From 9baa7cd9fafa7a4705f19e441eeb51fc4cfc5ed7 Mon Sep 17 00:00:00 2001 From: Nickolas Davis Date: Thu, 9 Sep 2021 12:12:20 -0600 Subject: [PATCH] Implement tbb runtime device configuration and update vtkm to use it --- benchmarking/BenchmarkCopySpeeds.cxx | 42 ---------------- benchmarking/BenchmarkDeviceAdapter.cxx | 38 --------------- benchmarking/BenchmarkFilters.cxx | 44 ----------------- .../contour_tree_augmented/ContourTreeApp.cxx | 35 -------------- .../ContourTreeApp.cxx | 33 ------------- .../internal/RuntimeDeviceConfigurationTBB.h | 48 +++++++++++++++++-- 6 files changed, 43 insertions(+), 197 deletions(-) diff --git a/benchmarking/BenchmarkCopySpeeds.cxx b/benchmarking/BenchmarkCopySpeeds.cxx index a648d4e4e..c42d00764 100644 --- a/benchmarking/BenchmarkCopySpeeds.cxx +++ b/benchmarking/BenchmarkCopySpeeds.cxx @@ -21,13 +21,6 @@ #include -#ifdef VTKM_ENABLE_TBB -#include -#endif // TBB - -// For the TBB implementation, the number of threads can be customized using a -// "NumThreads [numThreads]" argument. - namespace { @@ -111,41 +104,6 @@ int main(int argc, char* argv[]) vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); } -// Handle NumThreads command-line arg: -// TODO: Use the VTK-m library to set the number of threads (when that becomes available). -#ifdef VTKM_ENABLE_TBB -#if TBB_VERSION_MAJOR >= 2020 - int numThreads = tbb::task_arena{}.max_concurrency(); -#else - int numThreads = tbb::task_scheduler_init::automatic; -#endif -#endif // TBB - - if (argc == 3) - { - if (std::string(argv[1]) == "NumThreads") - { -#ifdef VTKM_ENABLE_TBB - std::istringstream parse(argv[2]); - parse >> numThreads; - std::cout << "Selected " << numThreads << " TBB threads." << std::endl; -#else - std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl; -#endif // TBB - } - } - - // TODO: Use the VTK-m library to set the number of threads (when that becomes available). -#ifdef VTKM_ENABLE_TBB -#if TBB_VERSION_MAJOR >= 2020 - // Must not be destroyed as long as benchmarks are running: - tbb::global_control tbbControl(tbb::global_control::max_allowed_parallelism, numThreads); -#else - // Must not be destroyed as long as benchmarks are running: - tbb::task_scheduler_init init(numThreads); -#endif -#endif // TBB - // handle benchmarking related args and run benchmarks: VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkDeviceAdapter.cxx b/benchmarking/BenchmarkDeviceAdapter.cxx index 5cbf745dd..159e2c7ab 100644 --- a/benchmarking/BenchmarkDeviceAdapter.cxx +++ b/benchmarking/BenchmarkDeviceAdapter.cxx @@ -33,9 +33,6 @@ #include -#ifdef VTKM_ENABLE_TBB -#include -#endif #ifdef VTKM_ENABLE_OPENMP #include #endif @@ -1242,41 +1239,6 @@ int main(int argc, char* argv[]) vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); } -// Handle NumThreads command-line arg: -// TODO: Use the VTK-m library to set the number of threads (when that becomes available). -#ifdef VTKM_ENABLE_TBB -#if TBB_VERSION_MAJOR >= 2020 - int numThreads = tbb::task_arena{}.max_concurrency(); -#else - int numThreads = tbb::task_scheduler_init::automatic; -#endif -#endif // TBB - - if (argc == 3) - { - if (std::string(argv[1]) == "NumThreads") - { -#ifdef VTKM_ENABLE_TBB - std::istringstream parse(argv[2]); - parse >> numThreads; - std::cout << "Selected " << numThreads << " TBB threads." << std::endl; -#else - std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl; -#endif // TBB - } - } - - // TODO: Use the VTK-m library to set the number of threads (when that becomes available). -#ifdef VTKM_ENABLE_TBB -#if TBB_VERSION_MAJOR >= 2020 - // Must not be destroyed as long as benchmarks are running: - tbb::global_control tbbControl(tbb::global_control::max_allowed_parallelism, numThreads); -#else - // Must not be destroyed as long as benchmarks are running: - tbb::task_scheduler_init init(numThreads); -#endif -#endif // TBB - // handle benchmarking related args and run benchmarks: VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkFilters.cxx b/benchmarking/BenchmarkFilters.cxx index 0c319f510..236e2e596 100644 --- a/benchmarking/BenchmarkFilters.cxx +++ b/benchmarking/BenchmarkFilters.cxx @@ -80,9 +80,6 @@ // If the fields are not specified, the first field with the correct association // is used. If no such field exists, one will be generated from the data. -// For the TBB/OpenMP implementations, the number of threads can be customized -// using a "NumThreads [numThreads]" argument. - namespace { @@ -791,7 +788,6 @@ enum optionIndex { UNKNOWN, HELP, - NUM_THREADS, FILENAME, POINT_SCALARS, CELL_SCALARS, @@ -802,7 +798,6 @@ enum optionIndex void InitDataSet(int& argc, char** argv) { - int numThreads = 0; std::string filename; vtkm::Id waveletDim = 256; bool tetra = false; @@ -817,12 +812,6 @@ void InitDataSet(int& argc, char** argv) usage.push_back({ UNKNOWN, 0, "", "", Arg::None, "Input data options are:" }); usage.push_back({ HELP, 0, "h", "help", Arg::None, " -h, --help\tDisplay this help." }); usage.push_back({ UNKNOWN, 0, "", "", Arg::None, Config.Usage.c_str() }); - usage.push_back({ NUM_THREADS, - 0, - "", - "num-threads", - Arg::Number, - " --num-threads \tSpecify the number of threads to use." }); usage.push_back({ FILENAME, 0, "", @@ -880,22 +869,6 @@ void InitDataSet(int& argc, char** argv) exit(0); } - if (options[NUM_THREADS]) - { - std::istringstream parse(options[NUM_THREADS].arg); - parse >> numThreads; - if (Config.Device == vtkm::cont::DeviceAdapterTagTBB() || - Config.Device == vtkm::cont::DeviceAdapterTagOpenMP()) - { - std::cout << "Selected " << numThreads << " " << Config.Device.GetName() << " threads." - << std::endl; - } - else - { - std::cerr << options[NUM_THREADS].name << " not valid on this device. Ignoring." << std::endl; - } - } - if (options[FILENAME]) { filename = options[FILENAME].arg; @@ -922,23 +895,6 @@ void InitDataSet(int& argc, char** argv) tetra = (options[TETRA] != nullptr); - // TODO: Use the VTK-m library to set the number of threads (when that becomes available). -#ifdef VTKM_ENABLE_TBB -#if TBB_VERSION_MAJOR >= 2020 - if (numThreads < 1) - { - // Ask TBB how many threads are available. - numThreads = tbb::task_arena{}.max_concurrency(); - } - // Must not be destroyed as long as benchmarks are running: - tbb::global_control tbbControl(tbb::global_control::max_allowed_parallelism, numThreads); -#else // TBB_VERSION_MAJOR < 2020 - // Must not be destroyed as long as benchmarks are running: - tbb::task_scheduler_init init((numThreads > 0) ? numThreads - : tbb::task_scheduler_init::automatic); -#endif -#endif - // Now go back through the arg list and remove anything that is not in the list of // unknown options or non-option arguments. int destArg = 1; diff --git a/examples/contour_tree_augmented/ContourTreeApp.cxx b/examples/contour_tree_augmented/ContourTreeApp.cxx index fcd71961c..28882d322 100644 --- a/examples/contour_tree_augmented/ContourTreeApp.cxx +++ b/examples/contour_tree_augmented/ContourTreeApp.cxx @@ -77,10 +77,6 @@ #include #include -#ifdef ENABLE_SET_NUM_THREADS -#include "tbb/task_scheduler_init.h" -#endif - // clang-format off VTKM_THIRDPARTY_PRE_INCLUDE #include @@ -227,29 +223,6 @@ int main(int argc, char* argv[]) computeBranchDecomposition = false; } - -#ifdef ENABLE_SET_NUM_THREADS - int numThreads = tbb::task_scheduler_init::default_num_threads(); - if (parser.hasOption("--numThreads")) - { - bool deviceIsTBB = (device.GetName() == "TBB"); - // Set the number of threads to be used for TBB - if (deviceIsTBB) - { - numThreads = std::stoi(parser.getOption("--numThreads")); - tbb::task_scheduler_init schedulerInit(numThreads); - } - // Print warning about mismatch between the --numThreads and -d/--device option - else - { - VTKM_LOG_S(vtkm::cont::LogLevel::Warn, - "WARNING: Mismatch between --numThreads and -d/--device option." - "numThreads option requires the use of TBB as device. " - "Ignoring the numThread option."); - } - } -#endif - // Iso value selection parameters // Approach to be used to select contours based on the tree vtkm::Id contourType = 0; @@ -316,10 +289,6 @@ int main(int argc, char* argv[]) "Requires --augmentTree (Default=True)" << std::endl; std::cout << "--printCT Print the contour tree. (Default=False)" << std::endl; -#ifdef ENABLE_SET_NUM_THREADS - std::cout << "--numThreads Specifiy the number of threads to use. Available only with TBB." - << std::endl; -#endif std::cout << std::endl; std::cout << "---------------------- Isovalue Selection Options ----------------------" << std::endl; @@ -360,10 +329,6 @@ int main(int argc, char* argv[]) #ifdef WITH_MPI " nblocks=" << numBlocks << std::endl << -#endif -#ifdef ENABLE_SET_NUM_THREADS - " numThreads=" << numThreads << std::endl - << #endif " computeIsovalues=" << (numLevels > 0); VTKM_LOG_S(vtkm::cont::LogLevel::Info, std::endl << logmessage.str()); diff --git a/examples/contour_tree_distributed/ContourTreeApp.cxx b/examples/contour_tree_distributed/ContourTreeApp.cxx index 59b7088db..741aac475 100644 --- a/examples/contour_tree_distributed/ContourTreeApp.cxx +++ b/examples/contour_tree_distributed/ContourTreeApp.cxx @@ -77,10 +77,6 @@ #include #include -#ifdef ENABLE_SET_NUM_THREADS -#include "tbb/task_scheduler_init.h" -#endif - // clang-format off VTKM_THIRDPARTY_PRE_INCLUDE #include @@ -237,28 +233,6 @@ int main(int argc, char* argv[]) forwardSummary = true; } -#ifdef ENABLE_SET_NUM_THREADS - int numThreads = tbb::task_scheduler_init::default_num_threads(); - if (parser.hasOption("--numThreads")) - { - bool deviceIsTBB = (device.GetName() == "TBB"); - // Set the number of threads to be used for TBB - if (deviceIsTBB) - { - numThreads = std::stoi(parser.getOption("--numThreads")); - tbb::task_scheduler_init schedulerInit(numThreads); - } - // Print warning about mismatch between the --numThreads and -d/--device option - else - { - VTKM_LOG_S(vtkm::cont::LogLevel::Warn, - "WARNING: Mismatch between --numThreads and -d/--device option." - "numThreads option requires the use of TBB as device. " - "Ignoring the numThread option."); - } - } -#endif - int numBlocks = size; int blocksPerRank = 1; if (parser.hasOption("--numBlocks")) @@ -305,10 +279,6 @@ int main(int argc, char* argv[]) << "computation (Default=False). " << std::endl; std::cout << "--saveTreeCompilerData Save data files needed for the tree compiler" << std::endl; -#ifdef ENABLE_SET_NUM_THREADS - std::cout << "--numThreads Specifiy the number of threads to use. " - << "Available only with TBB." << std::endl; -#endif std::cout << "--numBlocks Number of blocks to use during computation " << "(Default=number of MPI ranks.)" << std::endl; std::cout << "--forwardSummary Forward the summary timings also to the per-rank " @@ -332,9 +302,6 @@ int main(int argc, char* argv[]) << " saveDot=" << saveDotFiles << std::endl << " saveTreeCompilerData=" << saveTreeCompilerData << std::endl << " forwardSummary=" << forwardSummary << std::endl -#ifdef ENABLE_SET_NUM_THREADS - << " numThreads=" << numThreads << std::endl -#endif << " nblocks=" << numBlocks << std::endl); } diff --git a/vtkm/cont/tbb/internal/RuntimeDeviceConfigurationTBB.h b/vtkm/cont/tbb/internal/RuntimeDeviceConfigurationTBB.h index 10df385fd..8c567eddb 100644 --- a/vtkm/cont/tbb/internal/RuntimeDeviceConfigurationTBB.h +++ b/vtkm/cont/tbb/internal/RuntimeDeviceConfigurationTBB.h @@ -13,6 +13,12 @@ #include #include +VTKM_THIRDPARTY_PRE_INCLUDE +#include +VTKM_THIRDPARTY_POST_INCLUDE + +#include + namespace vtkm { namespace cont @@ -24,27 +30,59 @@ template <> class RuntimeDeviceConfiguration : public vtkm::cont::internal::RuntimeDeviceConfigurationBase { +public: + VTKM_CONT + RuntimeDeviceConfiguration() + : +#if TBB_VERSION_MAJOR >= 2020 + HardwareMaxThreads(::tbb::task_arena{}.max_concurrency()) +#else + HardwareMaxThreads(::tbb::task_scheduler_init::default_num_threads()) +#endif + { + } + VTKM_CONT vtkm::cont::DeviceAdapterId GetDevice() const override final { return vtkm::cont::DeviceAdapterTagTBB{}; } - VTKM_CONT virtual RuntimeDeviceConfigReturnCode SetThreads(const vtkm::Id&) override final + VTKM_CONT virtual RuntimeDeviceConfigReturnCode SetThreads(const vtkm::Id& value) override final { - // TODO: vtk-m set the number of global threads +#if TBB_VERSION_MAJOR >= 2020 + GlobalControl.reset(new ::tbb::global_control(::tbb::global_control::max_allowed_parallelism, + value > 0 ? value : this->HardwareMaxThreads)); +#else + TaskSchedulerInit.reset(new ::tbb::task_scheduler_init( + value > 0 ? static_cast(value) : static_cast(this->HardwareMaxThreads))); +#endif return RuntimeDeviceConfigReturnCode::SUCCESS; } - VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetThreads(vtkm::Id&) const override final + VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetThreads(vtkm::Id& value) const override final { - // TODO: Get number of TBB threads here (essentially just threads supported by architecture) +#if TBB_VERSION_MAJOR >= 2020 + value = ::tbb::global_control::active_value(::tbb::global_control::max_allowed_parallelism); +#else + value = ::tbb::task_scheduler_init::default_num_threads(); +#endif return RuntimeDeviceConfigReturnCode::SUCCESS; } - VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetMaxThreads(vtkm::Id&) const override final + VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetMaxThreads( + vtkm::Id& value) const override final { + value = this->HardwareMaxThreads; return RuntimeDeviceConfigReturnCode::SUCCESS; } + +private: +#if TBB_VERSION_MAJOR >= 2020 + std::unique_ptr<::tbb::global_control> GlobalControl; +#else + std::unique_ptr<::tbb::task_scheduler_init> TaskSchedulerInit; +#endif + vtkm::Id HardwareMaxThreads; }; } // namespace vktm::cont::internal } // namespace vtkm::cont