diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index cceec8d444c..e6140b3ed09 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -19,8 +19,10 @@ set(SRC util_simd.cpp util_system.cpp util_task.cpp + util_thread.cpp util_time.cpp util_transform.cpp + util_windows.cpp ) if(NOT CYCLES_STANDALONE_REPOSITORY) diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 4ff0ee91d73..16f713e1ea6 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -15,7 +15,9 @@ */ #include "util_system.h" + #include "util_debug.h" +#include "util_logging.h" #include "util_types.h" #include "util_string.h" @@ -33,28 +35,57 @@ CCL_NAMESPACE_BEGIN +int system_cpu_group_count() +{ +#ifdef _WIN32 + util_windows_init_numa_groups(); + return GetActiveProcessorGroupCount(); +#else + /* TODO(sergey): Need to adopt for other platforms. */ + return 1; +#endif +} + +int system_cpu_group_thread_count(int group) +{ + /* TODO(sergey): Need make other platforms aware of groups. */ +#ifdef _WIN32 + util_windows_init_numa_groups(); + return GetActiveProcessorCount(group); +#elif defined(__APPLE__) + (void)group; + size_t len = sizeof(count); + int mib[2] = { CTL_HW, HW_NCPU }; + + int count; + sysctl(mib, 2, &count, &len, NULL, 0); + return count; +#else + (void)group; + return sysconf(_SC_NPROCESSORS_ONLN); +#endif +} + int system_cpu_thread_count() { static uint count = 0; - if(count > 0) + if(count > 0) { return count; + } -#ifdef _WIN32 - SYSTEM_INFO info; - GetSystemInfo(&info); - count = (uint)info.dwNumberOfProcessors; -#elif defined(__APPLE__) - size_t len = sizeof(count); - int mib[2] = { CTL_HW, HW_NCPU }; - - sysctl(mib, 2, &count, &len, NULL, 0); -#else - count = (uint)sysconf(_SC_NPROCESSORS_ONLN); -#endif + int max_group = system_cpu_group_count(); + VLOG(1) << "Detected " << max_group << " CPU groups."; + for(int group = 0; group < max_group; ++group) { + int num_threads = system_cpu_group_thread_count(group); + VLOG(1) << "Group " << group + << " has " << num_threads << " threads."; + count += num_threads; + } - if(count < 1) + if(count < 1) { count = 1; + } return count; } diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index 4e7e00f85fd..557aab6cbae 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -21,7 +21,15 @@ CCL_NAMESPACE_BEGIN +/* Get number of available CPU groups. */ +int system_cpu_group_count(); + +/* Get number of threads/processors in the specified group. */ +int system_cpu_group_thread_count(int group); + +/* Get total number of threads in all groups. */ int system_cpu_thread_count(); + string system_cpu_brand_string(); int system_cpu_bits(); bool system_cpu_support_sse2(); diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index d86aa8a4a46..352ba81c95a 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -16,6 +16,7 @@ #include "util_debug.h" #include "util_foreach.h" +#include "util_logging.h" #include "util_system.h" #include "util_task.h" #include "util_time.h" @@ -198,12 +199,30 @@ void TaskScheduler::init(int num_threads) /* automatic number of threads */ num_threads = system_cpu_thread_count(); } + VLOG(1) << "Creating pool of " << num_threads << " threads."; /* launch threads that will be waiting for work */ threads.resize(num_threads); - for(size_t i = 0; i < threads.size(); i++) - threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i + 1)); + int num_groups = system_cpu_group_count(); + int thread_index = 0; + for(int group = 0; group < num_groups; ++group) { + /* NOTE: That's not really efficient from threading point of view, + * but it is simple to read and it doesn't make sense to use more + * user-specified threads than logical threads anyway. + */ + int num_group_threads = (group == num_groups - 1) + ? (threads.size() - thread_index) + : system_cpu_group_thread_count(group); + for(int group_thread = 0; + group_thread < num_group_threads && thread_index < threads.size(); + ++group_thread, ++thread_index) + { + threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, + thread_index + 1), + group); + } + } } users++; diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp new file mode 100644 index 00000000000..3db8b4bd197 --- /dev/null +++ b/intern/cycles/util/util_thread.cpp @@ -0,0 +1,66 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util_thread.h" + +#include "util_system.h" +#include "util_windows.h" + +CCL_NAMESPACE_BEGIN + +thread::thread(function run_cb, int group) + : run_cb_(run_cb), + joined_(false), + group_(group) +{ + pthread_create(&pthread_id_, NULL, run, (void*)this); +} + +thread::~thread() +{ + if(!joined_) { + join(); + } +} + +void *thread::run(void *arg) +{ + thread *self = (thread*)(arg); + if(self->group_ != -1) { +#ifdef _WIN32 + HANDLE thread_handle = GetCurrentThread(); + GROUP_AFFINITY group_affinity = { 0 }; + int num_threads = system_cpu_group_thread_count(self->group_); + group_affinity.Group = self->group_; + group_affinity.Mask = (num_threads == 64) + ? -1 + : (1ull << num_threads) - 1; + if(SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) { + fprintf(stderr, "Error setting thread affinity.\n"); + } +#endif + } + self->run_cb_(); + return NULL; +} + +bool thread::join() +{ + joined_ = true; + return pthread_join(pthread_id_, NULL) == 0; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index 59575f31c13..427c633d2ce 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -52,37 +52,17 @@ typedef boost::condition_variable thread_condition_variable; class thread { public: - thread(function run_cb_) + thread(function run_cb, int group = -1); + ~thread(); - { - joined = false; - run_cb = run_cb_; - - pthread_create(&pthread_id, NULL, run, (void*)this); - } - - ~thread() - { - if(!joined) - join(); - } - - static void *run(void *arg) - { - ((thread*)arg)->run_cb(); - return NULL; - } - - bool join() - { - joined = true; - return pthread_join(pthread_id, NULL) == 0; - } + static void *run(void *arg); + bool join(); protected: - function run_cb; - pthread_t pthread_id; - bool joined; + function run_cb_; + pthread_t pthread_id_; + bool joined_; + int group_; }; /* Own wrapper around pthread's spin lock to make it's use easier. */ diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp new file mode 100644 index 00000000000..6fd259713d9 --- /dev/null +++ b/intern/cycles/util/util_windows.cpp @@ -0,0 +1,82 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util_windows.h" + +#ifdef _WIN32 + +CCL_NAMESPACE_BEGIN + +#include + +#if _WIN32_WINNT < 0x0601 +tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount; +tGetActiveProcessorCount *GetActiveProcessorCount; +tSetThreadGroupAffinity *SetThreadGroupAffinity; +#endif + +static WORD GetActiveProcessorGroupCount_stub() +{ + return 1; +} + +static DWORD GetActiveProcessorCount_stub(WORD /*GroupNumber*/) +{ + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +} + +static BOOL SetThreadGroupAffinity_stub( + HANDLE /*hThread*/, + const GROUP_AFFINITY * /*GroupAffinity*/, + PGROUP_AFFINITY /*PreviousGroupAffinity*/) +{ + return TRUE; +} + +static bool supports_numa() +{ + return IsWindows7OrGreater(); +} + +void util_windows_init_numa_groups() +{ + static bool initialized = false; + if(initialized) { + return; + } + initialized = true; +#if _WIN32_WINNT < 0x0601 + if(!supports_numa()) { + /* Use stubs on platforms which doesn't have rean NUMA/Groups. */ + GetActiveProcessorGroupCount = GetActiveProcessorGroupCount_stub; + GetActiveProcessorCount = GetActiveProcessorCount_stub; + SetThreadGroupAffinity = SetThreadGroupAffinity_stub; + return; + } + HMODULE kernel = GetModuleHandleA("kernel32.dll"); +# define READ_SYMBOL(sym) sym = (t##sym*)GetProcAddress(kernel, #sym) + READ_SYMBOL(GetActiveProcessorGroupCount); + READ_SYMBOL(GetActiveProcessorCount); + READ_SYMBOL(SetThreadGroupAffinity); +# undef READ_SUMBOL +#endif +} + +CCL_NAMESPACE_END + +#endif /* _WIN32 */ diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h index f67e34d0f31..ac61d5348c3 100644 --- a/intern/cycles/util/util_windows.h +++ b/intern/cycles/util/util_windows.h @@ -31,6 +31,25 @@ #include +CCL_NAMESPACE_BEGIN + +#if _WIN32_WINNT < 0x0601 +typedef WORD tGetActiveProcessorGroupCount(); +typedef DWORD tGetActiveProcessorCount(WORD GroupNumber); +typedef BOOL tSetThreadGroupAffinity(HANDLE hThread, + const GROUP_AFFINITY *GroupAffinity, + PGROUP_AFFINITY PreviousGroupAffinity); + +extern tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount; +extern tGetActiveProcessorCount *GetActiveProcessorCount; +extern tSetThreadGroupAffinity *SetThreadGroupAffinity; +#endif + +/* Make sure NUMA and processor groups API is initialized. */ +void util_windows_init_numa_groups(); + +CCL_NAMESPACE_END + #endif /* WIN32 */ #endif /* __UTIL_WINDOWS_H__ */