forked from bartvdbraak/blender
Cycles: Add support of processor groups
Currently for windows only, this is an initial commit towards native support of NUMA. Current commit makes it so Cycles will use all logical processors on Windows running on system with more than 64 threads. Reviewers: juicyfruit, dingto, lukasstockner97, maiself, brecht Subscribers: LazyDodo Differential Revision: https://developer.blender.org/D2049
This commit is contained in:
parent
9d090ed1cd
commit
b62faa54de
@ -19,8 +19,10 @@ set(SRC
|
||||
util_simd.cpp
|
||||
util_system.cpp
|
||||
util_task.cpp
|
||||
util_thread.cpp
|
||||
util_time.cpp
|
||||
util_transform.cpp
|
||||
util_windows.cpp
|
||||
)
|
||||
|
||||
if(NOT CYCLES_STANDALONE_REPOSITORY)
|
||||
|
@ -15,7 +15,9 @@
|
||||
*/
|
||||
|
||||
#include "util_system.h"
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_types.h"
|
||||
#include "util_string.h"
|
||||
|
||||
@ -33,28 +35,57 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
int system_cpu_group_count()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
util_windows_init_numa_groups();
|
||||
return GetActiveProcessorGroupCount();
|
||||
#else
|
||||
/* TODO(sergey): Need to adopt for other platforms. */
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int system_cpu_group_thread_count(int group)
|
||||
{
|
||||
/* TODO(sergey): Need make other platforms aware of groups. */
|
||||
#ifdef _WIN32
|
||||
util_windows_init_numa_groups();
|
||||
return GetActiveProcessorCount(group);
|
||||
#elif defined(__APPLE__)
|
||||
(void)group;
|
||||
size_t len = sizeof(count);
|
||||
int mib[2] = { CTL_HW, HW_NCPU };
|
||||
|
||||
int count;
|
||||
sysctl(mib, 2, &count, &len, NULL, 0);
|
||||
return count;
|
||||
#else
|
||||
(void)group;
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#endif
|
||||
}
|
||||
|
||||
int system_cpu_thread_count()
|
||||
{
|
||||
static uint count = 0;
|
||||
|
||||
if(count > 0)
|
||||
if(count > 0) {
|
||||
return count;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
SYSTEM_INFO info;
|
||||
GetSystemInfo(&info);
|
||||
count = (uint)info.dwNumberOfProcessors;
|
||||
#elif defined(__APPLE__)
|
||||
size_t len = sizeof(count);
|
||||
int mib[2] = { CTL_HW, HW_NCPU };
|
||||
int max_group = system_cpu_group_count();
|
||||
VLOG(1) << "Detected " << max_group << " CPU groups.";
|
||||
for(int group = 0; group < max_group; ++group) {
|
||||
int num_threads = system_cpu_group_thread_count(group);
|
||||
VLOG(1) << "Group " << group
|
||||
<< " has " << num_threads << " threads.";
|
||||
count += num_threads;
|
||||
}
|
||||
|
||||
sysctl(mib, 2, &count, &len, NULL, 0);
|
||||
#else
|
||||
count = (uint)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#endif
|
||||
|
||||
if(count < 1)
|
||||
if(count < 1) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -21,7 +21,15 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Get number of available CPU groups. */
|
||||
int system_cpu_group_count();
|
||||
|
||||
/* Get number of threads/processors in the specified group. */
|
||||
int system_cpu_group_thread_count(int group);
|
||||
|
||||
/* Get total number of threads in all groups. */
|
||||
int system_cpu_thread_count();
|
||||
|
||||
string system_cpu_brand_string();
|
||||
int system_cpu_bits();
|
||||
bool system_cpu_support_sse2();
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_system.h"
|
||||
#include "util_task.h"
|
||||
#include "util_time.h"
|
||||
@ -198,12 +199,30 @@ void TaskScheduler::init(int num_threads)
|
||||
/* automatic number of threads */
|
||||
num_threads = system_cpu_thread_count();
|
||||
}
|
||||
VLOG(1) << "Creating pool of " << num_threads << " threads.";
|
||||
|
||||
/* launch threads that will be waiting for work */
|
||||
threads.resize(num_threads);
|
||||
|
||||
for(size_t i = 0; i < threads.size(); i++)
|
||||
threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i + 1));
|
||||
int num_groups = system_cpu_group_count();
|
||||
int thread_index = 0;
|
||||
for(int group = 0; group < num_groups; ++group) {
|
||||
/* NOTE: That's not really efficient from threading point of view,
|
||||
* but it is simple to read and it doesn't make sense to use more
|
||||
* user-specified threads than logical threads anyway.
|
||||
*/
|
||||
int num_group_threads = (group == num_groups - 1)
|
||||
? (threads.size() - thread_index)
|
||||
: system_cpu_group_thread_count(group);
|
||||
for(int group_thread = 0;
|
||||
group_thread < num_group_threads && thread_index < threads.size();
|
||||
++group_thread, ++thread_index)
|
||||
{
|
||||
threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run,
|
||||
thread_index + 1),
|
||||
group);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
users++;
|
||||
|
66
intern/cycles/util/util_thread.cpp
Normal file
66
intern/cycles/util/util_thread.cpp
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "util_thread.h"
|
||||
|
||||
#include "util_system.h"
|
||||
#include "util_windows.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
thread::thread(function<void(void)> run_cb, int group)
|
||||
: run_cb_(run_cb),
|
||||
joined_(false),
|
||||
group_(group)
|
||||
{
|
||||
pthread_create(&pthread_id_, NULL, run, (void*)this);
|
||||
}
|
||||
|
||||
thread::~thread()
|
||||
{
|
||||
if(!joined_) {
|
||||
join();
|
||||
}
|
||||
}
|
||||
|
||||
void *thread::run(void *arg)
|
||||
{
|
||||
thread *self = (thread*)(arg);
|
||||
if(self->group_ != -1) {
|
||||
#ifdef _WIN32
|
||||
HANDLE thread_handle = GetCurrentThread();
|
||||
GROUP_AFFINITY group_affinity = { 0 };
|
||||
int num_threads = system_cpu_group_thread_count(self->group_);
|
||||
group_affinity.Group = self->group_;
|
||||
group_affinity.Mask = (num_threads == 64)
|
||||
? -1
|
||||
: (1ull << num_threads) - 1;
|
||||
if(SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
|
||||
fprintf(stderr, "Error setting thread affinity.\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
self->run_cb_();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool thread::join()
|
||||
{
|
||||
joined_ = true;
|
||||
return pthread_join(pthread_id_, NULL) == 0;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@ -52,37 +52,17 @@ typedef boost::condition_variable thread_condition_variable;
|
||||
|
||||
class thread {
|
||||
public:
|
||||
thread(function<void(void)> run_cb_)
|
||||
thread(function<void(void)> run_cb, int group = -1);
|
||||
~thread();
|
||||
|
||||
{
|
||||
joined = false;
|
||||
run_cb = run_cb_;
|
||||
|
||||
pthread_create(&pthread_id, NULL, run, (void*)this);
|
||||
}
|
||||
|
||||
~thread()
|
||||
{
|
||||
if(!joined)
|
||||
join();
|
||||
}
|
||||
|
||||
static void *run(void *arg)
|
||||
{
|
||||
((thread*)arg)->run_cb();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool join()
|
||||
{
|
||||
joined = true;
|
||||
return pthread_join(pthread_id, NULL) == 0;
|
||||
}
|
||||
static void *run(void *arg);
|
||||
bool join();
|
||||
|
||||
protected:
|
||||
function<void(void)> run_cb;
|
||||
pthread_t pthread_id;
|
||||
bool joined;
|
||||
function<void(void)> run_cb_;
|
||||
pthread_t pthread_id_;
|
||||
bool joined_;
|
||||
int group_;
|
||||
};
|
||||
|
||||
/* Own wrapper around pthread's spin lock to make it's use easier. */
|
||||
|
82
intern/cycles/util/util_windows.cpp
Normal file
82
intern/cycles/util/util_windows.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "util_windows.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#include <VersionHelpers.h>
|
||||
|
||||
#if _WIN32_WINNT < 0x0601
|
||||
tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount;
|
||||
tGetActiveProcessorCount *GetActiveProcessorCount;
|
||||
tSetThreadGroupAffinity *SetThreadGroupAffinity;
|
||||
#endif
|
||||
|
||||
static WORD GetActiveProcessorGroupCount_stub()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static DWORD GetActiveProcessorCount_stub(WORD /*GroupNumber*/)
|
||||
{
|
||||
SYSTEM_INFO info;
|
||||
GetSystemInfo(&info);
|
||||
return info.dwNumberOfProcessors;
|
||||
}
|
||||
|
||||
static BOOL SetThreadGroupAffinity_stub(
|
||||
HANDLE /*hThread*/,
|
||||
const GROUP_AFFINITY * /*GroupAffinity*/,
|
||||
PGROUP_AFFINITY /*PreviousGroupAffinity*/)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static bool supports_numa()
|
||||
{
|
||||
return IsWindows7OrGreater();
|
||||
}
|
||||
|
||||
void util_windows_init_numa_groups()
|
||||
{
|
||||
static bool initialized = false;
|
||||
if(initialized) {
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
#if _WIN32_WINNT < 0x0601
|
||||
if(!supports_numa()) {
|
||||
/* Use stubs on platforms which doesn't have rean NUMA/Groups. */
|
||||
GetActiveProcessorGroupCount = GetActiveProcessorGroupCount_stub;
|
||||
GetActiveProcessorCount = GetActiveProcessorCount_stub;
|
||||
SetThreadGroupAffinity = SetThreadGroupAffinity_stub;
|
||||
return;
|
||||
}
|
||||
HMODULE kernel = GetModuleHandleA("kernel32.dll");
|
||||
# define READ_SYMBOL(sym) sym = (t##sym*)GetProcAddress(kernel, #sym)
|
||||
READ_SYMBOL(GetActiveProcessorGroupCount);
|
||||
READ_SYMBOL(GetActiveProcessorCount);
|
||||
READ_SYMBOL(SetThreadGroupAffinity);
|
||||
# undef READ_SUMBOL
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* _WIN32 */
|
@ -31,6 +31,25 @@
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if _WIN32_WINNT < 0x0601
|
||||
typedef WORD tGetActiveProcessorGroupCount();
|
||||
typedef DWORD tGetActiveProcessorCount(WORD GroupNumber);
|
||||
typedef BOOL tSetThreadGroupAffinity(HANDLE hThread,
|
||||
const GROUP_AFFINITY *GroupAffinity,
|
||||
PGROUP_AFFINITY PreviousGroupAffinity);
|
||||
|
||||
extern tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount;
|
||||
extern tGetActiveProcessorCount *GetActiveProcessorCount;
|
||||
extern tSetThreadGroupAffinity *SetThreadGroupAffinity;
|
||||
#endif
|
||||
|
||||
/* Make sure NUMA and processor groups API is initialized. */
|
||||
void util_windows_init_numa_groups();
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WIN32 */
|
||||
|
||||
#endif /* __UTIL_WINDOWS_H__ */
|
||||
|
Loading…
Reference in New Issue
Block a user