Cycles: threading optimizations

* Multithreaded image loading, each thread can load a separate image.
* Better multithreading for multiple instanced meshes, different threads can now
  build BVH's for different meshes, rather than all cooperating on the same mesh.
  Especially noticeable for dynamic BVH building for the viewport, gave about
  2x faster build on 8 core in fairly complex scene with many objects.
* The main thread waiting for worker threads can now also work itself, so
  (num_cores + 1) threads will be working, this supposedly gives better
  performance on some operating systems, but did not measure performance for
  this very detailed yet.
This commit is contained in:
Brecht Van Lommel 2012-05-05 19:44:33 +00:00
parent c53fe94bb4
commit 8103381ded
9 changed files with 182 additions and 109 deletions

@ -36,12 +36,12 @@ CCL_NAMESPACE_BEGIN
class BVHBuildTask : public Task {
public:
BVHBuildTask(InnerNode *node_, int child_, BVHObjectBinning& range_, int level_)
: node(node_), child(child_), level(level_), range(range_) {}
BVHBuildTask(BVHBuild *build, InnerNode *node, int child, BVHObjectBinning& range_, int level)
: range(range_)
{
run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range, level);
}
InnerNode *node;
int child;
int level;
BVHObjectBinning range;
};
@ -55,8 +55,7 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
prim_object(prim_object_),
params(params_),
progress(progress_),
progress_start_time(0.0),
task_pool(function_bind(&BVHBuild::thread_build_node, this, _1, _2))
progress_start_time(0.0)
{
spatial_min_overlap = 0.0f;
}
@ -177,7 +176,7 @@ BVHNode* BVHBuild::run()
/* multithreaded binning build */
BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
rootnode = build_node(rootbin, 0);
task_pool.wait();
task_pool.wait_work();
}
/* delete if we cancelled */
@ -210,25 +209,24 @@ void BVHBuild::progress_update()
progress_start_time = time_dt();
}
void BVHBuild::thread_build_node(Task *task_, int thread_id)
void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
{
if(progress.get_cancel())
return;
/* build nodes */
BVHBuildTask *task = (BVHBuildTask*)task_;
BVHNode *node = build_node(task->range, task->level);
BVHNode *node = build_node(*range, level);
/* set child in inner node */
task->node->children[task->child] = node;
inner->children[child] = node;
/* update progress */
if(task->range.size() < THREAD_TASK_SIZE) {
if(range->size() < THREAD_TASK_SIZE) {
/*rotate(node, INT_MAX, 5);*/
thread_scoped_lock lock(build_mutex);
progress_count += task->range.size();
progress_count += range->size();
progress_update();
}
}
@ -262,8 +260,8 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
/* threaded build */
inner = new InnerNode(range.bounds());
task_pool.push(new BVHBuildTask(inner, 0, left, level + 1), true);
task_pool.push(new BVHBuildTask(inner, 1, right, level + 1), true);
task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
}
return inner;

@ -29,7 +29,9 @@
CCL_NAMESPACE_BEGIN
class BVHBuildTask;
class BVHParams;
class InnerNode;
class Mesh;
class Object;
class Progress;
@ -54,6 +56,7 @@ protected:
friend class BVHMixedSplit;
friend class BVHObjectSplit;
friend class BVHSpatialSplit;
friend class BVHBuildTask;
/* adding references */
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
@ -68,7 +71,7 @@ protected:
/* threads */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(Task *task_, int thread_id);
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
thread_mutex build_mutex;
/* progress */

@ -44,7 +44,6 @@ public:
KernelGlobals *kg;
CPUDevice(int threads_num)
: task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2))
{
kg = kernel_globals_create();
@ -113,10 +112,8 @@ public:
#endif
}
void thread_run(Task *task_, int thread_id)
void thread_run(DeviceTask *task)
{
DeviceTask *task = (DeviceTask*)task_;
if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task);
else if(task->type == DeviceTask::TONEMAP)
@ -125,6 +122,15 @@ public:
thread_shader(*task);
}
class CPUDeviceTask : public DeviceTask {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CPUDevice::thread_run, device, this);
}
};
void thread_path_trace(DeviceTask& task)
{
if(task_pool.cancelled())
@ -226,12 +232,12 @@ public:
task.split(tasks, TaskScheduler::num_threads()*10);
foreach(DeviceTask& task, tasks)
task_pool.push(new DeviceTask(task));
task_pool.push(new CPUDeviceTask(this, task));
}
void task_wait()
{
task_pool.wait();
task_pool.wait_work();
}
void task_cancel()

@ -324,8 +324,10 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
return true;
}
void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int slot)
void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progress)
{
if(progress->get_cancel())
return;
if(osl_texture_system)
return;
@ -342,6 +344,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
}
if(is_float) {
string filename = path_filename(float_images[slot]->filename);
progress->set_status("Updating Images", "Loading " + filename);
device_vector<float4>& tex_img = dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START];
if(tex_img.device_pointer)
@ -365,6 +370,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
device->tex_alloc(name.c_str(), tex_img, true, true);
}
else {
string filename = path_filename(images[slot]->filename);
progress->set_status("Updating Images", "Loading " + filename);
device_vector<uchar4>& tex_img = dscene->tex_image[slot];
if(tex_img.device_pointer)
@ -387,6 +395,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
device->tex_alloc(name.c_str(), tex_img, true, true);
}
img->need_load = false;
}
void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int slot)
@ -431,39 +441,37 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
{
if(!need_update)
return;
TaskPool pool;
for(size_t slot = 0; slot < images.size(); slot++) {
if(images[slot]) {
if(images[slot]->users == 0) {
device_free_image(device, dscene, slot);
}
else if(images[slot]->need_load) {
string name = path_filename(images[slot]->filename);
progress.set_status("Updating Images", "Loading " + name);
device_load_image(device, dscene, slot);
images[slot]->need_load = false;
}
if(!images[slot])
continue;
if(progress.get_cancel()) return;
if(images[slot]->users == 0) {
device_free_image(device, dscene, slot);
}
else if(images[slot]->need_load) {
if(!osl_texture_system)
pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress));
}
}
for(size_t slot = 0; slot < float_images.size(); slot++) {
if(float_images[slot]) {
if(float_images[slot]->users == 0) {
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
}
else if(float_images[slot]->need_load) {
string name = path_filename(float_images[slot]->filename);
progress.set_status("Updating Images", "Loading " + name);
device_load_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
float_images[slot]->need_load = false;
}
if(!float_images[slot])
continue;
if(progress.get_cancel()) return;
if(float_images[slot]->users == 0) {
device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
}
else if(float_images[slot]->need_load) {
if(!osl_texture_system)
pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + TEX_IMAGE_FLOAT_START, &progress));
}
}
pool.wait_work();
need_update = false;
}

@ -65,7 +65,7 @@ private:
bool file_load_image(Image *img, device_vector<uchar4>& tex_img);
bool file_load_float_image(Image *img, device_vector<float4>& tex_img);
void device_load_image(Device *device, DeviceScene *dscene, int slot);
void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess);
void device_free_image(Device *device, DeviceScene *dscene, int slot);
};

@ -242,31 +242,47 @@ void Mesh::pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset)
}
}
void Mesh::compute_bvh(SceneParams *params, Progress& progress)
void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total)
{
Object object;
object.mesh = this;
if(progress->get_cancel())
return;
vector<Object*> objects;
objects.push_back(&object);
compute_bounds();
if(bvh && !need_update_rebuild) {
progress.set_substatus("Refitting BVH");
bvh->objects = objects;
bvh->refit(progress);
if(!transform_applied) {
string msg = "Updating Mesh BVH ";
if(name == "")
msg += string_printf("%u/%u", (uint)(n+1), (uint)total);
else
msg += string_printf("%s %u/%u", name.c_str(), (uint)(n+1), (uint)total);
Object object;
object.mesh = this;
vector<Object*> objects;
objects.push_back(&object);
if(bvh && !need_update_rebuild) {
progress->set_status(msg, "Refitting BVH");
bvh->objects = objects;
bvh->refit(*progress);
}
else {
progress->set_status(msg, "Building BVH");
BVHParams bparams;
bparams.use_cache = params->use_bvh_cache;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_qbvh = params->use_qbvh;
delete bvh;
bvh = BVH::create(bparams, objects);
bvh->build(*progress);
}
}
else {
progress.set_substatus("Building BVH");
BVHParams bparams;
bparams.use_cache = params->use_bvh_cache;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_qbvh = params->use_qbvh;
delete bvh;
bvh = BVH::create(bparams, objects);
bvh->build(progress);
}
need_update = false;
need_update_rebuild = false;
}
void Mesh::tag_update(Scene *scene, bool rebuild)
@ -686,35 +702,22 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
}
/* update bvh */
size_t i = 0, num_instance_bvh = 0;
size_t i = 0, num_bvh = 0;
foreach(Mesh *mesh, scene->meshes)
if(mesh->need_update && !mesh->transform_applied)
num_instance_bvh++;
num_bvh++;
TaskPool pool;
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update) {
mesh->compute_bounds();
if(!mesh->transform_applied) {
string msg = "Updating Mesh BVH ";
if(mesh->name == "")
msg += string_printf("%u/%u", (uint)(i+1), (uint)num_instance_bvh);
else
msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)num_instance_bvh);
progress.set_status(msg, "Building BVH");
mesh->compute_bvh(&scene->params, progress);
i++;
}
if(progress.get_cancel()) return;
mesh->need_update = false;
mesh->need_update_rebuild = false;
pool.push(function_bind(&Mesh::compute_bvh, mesh, &scene->params, &progress, i, num_bvh));
i++;
}
}
pool.wait_work();
foreach(Shader *shader, scene->shaders)
shader->need_update_attributes = false;

@ -96,7 +96,7 @@ public:
void pack_normals(Scene *scene, float4 *normal, float4 *vnormal);
void pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset);
void compute_bvh(SceneParams *params, Progress& progress);
void compute_bvh(SceneParams *params, Progress *progress, int n, int total);
bool need_attribute(Scene *scene, AttributeStandard std);
bool need_attribute(Scene *scene, ustring name);

@ -25,14 +25,12 @@ CCL_NAMESPACE_BEGIN
/* Task Pool */
TaskPool::TaskPool(const TaskRunFunction& run_)
TaskPool::TaskPool()
{
num = 0;
num_done = 0;
do_cancel = false;
run = run_;
}
TaskPool::~TaskPool()
@ -50,12 +48,55 @@ void TaskPool::push(Task *task, bool front)
TaskScheduler::push(entry, front);
}
void TaskPool::wait()
void TaskPool::push(const TaskRunFunction& run, bool front)
{
thread_scoped_lock lock(done_mutex);
push(new Task(run), front);
}
while(num_done != num)
done_cond.wait(lock);
void TaskPool::wait_work()
{
thread_scoped_lock done_lock(done_mutex);
while(num_done != num) {
thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
/* find task from this pool. if we get a task from another pool,
* we can get into deadlock */
TaskScheduler::Entry work_entry;
bool found_entry = false;
list<TaskScheduler::Entry>::iterator it;
for(it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) {
TaskScheduler::Entry& entry = *it;
if(entry.pool == this) {
work_entry = entry;
found_entry = true;
TaskScheduler::queue.erase(it);
break;
}
}
queue_lock.unlock();
/* if found task, do it, otherwise wait until other tasks are done */
if(found_entry) {
done_lock.unlock();
/* run task */
work_entry.task->run();
/* delete task */
delete work_entry.task;
/* notify pool task was done */
done_increase(1);
done_lock.lock();
}
else
done_cond.wait(done_lock);
}
}
void TaskPool::cancel()
@ -63,7 +104,12 @@ void TaskPool::cancel()
TaskScheduler::clear(this);
do_cancel = true;
wait();
{
thread_scoped_lock lock(done_mutex);
while(num_done != num)
done_cond.wait(lock);
}
do_cancel = false;
}
@ -94,6 +140,7 @@ void TaskPool::done_increase(int done)
thread_mutex TaskScheduler::mutex;
int TaskScheduler::users = 0;
vector<thread*> TaskScheduler::threads;
vector<int> TaskScheduler::thread_level;
volatile bool TaskScheduler::do_exit = false;
list<TaskScheduler::Entry> TaskScheduler::queue;
@ -114,9 +161,12 @@ void TaskScheduler::init(int num_threads)
num_threads = system_cpu_thread_count();
threads.resize(num_threads);
thread_level.resize(num_threads);
for(size_t i = 0; i < threads.size(); i++)
for(size_t i = 0; i < threads.size(); i++) {
threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i));
thread_level[i] = 0;
}
}
users++;
@ -140,6 +190,7 @@ void TaskScheduler::exit()
}
threads.clear();
thread_level.clear();
}
}
@ -170,7 +221,7 @@ void TaskScheduler::thread_run(int thread_id)
/* keep popping off tasks */
while(thread_wait_pop(entry)) {
/* run task */
entry.pool->run(entry.task, thread_id);
entry.task->run();
/* delete task */
delete entry.task;
@ -196,20 +247,20 @@ void TaskScheduler::push(Entry& entry, bool front)
void TaskScheduler::clear(TaskPool *pool)
{
thread_scoped_lock lock(TaskScheduler::queue_mutex);
thread_scoped_lock lock(queue_mutex);
/* erase all tasks from this pool from the queue */
list<TaskScheduler::Entry>::iterator it = TaskScheduler::queue.begin();
list<Entry>::iterator it = queue.begin();
int done = 0;
while(it != TaskScheduler::queue.end()) {
TaskScheduler::Entry& entry = *it;
while(it != queue.end()) {
Entry& entry = *it;
if(entry.pool == pool) {
done++;
delete entry.task;
it = TaskScheduler::queue.erase(it);
it = queue.erase(it);
}
else
it++;

@ -29,7 +29,7 @@ class Task;
class TaskPool;
class TaskScheduler;
typedef boost::function<void(Task*,int)> TaskRunFunction;
typedef boost::function<void(void)> TaskRunFunction;
/* Task
*
@ -39,7 +39,11 @@ class Task
{
public:
Task() {};
Task(const TaskRunFunction& run_) : run(run_) {}
virtual ~Task() {}
TaskRunFunction run;
};
/* Task Pool
@ -54,12 +58,13 @@ public:
class TaskPool
{
public:
TaskPool(const TaskRunFunction& run);
TaskPool();
~TaskPool();
void push(Task *task, bool front = false);
void push(const TaskRunFunction& run, bool front = false);
void wait(); /* wait until all tasks are done */
void wait_work(); /* work and wait until all tasks are done */
void cancel(); /* cancel all tasks, keep worker threads running */
void stop(); /* stop all worker threads */
@ -70,8 +75,6 @@ protected:
void done_increase(int done);
TaskRunFunction run;
thread_mutex done_mutex;
thread_condition_variable done_cond;
@ -103,6 +106,7 @@ protected:
static thread_mutex mutex;
static int users;
static vector<thread*> threads;
static vector<int> thread_level;
static volatile bool do_exit;
static list<Entry> queue;