Cycles: make TBB a required library dependency, and use in a few places
Now that the rest of Blender also relies on TBB, no point in maintaining custom code for paraller_for and thread local storage.
This commit is contained in:
parent
ace3268482
commit
d8c2092b15
@ -654,6 +654,7 @@ if(WITH_BOOST AND NOT (WITH_CYCLES OR WITH_OPENIMAGEIO OR WITH_INTERNATIONAL OR
|
||||
set(WITH_BOOST OFF)
|
||||
endif()
|
||||
|
||||
set_and_warn_dependency(WITH_TBB WITH_CYCLES OFF)
|
||||
set_and_warn_dependency(WITH_TBB WITH_USD OFF)
|
||||
set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE OFF)
|
||||
set_and_warn_dependency(WITH_TBB WITH_OPENVDB OFF)
|
||||
|
@ -286,6 +286,7 @@ include_directories(
|
||||
${OPENEXR_INCLUDE_DIR}
|
||||
${OPENEXR_INCLUDE_DIRS}
|
||||
${PUGIXML_INCLUDE_DIR}
|
||||
${TBB_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
|
@ -423,22 +423,6 @@ BVHNode *BVHBuild::run()
|
||||
}
|
||||
|
||||
spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
|
||||
if (params.use_spatial_split) {
|
||||
/* NOTE: The API here tries to be as much ready for multi-threaded build
|
||||
* as possible, but at the same time it tries not to introduce any
|
||||
* changes in behavior for until all refactoring needed for threading is
|
||||
* finished.
|
||||
*
|
||||
* So we currently allocate single storage for now, which is only used by
|
||||
* the only thread working on the spatial BVH build.
|
||||
*/
|
||||
spatial_storage.resize(TaskScheduler::num_threads() + 1);
|
||||
size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
|
||||
foreach (BVHSpatialStorage &storage, spatial_storage) {
|
||||
storage.right_bounds.clear();
|
||||
}
|
||||
spatial_storage[0].right_bounds.resize(num_bins);
|
||||
}
|
||||
spatial_free_index = 0;
|
||||
|
||||
need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
|
||||
@ -475,6 +459,9 @@ BVHNode *BVHBuild::run()
|
||||
task_pool.wait_work();
|
||||
}
|
||||
|
||||
/* clean up temporary memory usage by threads */
|
||||
spatial_storage.clear();
|
||||
|
||||
/* delete if we canceled */
|
||||
if (rootnode) {
|
||||
if (progress.get_cancel()) {
|
||||
@ -551,19 +538,18 @@ void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
|
||||
int child,
|
||||
BVHRange *range,
|
||||
vector<BVHReference> *references,
|
||||
int level,
|
||||
int thread_id)
|
||||
void BVHBuild::thread_build_spatial_split_node(
|
||||
InnerNode *inner, int child, BVHRange *range, vector<BVHReference> *references, int level)
|
||||
{
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get per-thread memory for spatial split. */
|
||||
BVHSpatialStorage *local_storage = &spatial_storage.local();
|
||||
|
||||
/* build nodes */
|
||||
BVHNode *node = build_node(*range, references, level, thread_id);
|
||||
BVHNode *node = build_node(*range, references, level, local_storage);
|
||||
|
||||
/* set child in inner node */
|
||||
inner->children[child] = node;
|
||||
@ -690,7 +676,7 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
|
||||
BVHNode *BVHBuild::build_node(const BVHRange &range,
|
||||
vector<BVHReference> *references,
|
||||
int level,
|
||||
int thread_id)
|
||||
BVHSpatialStorage *storage)
|
||||
{
|
||||
/* Update progress.
|
||||
*
|
||||
@ -712,7 +698,6 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
|
||||
}
|
||||
|
||||
/* Perform splitting test. */
|
||||
BVHSpatialStorage *storage = &spatial_storage[thread_id];
|
||||
BVHMixedSplit split(this, storage, range, references, level);
|
||||
|
||||
if (!(range.size() > 0 && params.top_level && level == 0)) {
|
||||
|
@ -76,7 +76,7 @@ class BVHBuild {
|
||||
BVHNode *build_node(const BVHRange &range,
|
||||
vector<BVHReference> *references,
|
||||
int level,
|
||||
int thread_id);
|
||||
BVHSpatialStorage *storage);
|
||||
BVHNode *build_node(const BVHObjectBinning &range, int level);
|
||||
BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
|
||||
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
|
||||
@ -87,12 +87,8 @@ class BVHBuild {
|
||||
/* Threads. */
|
||||
enum { THREAD_TASK_SIZE = 4096 };
|
||||
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
|
||||
void thread_build_spatial_split_node(InnerNode *node,
|
||||
int child,
|
||||
BVHRange *range,
|
||||
vector<BVHReference> *references,
|
||||
int level,
|
||||
int thread_id);
|
||||
void thread_build_spatial_split_node(
|
||||
InnerNode *node, int child, BVHRange *range, vector<BVHReference> *references, int level);
|
||||
thread_mutex build_mutex;
|
||||
|
||||
/* Progress. */
|
||||
@ -127,7 +123,7 @@ class BVHBuild {
|
||||
|
||||
/* Spatial splitting. */
|
||||
float spatial_min_overlap;
|
||||
vector<BVHSpatialStorage> spatial_storage;
|
||||
enumerable_thread_specific<BVHSpatialStorage> spatial_storage;
|
||||
size_t spatial_free_index;
|
||||
thread_spin_lock spatial_spin_lock;
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_path.h"
|
||||
#include "util/util_sky_model.h"
|
||||
#include "util/util_task.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@ -58,26 +59,21 @@ bool SkyLoader::load_pixels(const ImageMetaData &metadata,
|
||||
float altitude_f = (float)altitude;
|
||||
|
||||
/* precompute sky texture */
|
||||
const int num_chunks = TaskScheduler::num_threads();
|
||||
const int chunk_size = height / num_chunks;
|
||||
TaskPool pool;
|
||||
for (int chunk = 0; chunk < num_chunks; chunk++) {
|
||||
const int chunk_start = chunk * chunk_size;
|
||||
const int chunk_end = (chunk + 1 < num_chunks) ? (chunk + 1) * chunk_size : height;
|
||||
pool.push(function_bind(&nishita_skymodel_precompute_texture,
|
||||
pixel_data,
|
||||
metadata.channels,
|
||||
chunk_start,
|
||||
chunk_end,
|
||||
width,
|
||||
height,
|
||||
sun_elevation,
|
||||
altitude_f,
|
||||
air_density,
|
||||
dust_density,
|
||||
ozone_density));
|
||||
}
|
||||
pool.wait_work();
|
||||
const int rows_per_task = divide_up(1024, width);
|
||||
parallel_for(blocked_range<size_t>(0, height, rows_per_task),
|
||||
[&](const blocked_range<size_t> &r) {
|
||||
nishita_skymodel_precompute_texture(pixel_data,
|
||||
metadata.channels,
|
||||
r.begin(),
|
||||
r.end(),
|
||||
width,
|
||||
height,
|
||||
sun_elevation,
|
||||
altitude_f,
|
||||
air_density,
|
||||
dust_density,
|
||||
ozone_density);
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -680,29 +680,13 @@ void LightManager::device_update_background(Device *device,
|
||||
float2 *cond_cdf = dscene->light_background_conditional_cdf.alloc(cdf_width * res.y);
|
||||
|
||||
double time_start = time_dt();
|
||||
if (max(res.x, res.y) < 512) {
|
||||
/* Small enough resolution, faster to do single-threaded. */
|
||||
background_cdf(0, res.y, res.x, res.y, &pixels, cond_cdf);
|
||||
}
|
||||
else {
|
||||
/* Threaded evaluation for large resolution. */
|
||||
const int num_blocks = TaskScheduler::num_threads();
|
||||
const int chunk_size = res.y / num_blocks;
|
||||
int start_row = 0;
|
||||
TaskPool pool;
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
const int current_chunk_size = (i != num_blocks - 1) ? chunk_size : (res.y - i * chunk_size);
|
||||
pool.push(function_bind(&background_cdf,
|
||||
start_row,
|
||||
start_row + current_chunk_size,
|
||||
res.x,
|
||||
res.y,
|
||||
&pixels,
|
||||
cond_cdf));
|
||||
start_row += current_chunk_size;
|
||||
}
|
||||
pool.wait_work();
|
||||
}
|
||||
|
||||
/* Create CDF in parallel. */
|
||||
const int rows_per_task = divide_up(10240, res.x);
|
||||
parallel_for(blocked_range<size_t>(0, res.y, rows_per_task),
|
||||
[&](const blocked_range<size_t> &r) {
|
||||
background_cdf(r.begin(), r.end(), res.x, res.y, &pixels, cond_cdf);
|
||||
});
|
||||
|
||||
/* marginal CDFs (column, V direction, sum of rows) */
|
||||
marg_cdf[0].x = cond_cdf[res.x].x;
|
||||
|
@ -78,7 +78,6 @@ struct UpdateObjectTransformState {
|
||||
Scene *scene;
|
||||
|
||||
/* Some locks to keep everything thread-safe. */
|
||||
thread_spin_lock queue_lock;
|
||||
thread_spin_lock surface_area_lock;
|
||||
|
||||
/* First unused object index in the queue. */
|
||||
@ -551,41 +550,6 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
|
||||
}
|
||||
}
|
||||
|
||||
bool ObjectManager::device_update_object_transform_pop_work(UpdateObjectTransformState *state,
|
||||
int *start_index,
|
||||
int *num_objects)
|
||||
{
|
||||
/* Tweakable parameter, number of objects per chunk.
|
||||
* Too small value will cause some extra overhead due to spin lock,
|
||||
* too big value might not use all threads nicely.
|
||||
*/
|
||||
static const int OBJECTS_PER_TASK = 32;
|
||||
bool have_work = false;
|
||||
state->queue_lock.lock();
|
||||
int num_scene_objects = state->scene->objects.size();
|
||||
if (state->queue_start_object < num_scene_objects) {
|
||||
int count = min(OBJECTS_PER_TASK, num_scene_objects - state->queue_start_object);
|
||||
*start_index = state->queue_start_object;
|
||||
*num_objects = count;
|
||||
state->queue_start_object += count;
|
||||
have_work = true;
|
||||
}
|
||||
state->queue_lock.unlock();
|
||||
return have_work;
|
||||
}
|
||||
|
||||
void ObjectManager::device_update_object_transform_task(UpdateObjectTransformState *state)
|
||||
{
|
||||
int start_index, num_objects;
|
||||
while (device_update_object_transform_pop_work(state, &start_index, &num_objects)) {
|
||||
for (int i = 0; i < num_objects; ++i) {
|
||||
const int object_index = start_index + i;
|
||||
Object *ob = state->scene->objects[object_index];
|
||||
device_update_object_transform(state, ob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)
|
||||
{
|
||||
UpdateObjectTransformState state;
|
||||
@ -631,29 +595,16 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
|
||||
numparticles += psys->particles.size();
|
||||
}
|
||||
|
||||
/* NOTE: If it's just a handful of objects we deal with them in a single
|
||||
* thread to avoid threading overhead. However, this threshold is might
|
||||
* need some tweaks to make mid-complex scenes optimal.
|
||||
*/
|
||||
if (scene->objects.size() < 64) {
|
||||
foreach (Object *ob, scene->objects) {
|
||||
device_update_object_transform(&state, ob);
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
const int num_threads = TaskScheduler::num_threads();
|
||||
TaskPool pool;
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
pool.push(function_bind(&ObjectManager::device_update_object_transform_task, this, &state));
|
||||
}
|
||||
pool.wait_work();
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* Parallel object update, with grain size to avoid too much threadng overhead
|
||||
* for individual objects. */
|
||||
static const int OBJECTS_PER_TASK = 32;
|
||||
parallel_for(blocked_range<size_t>(0, scene->objects.size(), OBJECTS_PER_TASK),
|
||||
[&](const blocked_range<size_t> &r) {
|
||||
for (size_t i = r.begin(); i != r.end(); i++) {
|
||||
Object *ob = state.scene->objects[i];
|
||||
device_update_object_transform(&state, ob);
|
||||
}
|
||||
});
|
||||
|
||||
dscene->objects.copy_to_device();
|
||||
if (state.need_motion == Scene::MOTION_PASS) {
|
||||
|
@ -94,8 +94,7 @@ void SVMShaderManager::device_update(Device *device,
|
||||
scene,
|
||||
scene->shaders[i],
|
||||
&progress,
|
||||
&shader_svm_nodes[i]),
|
||||
false);
|
||||
&shader_svm_nodes[i]));
|
||||
}
|
||||
task_pool.wait_work();
|
||||
|
||||
|
@ -29,7 +29,7 @@ set(SRC
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
||||
${TBB_LIBRARIES}
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_STANDALONE)
|
||||
|
@ -22,8 +22,15 @@
|
||||
#include "util/util_thread.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
|
||||
#include <tbb/tbb.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
using tbb::blocked_range;
|
||||
using tbb::enumerable_thread_specific;
|
||||
using tbb::parallel_for;
|
||||
|
||||
class Task;
|
||||
class TaskPool;
|
||||
class TaskScheduler;
|
||||
|
Loading…
Reference in New Issue
Block a user