Cycles: Rework tile scheduling for denoising

This fixes denoising being delayed until after all rendering has finished. Instead, tile-based
denoising is now part of the "RENDER" task again, so that it is all in one task and does not
cause issues with dedicated task pools where tasks are serialized.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D6940
This commit is contained in:
Patrick Mours 2020-02-26 16:30:42 +01:00
parent 03e04d4db7
commit af54bbd61c
12 changed files with 127 additions and 119 deletions

@ -2144,7 +2144,7 @@ void CUDADevice::thread_run(DeviceTask *task)
{
CUDAContextScope scope(this);
if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
if (task->type == DeviceTask::RENDER) {
DeviceRequestedFeatures requested_features;
if (use_split_kernel()) {
if (split_kernel == NULL) {
@ -2159,7 +2159,7 @@ void CUDADevice::thread_run(DeviceTask *task)
RenderTile tile;
DenoisingTask denoising(this, *task);
while (task->acquire_tile(this, tile)) {
while (task->acquire_tile(this, tile, task->tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel()) {
device_only_memory<uchar> void_buffer(this, "void_buffer");

@ -511,7 +511,7 @@ class CPUDevice : public Device {
void thread_run(DeviceTask *task)
{
if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE)
if (task->type == DeviceTask::RENDER)
thread_render(*task);
else if (task->type == DeviceTask::SHADER)
thread_shader(*task);
@ -927,7 +927,7 @@ class CPUDevice : public Device {
DenoisingTask denoising(this, task);
denoising.profiler = &kg->profiler;
while (task.acquire_tile(this, tile)) {
while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel) {
device_only_memory<uchar> void_buffer(this, "void_buffer");

@ -482,11 +482,24 @@ class MultiDevice : public Device {
void task_add(DeviceTask &task)
{
list<SubDevice> &task_devices = denoising_devices.empty() ||
(task.type != DeviceTask::DENOISE &&
task.type != DeviceTask::DENOISE_BUFFER) ?
devices :
denoising_devices;
list<SubDevice> task_devices = devices;
if (!denoising_devices.empty()) {
if (task.type == DeviceTask::DENOISE_BUFFER) {
/* Denoising tasks should be redirected to the denoising devices entirely. */
task_devices = denoising_devices;
}
else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
const uint tile_types = task.tile_types;
/* For normal rendering tasks only redirect the denoising part to the denoising devices.
* Do not need to split the task here, since they all run through 'acquire_tile'. */
task.tile_types = RenderTile::DENOISE;
foreach (SubDevice &sub, denoising_devices) {
sub.device->task_add(task);
}
/* Rendering itself should still be executed on the rendering devices. */
task.tile_types = tile_types ^ RenderTile::DENOISE;
}
}
list<DeviceTask> tasks;
task.split(tasks, task_devices.size());

@ -569,9 +569,14 @@ class OptiXDevice : public CUDADevice {
if (have_error())
return; // Abort early if there was an error previously
if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) {
if (task.type == DeviceTask::RENDER) {
if (thread_index != 0) {
// Only execute denoising in a single thread (see also 'task_add')
task.tile_types &= ~RenderTile::DENOISE;
}
RenderTile tile;
while (task.acquire_tile(this, tile)) {
while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE)
launch_render(task, tile, thread_index);
else if (tile.task == RenderTile::DENOISE)
@ -1451,7 +1456,7 @@ class OptiXDevice : public CUDADevice {
return;
}
if (task.type == DeviceTask::DENOISE || task.type == DeviceTask::DENOISE_BUFFER) {
if (task.type == DeviceTask::DENOISE_BUFFER) {
// Execute denoising in a single thread (e.g. to avoid race conditions during creation)
task_pool.push(new OptiXDeviceTask(this, task, 0));
return;

@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
if (type == SHADER) {
num = min(shader_w, num);
}
else if (type == RENDER || type == DENOISE) {
else if (type == RENDER) {
}
else {
num = min(h, num);
@ -94,7 +94,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
tasks.push_back(task);
}
}
else if (type == RENDER || type == DENOISE) {
else if (type == RENDER) {
for (int i = 0; i < num; i++)
tasks.push_back(*this);
}

@ -64,7 +64,7 @@ class DenoiseParams {
class DeviceTask : public Task {
public:
typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type;
typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type;
Type type;
int x, y, w, h;
@ -90,7 +90,7 @@ class DeviceTask : public Task {
void update_progress(RenderTile *rtile, int pixel_samples = -1);
function<bool(Device *device, RenderTile &)> acquire_tile;
function<bool(Device *device, RenderTile &, uint)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
@ -98,6 +98,7 @@ class DeviceTask : public Task {
function<void(RenderTile *, Device *)> map_neighbor_tiles;
function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
uint tile_types;
DenoiseParams denoising;
bool denoising_from_render;
vector<int> denoising_frames;

@ -1308,7 +1308,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
{
flush_texture_buffers();
if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
if (task->type == DeviceTask::RENDER) {
RenderTile tile;
DenoisingTask denoising(this, *task);
@ -1317,7 +1317,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
while (task->acquire_tile(this, tile)) {
while (task->acquire_tile(this, tile, task->tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);

@ -130,7 +130,7 @@ class DisplayBuffer {
class RenderTile {
public:
typedef enum { PATH_TRACE, DENOISE } Task;
typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;
Task task;
int x, y, w, h;

@ -301,12 +301,7 @@ void Session::run_gpu()
update_status_time();
/* render */
render();
/* denoise */
if (need_denoise) {
denoise();
}
render(need_denoise);
device->task_wait();
@ -384,7 +379,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
return false;
}
bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::Task task)
bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types)
{
if (progress.get_cancel()) {
if (params.progressive_refine == false) {
@ -399,9 +394,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::T
Tile *tile;
int device_num = device->device_number(tile_device);
while (!tile_manager.next_tile(tile, device_num, task == RenderTile::DENOISE)) {
while (!tile_manager.next_tile(tile, device_num, tile_types)) {
/* Wait for denoising tiles to become available */
if (task == RenderTile::DENOISE && !progress.get_cancel() && tile_manager.has_tiles()) {
if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) {
denoising_cond.wait(tile_lock);
continue;
}
@ -417,7 +412,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::T
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
rtile.task = task;
rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
tile_lock.unlock();
@ -700,12 +695,7 @@ void Session::run_cpu()
update_status_time();
/* render */
render();
/* denoise */
if (need_denoise) {
denoise();
}
render(need_denoise);
/* update status and timing */
update_status_time();
@ -1089,99 +1079,90 @@ void Session::update_status_time(bool show_pause, bool show_done)
progress.set_status(status, substatus);
}
void Session::render()
void Session::render(bool with_denoising)
{
/* Clear buffers. */
if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) {
/* Clear buffers. */
buffers->zero();
}
if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) {
return; /* Avoid empty launches. */
}
/* Add path trace task. */
DeviceTask task(DeviceTask::RENDER);
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::PATH_TRACE);
task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3);
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
device->task_add(task);
}
/* Acquire render tiles by default. */
task.tile_types = RenderTile::PATH_TRACE;
void Session::denoise()
{
if (!params.run_denoising) {
return;
}
/* Do not denoise viewport until the sample at which denoising should start is reached. */
if (!params.background && tile_manager.state.sample < params.denoising_start_sample) {
return;
}
/* Cannot denoise with resolution divider and separate denoising devices.
* It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on the full buffer
* dimensions and not the scaled ones). */
if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) {
return;
}
/* It can happen that denoising was already enabled, but the scene still needs an update. */
if (scene->film->need_update || !scene->film->denoising_data_offset) {
return;
}
/* Add separate denoising task. */
DeviceTask task(DeviceTask::DENOISE);
if (tile_manager.schedule_denoising) {
/* Run denoising on each tile. */
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::DENOISE);
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
}
else {
assert(buffers);
if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) {
return; /* Avoid empty launches. */
with_denoising = params.run_denoising && with_denoising;
if (with_denoising) {
/* Do not denoise viewport until the sample at which denoising should start is reached. */
if (!params.background && tile_manager.state.sample < params.denoising_start_sample) {
with_denoising = false;
}
/* Wait for rendering to finish. */
device->task_wait();
/* Cannot denoise with resolution divider and separate denoising devices.
* It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on the full buffer
* dimensions and not the scaled ones). */
if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) {
with_denoising = false;
}
/* Run denoising on the whole image at once. */
task.type = DeviceTask::DENOISE_BUFFER;
task.x = tile_manager.state.buffer.full_x;
task.y = tile_manager.state.buffer.full_y;
task.w = tile_manager.state.buffer.width;
task.h = tile_manager.state.buffer.height;
task.buffer = buffers->buffer.device_pointer;
task.sample = tile_manager.state.sample;
task.num_samples = tile_manager.state.num_samples;
tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
task.buffers = buffers;
/* It can happen that denoising was already enabled, but the scene still needs an update. */
if (scene->film->need_update || !scene->film->denoising_data_offset) {
with_denoising = false;
}
}
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.need_finish_queue = params.progressive_refine;
task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
if (with_denoising) {
task.denoising = params.denoising;
task.denoising = params.denoising;
task.pass_stride = scene->film->pass_stride;
task.target_pass_stride = task.pass_stride;
task.pass_denoising_data = scene->film->denoising_data_offset;
task.pass_denoising_clean = scene->film->denoising_clean_offset;
task.pass_stride = scene->film->pass_stride;
task.target_pass_stride = task.pass_stride;
task.pass_denoising_data = scene->film->denoising_data_offset;
task.pass_denoising_clean = scene->film->denoising_clean_offset;
task.denoising_from_render = true;
task.denoising_do_filter = params.full_denoising;
task.denoising_use_optix = params.optix_denoising;
task.denoising_write_passes = params.write_denoising_passes;
task.denoising_from_render = true;
task.denoising_do_filter = params.full_denoising;
task.denoising_use_optix = params.optix_denoising;
task.denoising_write_passes = params.write_denoising_passes;
if (tile_manager.schedule_denoising) {
/* Acquire denoising tiles during rendering. */
task.tile_types |= RenderTile::DENOISE;
}
else {
assert(buffers);
/* Schedule rendering and wait for it to finish. */
device->task_add(task);
device->task_wait();
/* Then run denoising on the whole image at once. */
task.type = DeviceTask::DENOISE_BUFFER;
task.x = tile_manager.state.buffer.full_x;
task.y = tile_manager.state.buffer.full_y;
task.w = tile_manager.state.buffer.width;
task.h = tile_manager.state.buffer.height;
task.buffer = buffers->buffer.device_pointer;
task.sample = tile_manager.state.sample;
task.num_samples = tile_manager.state.num_samples;
tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
task.buffers = buffers;
}
}
device->task_add(task);
}

@ -183,8 +183,7 @@ class Session {
void update_status_time(bool show_pause = false, bool show_done = false);
void render();
void denoise();
void render(bool with_denoising);
void copy_to_display_buffer(int sample);
void reset_(BufferParams &params, int samples);
@ -197,7 +196,7 @@ class Session {
bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
void reset_gpu(BufferParams &params, int samples);
bool acquire_tile(Device *tile_device, RenderTile &tile, RenderTile::Task task);
bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types);
void update_tile_sample(RenderTile &tile);
void release_tile(RenderTile &tile);

@ -495,20 +495,20 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
}
}
bool TileManager::next_tile(Tile *&tile, int device, bool denoising)
bool TileManager::next_tile(Tile *&tile, int device, uint tile_types)
{
/* Preserve device if requested, unless this is a separate denoising device that just wants to
* grab any available tile. */
const bool preserve_device = preserve_tile_device && device < num_devices;
int tile_index = -1;
int logical_device = preserve_device ? device : 0;
if (tile_types & RenderTile::DENOISE) {
int tile_index = -1;
int logical_device = preserve_device ? device : 0;
if (denoising) {
while (logical_device < state.denoising_tiles.size()) {
if (state.denoising_tiles[logical_device].empty()) {
if (preserve_device) {
return false;
break;
}
else {
logical_device++;
@ -520,12 +520,21 @@ bool TileManager::next_tile(Tile *&tile, int device, bool denoising)
state.denoising_tiles[logical_device].pop_front();
break;
}
if (tile_index >= 0) {
tile = &state.tiles[tile_index];
return true;
}
}
else {
if (tile_types & RenderTile::PATH_TRACE) {
int tile_index = -1;
int logical_device = preserve_device ? device : 0;
while (logical_device < state.render_tiles.size()) {
if (state.render_tiles[logical_device].empty()) {
if (preserve_device) {
return false;
break;
}
else {
logical_device++;
@ -537,11 +546,11 @@ bool TileManager::next_tile(Tile *&tile, int device, bool denoising)
state.render_tiles[logical_device].pop_front();
break;
}
}
if (tile_index >= 0) {
tile = &state.tiles[tile_index];
return true;
if (tile_index >= 0) {
tile = &state.tiles[tile_index];
return true;
}
}
return false;

@ -106,7 +106,7 @@ class TileManager {
void reset(BufferParams &params, int num_samples);
void set_samples(int num_samples);
bool next();
bool next_tile(Tile *&tile, int device, bool denoising);
bool next_tile(Tile *&tile, int device, uint tile_types);
bool finish_tile(int index, bool &delete_tile);
bool done();
bool has_tiles();