diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index f6a4f93a690..4a7c45d8b93 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -2144,7 +2144,7 @@ void CUDADevice::thread_run(DeviceTask *task) { CUDAContextScope scope(this); - if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) { + if (task->type == DeviceTask::RENDER) { DeviceRequestedFeatures requested_features; if (use_split_kernel()) { if (split_kernel == NULL) { @@ -2159,7 +2159,7 @@ void CUDADevice::thread_run(DeviceTask *task) RenderTile tile; DenoisingTask denoising(this, *task); - while (task->acquire_tile(this, tile)) { + while (task->acquire_tile(this, tile, task->tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { if (use_split_kernel()) { device_only_memory void_buffer(this, "void_buffer"); diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 795781ee072..1c9d2227ac3 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -511,7 +511,7 @@ class CPUDevice : public Device { void thread_run(DeviceTask *task) { - if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) + if (task->type == DeviceTask::RENDER) thread_render(*task); else if (task->type == DeviceTask::SHADER) thread_shader(*task); @@ -927,7 +927,7 @@ class CPUDevice : public Device { DenoisingTask denoising(this, task); denoising.profiler = &kg->profiler; - while (task.acquire_tile(this, tile)) { + while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { if (use_split_kernel) { device_only_memory void_buffer(this, "void_buffer"); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 8226221ea08..0044610eeb4 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -482,11 +482,24 @@ class MultiDevice : public Device { void task_add(DeviceTask &task) { - list &task_devices = denoising_devices.empty() || - (task.type != DeviceTask::DENOISE && - task.type != DeviceTask::DENOISE_BUFFER) ? - devices : - denoising_devices; + list task_devices = devices; + if (!denoising_devices.empty()) { + if (task.type == DeviceTask::DENOISE_BUFFER) { + /* Denoising tasks should be redirected to the denoising devices entirely. */ + task_devices = denoising_devices; + } + else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) { + const uint tile_types = task.tile_types; + /* For normal rendering tasks only redirect the denoising part to the denoising devices. + * Do not need to split the task here, since they all run through 'acquire_tile'. */ + task.tile_types = RenderTile::DENOISE; + foreach (SubDevice &sub, denoising_devices) { + sub.device->task_add(task); + } + /* Rendering itself should still be executed on the rendering devices. */ + task.tile_types = tile_types ^ RenderTile::DENOISE; + } + } list tasks; task.split(tasks, task_devices.size()); diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 0121b89e9d8..61a5c74f69e 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -569,9 +569,14 @@ class OptiXDevice : public CUDADevice { if (have_error()) return; // Abort early if there was an error previously - if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) { + if (task.type == DeviceTask::RENDER) { + if (thread_index != 0) { + // Only execute denoising in a single thread (see also 'task_add') + task.tile_types &= ~RenderTile::DENOISE; + } + RenderTile tile; - while (task.acquire_tile(this, tile)) { + while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) launch_render(task, tile, thread_index); else if (tile.task == RenderTile::DENOISE) @@ -1451,7 +1456,7 @@ class OptiXDevice : public CUDADevice { return; } - if (task.type == DeviceTask::DENOISE || task.type == DeviceTask::DENOISE_BUFFER) { + if (task.type == DeviceTask::DENOISE_BUFFER) { // Execute denoising in a single thread (e.g. to avoid race conditions during creation) task_pool.push(new OptiXDeviceTask(this, task, 0)); return; diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp index 8f15e8c8c1e..36522b874ab 100644 --- a/intern/cycles/device/device_task.cpp +++ b/intern/cycles/device/device_task.cpp @@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size) if (type == SHADER) { num = min(shader_w, num); } - else if (type == RENDER || type == DENOISE) { + else if (type == RENDER) { } else { num = min(h, num); @@ -94,7 +94,7 @@ void DeviceTask::split(list &tasks, int num, int max_size) tasks.push_back(task); } } - else if (type == RENDER || type == DENOISE) { + else if (type == RENDER) { for (int i = 0; i < num; i++) tasks.push_back(*this); } diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 0f718528b86..972f6131092 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -64,7 +64,7 @@ class DenoiseParams { class DeviceTask : public Task { public: - typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type; + typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type; Type type; int x, y, w, h; @@ -90,7 +90,7 @@ class DeviceTask : public Task { void update_progress(RenderTile *rtile, int pixel_samples = -1); - function acquire_tile; + function acquire_tile; function update_progress_sample; function update_tile_sample; function release_tile; @@ -98,6 +98,7 @@ class DeviceTask : public Task { function map_neighbor_tiles; function unmap_neighbor_tiles; + uint tile_types; DenoiseParams denoising; bool denoising_from_render; vector denoising_frames; diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 012f6dbe114..68cdfd5238c 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -1308,7 +1308,7 @@ void OpenCLDevice::thread_run(DeviceTask *task) { flush_texture_buffers(); - if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) { + if (task->type == DeviceTask::RENDER) { RenderTile tile; DenoisingTask denoising(this, *task); @@ -1317,7 +1317,7 @@ void OpenCLDevice::thread_run(DeviceTask *task) kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ - while (task->acquire_tile(this, tile)) { + while (task->acquire_tile(this, tile, task->tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); scoped_timer timer(&tile.buffers->render_time); diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 1042b42810f..42efb031843 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -130,7 +130,7 @@ class DisplayBuffer { class RenderTile { public: - typedef enum { PATH_TRACE, DENOISE } Task; + typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task; Task task; int x, y, w, h; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index f45e6d68c97..0d1f8df3610 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -301,12 +301,7 @@ void Session::run_gpu() update_status_time(); /* render */ - render(); - - /* denoise */ - if (need_denoise) { - denoise(); - } + render(need_denoise); device->task_wait(); @@ -384,7 +379,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param return false; } -bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::Task task) +bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types) { if (progress.get_cancel()) { if (params.progressive_refine == false) { @@ -399,9 +394,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::T Tile *tile; int device_num = device->device_number(tile_device); - while (!tile_manager.next_tile(tile, device_num, task == RenderTile::DENOISE)) { + while (!tile_manager.next_tile(tile, device_num, tile_types)) { /* Wait for denoising tiles to become available */ - if (task == RenderTile::DENOISE && !progress.get_cancel() && tile_manager.has_tiles()) { + if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) { denoising_cond.wait(tile_lock); continue; } @@ -417,7 +412,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::T rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; rtile.tile_index = tile->index; - rtile.task = task; + rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE; tile_lock.unlock(); @@ -700,12 +695,7 @@ void Session::run_cpu() update_status_time(); /* render */ - render(); - - /* denoise */ - if (need_denoise) { - denoise(); - } + render(need_denoise); /* update status and timing */ update_status_time(); @@ -1089,99 +1079,90 @@ void Session::update_status_time(bool show_pause, bool show_done) progress.set_status(status, substatus); } -void Session::render() +void Session::render(bool with_denoising) { - /* Clear buffers. */ if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) { + /* Clear buffers. */ buffers->zero(); } + if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) { + return; /* Avoid empty launches. */ + } + /* Add path trace task. */ DeviceTask task(DeviceTask::RENDER); - task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::PATH_TRACE); + task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3); task.release_tile = function_bind(&Session::release_tile, this, _1); + task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); + task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); task.need_finish_queue = params.progressive_refine; task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; - device->task_add(task); -} + /* Acquire render tiles by default. */ + task.tile_types = RenderTile::PATH_TRACE; -void Session::denoise() -{ - if (!params.run_denoising) { - return; - } - - /* Do not denoise viewport until the sample at which denoising should start is reached. */ - if (!params.background && tile_manager.state.sample < params.denoising_start_sample) { - return; - } - - /* Cannot denoise with resolution divider and separate denoising devices. - * It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on the full buffer - * dimensions and not the scaled ones). */ - if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) { - return; - } - - /* It can happen that denoising was already enabled, but the scene still needs an update. */ - if (scene->film->need_update || !scene->film->denoising_data_offset) { - return; - } - - /* Add separate denoising task. */ - DeviceTask task(DeviceTask::DENOISE); - - if (tile_manager.schedule_denoising) { - /* Run denoising on each tile. */ - task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::DENOISE); - task.release_tile = function_bind(&Session::release_tile, this, _1); - task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); - task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); - } - else { - assert(buffers); - - if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) { - return; /* Avoid empty launches. */ + with_denoising = params.run_denoising && with_denoising; + if (with_denoising) { + /* Do not denoise viewport until the sample at which denoising should start is reached. */ + if (!params.background && tile_manager.state.sample < params.denoising_start_sample) { + with_denoising = false; } - /* Wait for rendering to finish. */ - device->task_wait(); + /* Cannot denoise with resolution divider and separate denoising devices. + * It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on the full buffer + * dimensions and not the scaled ones). */ + if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) { + with_denoising = false; + } - /* Run denoising on the whole image at once. */ - task.type = DeviceTask::DENOISE_BUFFER; - task.x = tile_manager.state.buffer.full_x; - task.y = tile_manager.state.buffer.full_y; - task.w = tile_manager.state.buffer.width; - task.h = tile_manager.state.buffer.height; - task.buffer = buffers->buffer.device_pointer; - task.sample = tile_manager.state.sample; - task.num_samples = tile_manager.state.num_samples; - tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); - task.buffers = buffers; + /* It can happen that denoising was already enabled, but the scene still needs an update. */ + if (scene->film->need_update || !scene->film->denoising_data_offset) { + with_denoising = false; + } } - task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); - task.need_finish_queue = params.progressive_refine; - task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); - task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); + if (with_denoising) { + task.denoising = params.denoising; - task.denoising = params.denoising; + task.pass_stride = scene->film->pass_stride; + task.target_pass_stride = task.pass_stride; + task.pass_denoising_data = scene->film->denoising_data_offset; + task.pass_denoising_clean = scene->film->denoising_clean_offset; - task.pass_stride = scene->film->pass_stride; - task.target_pass_stride = task.pass_stride; - task.pass_denoising_data = scene->film->denoising_data_offset; - task.pass_denoising_clean = scene->film->denoising_clean_offset; + task.denoising_from_render = true; + task.denoising_do_filter = params.full_denoising; + task.denoising_use_optix = params.optix_denoising; + task.denoising_write_passes = params.write_denoising_passes; - task.denoising_from_render = true; - task.denoising_do_filter = params.full_denoising; - task.denoising_use_optix = params.optix_denoising; - task.denoising_write_passes = params.write_denoising_passes; + if (tile_manager.schedule_denoising) { + /* Acquire denoising tiles during rendering. */ + task.tile_types |= RenderTile::DENOISE; + } + else { + assert(buffers); + + /* Schedule rendering and wait for it to finish. */ + device->task_add(task); + device->task_wait(); + + /* Then run denoising on the whole image at once. */ + task.type = DeviceTask::DENOISE_BUFFER; + task.x = tile_manager.state.buffer.full_x; + task.y = tile_manager.state.buffer.full_y; + task.w = tile_manager.state.buffer.width; + task.h = tile_manager.state.buffer.height; + task.buffer = buffers->buffer.device_pointer; + task.sample = tile_manager.state.sample; + task.num_samples = tile_manager.state.num_samples; + tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); + task.buffers = buffers; + } + } device->task_add(task); } diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 40ec3979afd..7f3614ccb19 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -183,8 +183,7 @@ class Session { void update_status_time(bool show_pause = false, bool show_done = false); - void render(); - void denoise(); + void render(bool with_denoising); void copy_to_display_buffer(int sample); void reset_(BufferParams ¶ms, int samples); @@ -197,7 +196,7 @@ class Session { bool draw_gpu(BufferParams ¶ms, DeviceDrawParams &draw_params); void reset_gpu(BufferParams ¶ms, int samples); - bool acquire_tile(Device *tile_device, RenderTile &tile, RenderTile::Task task); + bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types); void update_tile_sample(RenderTile &tile); void release_tile(RenderTile &tile); diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 4ddfd56cd01..1480b6d1aab 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -495,20 +495,20 @@ bool TileManager::finish_tile(int index, bool &delete_tile) } } -bool TileManager::next_tile(Tile *&tile, int device, bool denoising) +bool TileManager::next_tile(Tile *&tile, int device, uint tile_types) { /* Preserve device if requested, unless this is a separate denoising device that just wants to * grab any available tile. */ const bool preserve_device = preserve_tile_device && device < num_devices; - int tile_index = -1; - int logical_device = preserve_device ? device : 0; + if (tile_types & RenderTile::DENOISE) { + int tile_index = -1; + int logical_device = preserve_device ? device : 0; - if (denoising) { while (logical_device < state.denoising_tiles.size()) { if (state.denoising_tiles[logical_device].empty()) { if (preserve_device) { - return false; + break; } else { logical_device++; @@ -520,12 +520,21 @@ bool TileManager::next_tile(Tile *&tile, int device, bool denoising) state.denoising_tiles[logical_device].pop_front(); break; } + + if (tile_index >= 0) { + tile = &state.tiles[tile_index]; + return true; + } } - else { + + if (tile_types & RenderTile::PATH_TRACE) { + int tile_index = -1; + int logical_device = preserve_device ? device : 0; + while (logical_device < state.render_tiles.size()) { if (state.render_tiles[logical_device].empty()) { if (preserve_device) { - return false; + break; } else { logical_device++; @@ -537,11 +546,11 @@ bool TileManager::next_tile(Tile *&tile, int device, bool denoising) state.render_tiles[logical_device].pop_front(); break; } - } - if (tile_index >= 0) { - tile = &state.tiles[tile_index]; - return true; + if (tile_index >= 0) { + tile = &state.tiles[tile_index]; + return true; + } } return false; diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 14c693683c4..9fb9c1ca782 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -106,7 +106,7 @@ class TileManager { void reset(BufferParams ¶ms, int num_samples); void set_samples(int num_samples); bool next(); - bool next_tile(Tile *&tile, int device, bool denoising); + bool next_tile(Tile *&tile, int device, uint tile_types); bool finish_tile(int index, bool &delete_tile); bool done(); bool has_tiles();