1387 lines
41 KiB
C++
1387 lines
41 KiB
C++
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#include "integrator/path_trace.h"
|
|
|
|
#include "device/cpu/device.h"
|
|
#include "device/device.h"
|
|
#include "integrator/pass_accessor.h"
|
|
#include "integrator/path_trace_display.h"
|
|
#include "integrator/path_trace_tile.h"
|
|
#include "integrator/render_scheduler.h"
|
|
#include "scene/pass.h"
|
|
#include "scene/scene.h"
|
|
#include "session/tile.h"
|
|
#include "util/algorithm.h"
|
|
#include "util/log.h"
|
|
#include "util/progress.h"
|
|
#include "util/tbb.h"
|
|
#include "util/time.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
PathTrace::PathTrace(Device *device,
|
|
Film *film,
|
|
DeviceScene *device_scene,
|
|
RenderScheduler &render_scheduler,
|
|
TileManager &tile_manager)
|
|
: device_(device),
|
|
film_(film),
|
|
device_scene_(device_scene),
|
|
render_scheduler_(render_scheduler),
|
|
tile_manager_(tile_manager)
|
|
{
|
|
DCHECK_NE(device_, nullptr);
|
|
|
|
{
|
|
vector<DeviceInfo> cpu_devices;
|
|
device_cpu_info(cpu_devices);
|
|
|
|
cpu_device_.reset(device_cpu_create(cpu_devices[0], device->stats, device->profiler));
|
|
}
|
|
|
|
/* Create path tracing work in advance, so that it can be reused by incremental sampling as much
|
|
* as possible. */
|
|
device_->foreach_device([&](Device *path_trace_device) {
|
|
unique_ptr<PathTraceWork> work = PathTraceWork::create(
|
|
path_trace_device, film, device_scene, &render_cancel_.is_requested);
|
|
if (work) {
|
|
path_trace_works_.emplace_back(std::move(work));
|
|
}
|
|
});
|
|
|
|
work_balance_infos_.resize(path_trace_works_.size());
|
|
work_balance_do_initial(work_balance_infos_);
|
|
|
|
render_scheduler.set_need_schedule_rebalance(path_trace_works_.size() > 1);
|
|
}
|
|
|
|
PathTrace::~PathTrace()
|
|
{
|
|
destroy_gpu_resources();
|
|
}
|
|
|
|
void PathTrace::load_kernels()
|
|
{
|
|
if (denoiser_) {
|
|
/* Activate graphics interop while denoiser device is created, so that it can choose a device
|
|
* that supports interop for faster display updates. */
|
|
if (display_ && path_trace_works_.size() > 1) {
|
|
display_->graphics_interop_activate();
|
|
}
|
|
|
|
denoiser_->load_kernels(progress_);
|
|
|
|
if (display_ && path_trace_works_.size() > 1) {
|
|
display_->graphics_interop_deactivate();
|
|
}
|
|
}
|
|
}
|
|
|
|
void PathTrace::alloc_work_memory()
|
|
{
|
|
for (auto &&path_trace_work : path_trace_works_) {
|
|
path_trace_work->alloc_work_memory();
|
|
}
|
|
}
|
|
|
|
bool PathTrace::ready_to_reset()
|
|
{
|
|
/* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
|
|
* display. Of there is no such display, the logic here will break. */
|
|
DCHECK(display_);
|
|
|
|
/* The logic here tries to provide behavior which feels the most interactive feel to artists.
|
|
* General idea is to be able to reset as quickly as possible, while still providing interactive
|
|
* feel.
|
|
*
|
|
* If the render result was ever drawn after previous reset, consider that reset is now possible.
|
|
* This way camera navigation gives the quickest feedback of rendered pixels, regardless of
|
|
* whether CPU or GPU drawing pipeline is used.
|
|
*
|
|
* Consider reset happening after redraw "slow" enough to not clog anything. This is a bit
|
|
* arbitrary, but seems to work very well with viewport navigation in Blender. */
|
|
|
|
if (did_draw_after_reset_) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void PathTrace::reset(const BufferParams &full_params,
|
|
const BufferParams &big_tile_params,
|
|
const bool reset_rendering)
|
|
{
|
|
if (big_tile_params_.modified(big_tile_params)) {
|
|
big_tile_params_ = big_tile_params;
|
|
render_state_.need_reset_params = true;
|
|
}
|
|
|
|
full_params_ = full_params;
|
|
|
|
/* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
|
|
* It is requires to inform about reset whenever it happens, so that the redraw state tracking is
|
|
* properly updated. */
|
|
if (display_) {
|
|
display_->reset(big_tile_params, reset_rendering);
|
|
}
|
|
|
|
render_state_.has_denoised_result = false;
|
|
render_state_.tile_written = false;
|
|
|
|
did_draw_after_reset_ = false;
|
|
}
|
|
|
|
void PathTrace::device_free()
|
|
{
|
|
/* Free render buffers used by the path trace work to reduce memory peak. */
|
|
BufferParams empty_params;
|
|
empty_params.pass_stride = 0;
|
|
empty_params.update_offset_stride();
|
|
for (auto &&path_trace_work : path_trace_works_) {
|
|
path_trace_work->get_render_buffers()->reset(empty_params);
|
|
}
|
|
render_state_.need_reset_params = true;
|
|
}
|
|
|
|
void PathTrace::set_progress(Progress *progress)
|
|
{
|
|
progress_ = progress;
|
|
}
|
|
|
|
void PathTrace::render(const RenderWork &render_work)
|
|
{
|
|
/* Indicate that rendering has started and that it can be requested to cancel. */
|
|
{
|
|
thread_scoped_lock lock(render_cancel_.mutex);
|
|
if (render_cancel_.is_requested) {
|
|
return;
|
|
}
|
|
render_cancel_.is_rendering = true;
|
|
}
|
|
|
|
render_pipeline(render_work);
|
|
|
|
/* Indicate that rendering has finished, making it so thread which requested `cancel()` can carry
|
|
* on. */
|
|
{
|
|
thread_scoped_lock lock(render_cancel_.mutex);
|
|
render_cancel_.is_rendering = false;
|
|
render_cancel_.condition.notify_one();
|
|
}
|
|
}
|
|
|
|
void PathTrace::render_pipeline(RenderWork render_work)
|
|
{
|
|
/* NOTE: Only check for "instant" cancel here. The user-requested cancel via progress is
|
|
* checked in Session and the work in the event of cancel is to be finished here. */
|
|
|
|
render_scheduler_.set_need_schedule_cryptomatte(device_scene_->data.film.cryptomatte_passes !=
|
|
0);
|
|
|
|
render_init_kernel_execution();
|
|
|
|
render_scheduler_.report_work_begin(render_work);
|
|
|
|
init_render_buffers(render_work);
|
|
|
|
rebalance(render_work);
|
|
|
|
/* Prepare all per-thread guiding structures before we start with the next rendering
|
|
* iteration/progression. */
|
|
const bool use_guiding = device_scene_->data.integrator.use_guiding;
|
|
if (use_guiding) {
|
|
guiding_prepare_structures();
|
|
}
|
|
|
|
path_trace(render_work);
|
|
if (render_cancel_.is_requested) {
|
|
return;
|
|
}
|
|
|
|
/* Update the guiding field using the training data/samples collected during the rendering
|
|
* iteration/progression. */
|
|
const bool train_guiding = device_scene_->data.integrator.train_guiding;
|
|
if (use_guiding && train_guiding) {
|
|
guiding_update_structures();
|
|
}
|
|
|
|
adaptive_sample(render_work);
|
|
if (render_cancel_.is_requested) {
|
|
return;
|
|
}
|
|
|
|
cryptomatte_postprocess(render_work);
|
|
if (render_cancel_.is_requested) {
|
|
return;
|
|
}
|
|
|
|
denoise(render_work);
|
|
if (render_cancel_.is_requested) {
|
|
return;
|
|
}
|
|
|
|
write_tile_buffer(render_work);
|
|
update_display(render_work);
|
|
|
|
progress_update_if_needed(render_work);
|
|
|
|
finalize_full_buffer_on_disk(render_work);
|
|
}
|
|
|
|
void PathTrace::render_init_kernel_execution()
|
|
{
|
|
for (auto &&path_trace_work : path_trace_works_) {
|
|
path_trace_work->init_execution();
|
|
}
|
|
}
|
|
|
|
/* TODO(sergey): Look into `std::function` rather than using a template. Should not be a
|
|
* measurable performance impact at runtime, but will make compilation faster and binary somewhat
|
|
* smaller. */
|
|
template<typename Callback>
|
|
static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
|
|
const vector<WorkBalanceInfo> &work_balance_infos,
|
|
const BufferParams &buffer_params,
|
|
const int overscan,
|
|
const Callback &callback)
|
|
{
|
|
const int num_works = path_trace_works.size();
|
|
const int window_height = buffer_params.window_height;
|
|
|
|
int current_y = 0;
|
|
for (int i = 0; i < num_works; ++i) {
|
|
const double weight = work_balance_infos[i].weight;
|
|
const int slice_window_full_y = buffer_params.full_y + buffer_params.window_y + current_y;
|
|
const int slice_window_height = max(lround(window_height * weight), 1);
|
|
|
|
/* Disallow negative values to deal with situations when there are more compute devices than
|
|
* scan-lines. */
|
|
const int remaining_window_height = max(0, window_height - current_y);
|
|
|
|
BufferParams slice_params = buffer_params;
|
|
|
|
slice_params.full_y = max(slice_window_full_y - overscan, buffer_params.full_y);
|
|
slice_params.window_y = slice_window_full_y - slice_params.full_y;
|
|
|
|
if (i < num_works - 1) {
|
|
slice_params.window_height = min(slice_window_height, remaining_window_height);
|
|
}
|
|
else {
|
|
slice_params.window_height = remaining_window_height;
|
|
}
|
|
|
|
slice_params.height = slice_params.window_y + slice_params.window_height + overscan;
|
|
slice_params.height = min(slice_params.height,
|
|
buffer_params.height + buffer_params.full_y - slice_params.full_y);
|
|
|
|
slice_params.update_offset_stride();
|
|
|
|
callback(path_trace_works[i].get(), slice_params);
|
|
|
|
current_y += slice_params.window_height;
|
|
}
|
|
}
|
|
|
|
void PathTrace::update_allocated_work_buffer_params()
|
|
{
|
|
const int overscan = tile_manager_.get_tile_overscan();
|
|
foreach_sliced_buffer_params(path_trace_works_,
|
|
work_balance_infos_,
|
|
big_tile_params_,
|
|
overscan,
|
|
[](PathTraceWork *path_trace_work, const BufferParams ¶ms) {
|
|
RenderBuffers *buffers = path_trace_work->get_render_buffers();
|
|
buffers->reset(params);
|
|
});
|
|
}
|
|
|
|
static BufferParams scale_buffer_params(const BufferParams ¶ms, int resolution_divider)
|
|
{
|
|
BufferParams scaled_params = params;
|
|
|
|
scaled_params.width = max(1, params.width / resolution_divider);
|
|
scaled_params.height = max(1, params.height / resolution_divider);
|
|
|
|
scaled_params.window_x = params.window_x / resolution_divider;
|
|
scaled_params.window_y = params.window_y / resolution_divider;
|
|
scaled_params.window_width = max(1, params.window_width / resolution_divider);
|
|
scaled_params.window_height = max(1, params.window_height / resolution_divider);
|
|
|
|
scaled_params.full_x = params.full_x / resolution_divider;
|
|
scaled_params.full_y = params.full_y / resolution_divider;
|
|
scaled_params.full_width = max(1, params.full_width / resolution_divider);
|
|
scaled_params.full_height = max(1, params.full_height / resolution_divider);
|
|
|
|
scaled_params.update_offset_stride();
|
|
|
|
return scaled_params;
|
|
}
|
|
|
|
void PathTrace::update_effective_work_buffer_params(const RenderWork &render_work)
|
|
{
|
|
const int resolution_divider = render_work.resolution_divider;
|
|
|
|
const BufferParams scaled_full_params = scale_buffer_params(full_params_, resolution_divider);
|
|
const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_,
|
|
resolution_divider);
|
|
|
|
const int overscan = tile_manager_.get_tile_overscan();
|
|
|
|
foreach_sliced_buffer_params(path_trace_works_,
|
|
work_balance_infos_,
|
|
scaled_big_tile_params,
|
|
overscan,
|
|
[&](PathTraceWork *path_trace_work, const BufferParams params) {
|
|
path_trace_work->set_effective_buffer_params(
|
|
scaled_full_params, scaled_big_tile_params, params);
|
|
});
|
|
|
|
render_state_.effective_big_tile_params = scaled_big_tile_params;
|
|
}
|
|
|
|
void PathTrace::update_work_buffer_params_if_needed(const RenderWork &render_work)
|
|
{
|
|
if (render_state_.need_reset_params) {
|
|
update_allocated_work_buffer_params();
|
|
}
|
|
|
|
if (render_state_.need_reset_params ||
|
|
render_state_.resolution_divider != render_work.resolution_divider)
|
|
{
|
|
update_effective_work_buffer_params(render_work);
|
|
}
|
|
|
|
render_state_.resolution_divider = render_work.resolution_divider;
|
|
render_state_.need_reset_params = false;
|
|
}
|
|
|
|
void PathTrace::init_render_buffers(const RenderWork &render_work)
|
|
{
|
|
update_work_buffer_params_if_needed(render_work);
|
|
|
|
/* Handle initialization scheduled by the render scheduler. */
|
|
if (render_work.init_render_buffers) {
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
path_trace_work->zero_render_buffers();
|
|
});
|
|
|
|
tile_buffer_read();
|
|
}
|
|
}
|
|
|
|
void PathTrace::path_trace(RenderWork &render_work)
|
|
{
|
|
if (!render_work.path_trace.num_samples) {
|
|
return;
|
|
}
|
|
|
|
VLOG_WORK << "Will path trace " << render_work.path_trace.num_samples
|
|
<< " samples at the resolution divider " << render_work.resolution_divider;
|
|
|
|
const double start_time = time_dt();
|
|
|
|
const int num_works = path_trace_works_.size();
|
|
|
|
thread_capture_fp_settings();
|
|
|
|
parallel_for(0, num_works, [&](int i) {
|
|
const double work_start_time = time_dt();
|
|
const int num_samples = render_work.path_trace.num_samples;
|
|
|
|
PathTraceWork *path_trace_work = path_trace_works_[i].get();
|
|
if (path_trace_work->get_device()->have_error()) {
|
|
return;
|
|
}
|
|
|
|
PathTraceWork::RenderStatistics statistics;
|
|
path_trace_work->render_samples(statistics,
|
|
render_work.path_trace.start_sample,
|
|
num_samples,
|
|
render_work.path_trace.sample_offset);
|
|
|
|
const double work_time = time_dt() - work_start_time;
|
|
work_balance_infos_[i].time_spent += work_time;
|
|
work_balance_infos_[i].occupancy = statistics.occupancy;
|
|
|
|
VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
|
|
<< work_time / num_samples
|
|
<< " seconds per sample), occupancy: " << statistics.occupancy;
|
|
});
|
|
|
|
float occupancy_accum = 0.0f;
|
|
for (const WorkBalanceInfo &balance_info : work_balance_infos_) {
|
|
occupancy_accum += balance_info.occupancy;
|
|
}
|
|
const float occupancy = occupancy_accum / num_works;
|
|
render_scheduler_.report_path_trace_occupancy(render_work, occupancy);
|
|
|
|
render_scheduler_.report_path_trace_time(
|
|
render_work, time_dt() - start_time, is_cancel_requested());
|
|
}
|
|
|
|
void PathTrace::adaptive_sample(RenderWork &render_work)
|
|
{
|
|
if (!render_work.adaptive_sampling.filter) {
|
|
return;
|
|
}
|
|
|
|
bool did_reschedule_on_idle = false;
|
|
|
|
while (true) {
|
|
VLOG_WORK << "Will filter adaptive stopping buffer, threshold "
|
|
<< render_work.adaptive_sampling.threshold;
|
|
if (render_work.adaptive_sampling.reset) {
|
|
VLOG_WORK << "Will re-calculate convergency flag for currently converged pixels.";
|
|
}
|
|
|
|
const double start_time = time_dt();
|
|
|
|
uint num_active_pixels = 0;
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
const uint num_active_pixels_in_work =
|
|
path_trace_work->adaptive_sampling_converge_filter_count_active(
|
|
render_work.adaptive_sampling.threshold, render_work.adaptive_sampling.reset);
|
|
if (num_active_pixels_in_work) {
|
|
atomic_add_and_fetch_u(&num_active_pixels, num_active_pixels_in_work);
|
|
}
|
|
});
|
|
|
|
render_scheduler_.report_adaptive_filter_time(
|
|
render_work, time_dt() - start_time, is_cancel_requested());
|
|
|
|
if (num_active_pixels == 0) {
|
|
VLOG_WORK << "All pixels converged.";
|
|
if (!render_scheduler_.render_work_reschedule_on_converge(render_work)) {
|
|
break;
|
|
}
|
|
VLOG_WORK << "Continuing with lower threshold.";
|
|
}
|
|
else if (did_reschedule_on_idle) {
|
|
break;
|
|
}
|
|
else if (num_active_pixels < 128 * 128) {
|
|
/* NOTE: The hardcoded value of 128^2 is more of an empirical value to keep GPU busy so that
|
|
* there is no performance loss from the progressive noise floor feature.
|
|
*
|
|
* A better heuristic is possible here: for example, use maximum of 128^2 and percentage of
|
|
* the final resolution. */
|
|
if (!render_scheduler_.render_work_reschedule_on_idle(render_work)) {
|
|
VLOG_WORK << "Rescheduling is not possible: final threshold is reached.";
|
|
break;
|
|
}
|
|
VLOG_WORK << "Rescheduling lower threshold.";
|
|
did_reschedule_on_idle = true;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void PathTrace::set_denoiser_params(const DenoiseParams ¶ms)
|
|
{
|
|
render_scheduler_.set_denoiser_params(params);
|
|
|
|
if (!params.use) {
|
|
denoiser_.reset();
|
|
return;
|
|
}
|
|
|
|
if (denoiser_) {
|
|
const DenoiseParams old_denoiser_params = denoiser_->get_params();
|
|
if (old_denoiser_params.type == params.type) {
|
|
denoiser_->set_params(params);
|
|
return;
|
|
}
|
|
}
|
|
|
|
denoiser_ = Denoiser::create(device_, params);
|
|
|
|
/* Only take into account the "immediate" cancel to have interactive rendering responding to
|
|
* navigation as quickly as possible, but allow to run denoiser after user hit Escape key while
|
|
* doing offline rendering. */
|
|
denoiser_->is_cancelled_cb = [this]() { return render_cancel_.is_requested; };
|
|
}
|
|
|
|
void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
|
|
{
|
|
render_scheduler_.set_adaptive_sampling(adaptive_sampling);
|
|
}
|
|
|
|
void PathTrace::cryptomatte_postprocess(const RenderWork &render_work)
|
|
{
|
|
if (!render_work.cryptomatte.postprocess) {
|
|
return;
|
|
}
|
|
VLOG_WORK << "Perform cryptomatte work.";
|
|
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
path_trace_work->cryptomatte_postproces();
|
|
});
|
|
}
|
|
|
|
void PathTrace::denoise(const RenderWork &render_work)
|
|
{
|
|
if (!render_work.tile.denoise) {
|
|
return;
|
|
}
|
|
|
|
if (!denoiser_) {
|
|
/* Denoiser was not configured, so nothing to do here. */
|
|
return;
|
|
}
|
|
|
|
VLOG_WORK << "Perform denoising work.";
|
|
|
|
const double start_time = time_dt();
|
|
|
|
RenderBuffers *buffer_to_denoise = nullptr;
|
|
bool allow_inplace_modification = false;
|
|
|
|
Device *denoiser_device = denoiser_->get_denoiser_device();
|
|
if (path_trace_works_.size() > 1 && denoiser_device && !big_tile_denoise_work_) {
|
|
big_tile_denoise_work_ = PathTraceWork::create(denoiser_device, film_, device_scene_, nullptr);
|
|
}
|
|
|
|
if (big_tile_denoise_work_) {
|
|
big_tile_denoise_work_->set_effective_buffer_params(render_state_.effective_big_tile_params,
|
|
render_state_.effective_big_tile_params,
|
|
render_state_.effective_big_tile_params);
|
|
|
|
buffer_to_denoise = big_tile_denoise_work_->get_render_buffers();
|
|
buffer_to_denoise->reset(render_state_.effective_big_tile_params);
|
|
|
|
copy_to_render_buffers(buffer_to_denoise);
|
|
|
|
allow_inplace_modification = true;
|
|
}
|
|
else {
|
|
DCHECK_EQ(path_trace_works_.size(), 1);
|
|
|
|
buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
|
|
}
|
|
|
|
if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
|
|
buffer_to_denoise,
|
|
get_num_samples_in_buffer(),
|
|
allow_inplace_modification))
|
|
{
|
|
render_state_.has_denoised_result = true;
|
|
}
|
|
|
|
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
|
|
}
|
|
|
|
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
|
|
{
|
|
output_driver_ = std::move(driver);
|
|
}
|
|
|
|
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
|
|
{
|
|
/* The display driver is the source of the drawing context which might be used by
|
|
* path trace works. Make sure there is no graphics interop using resources from
|
|
* the old display, as it might no longer be available after this call. */
|
|
destroy_gpu_resources();
|
|
|
|
if (driver) {
|
|
display_ = make_unique<PathTraceDisplay>(std::move(driver));
|
|
}
|
|
else {
|
|
display_ = nullptr;
|
|
}
|
|
}
|
|
|
|
void PathTrace::clear_display()
|
|
{
|
|
if (display_) {
|
|
display_->clear();
|
|
}
|
|
}
|
|
|
|
void PathTrace::draw()
|
|
{
|
|
if (!display_) {
|
|
return;
|
|
}
|
|
|
|
did_draw_after_reset_ |= display_->draw();
|
|
}
|
|
|
|
void PathTrace::flush_display()
|
|
{
|
|
if (!display_) {
|
|
return;
|
|
}
|
|
|
|
display_->flush();
|
|
}
|
|
|
|
void PathTrace::update_display(const RenderWork &render_work)
|
|
{
|
|
if (!render_work.display.update) {
|
|
return;
|
|
}
|
|
|
|
if (!display_ && !output_driver_) {
|
|
VLOG_WORK << "Ignore display update.";
|
|
return;
|
|
}
|
|
|
|
if (full_params_.width == 0 || full_params_.height == 0) {
|
|
VLOG_WORK << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
|
|
return;
|
|
}
|
|
|
|
const double start_time = time_dt();
|
|
|
|
if (output_driver_) {
|
|
VLOG_WORK << "Invoke buffer update callback.";
|
|
|
|
PathTraceTile tile(*this);
|
|
output_driver_->update_render_tile(tile);
|
|
}
|
|
|
|
if (display_) {
|
|
VLOG_WORK << "Perform copy to GPUDisplay work.";
|
|
|
|
const int texture_width = render_state_.effective_big_tile_params.window_width;
|
|
const int texture_height = render_state_.effective_big_tile_params.window_height;
|
|
if (!display_->update_begin(texture_width, texture_height)) {
|
|
LOG(ERROR) << "Error beginning GPUDisplay update.";
|
|
return;
|
|
}
|
|
|
|
const PassMode pass_mode = render_work.display.use_denoised_result &&
|
|
render_state_.has_denoised_result ?
|
|
PassMode::DENOISED :
|
|
PassMode::NOISY;
|
|
|
|
/* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
|
|
* all works in parallel. */
|
|
const int num_samples = get_num_samples_in_buffer();
|
|
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
|
|
big_tile_denoise_work_->copy_to_display(display_.get(), pass_mode, num_samples);
|
|
}
|
|
else {
|
|
for (auto &&path_trace_work : path_trace_works_) {
|
|
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
|
|
}
|
|
}
|
|
|
|
display_->update_end();
|
|
}
|
|
|
|
render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
|
|
}
|
|
|
|
void PathTrace::rebalance(const RenderWork &render_work)
|
|
{
|
|
if (!render_work.rebalance) {
|
|
return;
|
|
}
|
|
|
|
const int num_works = path_trace_works_.size();
|
|
|
|
if (num_works == 1) {
|
|
VLOG_WORK << "Ignoring rebalance work due to single device render.";
|
|
return;
|
|
}
|
|
|
|
const double start_time = time_dt();
|
|
|
|
if (VLOG_IS_ON(3)) {
|
|
VLOG_WORK << "Perform rebalance work.";
|
|
VLOG_WORK << "Per-device path tracing time (seconds):";
|
|
for (int i = 0; i < num_works; ++i) {
|
|
VLOG_WORK << path_trace_works_[i]->get_device()->info.description << ": "
|
|
<< work_balance_infos_[i].time_spent;
|
|
}
|
|
}
|
|
|
|
const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_);
|
|
|
|
if (VLOG_IS_ON(3)) {
|
|
VLOG_WORK << "Calculated per-device weights for works:";
|
|
for (int i = 0; i < num_works; ++i) {
|
|
VLOG_WORK << path_trace_works_[i]->get_device()->info.description << ": "
|
|
<< work_balance_infos_[i].weight;
|
|
}
|
|
}
|
|
|
|
if (!did_rebalance) {
|
|
VLOG_WORK << "Balance in path trace works did not change.";
|
|
render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, false);
|
|
return;
|
|
}
|
|
|
|
RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
|
|
big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
|
|
|
|
copy_to_render_buffers(&big_tile_cpu_buffers);
|
|
|
|
render_state_.need_reset_params = true;
|
|
update_work_buffer_params_if_needed(render_work);
|
|
|
|
copy_from_render_buffers(&big_tile_cpu_buffers);
|
|
|
|
render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, true);
|
|
}
|
|
|
|
void PathTrace::write_tile_buffer(const RenderWork &render_work)
|
|
{
|
|
if (!render_work.tile.write) {
|
|
return;
|
|
}
|
|
|
|
VLOG_WORK << "Write tile result.";
|
|
|
|
render_state_.tile_written = true;
|
|
|
|
const bool has_multiple_tiles = tile_manager_.has_multiple_tiles();
|
|
|
|
/* Write render tile result, but only if not using tiled rendering.
|
|
*
|
|
* Tiles are written to a file during rendering, and written to the software at the end
|
|
* of rendering (wither when all tiles are finished, or when rendering was requested to be
|
|
* canceled).
|
|
*
|
|
* Important thing is: tile should be written to the software via callback only once. */
|
|
if (!has_multiple_tiles) {
|
|
VLOG_WORK << "Write tile result via buffer write callback.";
|
|
tile_buffer_write();
|
|
}
|
|
/* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
|
|
*/
|
|
else {
|
|
VLOG_WORK << "Write tile result to disk.";
|
|
tile_buffer_write_to_disk();
|
|
}
|
|
}
|
|
|
|
void PathTrace::finalize_full_buffer_on_disk(const RenderWork &render_work)
|
|
{
|
|
if (!render_work.full.write) {
|
|
return;
|
|
}
|
|
|
|
VLOG_WORK << "Handle full-frame render buffer work.";
|
|
|
|
if (!tile_manager_.has_written_tiles()) {
|
|
VLOG_WORK << "No tiles on disk.";
|
|
return;
|
|
}
|
|
|
|
/* Make sure writing to the file is fully finished.
|
|
* This will include writing all possible missing tiles, ensuring validness of the file. */
|
|
tile_manager_.finish_write_tiles();
|
|
|
|
/* NOTE: The rest of full-frame post-processing (such as full-frame denoising) will be done after
|
|
* all scenes and layers are rendered by the Session (which happens after freeing Session memory,
|
|
* so that we never hold scene and full-frame buffer in memory at the same time). */
|
|
}
|
|
|
|
void PathTrace::cancel()
|
|
{
|
|
thread_scoped_lock lock(render_cancel_.mutex);
|
|
|
|
render_cancel_.is_requested = true;
|
|
|
|
while (render_cancel_.is_rendering) {
|
|
render_cancel_.condition.wait(lock);
|
|
}
|
|
|
|
render_cancel_.is_requested = false;
|
|
}
|
|
|
|
int PathTrace::get_num_samples_in_buffer()
|
|
{
|
|
return render_scheduler_.get_num_rendered_samples();
|
|
}
|
|
|
|
bool PathTrace::is_cancel_requested()
|
|
{
|
|
if (render_cancel_.is_requested) {
|
|
return true;
|
|
}
|
|
|
|
if (progress_ != nullptr) {
|
|
if (progress_->get_cancel()) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void PathTrace::tile_buffer_write()
|
|
{
|
|
if (!output_driver_) {
|
|
return;
|
|
}
|
|
|
|
PathTraceTile tile(*this);
|
|
output_driver_->write_render_tile(tile);
|
|
}
|
|
|
|
void PathTrace::tile_buffer_read()
|
|
{
|
|
if (!device_scene_->data.bake.use) {
|
|
return;
|
|
}
|
|
|
|
if (!output_driver_) {
|
|
return;
|
|
}
|
|
|
|
/* Read buffers back from device. */
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
path_trace_work->copy_render_buffers_from_device();
|
|
});
|
|
|
|
/* Read (subset of) passes from output driver. */
|
|
PathTraceTile tile(*this);
|
|
if (output_driver_->read_render_tile(tile)) {
|
|
/* Copy buffers to device again. */
|
|
parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
path_trace_work->copy_render_buffers_to_device();
|
|
});
|
|
}
|
|
}
|
|
|
|
void PathTrace::tile_buffer_write_to_disk()
|
|
{
|
|
/* Sample count pass is required to support per-tile partial results stored in the file. */
|
|
DCHECK_NE(big_tile_params_.get_pass_offset(PASS_SAMPLE_COUNT), PASS_UNUSED);
|
|
|
|
const int num_rendered_samples = render_scheduler_.get_num_rendered_samples();
|
|
|
|
if (num_rendered_samples == 0) {
|
|
/* The tile has zero samples, no need to write it. */
|
|
return;
|
|
}
|
|
|
|
/* Get access to the CPU-side render buffers of the current big tile. */
|
|
RenderBuffers *buffers;
|
|
RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
|
|
|
|
if (path_trace_works_.size() == 1) {
|
|
path_trace_works_[0]->copy_render_buffers_from_device();
|
|
buffers = path_trace_works_[0]->get_render_buffers();
|
|
}
|
|
else {
|
|
big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
|
|
copy_to_render_buffers(&big_tile_cpu_buffers);
|
|
|
|
buffers = &big_tile_cpu_buffers;
|
|
}
|
|
|
|
if (!tile_manager_.write_tile(*buffers)) {
|
|
device_->set_error("Error writing tile to file");
|
|
}
|
|
}
|
|
|
|
void PathTrace::progress_update_if_needed(const RenderWork &render_work)
|
|
{
|
|
if (progress_ != nullptr) {
|
|
const int2 tile_size = get_render_tile_size();
|
|
const uint64_t num_samples_added = uint64_t(tile_size.x) * tile_size.y *
|
|
render_work.path_trace.num_samples;
|
|
const int current_sample = render_work.path_trace.start_sample +
|
|
render_work.path_trace.num_samples -
|
|
render_work.path_trace.sample_offset;
|
|
progress_->add_samples(num_samples_added, current_sample);
|
|
}
|
|
|
|
if (progress_update_cb) {
|
|
progress_update_cb();
|
|
}
|
|
}
|
|
|
|
void PathTrace::progress_set_status(const string &status, const string &substatus)
|
|
{
|
|
if (progress_ != nullptr) {
|
|
progress_->set_status(status, substatus);
|
|
}
|
|
}
|
|
|
|
void PathTrace::copy_to_render_buffers(RenderBuffers *render_buffers)
|
|
{
|
|
parallel_for_each(path_trace_works_,
|
|
[&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
path_trace_work->copy_to_render_buffers(render_buffers);
|
|
});
|
|
render_buffers->copy_to_device();
|
|
}
|
|
|
|
void PathTrace::copy_from_render_buffers(RenderBuffers *render_buffers)
|
|
{
|
|
render_buffers->copy_from_device();
|
|
parallel_for_each(path_trace_works_,
|
|
[&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
path_trace_work->copy_from_render_buffers(render_buffers);
|
|
});
|
|
}
|
|
|
|
bool PathTrace::copy_render_tile_from_device()
|
|
{
|
|
if (full_frame_state_.render_buffers) {
|
|
/* Full-frame buffer is always allocated on CPU. */
|
|
return true;
|
|
}
|
|
|
|
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
|
|
return big_tile_denoise_work_->copy_render_buffers_from_device();
|
|
}
|
|
|
|
bool success = true;
|
|
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
if (!success) {
|
|
return;
|
|
}
|
|
if (!path_trace_work->copy_render_buffers_from_device()) {
|
|
success = false;
|
|
}
|
|
});
|
|
|
|
return success;
|
|
}
|
|
|
|
static string get_layer_view_name(const RenderBuffers &buffers)
|
|
{
|
|
string result;
|
|
|
|
if (buffers.params.layer.size()) {
|
|
result += string(buffers.params.layer);
|
|
}
|
|
|
|
if (buffers.params.view.size()) {
|
|
if (!result.empty()) {
|
|
result += ", ";
|
|
}
|
|
result += string(buffers.params.view);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void PathTrace::process_full_buffer_from_disk(string_view filename)
|
|
{
|
|
VLOG_WORK << "Processing full frame buffer file " << filename;
|
|
|
|
progress_set_status("Reading full buffer from disk");
|
|
|
|
RenderBuffers full_frame_buffers(cpu_device_.get());
|
|
|
|
DenoiseParams denoise_params;
|
|
if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
|
|
const string error_message = "Error reading tiles from file";
|
|
if (progress_) {
|
|
progress_->set_error(error_message);
|
|
progress_->set_cancel(error_message);
|
|
}
|
|
else {
|
|
LOG(ERROR) << error_message;
|
|
}
|
|
return;
|
|
}
|
|
|
|
const string layer_view_name = get_layer_view_name(full_frame_buffers);
|
|
|
|
render_state_.has_denoised_result = false;
|
|
|
|
if (denoise_params.use) {
|
|
progress_set_status(layer_view_name, "Denoising");
|
|
|
|
/* Re-use the denoiser as much as possible, avoiding possible device re-initialization.
|
|
*
|
|
* It will not conflict with the regular rendering as:
|
|
* - Rendering is supposed to be finished here.
|
|
* - The next rendering will go via Session's `run_update_for_next_iteration` which will
|
|
* ensure proper denoiser is used. */
|
|
set_denoiser_params(denoise_params);
|
|
|
|
/* Number of samples doesn't matter too much, since the samples count pass will be used. */
|
|
denoiser_->denoise_buffer(full_frame_buffers.params, &full_frame_buffers, 0, false);
|
|
|
|
render_state_.has_denoised_result = true;
|
|
}
|
|
|
|
full_frame_state_.render_buffers = &full_frame_buffers;
|
|
|
|
progress_set_status(layer_view_name, "Finishing");
|
|
|
|
/* Write the full result pretending that there is a single tile.
|
|
* Requires some state change, but allows to use same communication API with the software. */
|
|
tile_buffer_write();
|
|
|
|
full_frame_state_.render_buffers = nullptr;
|
|
}
|
|
|
|
int PathTrace::get_num_render_tile_samples() const
|
|
{
|
|
if (full_frame_state_.render_buffers) {
|
|
return full_frame_state_.render_buffers->params.samples;
|
|
}
|
|
|
|
return render_scheduler_.get_num_rendered_samples();
|
|
}
|
|
|
|
bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
|
|
const PassAccessor::Destination &destination)
|
|
{
|
|
if (full_frame_state_.render_buffers) {
|
|
return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
|
|
}
|
|
|
|
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
|
|
/* Only use the big tile denoised buffer to access the denoised passes.
|
|
* The guiding passes are allowed to be modified in-place for the needs of the denoiser,
|
|
* so copy those from the original devices buffers. */
|
|
if (pass_accessor.get_pass_access_info().mode == PassMode::DENOISED) {
|
|
return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
|
|
}
|
|
}
|
|
|
|
bool success = true;
|
|
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
if (!success) {
|
|
return;
|
|
}
|
|
if (!path_trace_work->get_render_tile_pixels(pass_accessor, destination)) {
|
|
success = false;
|
|
}
|
|
});
|
|
|
|
return success;
|
|
}
|
|
|
|
bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
|
|
const PassAccessor::Source &source)
|
|
{
|
|
bool success = true;
|
|
|
|
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
|
if (!success) {
|
|
return;
|
|
}
|
|
if (!path_trace_work->set_render_tile_pixels(pass_accessor, source)) {
|
|
success = false;
|
|
}
|
|
});
|
|
|
|
return success;
|
|
}
|
|
|
|
int2 PathTrace::get_render_tile_size() const
|
|
{
|
|
if (full_frame_state_.render_buffers) {
|
|
return make_int2(full_frame_state_.render_buffers->params.window_width,
|
|
full_frame_state_.render_buffers->params.window_height);
|
|
}
|
|
|
|
const Tile &tile = tile_manager_.get_current_tile();
|
|
return make_int2(tile.window_width, tile.window_height);
|
|
}
|
|
|
|
int2 PathTrace::get_render_tile_offset() const
|
|
{
|
|
if (full_frame_state_.render_buffers) {
|
|
return make_int2(0, 0);
|
|
}
|
|
|
|
const Tile &tile = tile_manager_.get_current_tile();
|
|
return make_int2(tile.x + tile.window_x, tile.y + tile.window_y);
|
|
}
|
|
|
|
int2 PathTrace::get_render_size() const
|
|
{
|
|
return tile_manager_.get_size();
|
|
}
|
|
|
|
const BufferParams &PathTrace::get_render_tile_params() const
|
|
{
|
|
if (full_frame_state_.render_buffers) {
|
|
return full_frame_state_.render_buffers->params;
|
|
}
|
|
|
|
return big_tile_params_;
|
|
}
|
|
|
|
bool PathTrace::has_denoised_result() const
|
|
{
|
|
return render_state_.has_denoised_result;
|
|
}
|
|
|
|
void PathTrace::destroy_gpu_resources()
|
|
{
|
|
/* Destroy any GPU resource which was used for graphics interop.
|
|
* Need to have access to the PathTraceDisplay as it is the only source of drawing context which
|
|
* is used for interop. */
|
|
if (display_) {
|
|
for (auto &&path_trace_work : path_trace_works_) {
|
|
path_trace_work->destroy_gpu_resources(display_.get());
|
|
}
|
|
|
|
if (big_tile_denoise_work_) {
|
|
big_tile_denoise_work_->destroy_gpu_resources(display_.get());
|
|
}
|
|
}
|
|
}
|
|
|
|
/* --------------------------------------------------------------------
|
|
* Report generation.
|
|
*/
|
|
|
|
static const char *device_type_for_description(const DeviceType type)
|
|
{
|
|
switch (type) {
|
|
case DEVICE_NONE:
|
|
return "None";
|
|
|
|
case DEVICE_CPU:
|
|
return "CPU";
|
|
case DEVICE_CUDA:
|
|
return "CUDA";
|
|
case DEVICE_OPTIX:
|
|
return "OptiX";
|
|
case DEVICE_HIP:
|
|
return "HIP";
|
|
case DEVICE_HIPRT:
|
|
return "HIPRT";
|
|
case DEVICE_ONEAPI:
|
|
return "oneAPI";
|
|
case DEVICE_DUMMY:
|
|
return "Dummy";
|
|
case DEVICE_MULTI:
|
|
return "Multi";
|
|
case DEVICE_METAL:
|
|
return "Metal";
|
|
}
|
|
|
|
return "UNKNOWN";
|
|
}
|
|
|
|
/* Construct description of the device which will appear in the full report. */
|
|
/* TODO(sergey): Consider making it more reusable utility. */
|
|
static string full_device_info_description(const DeviceInfo &device_info)
|
|
{
|
|
string full_description = device_info.description;
|
|
|
|
full_description += " (" + string(device_type_for_description(device_info.type)) + ")";
|
|
|
|
if (device_info.display_device) {
|
|
full_description += " (display)";
|
|
}
|
|
|
|
if (device_info.type == DEVICE_CPU) {
|
|
full_description += " (" + to_string(device_info.cpu_threads) + " threads)";
|
|
}
|
|
|
|
full_description += " [" + device_info.id + "]";
|
|
|
|
return full_description;
|
|
}
|
|
|
|
/* Construct string which will contain information about devices, possibly multiple of the devices.
|
|
*
|
|
* In the simple case the result looks like:
|
|
*
|
|
* Message: Full Device Description
|
|
*
|
|
* If there are multiple devices then the result looks like:
|
|
*
|
|
* Message: Full First Device Description
|
|
* Full Second Device Description
|
|
*
|
|
* Note that the newlines are placed in a way so that the result can be easily concatenated to the
|
|
* full report. */
|
|
static string device_info_list_report(const string &message, const DeviceInfo &device_info)
|
|
{
|
|
string result = "\n" + message + ": ";
|
|
const string pad(message.length() + 2, ' ');
|
|
|
|
if (device_info.multi_devices.empty()) {
|
|
result += full_device_info_description(device_info) + "\n";
|
|
return result;
|
|
}
|
|
|
|
bool is_first = true;
|
|
for (const DeviceInfo &sub_device_info : device_info.multi_devices) {
|
|
if (!is_first) {
|
|
result += pad;
|
|
}
|
|
|
|
result += full_device_info_description(sub_device_info) + "\n";
|
|
|
|
is_first = false;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static string path_trace_devices_report(const vector<unique_ptr<PathTraceWork>> &path_trace_works)
|
|
{
|
|
DeviceInfo device_info;
|
|
device_info.type = DEVICE_MULTI;
|
|
|
|
for (auto &&path_trace_work : path_trace_works) {
|
|
device_info.multi_devices.push_back(path_trace_work->get_device()->info);
|
|
}
|
|
|
|
return device_info_list_report("Path tracing on", device_info);
|
|
}
|
|
|
|
static string denoiser_device_report(const Denoiser *denoiser)
|
|
{
|
|
if (!denoiser) {
|
|
return "";
|
|
}
|
|
|
|
if (!denoiser->get_params().use) {
|
|
return "";
|
|
}
|
|
|
|
const Device *denoiser_device = denoiser->get_denoiser_device();
|
|
if (!denoiser_device) {
|
|
return "";
|
|
}
|
|
|
|
return device_info_list_report("Denoising on", denoiser_device->info);
|
|
}
|
|
|
|
string PathTrace::full_report() const
|
|
{
|
|
string result = "\nFull path tracing report\n";
|
|
|
|
result += path_trace_devices_report(path_trace_works_);
|
|
result += denoiser_device_report(denoiser_.get());
|
|
|
|
/* Report from the render scheduler, which includes:
|
|
* - Render mode (interactive, offline, headless)
|
|
* - Adaptive sampling and denoiser parameters
|
|
* - Breakdown of timing. */
|
|
result += render_scheduler_.full_report();
|
|
|
|
return result;
|
|
}
|
|
|
|
void PathTrace::set_guiding_params(const GuidingParams &guiding_params, const bool reset)
|
|
{
|
|
#ifdef WITH_PATH_GUIDING
|
|
if (guiding_params_.modified(guiding_params)) {
|
|
guiding_params_ = guiding_params;
|
|
|
|
if (guiding_params_.use) {
|
|
PGLFieldArguments field_args;
|
|
switch (guiding_params_.type) {
|
|
default:
|
|
/* Parallax-aware von Mises-Fisher mixture models. */
|
|
case GUIDING_TYPE_PARALLAX_AWARE_VMM: {
|
|
pglFieldArgumentsSetDefaults(
|
|
field_args,
|
|
PGL_SPATIAL_STRUCTURE_TYPE::PGL_SPATIAL_STRUCTURE_KDTREE,
|
|
PGL_DIRECTIONAL_DISTRIBUTION_TYPE::PGL_DIRECTIONAL_DISTRIBUTION_PARALLAX_AWARE_VMM);
|
|
break;
|
|
}
|
|
/* Directional quad-trees. */
|
|
case GUIDING_TYPE_DIRECTIONAL_QUAD_TREE: {
|
|
pglFieldArgumentsSetDefaults(
|
|
field_args,
|
|
PGL_SPATIAL_STRUCTURE_TYPE::PGL_SPATIAL_STRUCTURE_KDTREE,
|
|
PGL_DIRECTIONAL_DISTRIBUTION_TYPE::PGL_DIRECTIONAL_DISTRIBUTION_QUADTREE);
|
|
break;
|
|
}
|
|
/* von Mises-Fisher mixture models. */
|
|
case GUIDING_TYPE_VMM: {
|
|
pglFieldArgumentsSetDefaults(
|
|
field_args,
|
|
PGL_SPATIAL_STRUCTURE_TYPE::PGL_SPATIAL_STRUCTURE_KDTREE,
|
|
PGL_DIRECTIONAL_DISTRIBUTION_TYPE::PGL_DIRECTIONAL_DISTRIBUTION_VMM);
|
|
break;
|
|
}
|
|
}
|
|
field_args.deterministic = guiding_params.deterministic;
|
|
reinterpret_cast<PGLKDTreeArguments *>(field_args.spatialSturctureArguments)->maxDepth = 16;
|
|
openpgl::cpp::Device *guiding_device = static_cast<openpgl::cpp::Device *>(
|
|
device_->get_guiding_device());
|
|
if (guiding_device) {
|
|
guiding_sample_data_storage_ = make_unique<openpgl::cpp::SampleStorage>();
|
|
guiding_field_ = make_unique<openpgl::cpp::Field>(guiding_device, field_args);
|
|
}
|
|
else {
|
|
guiding_sample_data_storage_ = nullptr;
|
|
guiding_field_ = nullptr;
|
|
}
|
|
}
|
|
else {
|
|
guiding_sample_data_storage_ = nullptr;
|
|
guiding_field_ = nullptr;
|
|
}
|
|
}
|
|
else if (reset) {
|
|
if (guiding_field_) {
|
|
guiding_field_->Reset();
|
|
}
|
|
}
|
|
#else
|
|
(void)guiding_params;
|
|
(void)reset;
|
|
#endif
|
|
}
|
|
|
|
void PathTrace::guiding_prepare_structures()
|
|
{
|
|
#ifdef WITH_PATH_GUIDING
|
|
const bool train = (guiding_params_.training_samples == 0) ||
|
|
(guiding_field_->GetIteration() < guiding_params_.training_samples);
|
|
|
|
for (auto &&path_trace_work : path_trace_works_) {
|
|
path_trace_work->guiding_init_kernel_globals(
|
|
guiding_field_.get(), guiding_sample_data_storage_.get(), train);
|
|
}
|
|
|
|
if (train) {
|
|
/* For training the guiding distribution we need to force the number of samples
|
|
* per update to be limited, for reproducible results and reasonable training size.
|
|
*
|
|
* Idea: we could stochastically discard samples with a probability of 1/num_samples_per_update
|
|
* we can then update only after the num_samples_per_update iterations are rendered. */
|
|
render_scheduler_.set_limit_samples_per_update(4);
|
|
}
|
|
else {
|
|
render_scheduler_.set_limit_samples_per_update(0);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void PathTrace::guiding_update_structures()
|
|
{
|
|
#ifdef WITH_PATH_GUIDING
|
|
VLOG_WORK << "Update path guiding structures";
|
|
|
|
VLOG_DEBUG << "Number of surface samples: " << guiding_sample_data_storage_->GetSizeSurface();
|
|
VLOG_DEBUG << "Number of volume samples: " << guiding_sample_data_storage_->GetSizeVolume();
|
|
|
|
const size_t num_valid_samples = guiding_sample_data_storage_->GetSizeSurface() +
|
|
guiding_sample_data_storage_->GetSizeVolume();
|
|
|
|
/* we wait until we have at least 1024 samples */
|
|
if (num_valid_samples >= 1024) {
|
|
guiding_field_->Update(*guiding_sample_data_storage_);
|
|
guiding_update_count++;
|
|
|
|
VLOG_DEBUG << "Path guiding field valid: " << guiding_field_->Validate();
|
|
|
|
guiding_sample_data_storage_->Clear();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|