From 366262bef5426fa98e75c96a1562dd16001fba26 Mon Sep 17 00:00:00 2001 From: William Leeson Date: Tue, 26 Oct 2021 08:30:15 +0200 Subject: [PATCH] Distance Scrambling for for Cycles X - Sobol version Cycles:Distance Scrambling for Cycles Sobol Sampler This option implements micro jittering an is based on the INRIA research paper [[ https://hal.inria.fr/hal-01325702/document | on micro jittering ]] and work by Lukas Stockner for implementing the scrambling distance. It works by controlling the correlation between pixels by either using a user supplied value or an adaptive algorithm to limit the maximum deviation of the sample values between pixels. This is a follow up of https://developer.blender.org/D12316 The PMJ version can be found here: https://developer.blender.org/D12511 Reviewed By: leesonw Differential Revision: https://developer.blender.org/D12318 --- intern/cycles/blender/addon/properties.py | 18 ++++++ intern/cycles/blender/addon/ui.py | 7 +++ intern/cycles/blender/sync.cpp | 15 +++++ .../cycles/integrator/path_trace_work_gpu.cpp | 5 +- intern/cycles/integrator/tile.cpp | 58 +++++++++++++++---- intern/cycles/integrator/tile.h | 3 +- .../cycles/integrator/work_tile_scheduler.cpp | 9 ++- .../cycles/integrator/work_tile_scheduler.h | 8 ++- intern/cycles/kernel/sample/pattern.h | 2 +- intern/cycles/kernel/types.h | 2 +- intern/cycles/scene/integrator.cpp | 2 + intern/cycles/scene/integrator.h | 1 + intern/cycles/test/integrator_tile_test.cpp | 19 +++--- release/datafiles/locale | 2 +- release/scripts/addons | 2 +- release/scripts/addons_contrib | 2 +- source/tools | 2 +- 17 files changed, 128 insertions(+), 29 deletions(-) diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 0f92238015d..e5853529d1c 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -342,6 +342,24 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default='PROGRESSIVE_MUTI_JITTER', ) + scrambling_distance: FloatProperty( + name="Scrambling Distance", + default=1.0, + min=0.0, max=1.0, + description="Lower values give faster rendering with GPU rendering and less noise with all devices at the cost of possible artifacts if set too low", + ) + preview_scrambling_distance: BoolProperty( + name="Scrambling Distance viewport", + default=False, + description="Uses the Scrambling Distance value for the viewport. Faster but may flicker", + ) + + adaptive_scrambling_distance: BoolProperty( + name="Adaptive Scrambling Distance", + default=False, + description="Uses a formula to adapt the scrambling distance strength based on the sample count", + ) + use_layer_samples: EnumProperty( name="Layer Samples", description="How to use per view layer sample settings", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index facf1b08676..47907481b03 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -289,6 +289,13 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel): col = layout.column(align=True) col.active = not(cscene.use_adaptive_sampling) col.prop(cscene, "sampling_pattern", text="Pattern") + col = layout.column(align=True) + col.active = cscene.sampling_pattern == 'SOBOL' and not cscene.use_adaptive_sampling + col.prop(cscene, "scrambling_distance", text="Scrambling Distance Strength") + col.prop(cscene, "adaptive_scrambling_distance", text="Adaptive Scrambling Distance") + col = layout.column(align=True) + col.active = ((cscene.scrambling_distance < 1.0) or cscene.adaptive_scrambling_distance) and cscene.sampling_pattern == 'SOBOL' and not cscene.use_adaptive_sampling + col.prop(cscene, "preview_scrambling_distance", text="Viewport Scrambling Distance") layout.separator() diff --git a/intern/cycles/blender/sync.cpp b/intern/cycles/blender/sync.cpp index 73d3a4436b5..f6f490077a7 100644 --- a/intern/cycles/blender/sync.cpp +++ b/intern/cycles/blender/sync.cpp @@ -352,6 +352,21 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background) integrator->set_adaptive_min_samples(get_int(cscene, "adaptive_min_samples")); } + int samples = get_int(cscene, "samples"); + float scrambling_distance = get_float(cscene, "scrambling_distance"); + bool adaptive_scrambling_distance = get_boolean(cscene, "adaptive_scrambling_distance"); + if (adaptive_scrambling_distance) { + scrambling_distance *= 4.0f / sqrtf(samples); + } + + /* only use scrambling distance in the viewport if user wants to and disable with AS */ + bool preview_scrambling_distance = get_boolean(cscene, "preview_scrambling_distance"); + if ((preview && !preview_scrambling_distance) || sampling_pattern != SAMPLING_PATTERN_SOBOL) + scrambling_distance = 1.0f; + + VLOG(1) << "Used Scrambling Distance: " << scrambling_distance; + integrator->set_scrambling_distance(scrambling_distance); + if (get_boolean(cscene, "use_fast_gi")) { if (preview) { integrator->set_ao_bounces(get_int(cscene, "ao_bounces")); diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index b7dc4e5d181..251bec0dc8f 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -258,7 +258,10 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics, * schedules work in halves of available number of paths. */ work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8); - work_tile_scheduler_.reset(effective_buffer_params_, start_sample, samples_num); + work_tile_scheduler_.reset(effective_buffer_params_, + start_sample, + samples_num, + device_scene_->data.integrator.scrambling_distance); enqueue_reset(); diff --git a/intern/cycles/integrator/tile.cpp b/intern/cycles/integrator/tile.cpp index 7ea73451d80..b49e1b27b83 100644 --- a/intern/cycles/integrator/tile.cpp +++ b/intern/cycles/integrator/tile.cpp @@ -48,7 +48,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x) TileSize tile_calculate_best_size(const int2 &image_size, const int num_samples, - const int max_num_path_states) + const int max_num_path_states, + const float scrambling_distance) { if (max_num_path_states == 1) { /* Simple case: avoid any calculation, which could cause rounding issues. */ @@ -71,17 +72,54 @@ TileSize tile_calculate_best_size(const int2 &image_size, * - Keep values a power of two, for more integer fit into the maximum number of paths. */ TileSize tile_size; - - /* Calculate tile size as if it is the most possible one to fit an entire range of samples. - * The idea here is to keep tiles as small as possible, and keep device occupied by scheduling - * multiple tiles with the same coordinates rendering different samples. */ const int num_path_states_per_sample = max_num_path_states / num_samples; - if (num_path_states_per_sample != 0) { - tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample))); - tile_size.height = tile_size.width; + if (scrambling_distance < 0.9f) { + /* Prefer large tiles for scrambling distance. */ + if (image_size.x * image_size.y <= num_path_states_per_sample) { + tile_size.width = image_size.x; + tile_size.height = image_size.y; + } + else { + /* Pick the option with the biggest tile size */ + int heightOption = num_path_states_per_sample / image_size.x; + int widthOption = num_path_states_per_sample / image_size.y; + // Check if these options are possible + if ((heightOption > 0) || (widthOption > 0)) { + int area1 = image_size.x * heightOption; + int area2 = widthOption * image_size.y; + /* The option with the biggest pixel area */ + if (area1 >= area2) { + tile_size.width = image_size.x; + tile_size.height = heightOption; + } + else { + tile_size.width = widthOption; + tile_size.height = image_size.y; + } + } + else { // Large tiles are not an option so use square tiles + if (num_path_states_per_sample != 0) { + tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample))); + tile_size.height = tile_size.width; + } + else { + tile_size.width = tile_size.height = 1; + } + } + } } else { - tile_size.width = tile_size.height = 1; + /* Calculate tile size as if it is the most possible one to fit an entire range of samples. + * The idea here is to keep tiles as small as possible, and keep device occupied by scheduling + * multiple tiles with the same coordinates rendering different samples. */ + + if (num_path_states_per_sample != 0) { + tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample))); + tile_size.height = tile_size.width; + } + else { + tile_size.width = tile_size.height = 1; + } } if (num_samples == 1) { @@ -93,7 +131,7 @@ TileSize tile_calculate_best_size(const int2 &image_size, tile_size.num_samples = min(round_up_to_power_of_two(lround(sqrt(num_samples / 2))), static_cast(num_samples)); - const int tile_area = tile_size.width / tile_size.height; + const int tile_area = tile_size.width * tile_size.height; tile_size.num_samples = min(tile_size.num_samples, max_num_path_states / tile_area); } diff --git a/intern/cycles/integrator/tile.h b/intern/cycles/integrator/tile.h index 879c68b875c..61f7d736115 100644 --- a/intern/cycles/integrator/tile.h +++ b/intern/cycles/integrator/tile.h @@ -51,6 +51,7 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size); * possible, and have as many threads active for every tile as possible. */ TileSize tile_calculate_best_size(const int2 &image_size, const int num_samples, - const int max_num_path_states); + const int max_num_path_states, + const float scrambling_distance); CCL_NAMESPACE_END diff --git a/intern/cycles/integrator/work_tile_scheduler.cpp b/intern/cycles/integrator/work_tile_scheduler.cpp index c874dffde91..2d1ac07db7f 100644 --- a/intern/cycles/integrator/work_tile_scheduler.cpp +++ b/intern/cycles/integrator/work_tile_scheduler.cpp @@ -33,13 +33,17 @@ void WorkTileScheduler::set_max_num_path_states(int max_num_path_states) max_num_path_states_ = max_num_path_states; } -void WorkTileScheduler::reset(const BufferParams &buffer_params, int sample_start, int samples_num) +void WorkTileScheduler::reset(const BufferParams &buffer_params, + int sample_start, + int samples_num, + float scrambling_distance) { /* Image buffer parameters. */ image_full_offset_px_.x = buffer_params.full_x; image_full_offset_px_.y = buffer_params.full_y; image_size_px_ = make_int2(buffer_params.width, buffer_params.height); + scrambling_distance_ = scrambling_distance; offset_ = buffer_params.offset; stride_ = buffer_params.stride; @@ -54,7 +58,8 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params, int sample_star void WorkTileScheduler::reset_scheduler_state() { - tile_size_ = tile_calculate_best_size(image_size_px_, samples_num_, max_num_path_states_); + tile_size_ = tile_calculate_best_size( + image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_); VLOG(3) << "Will schedule tiles of size " << tile_size_; diff --git a/intern/cycles/integrator/work_tile_scheduler.h b/intern/cycles/integrator/work_tile_scheduler.h index 155bba5cb68..d9fa7e84431 100644 --- a/intern/cycles/integrator/work_tile_scheduler.h +++ b/intern/cycles/integrator/work_tile_scheduler.h @@ -38,7 +38,10 @@ class WorkTileScheduler { void set_max_num_path_states(int max_num_path_states); /* Scheduling will happen for pixels within a big tile denotes by its parameters. */ - void reset(const BufferParams &buffer_params, int sample_start, int samples_num); + void reset(const BufferParams &buffer_params, + int sample_start, + int samples_num, + float scrambling_distance); /* Get work for a device. * Returns true if there is still work to be done and initialize the work tile to all @@ -68,6 +71,9 @@ class WorkTileScheduler { * Will be passed over to the KernelWorkTile. */ int offset_, stride_; + /* Scrambling Distance requires adapted tile size */ + float scrambling_distance_; + /* Start sample of index and number of samples which are to be rendered. * The scheduler will cover samples range of [start, start + num] over the entire image * (splitting into a smaller work tiles). */ diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h index 191b24a5f2a..0c27992c7f6 100644 --- a/intern/cycles/kernel/sample/pattern.h +++ b/intern/cycles/kernel/sample/pattern.h @@ -79,7 +79,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg, * See T38710, T50116. */ uint tmp_rng = cmj_hash_simple(dimension, rng_hash); - shift = tmp_rng * (1.0f / (float)0xFFFFFFFF); + shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF); return r + shift - floorf(r + shift); #endif diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 4109dd6a486..2827139d511 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1184,9 +1184,9 @@ typedef struct KernelIntegrator { float volume_step_rate; int has_shadow_catcher; + float scrambling_distance; /* padding */ - int pad1; } KernelIntegrator; static_assert_align(KernelIntegrator, 16); diff --git a/intern/cycles/scene/integrator.cpp b/intern/cycles/scene/integrator.cpp index 3e795b30e7f..e9ff868c3fc 100644 --- a/intern/cycles/scene/integrator.cpp +++ b/intern/cycles/scene/integrator.cpp @@ -81,6 +81,7 @@ NODE_DEFINE(Integrator) sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL); sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ); SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL); + SOCKET_FLOAT(scrambling_distance, "Scrambling Distance", 1.0f); static NodeEnum denoiser_type_enum; denoiser_type_enum.insert("optix", DENOISER_OPTIX); @@ -192,6 +193,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene sample_clamp_indirect * 3.0f; kintegrator->sampling_pattern = new_sampling_pattern; + kintegrator->scrambling_distance = scrambling_distance; if (light_sampling_threshold > 0.0f) { kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold; diff --git a/intern/cycles/scene/integrator.h b/intern/cycles/scene/integrator.h index c380203f4f3..75764bcdedc 100644 --- a/intern/cycles/scene/integrator.h +++ b/intern/cycles/scene/integrator.h @@ -76,6 +76,7 @@ class Integrator : public Node { NODE_SOCKET_API(float, adaptive_threshold) NODE_SOCKET_API(SamplingPattern, sampling_pattern) + NODE_SOCKET_API(float, scrambling_distance) NODE_SOCKET_API(bool, use_denoise); NODE_SOCKET_API(DenoiserType, denoiser_type); diff --git a/intern/cycles/test/integrator_tile_test.cpp b/intern/cycles/test/integrator_tile_test.cpp index e5ffa7c153d..8bb0856d6a9 100644 --- a/intern/cycles/test/integrator_tile_test.cpp +++ b/intern/cycles/test/integrator_tile_test.cpp @@ -24,23 +24,26 @@ CCL_NAMESPACE_BEGIN TEST(tile_calculate_best_size, Basic) { /* Make sure CPU-like case is handled properly. */ - EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1), TileSize(1, 1, 1)); - EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1), TileSize(1, 1, 1)); + EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1)); + EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1)); /* Enough path states to fit an entire image with all samples. */ - EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080), + EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080, 1.0f), TileSize(1920, 1080, 1)); - EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100), + EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f), TileSize(1920, 1080, 100)); } TEST(tile_calculate_best_size, Extreme) { - EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072), TileSize(1, 1, 512)); - EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072), TileSize(1, 1, 1024)); - EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072), TileSize(1, 1, 4096)); + EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072, 1.0f), + TileSize(1, 1, 512)); + EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072, 1.0f), + TileSize(1, 1, 1024)); + EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072, 1.0f), + TileSize(1, 1, 4096)); - EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024), + EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f), TileSize(1, 1, 1024)); } diff --git a/release/datafiles/locale b/release/datafiles/locale index 80d9e7ee122..8ee2942570f 160000 --- a/release/datafiles/locale +++ b/release/datafiles/locale @@ -1 +1 @@ -Subproject commit 80d9e7ee122c626cbbcd1da554683bce79f8d3df +Subproject commit 8ee2942570f08d10484bb2328d0d1b0aaaa0367c diff --git a/release/scripts/addons b/release/scripts/addons index 27fe7f3a4f9..f2a08d80ccd 160000 --- a/release/scripts/addons +++ b/release/scripts/addons @@ -1 +1 @@ -Subproject commit 27fe7f3a4f964b53af436c4da4ddea337eff0c7e +Subproject commit f2a08d80ccd3c13af304525778df3905f95bd44d diff --git a/release/scripts/addons_contrib b/release/scripts/addons_contrib index 42da56aa737..16467648282 160000 --- a/release/scripts/addons_contrib +++ b/release/scripts/addons_contrib @@ -1 +1 @@ -Subproject commit 42da56aa73726710107031787af5eea186797984 +Subproject commit 16467648282500cc229c271f62201ef897f2c2c3 diff --git a/source/tools b/source/tools index 7c5acb95df9..2e8c8792488 160000 --- a/source/tools +++ b/source/tools @@ -1 +1 @@ -Subproject commit 7c5acb95df918503d11cfc43172ce13901019289 +Subproject commit 2e8c879248822c8e500ed49d79acc605e5aa75b9