Cycles: Implement blue-noise dithered sampling

This patch implements blue-noise dithered sampling as described by Nathan Vegdahl (https://psychopath.io/post/2022_07_24_owen_scrambling_based_dithered_blue_noise_sampling), which in turn is based on "Screen-Space Blue-Noise Diffusion of Monte Carlo Sampling Error via Hierarchical Ordering of Pixels"(https://repository.kaust.edu.sa/items/1269ae24-2596-400b-a839-e54486033a93).

The basic idea is simple: Instead of generating independent sequences for each pixel by scrambling them, we use a single sequence for the entire image, with each pixel getting one chunk of the samples. The ordering across pixels is determined by hierarchical scrambling of the pixel's position along a space-filling curve, which ends up being pretty much the same operation as already used for the underlying sequence.

This results in a more high-frequency noise distribution, which appears smoother despite not being less noisy overall.

The main limitation at the moment is that the improvement is only clear if the full sample amount is used per pixel, so interactive preview rendering and adaptive sampling will not receive the benefit. One exception to this is that when using the new "Automatic" setting, the first sample in interactive rendering will also be blue-noise-distributed.

The sampling mode option is now exposed in the UI, with the three options being Blue Noise (the new mode), Classic (the previous Tabulated Sobol method) and the new default, Automatic (blue noise, with the additional property of ensuring the first sample is also blue-noise-distributed in interactive rendering). When debug mode is enabled, additional options appear, such as Sobol-Burley.

Note that the scrambling distance option is not compatible with the blue-noise pattern.

Pull Request: https://projects.blender.org/blender/blender/pulls/118479
This commit is contained in:
Lukas Stockner 2024-06-05 02:29:47 +02:00 committed by Lukas Stockner
parent 2ddc936cbb
commit 5246fb5a57
22 changed files with 276 additions and 113 deletions

@ -70,10 +70,38 @@ enum_use_layer_samples = (
('IGNORE', "Ignore", "Ignore per render layer number of samples"),
)
enum_sampling_pattern = (
('SOBOL_BURLEY', "Sobol-Burley", "Use on-the-fly computed Owen-scrambled Sobol for random sampling", 0),
('TABULATED_SOBOL', "Tabulated Sobol", "Use pre-computed tables of Owen-scrambled Sobol for random sampling", 1),
)
def enum_sampling_pattern(self, context):
prefs = context.preferences
use_debug = prefs.experimental.use_cycles_debug and prefs.view.show_developer_ui
items = [
('AUTOMATIC',
"Automatic",
"Use a blue-noise sampling pattern, which optimizes the frequency distribution of noise, for random sampling. For viewport rendering, optimize first sample quality for interactive preview",
5)]
if use_debug:
items += [
('SOBOL_BURLEY', "Sobol-Burley", "Use on-the-fly computed Owen-scrambled Sobol for random sampling", 0),
('TABULATED_SOBOL', "Tabulated Sobol", "Use pre-computed tables of Owen-scrambled Sobol for random sampling", 1),
('BLUE_NOISE', "Blue-Noise (pure)", "Blue-Noise (pure)", 2),
('BLUE_NOISE_FIRST', "Blue-Noise (first)", "Blue-Noise (first)", 3),
('BLUE_NOISE_ROUND', "Blue-Noise (round)", "Blue-Noise (round)", 4),
]
else:
items += [('TABULATED_SOBOL',
"Classic",
"Use pre-computed tables of Owen-scrambled Sobol for random sampling",
1),
('BLUE_NOISE',
"Blue-Noise",
"Use a blue-noise pattern, which optimizes the frequency distribution of noise, for random sampling",
2),
]
return items
enum_emission_sampling = (
('NONE',
@ -461,7 +489,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
name="Sampling Pattern",
description="Random sampling pattern used by the integrator",
items=enum_sampling_pattern,
default='TABULATED_SOBOL',
default=5,
)
scrambling_distance: FloatProperty(

@ -425,6 +425,9 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
row = layout.row(align=True)
row.prop(cscene, "sampling_pattern", text="Pattern")
row = layout.row(align=True)
row.prop(cscene, "seed")
row.prop(cscene, "use_animated_seed", text="", icon='TIME')
@ -436,7 +439,7 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
heading = layout.column(align=True, heading="Scrambling Distance")
# Tabulated Sobol is used when the debug UI is turned off.
heading.active = cscene.sampling_pattern == 'TABULATED_SOBOL' or not CyclesDebugButtonsPanel.poll(context)
heading.active = cscene.sampling_pattern == 'TABULATED_SOBOL'
heading.prop(cscene, "auto_scrambling_distance", text="Automatic")
heading.prop(cscene, "preview_scrambling_distance", text="Viewport")
heading.prop(cscene, "scrambling_distance", text="Multiplier")
@ -474,23 +477,6 @@ class CYCLES_RENDER_PT_sampling_lights(CyclesButtonsPanel, Panel):
sub.active = not cscene.use_light_tree
class CYCLES_RENDER_PT_sampling_debug(CyclesDebugButtonsPanel, Panel):
bl_label = "Debug"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
bl_options = {'DEFAULT_CLOSED'}
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
cscene = scene.cycles
col = layout.column(align=True)
col.prop(cscene, "sampling_pattern", text="Pattern")
class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
bl_label = "Subdivision"
bl_options = {'DEFAULT_CLOSED'}
@ -2576,7 +2562,6 @@ classes = (
CYCLES_RENDER_PT_sampling_path_guiding_debug,
CYCLES_RENDER_PT_sampling_lights,
CYCLES_RENDER_PT_sampling_advanced,
CYCLES_RENDER_PT_sampling_debug,
CYCLES_RENDER_PT_light_paths,
CYCLES_RENDER_PT_light_paths_max_bounces,
CYCLES_RENDER_PT_light_paths_clamping,

@ -101,7 +101,7 @@ def do_versions(self):
library_versions.setdefault(library.version, []).append(library)
# Do versioning per library, since they might have different versions.
max_need_versioning = (3, 5, 2)
max_need_versioning = (4, 2, 52)
for version, libraries in library_versions.items():
if version > max_need_versioning:
continue
@ -253,6 +253,13 @@ def do_versions(self):
# Tabulated Sobol.
cscene.sampling_pattern = 'TABULATED_SOBOL'
if version <= (4, 2, 52):
cscene = scene.cycles
# Previous versions defaulted to Tabulated Sobol unless debugging options
# were enabled, so keep this behavior instead of suddenly defaulting to
# blue noise if the file happens to contain a different option for the enum.
cscene.sampling_pattern = 'TABULATED_SOBOL'
# Lamps
for light in bpy.data.lights:
if light.library not in libraries:

@ -359,13 +359,39 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer,
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
}
SamplingPattern sampling_pattern;
if (use_developer_ui) {
sampling_pattern = (SamplingPattern)get_enum(
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_TABULATED_SOBOL);
}
else {
sampling_pattern = SAMPLING_PATTERN_TABULATED_SOBOL;
const bool is_vertex_baking = scene->bake_manager->get_baking() &&
b_scene.render().bake().target() != BL::BakeSettings::target_IMAGE_TEXTURES;
SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_TABULATED_SOBOL);
switch (sampling_pattern) {
case SAMPLING_PATTERN_AUTOMATIC:
if (is_vertex_baking) {
/* When baking vertex colors, the "pixels" in the output are unrelated to their neighbors,
* so blue-noise sampling makes no sense. */
sampling_pattern = SAMPLING_PATTERN_TABULATED_SOBOL;
}
else if (!background) {
/* For interactive rendering, ensure that the first sample is in itself
* blue-noise-distributed for smooth viewport navigation. */
sampling_pattern = SAMPLING_PATTERN_BLUE_NOISE_FIRST;
}
else {
/* For non-interactive rendering, default to a full blue-noise pattern. */
sampling_pattern = SAMPLING_PATTERN_BLUE_NOISE_PURE;
}
break;
case SAMPLING_PATTERN_TABULATED_SOBOL:
case SAMPLING_PATTERN_BLUE_NOISE_PURE:
/* Always allowed. */
break;
default:
/* If not using developer UI, default to blue noise for "advanced" patterns. */
if (!use_developer_ui) {
sampling_pattern = SAMPLING_PATTERN_BLUE_NOISE_PURE;
}
break;
}
integrator->set_sampling_pattern(sampling_pattern);
@ -409,7 +435,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer,
/* Only use scrambling distance in the viewport if user wants to. */
bool preview_scrambling_distance = get_boolean(cscene, "preview_scrambling_distance");
if ((preview && !preview_scrambling_distance) ||
sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY)
sampling_pattern != SAMPLING_PATTERN_TABULATED_SOBOL)
{
scrambling_distance = 1.0f;
}
@ -465,9 +491,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer,
/* No denoising support for vertex color baking, vertices packed into image
* buffer have no relation to neighbors. */
if (scene->bake_manager->get_baking() &&
b_scene.render().bake().target() != BL::BakeSettings::target_IMAGE_TEXTURES)
{
if (is_vertex_baking) {
denoise_params.use = false;
}

@ -195,6 +195,8 @@ KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, tabulated_sobol_sequence_size)
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, sobol_index_mask)
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, blue_noise_sequence_length)
/* Volume render. */
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps)

@ -130,7 +130,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
const int seed = __float_as_uint(primitive[0]);
int prim = __float_as_uint(primitive[1]);
if (prim == -1) {
/* Accumulate transparency for empty pixels. */
@ -141,13 +140,19 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
prim += kernel_data.bake.tri_offset;
/* Random number generator. */
const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
uint rng_pixel = __float_as_uint(primitive[0]);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL) {
rng_pixel = hash_uint(rng_pixel) ^ kernel_data.integrator.seed;
}
else {
rng_pixel = path_rng_pixel_init(kg, sample, x, y);
}
const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);
path_rng_2D(kg, rng_pixel, sample, PRNG_FILTER);
/* Initialize path state for path integration. */
path_state_init_integrator(kg, state, sample, rng_hash);
path_state_init_integrator(kg, state, sample, rng_pixel);
/* Barycentric UV. */
float u = primitive[2];

@ -20,17 +20,17 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
const int sample,
const int x,
const int y,
const uint rng_hash,
const uint rng_pixel,
ccl_private Ray *ray)
{
/* Filter sampling. */
const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);
path_rng_2D(kg, rng_pixel, sample, PRNG_FILTER);
/* Motion blur (time) and depth of field (lens) sampling. (time, lens_x, lens_y) */
const float3 rand_time_lens = (kernel_data.cam.shuttertime != -1.0f ||
kernel_data.cam.aperturesize > 0.0f) ?
path_rng_3D(kg, rng_hash, sample, PRNG_LENS_TIME) :
path_rng_3D(kg, rng_pixel, sample, PRNG_LENS_TIME) :
zero_float3();
/* We use x for time and y,z for lens because in practice with Sobol
@ -78,12 +78,12 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
/* Initialize random number seed for path. */
const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
const uint rng_pixel = path_rng_pixel_init(kg, sample, x, y);
{
/* Generate camera ray. */
Ray ray;
integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
integrate_camera_sample(kg, sample, x, y, rng_pixel, &ray);
if (ray.tmax == 0.0f) {
return true;
}
@ -93,7 +93,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
}
/* Initialize path state for path integration. */
path_state_init_integrator(kg, state, sample, rng_hash);
path_state_init_integrator(kg, state, sample, rng_pixel);
/* Continue with intersect_closest kernel, optionally initializing volume
* stack before that if the camera may be inside a volume. */

@ -129,7 +129,7 @@ ccl_device bool shadow_linking_pick_light_intersection(KernelGlobals kg,
const int object_receiver = light_link_receiver_forward(kg, state);
uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_pixel),
INTEGRATOR_STATE(state, path, rng_offset),
INTEGRATOR_STATE(state, path, sample),
0x68bc21eb);

@ -38,7 +38,7 @@ ccl_device_inline void path_state_init(IntegratorState state,
ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
IntegratorState state,
const int sample,
const uint rng_hash)
const uint rng_pixel)
{
INTEGRATOR_STATE_WRITE(state, path, sample) = sample;
INTEGRATOR_STATE_WRITE(state, path, bounce) = 0;
@ -48,7 +48,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash;
INTEGRATOR_STATE_WRITE(state, path, rng_pixel) = rng_pixel;
INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BOUNCE_NUM;
INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
PATH_RAY_TRANSPARENT_BACKGROUND;
@ -307,7 +307,7 @@ ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorSta
/* RNG State loaded onto stack. */
typedef struct RNGState {
uint rng_hash;
uint rng_pixel;
uint rng_offset;
int sample;
} RNGState;
@ -315,7 +315,7 @@ typedef struct RNGState {
ccl_device_inline void path_state_rng_load(ConstIntegratorState state,
ccl_private RNGState *rng_state)
{
rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash);
rng_state->rng_pixel = INTEGRATOR_STATE(state, path, rng_pixel);
rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset);
rng_state->sample = INTEGRATOR_STATE(state, path, sample);
}
@ -323,17 +323,25 @@ ccl_device_inline void path_state_rng_load(ConstIntegratorState state,
ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState state,
ccl_private RNGState *rng_state)
{
rng_state->rng_hash = INTEGRATOR_STATE(state, shadow_path, rng_hash);
rng_state->rng_pixel = INTEGRATOR_STATE(state, shadow_path, rng_pixel);
rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset);
rng_state->sample = INTEGRATOR_STATE(state, shadow_path, sample);
}
ccl_device_inline void path_state_rng_scramble(ccl_private RNGState *rng_state, const int seed)
{
/* To get an uncorrelated sequence of samples (e.g. for subsurface random walk), just change
* the dimension offset since all implemented samplers can generate unlimited numbers of
* dimensions anyways. The only thing to ensure is that the offset is divisible by 4. */
rng_state->rng_offset = hash_hp_seeded_uint(rng_state->rng_offset, seed) & ~0x3;
}
ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
ccl_private const RNGState *rng_state,
const int dimension)
{
return path_rng_1D(
kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
kg, rng_state->rng_pixel, rng_state->sample, rng_state->rng_offset + dimension);
}
ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg,
@ -341,7 +349,7 @@ ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg,
const int dimension)
{
return path_rng_2D(
kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
kg, rng_state->rng_pixel, rng_state->sample, rng_state->rng_offset + dimension);
}
ccl_device_inline float3 path_state_rng_3D(KernelGlobals kg,
@ -349,7 +357,7 @@ ccl_device_inline float3 path_state_rng_3D(KernelGlobals kg,
const int dimension)
{
return path_rng_3D(
kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
kg, rng_state->rng_pixel, rng_state->sample, rng_state->rng_offset + dimension);
}
ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
@ -359,7 +367,7 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
const int dimension)
{
return path_rng_1D(kg,
rng_state->rng_hash,
rng_state->rng_pixel,
rng_state->sample * num_branches + branch,
rng_state->rng_offset + dimension);
}
@ -371,7 +379,7 @@ ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg,
const int dimension)
{
return path_rng_2D(kg,
rng_state->rng_hash,
rng_state->rng_pixel,
rng_state->sample * num_branches + branch,
rng_state->rng_offset + dimension);
}
@ -383,7 +391,7 @@ ccl_device_inline float3 path_branched_rng_3D(KernelGlobals kg,
const int dimension)
{
return path_rng_3D(kg,
rng_state->rng_hash,
rng_state->rng_pixel,
rng_state->sample * num_branches + branch,
rng_state->rng_offset + dimension);
}

@ -232,8 +232,8 @@ integrate_direct_light_shadow_init_common(KernelGlobals kg,
state, path, render_pixel_index);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
state, path, rng_offset);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
state, path, rng_hash);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_pixel) = INTEGRATOR_STATE(
state, path, rng_pixel);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
state, path, sample);
@ -675,8 +675,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
state, path, render_pixel_index);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
state, path, rng_offset);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
state, path, rng_hash);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_pixel) = INTEGRATOR_STATE(
state, path, rng_pixel);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
state, path, sample);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
@ -724,7 +724,7 @@ ccl_device int integrate_surface(KernelGlobals kg,
/* Initialize additional RNG for BSDFs. */
if (sd.flag & SD_BSDF_NEEDS_LCG) {
sd.lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
sd.lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_pixel),
INTEGRATOR_STATE(state, path, rng_offset),
INTEGRATOR_STATE(state, path, sample),
0xb4bc3953);

@ -850,8 +850,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
state, path, render_pixel_index);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
state, path, rng_offset);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
state, path, rng_hash);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_pixel) = INTEGRATOR_STATE(
state, path, rng_pixel);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
state, path, sample);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;

@ -9,8 +9,8 @@ KERNEL_STRUCT_BEGIN(shadow_path)
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
/* Current sample number. */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, sample, KERNEL_FEATURE_PATH_TRACING)
/* Random number generator seed. */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
/* Random number generator per-pixel info. */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, rng_pixel, KERNEL_FEATURE_PATH_TRACING)
/* Random number dimension offset. */
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
/* Current ray bounce depth. */

@ -29,8 +29,8 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
/* DeviceKernel bit indicating queued kernels. */
KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* Random number generator seed. */
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
/* Random number generator per-pixel info. */
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_pixel, KERNEL_FEATURE_PATH_TRACING)
/* Random number dimension offset. */
KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayFlag */

@ -98,7 +98,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
/* Intersect with the same object. if multiple intersections are found it
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits. */
uint lcg_state = lcg_state_init(
rng_state.rng_hash, rng_state.rng_offset, rng_state.sample, 0x68bc21eb);
rng_state.rng_pixel, rng_state.rng_offset, rng_state.sample, 0x68bc21eb);
const int max_hits = BSSRDF_MAX_HITS;
scene_intersect_local(kg, &ray, &ss_isect, object, &lcg_state, max_hits);

@ -222,7 +222,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
/* Modify state for RNGs, decorrelated from other paths. */
rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
path_state_rng_scramble(&rng_state, 0xdeadbeef);
/* Random walk until we hit the surface again. */
bool hit = false;

@ -28,26 +28,59 @@ CCL_NAMESPACE_BEGIN
* x,y over x,z.
*/
ccl_device_forceinline uint3 blue_noise_indexing(KernelGlobals kg, uint pixel_index, uint sample)
{
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
/* One sequence per pixel, using the length mask optimization. */
return make_uint3(sample, pixel_index, kernel_data.integrator.sobol_index_mask);
}
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_PURE) {
/* For blue-noise samples, we use a single sequence (seed 0) with each pixel receiving
* a section of it.
* The total length is expected to get very large (effectively pixel count times sample count),
* so we don't use the length mask optimization here. */
pixel_index *= kernel_data.integrator.blue_noise_sequence_length;
return make_uint3(sample + pixel_index, 0, 0xffffffff);
}
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST) {
/* The "first" pattern uses a 1SPP blue-noise sequence for the first sample, and a separate
* N-1 SPP sequence for the remaining pixels. The purpose of this is to get blue-noise
* properties during viewport navigation, which will generally use 1 SPP.
* Unfortunately using just the first sample of a full blue-noise sequence doesn't give
* its benefits, so we combine the two as a tradeoff between quality at 1 SPP and full SPP. */
if (sample == 0) {
return make_uint3(pixel_index, 0x0cd0519f, 0xffffffff);
}
else {
pixel_index *= kernel_data.integrator.blue_noise_sequence_length;
return make_uint3((sample - 1) + pixel_index, 0, 0xffffffff);
}
}
else {
kernel_assert(false);
return make_uint3(0, 0, 0);
}
}
ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
uint rng_hash,
int sample,
uint rng_pixel,
uint sample,
int dimension)
{
#ifdef __DEBUG_CORRELATION__
return (float)drand48();
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_1D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_1D(kg, sample, rng_hash, dimension);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL) {
return tabulated_sobol_sample_1D(kg, sample, rng_pixel, dimension);
}
uint3 index = blue_noise_indexing(kg, rng_pixel, sample);
return sobol_burley_sample_1D(index.x, dimension, index.y, index.z);
}
ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
uint rng_hash,
uint rng_pixel,
int sample,
int dimension)
{
@ -55,17 +88,16 @@ ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
return make_float2((float)drand48(), (float)drand48());
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_2D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_2D(kg, sample, rng_hash, dimension);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL) {
return tabulated_sobol_sample_2D(kg, sample, rng_pixel, dimension);
}
uint3 index = blue_noise_indexing(kg, rng_pixel, sample);
return sobol_burley_sample_2D(index.x, dimension, index.y, index.z);
}
ccl_device_forceinline float3 path_rng_3D(KernelGlobals kg,
uint rng_hash,
uint rng_pixel,
int sample,
int dimension)
{
@ -73,17 +105,16 @@ ccl_device_forceinline float3 path_rng_3D(KernelGlobals kg,
return make_float3((float)drand48(), (float)drand48(), (float)drand48());
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_3D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_3D(kg, sample, rng_hash, dimension);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL) {
return tabulated_sobol_sample_3D(kg, sample, rng_pixel, dimension);
}
uint3 index = blue_noise_indexing(kg, rng_pixel, sample);
return sobol_burley_sample_3D(index.x, dimension, index.y, index.z);
}
ccl_device_forceinline float4 path_rng_4D(KernelGlobals kg,
uint rng_hash,
uint rng_pixel,
int sample,
int dimension)
{
@ -91,29 +122,43 @@ ccl_device_forceinline float4 path_rng_4D(KernelGlobals kg,
return make_float4((float)drand48(), (float)drand48(), (float)drand48(), (float)drand48());
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_4D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_4D(kg, sample, rng_hash, dimension);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL) {
return tabulated_sobol_sample_4D(kg, sample, rng_pixel, dimension);
}
uint3 index = blue_noise_indexing(kg, rng_pixel, sample);
return sobol_burley_sample_4D(index.x, dimension, index.y, index.z);
}
ccl_device_inline uint path_rng_hash_init(KernelGlobals kg,
ccl_device_inline uint path_rng_pixel_init(KernelGlobals kg,
const int sample,
const int x,
const int y)
{
const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;
const uint pattern = kernel_data.integrator.sampling_pattern;
if (pattern == SAMPLING_PATTERN_TABULATED_SOBOL || pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
#ifdef __DEBUG_CORRELATION__
srand48(rng_hash + sample);
return srand48(rng_pixel + sample);
#else
(void)sample;
(void)sample;
#endif
return rng_hash;
/* The white-noise samplers use a random per-pixel hash to generate independent sequences. */
return hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;
}
else {
/* The blue-noise samplers use a single sequence for all pixels, but offset the index within
* the sequence for each pixel. We use a hierarchically shuffled 2D morton curve to determine
* each pixel's offset along the sequence.
*
* Based on:
* https://psychopath.io/post/2022_07_24_owen_scrambling_based_dithered_blue_noise_sampling.
*
* TODO(lukas): Use a precomputed Hilbert curve to avoid directionality bias in the noise
* distribution. We can just precompute a small-ish tile and repeat it in morton code order.
*/
return nested_uniform_scramble_base4(morton2d(x, y), kernel_data.integrator.seed);
}
}
/**

@ -25,6 +25,25 @@ ccl_device_inline uint reversed_bit_owen(uint n, uint seed)
return n;
}
/*
* Performs base-4 Owen scrambling on a reversed-bit unsigned integer.
*
* See https://psychopath.io/post/2022_08_14_a_fast_hash_for_base_4_owen_scrambling
*/
ccl_device_inline uint reversed_bit_owen_base4(uint n, uint seed)
{
n ^= n * 0x3d20adea;
n ^= (n >> 1) & (n << 1) & 0x55555555;
n += seed;
n *= (seed >> 16) | 1;
n ^= (n >> 1) & (n << 1) & 0x55555555;
n ^= n * 0x05526c56;
n ^= n * 0x53a22864;
return n;
}
/*
* Performs base-2 Owen scrambling on an unsigned integer.
*/
@ -33,4 +52,27 @@ ccl_device_inline uint nested_uniform_scramble(uint i, uint seed)
return reverse_integer_bits(reversed_bit_owen(reverse_integer_bits(i), seed));
}
/*
* Performs base-4 Owen scrambling on an unsigned integer.
*/
ccl_device_inline uint nested_uniform_scramble_base4(uint i, uint seed)
{
return reverse_integer_bits(reversed_bit_owen_base4(reverse_integer_bits(i), seed));
}
ccl_device_inline uint expand_bits(uint x)
{
x &= 0x0000ffff;
x = (x ^ (x << 8)) & 0x00ff00ff;
x = (x ^ (x << 4)) & 0x0f0f0f0f;
x = (x ^ (x << 2)) & 0x33333333;
x = (x ^ (x << 1)) & 0x55555555;
return x;
}
ccl_device_inline uint morton2d(uint x, uint y)
{
return (expand_bits(x) << 1) | expand_bits(y);
}
CCL_NAMESPACE_END

@ -118,7 +118,7 @@ ccl_device float3 svm_bevel(
/* Setup for multi intersection. */
LocalIntersection isect;
uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_pixel),
INTEGRATOR_STATE(state, path, rng_offset),
INTEGRATOR_STATE(state, path, sample),
0x64c6a40e);

@ -340,6 +340,11 @@ enum PathTraceDimension {
enum SamplingPattern {
SAMPLING_PATTERN_SOBOL_BURLEY = 0,
SAMPLING_PATTERN_TABULATED_SOBOL = 1,
SAMPLING_PATTERN_BLUE_NOISE_PURE = 2,
SAMPLING_PATTERN_BLUE_NOISE_FIRST = 3,
SAMPLING_PATTERN_BLUE_NOISE_ROUND = 4,
/* Never used in kernel. */
SAMPLING_PATTERN_AUTOMATIC = 5,
SAMPLING_NUM_PATTERNS,
};

@ -121,6 +121,9 @@ NODE_DEFINE(Integrator)
static NodeEnum sampling_pattern_enum;
sampling_pattern_enum.insert("sobol_burley", SAMPLING_PATTERN_SOBOL_BURLEY);
sampling_pattern_enum.insert("tabulated_sobol", SAMPLING_PATTERN_TABULATED_SOBOL);
sampling_pattern_enum.insert("blue_noise_pure", SAMPLING_PATTERN_BLUE_NOISE_PURE);
sampling_pattern_enum.insert("blue_noise_round", SAMPLING_PATTERN_BLUE_NOISE_ROUND);
sampling_pattern_enum.insert("blue_noise_first", SAMPLING_PATTERN_BLUE_NOISE_FIRST);
SOCKET_ENUM(sampling_pattern,
"Sampling Pattern",
sampling_pattern_enum,
@ -274,6 +277,16 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->scrambling_distance = scrambling_distance;
kintegrator->sobol_index_mask = reverse_integer_bits(next_power_of_two(aa_samples - 1) - 1);
kintegrator->blue_noise_sequence_length = aa_samples;
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_ROUND) {
if (!is_power_of_two(aa_samples)) {
kintegrator->blue_noise_sequence_length = next_power_of_two(aa_samples);
}
kintegrator->sampling_pattern = SAMPLING_PATTERN_BLUE_NOISE_PURE;
}
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST) {
kintegrator->blue_noise_sequence_length -= 1;
}
/* NOTE: The kintegrator->use_light_tree is assigned to the efficient value in the light manager,
* and the synchronization code is expected to tag the light manager for update when the
@ -288,17 +301,16 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
/* Build pre-tabulated Sobol samples if needed. */
int sequence_size = clamp(
next_power_of_two(aa_samples - 1), MIN_TAB_SOBOL_SAMPLES, MAX_TAB_SOBOL_SAMPLES);
const int table_size = sequence_size * NUM_TAB_SOBOL_PATTERNS * NUM_TAB_SOBOL_DIMENSIONS;
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL &&
dscene->sample_pattern_lut.size() !=
(sequence_size * NUM_TAB_SOBOL_PATTERNS * NUM_TAB_SOBOL_DIMENSIONS))
dscene->sample_pattern_lut.size() != table_size)
{
kintegrator->tabulated_sobol_sequence_size = sequence_size;
if (dscene->sample_pattern_lut.size() != 0) {
dscene->sample_pattern_lut.free();
}
float4 *directions = (float4 *)dscene->sample_pattern_lut.alloc(
sequence_size * NUM_TAB_SOBOL_PATTERNS * NUM_TAB_SOBOL_DIMENSIONS);
float4 *directions = (float4 *)dscene->sample_pattern_lut.alloc(table_size);
TaskPool pool;
for (int j = 0; j < NUM_TAB_SOBOL_PATTERNS; ++j) {
float4 *sequence = directions + j * sequence_size;

@ -29,7 +29,7 @@ extern "C" {
/* Blender file format version. */
#define BLENDER_FILE_VERSION BLENDER_VERSION
#define BLENDER_FILE_SUBVERSION 52
#define BLENDER_FILE_SUBVERSION 53
/* Minimum Blender version that supports reading file written with the current
* version. Older Blender versions will test this and cancel loading the file, showing a warning to

@ -1 +1 @@
Subproject commit 363d42173a72ff8e9d0bc7c3be17b9739559b74c
Subproject commit dcdda07d27cb95e8b388ab80cbf2bae62778d24d