diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h index 6b89de13797..c7b50b20c70 100644 --- a/intern/cycles/kernel/data_template.h +++ b/intern/cycles/kernel/data_template.h @@ -183,6 +183,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis) KERNEL_STRUCT_MEMBER(integrator, int, use_caustics) /* Sampling pattern. */ KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern) +KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size) KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance) /* Volume render. */ KERNEL_STRUCT_MEMBER(integrator, int, use_volumes) @@ -205,6 +206,11 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding) KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding) KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light) KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights) + +/* Padding. */ +KERNEL_STRUCT_MEMBER(integrator, int, pad1) +KERNEL_STRUCT_MEMBER(integrator, int, pad2) +KERNEL_STRUCT_MEMBER(integrator, int, pad3) KERNEL_STRUCT_END(KernelIntegrator) /* SVM. For shader specialization. */ diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h index e748f95fc7d..1cde9f9d3de 100644 --- a/intern/cycles/kernel/sample/jitter.h +++ b/intern/cycles/kernel/sample/jitter.h @@ -7,6 +7,25 @@ #pragma once CCL_NAMESPACE_BEGIN +ccl_device uint pmj_shuffled_sample_index(KernelGlobals kg, uint sample, uint dimension, uint seed) +{ + const uint sample_count = kernel_data.integrator.pmj_sequence_size; + + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* sample_count should always be a power of two, so this results in a mask. */ + const uint sample_mask = sample_count - 1; + const uint sample_shuffled = nested_uniform_scramble(sample, + hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + return ((pattern_i * sample_count) + sample) % (sample_count * NUM_PMJ_PATTERNS); +} + ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, const uint rng_hash, @@ -20,22 +39,9 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, seed = kernel_data.integrator.seed; } - /* Shuffle the pattern order and sample index to better decorrelate - * dimensions and make the most of the finite patterns we have. - * The funky sample mask stuff is to ensure that we only shuffle - * *within* the current sample pattern, which is necessary to avoid - * early repeat pattern use. */ - const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); - /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ - const uint sample_mask = NUM_PMJ_SAMPLES - 1; - const uint sample_shuffled = nested_uniform_scramble(sample, - hash_wang_seeded_uint(dimension, seed)); - sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); - /* Fetch the sample. */ - const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % - (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); - float x = kernel_data_fetch(sample_pattern_lut, index * 2); + const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed); + float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS); /* Do limited Cranley-Patterson rotation when using scrambling distance. */ if (kernel_data.integrator.scrambling_distance < 1.0f) { @@ -61,23 +67,10 @@ ccl_device float2 pmj_sample_2D(KernelGlobals kg, seed = kernel_data.integrator.seed; } - /* Shuffle the pattern order and sample index to better decorrelate - * dimensions and make the most of the finite patterns we have. - * The funky sample mask stuff is to ensure that we only shuffle - * *within* the current sample pattern, which is necessary to avoid - * early repeat pattern use. */ - const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); - /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ - const uint sample_mask = NUM_PMJ_SAMPLES - 1; - const uint sample_shuffled = nested_uniform_scramble(sample, - hash_wang_seeded_uint(dimension, seed)); - sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); - /* Fetch the sample. */ - const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % - (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); - float x = kernel_data_fetch(sample_pattern_lut, index * 2); - float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1); + const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed); + float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS); + float y = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS + 1); /* Do limited Cranley-Patterson rotation when using scrambling distance. */ if (kernel_data.integrator.scrambling_distance < 1.0f) { diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 24c5a6a4540..6d80fd3425c 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1382,12 +1382,13 @@ static_assert_align(KernelShaderEvalInput, 16); /* Pre-computed sample table sizes for PMJ02 sampler. * - * NOTE: divisions *must* be a power of two, and patterns + * NOTE: min and max samples *must* be a power of two, and patterns * ideally should be as well. */ -#define NUM_PMJ_DIVISIONS 32 -#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS)) -#define NUM_PMJ_PATTERNS 64 +#define MIN_PMJ_SAMPLES 256 +#define MAX_PMJ_SAMPLES 8192 +#define NUM_PMJ_DIMENSIONS 2 +#define NUM_PMJ_PATTERNS 256 /* Device kernels. * diff --git a/intern/cycles/scene/integrator.cpp b/intern/cycles/scene/integrator.cpp index ade4716242b..23f9e8b7aa8 100644 --- a/intern/cycles/scene/integrator.cpp +++ b/intern/cycles/scene/integrator.cpp @@ -257,12 +257,18 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->light_inv_rr_threshold = 0.0f; } + constexpr int num_sequences = NUM_PMJ_PATTERNS; + int sequence_size = clamp(next_power_of_two(aa_samples - 1), MIN_PMJ_SAMPLES, MAX_PMJ_SAMPLES); if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ && - dscene->sample_pattern_lut.size() == 0) { - constexpr int sequence_size = NUM_PMJ_SAMPLES; - constexpr int num_sequences = NUM_PMJ_PATTERNS; + dscene->sample_pattern_lut.size() != + (sequence_size * NUM_PMJ_DIMENSIONS * NUM_PMJ_PATTERNS)) { + kintegrator->pmj_sequence_size = sequence_size; + + if (dscene->sample_pattern_lut.size() != 0) { + dscene->sample_pattern_lut.free(); + } float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences * - 2); + NUM_PMJ_DIMENSIONS); TaskPool pool; for (int j = 0; j < num_sequences; ++j) { float2 *sequence = directions + j * sequence_size;