Merge branch 'blender-v4.2-release'

2024-07-03 16:01:21 +02:00 · 2024-07-03 16:01:21 +02:00 · 35788ca3c9
commit 35788ca3c9
parent 78e9621bcd 659e19607d
6 changed files with 46 additions and 43 deletions
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@ -1580,6 +1580,17 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        ),
    )

+    # Be careful when deciding when to call this function,
+    # as Blender can crash with `_cycles.available_devices()` on some drivers.
+    def get_device_list(self, compute_device_type):
+        import _cycles
+        device_list = _cycles.available_devices(compute_device_type)
+        # Make sure device entries are up to date and not referenced before
+        # we know we won't add new devices. This way we guarantee to not
+        # hold pointers to a resized array.
+        self.update_device_entries(device_list)
+        return device_list
+
    def find_existing_device_entry(self, device):
        for device_entry in self.devices:
            if device_entry.id == device[2] and device_entry.type == device[1]:
@ -1605,13 +1616,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    # Gets all devices types for a compute device type.
    def get_devices_for_type(self, compute_device_type):
-        import _cycles
        # Layout of the device tuples: (Name, Type, Persistent ID)
-        device_list = _cycles.available_devices(compute_device_type)
-        # Make sure device entries are up to date and not referenced before
-        # we know we don't add new devices. This way we guarantee to not
-        # hold pointers to a resized array.
-        self.update_device_entries(device_list)
+        device_list = self.get_device_list(compute_device_type)
+
        # Sort entries into lists
        devices = []
        cpu_devices = []
@ -1627,13 +1634,15 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        return devices

    # Refresh device list. This does not happen automatically on Blender
-    # startup due to unstable OpenCL implementations that can cause crashes.
+    # startup due to unstable drivers that can cause crashes.
    def refresh_devices(self):
-        import _cycles
        # Ensure `self.devices` is not re-allocated when the second call to
        # get_devices_for_type is made, freeing items from the first list.
        for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL', 'ONEAPI'):
-            self.update_device_entries(_cycles.available_devices(device_type))
+            # Query the device list to trigger all required updates.
+            # Note that even though the device list is unused,
+            # the function has side-effects with internal state updates.
+            _device_list = self.get_device_list(device_type)

    # Deprecated: use refresh_devices instead.
    def get_devices(self, compute_device_type=''):
@ -1646,15 +1655,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        return self.compute_device_type

    def get_num_gpu_devices(self):
-        import _cycles
        compute_device_type = self.get_compute_device_type()

        num = 0
        if compute_device_type != 'NONE':
-            device_list = _cycles.available_devices(compute_device_type)
-            # Device list might be out of sync if the user hasn't opened preference yet
-            self.update_device_entries(device_list)
-            for device in device_list:
+            for device in self.get_device_list(compute_device_type):
                if device[1] != compute_device_type:
                    continue
                for dev in self.devices:
@ -1663,15 +1668,14 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        return num

    def has_multi_device(self):
-        import _cycles
        compute_device_type = self.get_compute_device_type()
-        device_list = _cycles.available_devices(compute_device_type)
-        for device in device_list:
-            if device[1] == compute_device_type:
-                continue
-            for dev in self.devices:
-                if dev.use and dev.id == device[2]:
-                    return True
+        if compute_device_type != 'NONE':
+            for device in self.get_device_list(compute_device_type):
+                if device[1] == compute_device_type:
+                    continue
+                for dev in self.devices:
+                    if dev.use and dev.id == device[2]:
+                        return True

        return False

@ -1679,15 +1683,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        return self.get_num_gpu_devices() > 0

    def has_oidn_gpu_devices(self):
-        import _cycles
        compute_device_type = self.get_compute_device_type()

        # We need non-CPU devices, used for rendering and supporting OIDN GPU denoising
        if compute_device_type != 'NONE':
-            device_list = _cycles.available_devices(compute_device_type)
-            # Device list might be out of sync if the user hasn't opened preference yet
-            self.update_device_entries(device_list)
-            for device in device_list:
+            for device in self.get_device_list(compute_device_type):
                device_type = device[1]
                if device_type == 'CPU':
                    continue
@ -1699,12 +1699,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        return False

    def has_optixdenoiser_gpu_devices(self):
-        import _cycles
        compute_device_type = self.get_compute_device_type()

        if compute_device_type == 'OPTIX':
            # We need any OptiX devices, used for rendering
-            for device in _cycles.available_devices(compute_device_type):
+            for device in self.get_device_list(compute_device_type):
                device_type = device[1]
                if device_type == 'CPU':
                    continue
@ -1807,7 +1806,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        import _cycles
        has_peer_memory = 0
        has_rt_api_support = {'METAL': False, 'HIP': False, 'ONEAPI': False}
-        for device in _cycles.available_devices(compute_device_type):
+        for device in self.get_device_list(compute_device_type):
            if device[3] and self.find_existing_device_entry(device).use:
                has_peer_memory += 1
            if device[4] and self.find_existing_device_entry(device).use:
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@ -178,7 +178,7 @@ def get_effective_preview_denoiser(context, has_oidn_gpu):
    if has_oidn_gpu:
        return 'OPENIMAGEDENOISE'

-    if context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'):
+    if has_optixdenoiser_gpu_devices(context):
        return 'OPTIX'

    return 'OPENIMAGEDENOISE'
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@ -137,7 +137,7 @@ DenoiserType Denoiser::automatic_viewport_denoiser_type(const DeviceInfo &denois
 #endif

 #ifdef WITH_OPTIX
-  if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {
+  if (OptiXDenoiser::is_device_supported(denoise_device_info)) {
    return DENOISER_OPTIX;
  }
 #endif
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@ -1987,12 +1987,14 @@ struct SubsurfaceData {
  /** xy: 2D sample position [-1..1], zw: sample_bounds. */
  /* NOTE(fclem) Using float4 for alignment. */
  float4 samples[SSS_SAMPLE_MAX];
-  /** Sample index after which samples are not randomly rotated anymore. */
-  int jitter_threshold;
  /** Number of samples precomputed in the set. */
  int sample_len;
-  int _pad0;
+  /** WORKAROUND: To avoid invalid integral for components that have very small radius, we clamp
+   * the minimal radius. This add bias to the SSS effect but this is the simplest workaround I
+   * could find to ship this without visible artifact. */
+  float min_radius;
  int _pad1;
+  int _pad2;
 };
 BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16)

--- a/source/blender/draw/engines/eevee_next/eevee_subsurface.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_subsurface.cc
@ -21,13 +21,7 @@ namespace blender::eevee {

 void SubsurfaceModule::end_sync()
 {
-  data_.jitter_threshold = inst_.scene->eevee.sss_jitter_threshold;
-  if (data_.sample_len != inst_.scene->eevee.sss_samples) {
-    /* Convert sample count from old implementation which was using a separable filter. */
-    /* TODO(fclem) better remapping. */
-    // data_.sample_len = square_f(1 + 2 * inst_.scene->eevee.sss_samples);
-    data_.sample_len = 16;
-  }
+  data_.sample_len = 16;

  {
    PassSimple &pass = setup_ps_;
@ -113,15 +107,21 @@ void SubsurfaceModule::precompute_samples_location()
  float rand_u = inst_.sampling.rng_get(SAMPLING_SSS_U);
  float rand_v = inst_.sampling.rng_get(SAMPLING_SSS_V);

+  /* Find minimum radius that we can represent because we are only sampling the largest radius. */
+  data_.min_radius = 1.0f;
+
  double golden_angle = M_PI * (3.0 - sqrt(5.0));
  for (auto i : IndexRange(data_.sample_len)) {
    float theta = golden_angle * i + M_PI * 2.0f * rand_u;
    float x = (rand_v + i) / data_.sample_len;
    float r = SubsurfaceModule::burley_sample(d, x);
+    data_.min_radius = min_ff(data_.min_radius, r);
    data_.samples[i].x = cosf(theta) * r;
    data_.samples[i].y = sinf(theta) * r;
    data_.samples[i].z = 1.0f / burley_pdf(d, r);
  }
+  /* Avoid float imprecision.*/
+  data_.min_radius = max_ff(data_.min_radius, 1e-4f);

  inst_.uniform_data.push_update();
 }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_convolve_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_convolve_comp.glsl
@ -112,7 +112,9 @@ void main(void)
  }

  /* Avoid too small radii that have float imprecision. */
-  vec3 clamped_sss_radius = max(vec3(1e-4), closure.sss_radius / max_radius) * max_radius;
+  vec3 clamped_sss_radius = max(vec3(uniform_buf.subsurface.min_radius),
+                                closure.sss_radius / max_radius) *
+                            max_radius;
  /* Scale albedo because we can have HDR value caused by BSDF sampling. */
  vec3 albedo = closure.color / max(1e-6, reduce_max(closure.color));
  vec3 d = burley_setup(clamped_sss_radius, albedo);