Merge branch 'blender-v4.2-release'

2024-06-07 16:25:37 +02:00 · 2024-06-07 16:25:37 +02:00 · ca7f1785fc
commit ca7f1785fc
parent 9f125489e7 a9ec92005a
19 changed files with 472 additions and 136 deletions
--- a/build_files/build_environment/cmake/osl.cmake
+++ b/build_files/build_environment/cmake/osl.cmake
@ -51,6 +51,7 @@ set(OSL_EXTRA_ARGS
  -DCMAKE_DEBUG_POSTFIX=_d
  -DPython_ROOT=${LIBDIR}/python
  -DPython_EXECUTABLE=${PYTHON_BINARY}
+  -Dlibdeflate_DIR=${LIBDIR}/deflate/lib/cmake/libdeflate
 )

 if(NOT APPLE)
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@ -148,7 +148,7 @@ DenoiserType Denoiser::automatic_viewport_denoiser_type(const DeviceInfo &path_t
 }

 Denoiser::Denoiser(Device *denoiser_device, const DenoiseParams &params)
-    : denoiser_device_(denoiser_device), params_(params)
+    : denoiser_device_(denoiser_device), denoise_kernels_are_loaded_(false), params_(params)
 {
  DCHECK(denoiser_device_);
  DCHECK(params.use);
@ -173,6 +173,11 @@ const DenoiseParams &Denoiser::get_params() const

 bool Denoiser::load_kernels(Progress *progress)
 {
+  /* If we have successfully loaded kernels once, then there is no need to repeat this again. */
+  if (denoise_kernels_are_loaded_) {
+    return denoise_kernels_are_loaded_;
+  }
+
  if (progress) {
    progress->set_status("Loading denoising kernels (may take a few minutes the first time)");
  }
@ -195,6 +200,7 @@ bool Denoiser::load_kernels(Progress *progress)
  VLOG_WORK << "Will denoise on " << denoiser_device_->info.description << " ("
            << denoiser_device_->info.id << ")";

+  denoise_kernels_are_loaded_ = true;
  return true;
 }

--- a/intern/cycles/integrator/denoiser.h
+++ b/intern/cycles/integrator/denoiser.h
@ -115,6 +115,7 @@ class Denoiser {
  virtual uint get_device_type_mask() const = 0;

  Device *denoiser_device_;
+  bool denoise_kernels_are_loaded_;
  DenoiseParams params_;
 };

--- a/source/blender/draw/engines/eevee_next/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc
@ -65,6 +65,11 @@ void Instance::init(const int2 &output_res,

  info = "";

+  shaders_are_ready_ = shaders.is_ready(is_image_render());
+  if (!shaders_are_ready_) {
+    return;
+  }
+
  if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) {
    sampling.reset();
  }
@ -121,6 +126,8 @@ void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager)
  debug_mode = (eDebugMode)G.debug_value;
  info = "";

+  shaders.is_ready(true);
+
  update_eval_members();

  sampling.init(scene);
@ -175,6 +182,10 @@ void Instance::view_update()

 void Instance::begin_sync()
 {
+  if (!shaders_are_ready_) {
+    return;
+  }
+
  /* Needs to be first for sun light parameters. */
  world.sync();

@ -218,6 +229,10 @@ void Instance::begin_sync()

 void Instance::object_sync(Object *ob)
 {
+  if (!shaders_are_ready_) {
+    return;
+  }
+
  const bool is_renderable_type = ELEM(ob->type,
                                       OB_CURVES,
                                       OB_GPENCIL_LEGACY,
@ -303,6 +318,10 @@ void Instance::object_sync_render(void *instance_,

 void Instance::end_sync()
 {
+  if (!shaders_are_ready_) {
+    return;
+  }
+
  velocity.end_sync();
  volume.end_sync();  /* Needs to be before shadows. */
  shadows.end_sync(); /* Needs to be before lights. */
@ -352,26 +371,24 @@ void Instance::render_sync()
  DRW_curves_update();
 }

+bool Instance::needs_lightprobe_sphere_passes() const
+{
+  return sphere_probes.update_probes_this_sample_;
+}
+
 bool Instance::do_lightprobe_sphere_sync() const
 {
-  if (!sphere_probes.update_probes_this_sample_) {
-    return false;
-  }
-  if (materials.queued_shaders_count > 0) {
-    return false;
-  }
-  return true;
+  return (materials.queued_shaders_count == 0) && needs_lightprobe_sphere_passes();
+}
+
+bool Instance::needs_planar_probe_passes() const
+{
+  return planar_probes.update_probes_;
 }

 bool Instance::do_planar_probe_sync() const
 {
-  if (!planar_probes.update_probes_) {
-    return false;
-  }
-  if (materials.queued_shaders_count > 0) {
-    return false;
-  }
-  return true;
+  return (materials.queued_shaders_count == 0) && needs_planar_probe_passes();
 }

 /** \} */
@ -515,6 +532,14 @@ void Instance::render_frame(RenderLayer *render_layer, const char *view_name)

 void Instance::draw_viewport()
 {
+  if (!shaders_are_ready_) {
+    DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
+    GPU_framebuffer_clear_color_depth(dfbl->default_fb, float4(0.0f), 1.0f);
+    info += "Compiling EEVEE Engine Shaders\n";
+    DRW_viewport_request_redraw();
+    return;
+  }
+
  render_sample();
  velocity.step_swap();

@ -529,6 +554,7 @@ void Instance::draw_viewport()
    std::stringstream ss;
    ss << "Compiling Shaders (" << materials.queued_shaders_count << " remaining)";
    info = ss.str();
+    DRW_viewport_request_redraw();
  }
  else if (materials.queued_optimize_shaders_count > 0) {
    std::stringstream ss;
--- a/source/blender/draw/engines/eevee_next/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh
@ -75,6 +75,8 @@ class Instance {
  uint64_t depsgraph_last_update_ = 0;
  bool overlays_enabled_ = false;

+  bool shaders_are_ready_ = true;
+
 public:
  ShaderModule &shaders;
  SyncModule sync;
@ -195,6 +197,13 @@ class Instance {
  bool do_lightprobe_sphere_sync() const;
  bool do_planar_probe_sync() const;

+  /**
+   * Return true when probe passes should be loaded.
+   * It can be true even if do_<type>_probe_sync() is false due to shaders still being compiled.
+   */
+  bool needs_lightprobe_sphere_passes() const;
+  bool needs_planar_probe_passes() const;
+
  /* Render. */

  void render_sync();
--- a/source/blender/draw/engines/eevee_next/eevee_material.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_material.cc
@ -322,7 +322,8 @@ Material &MaterialModule::material_sync(Object *ob,
      mat.overlap_masking = MaterialPass();
      mat.capture = MaterialPass();

-      if (inst_.do_lightprobe_sphere_sync() && !(ob->visibility_flag & OB_HIDE_PROBE_CUBEMAP)) {
+      if (inst_.needs_lightprobe_sphere_passes() && !(ob->visibility_flag & OB_HIDE_PROBE_CUBEMAP))
+      {
        mat.lightprobe_sphere_prepass = material_pass_get(
            ob, blender_mat, MAT_PIPE_PREPASS_DEFERRED, geometry_type, MAT_PROBE_REFLECTION);
        mat.lightprobe_sphere_shading = material_pass_get(
@ -333,7 +334,7 @@ Material &MaterialModule::material_sync(Object *ob,
        mat.lightprobe_sphere_shading = MaterialPass();
      }

-      if (inst_.do_planar_probe_sync() && !(ob->visibility_flag & OB_HIDE_PROBE_PLANAR)) {
+      if (inst_.needs_planar_probe_passes() && !(ob->visibility_flag & OB_HIDE_PROBE_PLANAR)) {
        mat.planar_probe_prepass = material_pass_get(
            ob, blender_mat, MAT_PIPE_PREPASS_PLANAR, geometry_type, MAT_PROBE_PLANAR);
        mat.planar_probe_shading = material_pass_get(
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@ -53,19 +53,27 @@ ShaderModule::ShaderModule()
    shader = nullptr;
  }

-#ifndef NDEBUG
-  /* Ensure all shader are described. */
+  Vector<const GPUShaderCreateInfo *> infos;
+  infos.reserve(MAX_SHADER_TYPE);
+
  for (auto i : IndexRange(MAX_SHADER_TYPE)) {
    const char *name = static_shader_create_info_name_get(eShaderType(i));
+    const GPUShaderCreateInfo *create_info = GPU_shader_create_info_get(name);
+    infos.append(create_info);
+
+#ifndef NDEBUG
    if (name == nullptr) {
      std::cerr << "EEVEE: Missing case for eShaderType(" << i
                << ") in static_shader_create_info_name_get().";
      BLI_assert(0);
    }
-    const GPUShaderCreateInfo *create_info = GPU_shader_create_info_get(name);
    BLI_assert_msg(create_info != nullptr, "EEVEE: Missing create info for static shader.");
-  }
 #endif
+  }
+
+  if (GPU_use_parallel_compilation()) {
+    compilation_handle_ = GPU_shader_batch_create_from_infos(infos);
+  }
 }

 ShaderModule::~ShaderModule()
@ -82,6 +90,22 @@ ShaderModule::~ShaderModule()
 *
 * \{ */

+bool ShaderModule::is_ready(bool block)
+{
+  if (compilation_handle_ == 0) {
+    return true;
+  }
+
+  if (block || GPU_shader_batch_is_ready(compilation_handle_)) {
+    Vector<GPUShader *> shaders = GPU_shader_batch_finalize(compilation_handle_);
+    for (int i : IndexRange(MAX_SHADER_TYPE)) {
+      shaders_[i] = shaders[i];
+    }
+  }
+
+  return compilation_handle_ == 0;
+}
+
 const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type)
 {
  switch (shader_type) {
@ -300,15 +324,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_

 GPUShader *ShaderModule::static_shader_get(eShaderType shader_type)
 {
+  BLI_assert(is_ready());
  if (shaders_[shader_type] == nullptr) {
    const char *shader_name = static_shader_create_info_name_get(shader_type);
-
-    shaders_[shader_type] = GPU_shader_create_from_info_name(shader_name);
-
-    if (shaders_[shader_type] == nullptr) {
+    if (GPU_use_parallel_compilation()) {
      fprintf(stderr, "EEVEE: error: Could not compile static shader \"%s\"\n", shader_name);
+      BLI_assert(0);
+    }
+    else {
+      shaders_[shader_type] = GPU_shader_create_from_info_name(shader_name);
    }
-    BLI_assert(shaders_[shader_type] != nullptr);
  }
  return shaders_[shader_type];
 }
--- a/source/blender/draw/engines/eevee_next/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh
@ -157,6 +157,7 @@ enum eShaderType {
 class ShaderModule {
 private:
  std::array<GPUShader *, MAX_SHADER_TYPE> shaders_;
+  BatchHandle compilation_handle_ = 0;

  /** Shared shader module across all engine instances. */
  static ShaderModule *g_shader_module;
@ -165,6 +166,8 @@ class ShaderModule {
  ShaderModule();
  ~ShaderModule();

+  bool is_ready(bool block = false);
+
  GPUShader *static_shader_get(eShaderType shader_type);
  GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,
                                           eMaterialGeometry geometry_type);
--- a/source/blender/draw/intern/draw_manager_shader.cc
+++ b/source/blender/draw/intern/draw_manager_shader.cc
@ -12,6 +12,7 @@

 #include "BLI_dynstr.h"
 #include "BLI_listbase.h"
+#include "BLI_map.hh"
 #include "BLI_string_utils.hh"
 #include "BLI_threads.h"
 #include "BLI_time.h"
@ -66,6 +67,8 @@ struct DRWShaderCompiler {
 static void drw_deferred_shader_compilation_exec(void *custom_data,
                                                 wmJobWorkerStatus *worker_status)
 {
+  using namespace blender;
+
  GPU_render_begin();
  DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
  void *system_gpu_context = comp->system_gpu_context;
@ -80,11 +83,16 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
    GPU_context_main_lock();
  }

+  const bool use_parallel_compilation = GPU_use_parallel_compilation();
+
  WM_system_gpu_context_activate(system_gpu_context);
  GPU_context_active_set(blender_gpu_context);

+  Vector<GPUMaterial *> next_batch;
+  Map<BatchHandle, Vector<GPUMaterial *>> batches;
+
  while (true) {
-    if (worker_status->stop != 0) {
+    if (worker_status->stop) {
      break;
    }

@ -96,14 +104,44 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
    if (mat) {
      /* Avoid another thread freeing the material mid compilation. */
      GPU_material_acquire(mat);
+      MEM_freeN(link);
    }
    BLI_spin_unlock(&comp->list_lock);

    if (mat) {
-      /* Do the compilation. */
-      GPU_material_compile(mat);
-      GPU_material_release(mat);
-      MEM_freeN(link);
+      /* We have a new material that must be compiled,
+       * we either compile it directly or add it to a parallel compilation batch. */
+      if (use_parallel_compilation) {
+        next_batch.append(mat);
+      }
+      else {
+        GPU_material_compile(mat);
+        GPU_material_release(mat);
+      }
+    }
+    else if (!next_batch.is_empty()) {
+      /* (only if use_parallel_compilation == true)
+       * We ran out of pending materials. Request the compilation of the current batch. */
+      BatchHandle batch_handle = GPU_material_batch_compile(next_batch);
+      batches.add(batch_handle, next_batch);
+      next_batch.clear();
+    }
+    else if (!batches.is_empty()) {
+      /* (only if use_parallel_compilation == true)
+       * Keep querying the requested batches until all of them are ready. */
+      Vector<BatchHandle> ready_handles;
+      for (BatchHandle handle : batches.keys()) {
+        if (GPU_material_batch_is_ready(handle)) {
+          ready_handles.append(handle);
+        }
+      }
+      for (BatchHandle handle : ready_handles) {
+        Vector<GPUMaterial *> batch = batches.pop(handle);
+        GPU_material_batch_finalize(handle, batch);
+        for (GPUMaterial *mat : batch) {
+          GPU_material_release(mat);
+        }
+      }
    }
    else {
      /* Check for Material Optimization job once there are no more
@ -111,7 +149,7 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
      BLI_spin_lock(&comp->list_lock);
      /* Pop tail because it will be less likely to lock the main thread
       * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
-      link = (LinkData *)BLI_poptail(&comp->optimize_queue);
+      LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
      GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : nullptr;
      if (optimize_mat) {
        /* Avoid another thread freeing the material during optimization. */
@ -136,6 +174,16 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
    }
  }

+  /* We have to wait until all the requested batches are ready,
+   * even if worker_status->stop is true. */
+  for (BatchHandle handle : batches.keys()) {
+    Vector<GPUMaterial *> &batch = batches.lookup(handle);
+    GPU_material_batch_finalize(handle, batch);
+    for (GPUMaterial *mat : batch) {
+      GPU_material_release(mat);
+    }
+  }
+
  GPU_context_active_set(nullptr);
  WM_system_gpu_context_release(system_gpu_context);
  if (use_main_context_workaround) {
--- a/source/blender/gpu/GPU_material.hh
+++ b/source/blender/gpu/GPU_material.hh
@ -260,6 +260,30 @@ void GPU_material_compile(GPUMaterial *mat);
 void GPU_material_free_single(GPUMaterial *material);
 void GPU_material_free(ListBase *gpumaterial);

+/**
+ * Request the creation of multiple `GPUMaterial`s at once, allowing the backend to use
+ * multithreaded compilation.
+ * Returns a handle that can be used to poll if all materials have been
+ * compiled, and to retrieve the compiled result.
+ * NOTE: This function is asynchronous on OpenGL, but it's blocking on Vulkan and Metal.
+ * WARNING: The material pointers and their pass->create_info should be valid until
+ * `GPU_material_batch_finalize` has returned.
+ */
+BatchHandle GPU_material_batch_compile(blender::Span<GPUMaterial *> mats);
+/**
+ * Returns true if all the materials from the batch have finished their compilation.
+ */
+bool GPU_material_batch_is_ready(BatchHandle handle);
+/**
+ * Asign the compiled shaders to their respective materials and flag their status.
+ * The materials list should have the same length and order as in the `GPU_material_batch_compile`
+ * call.
+ * If the compilation has not finished yet, this call will block the thread until all the
+ * shaders are ready.
+ * WARNING: The handle will be invalidated by this call, you can't process the same batch twice.
+ */
+void GPU_material_batch_finalize(BatchHandle &handle, blender::Span<GPUMaterial *> mats);
+
 void GPU_material_acquire(GPUMaterial *mat);
 void GPU_material_release(GPUMaterial *mat);

--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@ -102,6 +102,8 @@ struct GPUPass {
  uint32_t hash;
  /** Did we already tried to compile the attached GPUShader. */
  bool compiled;
+  /** If this pass is already being_compiled (A GPUPass can be shared by multiple GPUMaterials). */
+  bool compilation_requested;
  /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
   * during pass code generation. */
  bool should_optimize;
@ -805,6 +807,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
    pass->engine = engine;
    pass->hash = codegen.hash_get();
    pass->compiled = false;
+    pass->compilation_requested = false;
    pass->cached = false;
    /* Only flag pass optimization hint if this is the first generated pass for a material.
     * Optimized passes cannot be optimized further, even if the heuristic is still not
@ -881,17 +884,22 @@ static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
  return (active_samplers_len * 3 <= GPU_max_textures());
 }

-bool GPU_pass_compile(GPUPass *pass, const char *shname)
+GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname)
+{
+  if (!pass->compilation_requested) {
+    pass->compilation_requested = true;
+    pass->create_info->name_ = shname;
+    GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
+        static_cast<ShaderCreateInfo *>(pass->create_info));
+    return info;
+  }
+  return nullptr;
+}
+
+bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader)
 {
  bool success = true;
  if (!pass->compiled) {
-    GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
-        static_cast<ShaderCreateInfo *>(pass->create_info));
-
-    pass->create_info->name_ = shname;
-
-    GPUShader *shader = GPU_shader_create_from_info(info);
-
    /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
     * We need to make sure to count active samplers to avoid undefined behavior. */
    if (!gpu_pass_shader_validate(pass, shader)) {
@ -908,6 +916,16 @@ bool GPU_pass_compile(GPUPass *pass, const char *shname)
  return success;
 }

+bool GPU_pass_compile(GPUPass *pass, const char *shname)
+{
+  bool success = true;
+  if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
+    GPUShader *shader = GPU_shader_create_from_info(info);
+    success = GPU_pass_finalize_compilation(pass, shader);
+  }
+  return success;
+}
+
 GPUShader *GPU_pass_shader_get(GPUPass *pass)
 {
  return pass->shader;
--- a/source/blender/gpu/intern/gpu_codegen.hh
+++ b/source/blender/gpu/intern/gpu_codegen.hh
@ -31,6 +31,11 @@ void GPU_pass_acquire(GPUPass *pass);
 void GPU_pass_release(GPUPass *pass);
 bool GPU_pass_should_optimize(GPUPass *pass);

+/* Custom pass compilation. */
+
+GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname);
+bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader);
+
 /* Module */

 void gpu_codegen_init();
--- a/source/blender/gpu/intern/gpu_material.cc
+++ b/source/blender/gpu/intern/gpu_material.cc
@ -143,6 +143,8 @@ struct GPUMaterial {

  uint32_t refcount;

+  bool do_batch_compilation;
+
 #ifndef NDEBUG
  char name[64];
 #else
@ -951,21 +953,8 @@ void GPU_material_release(GPUMaterial *mat)
  GPU_material_free_single(mat);
 }

-void GPU_material_compile(GPUMaterial *mat)
+static void gpu_material_finalize(GPUMaterial *mat, bool success)
 {
-  bool success;
-
-  BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
-  BLI_assert(mat->pass);
-
-/* NOTE: The shader may have already been compiled here since we are
- * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
-#ifndef NDEBUG
-  success = GPU_pass_compile(mat->pass, mat->name);
-#else
-  success = GPU_pass_compile(mat->pass, __func__);
-#endif
-
  mat->flag |= GPU_MATFLAG_UPDATED;

  if (success) {
@ -1017,6 +1006,64 @@ void GPU_material_compile(GPUMaterial *mat)
  }
 }

+void GPU_material_compile(GPUMaterial *mat)
+{
+  bool success;
+  BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
+  BLI_assert(mat->pass);
+
+/* NOTE: The shader may have already been compiled here since we are
+ * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+  success = GPU_pass_compile(mat->pass, mat->name);
+#else
+  success = GPU_pass_compile(mat->pass, __func__);
+#endif
+
+  gpu_material_finalize(mat, success);
+}
+
+BatchHandle GPU_material_batch_compile(blender::Span<GPUMaterial *> mats)
+{
+  blender::Vector<GPUShaderCreateInfo *> infos;
+  infos.reserve(mats.size());
+
+  for (GPUMaterial *mat : mats) {
+    BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
+    BLI_assert(mat->pass);
+#ifndef NDEBUG
+    const char *name = mat->name;
+#else
+    const char *name = __func__;
+#endif
+    mat->do_batch_compilation = false;
+    if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(mat->pass, name)) {
+      infos.append(info);
+      mat->do_batch_compilation = true;
+    }
+  }
+
+  return GPU_shader_batch_create_from_infos(infos);
+}
+
+bool GPU_material_batch_is_ready(BatchHandle handle)
+{
+  return GPU_shader_batch_is_ready(handle);
+}
+
+void GPU_material_batch_finalize(BatchHandle &handle, blender::Span<GPUMaterial *> mats)
+{
+  blender::Vector<GPUShader *> shaders = GPU_shader_batch_finalize(handle);
+  int i = 0;
+  for (GPUMaterial *mat : mats) {
+    bool success = true;
+    if (mat->do_batch_compilation) {
+      success = GPU_pass_finalize_compilation(mat->pass, shaders[i++]);
+    }
+    gpu_material_finalize(mat, success);
+  }
+}
+
 void GPU_material_optimize(GPUMaterial *mat)
 {
  /* If shader is flagged for skipping optimization or has already been successfully
--- a/source/blender/gpu/opengl/gl_compilation_subprocess.cc
+++ b/source/blender/gpu/opengl/gl_compilation_subprocess.cc
@ -25,35 +25,57 @@
 namespace blender::gpu {

 class SubprocessShader {
+  GLuint comp_ = 0;
  GLuint vert_ = 0;
+  GLuint geom_ = 0;
  GLuint frag_ = 0;
  GLuint program_ = 0;
  bool success_ = false;

 public:
-  SubprocessShader(const char *vert_src, const char *frag_src)
+  SubprocessShader(const char *comp_src,
+                   const char *vert_src,
+                   const char *geom_src,
+                   const char *frag_src)
  {
    GLint status;
-
-    vert_ = glCreateShader(GL_VERTEX_SHADER);
-    glShaderSource(vert_, 1, &vert_src, nullptr);
-    glCompileShader(vert_);
-    glGetShaderiv(vert_, GL_COMPILE_STATUS, &status);
-    if (!status) {
-      return;
-    }
-
-    frag_ = glCreateShader(GL_FRAGMENT_SHADER);
-    glShaderSource(frag_, 1, &frag_src, nullptr);
-    glCompileShader(frag_);
-    glGetShaderiv(frag_, GL_COMPILE_STATUS, &status);
-    if (!status) {
-      return;
-    }
-
    program_ = glCreateProgram();
-    glAttachShader(program_, vert_);
-    glAttachShader(program_, frag_);
+
+    auto compile_stage = [&](const char *src, GLenum stage) -> GLuint {
+      if (src == nullptr) {
+        /* We only want status errors if compilation fails. */
+        status = GL_TRUE;
+        return 0;
+      }
+
+      GLuint shader = glCreateShader(stage);
+      glShaderSource(shader, 1, &src, nullptr);
+      glCompileShader(shader);
+      glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
+      glAttachShader(program_, shader);
+      return shader;
+    };
+
+    comp_ = compile_stage(comp_src, GL_COMPUTE_SHADER);
+    if (!status) {
+      return;
+    }
+
+    vert_ = compile_stage(vert_src, GL_VERTEX_SHADER);
+    if (!status) {
+      return;
+    }
+
+    geom_ = compile_stage(geom_src, GL_GEOMETRY_SHADER);
+    if (!status) {
+      return;
+    }
+
+    frag_ = compile_stage(frag_src, GL_FRAGMENT_SHADER);
+    if (!status) {
+      return;
+    }
+
    glLinkProgram(program_);
    glGetProgramiv(program_, GL_LINK_STATUS, &status);
    if (!status) {
@ -65,7 +87,9 @@ class SubprocessShader {

  ~SubprocessShader()
  {
+    glDeleteShader(comp_);
    glDeleteShader(vert_);
+    glDeleteShader(geom_);
    glDeleteShader(frag_);
    glDeleteProgram(program_);
  }
@ -78,8 +102,8 @@ class SubprocessShader {

    if (success_) {
      glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size);
-      if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) {
-        glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start);
+      if (bin->size <= sizeof(ShaderBinaryHeader::data)) {
+        glGetProgramBinary(program_, bin->size, nullptr, &bin->format, bin->data);
      }
    }

@ -92,7 +116,7 @@ static bool validate_binary(void *binary)
 {
  ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(binary);
  GLuint program = glCreateProgram();
-  glProgramBinary(program, bin->format, &bin->data_start, bin->size);
+  glProgramBinary(program, bin->format, bin->data, bin->size);
  GLint status;
  glGetProgramiv(program, GL_LINK_STATUS, &status);
  glDeleteProgram(program);
@ -165,15 +189,34 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
      break;
    }

-    const char *shaders = reinterpret_cast<const char *>(shared_mem.get_data());
-
-    const char *vert_src = shaders;
-    const char *frag_src = shaders + strlen(shaders) + 1;
+    ShaderSourceHeader *source = reinterpret_cast<ShaderSourceHeader *>(shared_mem.get_data());
+    const char *next_src = source->sources;
+    const char *comp_src = nullptr;
+    const char *vert_src = nullptr;
+    const char *geom_src = nullptr;
+    const char *frag_src = nullptr;

    DefaultHash<StringRefNull> hasher;
-    uint64_t vert_hash = hasher(vert_src);
-    uint64_t frag_hash = hasher(frag_src);
-    std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash);
+    std::string hash_str = "_";
+
+    auto get_src = [&]() {
+      const char *src = next_src;
+      next_src += strlen(src) + sizeof('\0');
+      hash_str += std::to_string(hasher(src)) + "_";
+      return src;
+    };
+
+    if (source->type == ShaderSourceHeader::Type::COMPUTE) {
+      comp_src = get_src();
+    }
+    else {
+      vert_src = get_src();
+      if (source->type == ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE) {
+        geom_src = get_src();
+      }
+      frag_src = get_src();
+    }
+
    std::string cache_path = cache_dir + SEP_STR + hash_str;

    /* TODO: This should lock the files? */
@ -203,14 +246,14 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
      }
    }

-    SubprocessShader shader(vert_src, frag_src);
+    SubprocessShader shader(comp_src, vert_src, geom_src, frag_src);
    ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data());

    end_semaphore.increment();

    fstream file(cache_path, std::ios::binary | std::ios::out);
    file.write(reinterpret_cast<char *>(shared_mem.get_data()),
-               binary->size + offsetof(ShaderBinaryHeader, data_start));
+               binary->size + offsetof(ShaderBinaryHeader, data));
  }

  GPU_exit();
--- a/source/blender/gpu/opengl/gl_compilation_subprocess.hh
+++ b/source/blender/gpu/opengl/gl_compilation_subprocess.hh
@ -13,7 +13,19 @@
 namespace blender::gpu {

 /* The size of the memory pools shared by Blender and the compilation subprocesses. */
-constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */
+constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5 MiB */
+
+struct ShaderSourceHeader {
+  enum Type { COMPUTE, GRAPHICS, GRAPHICS_WITH_GEOMETRY_STAGE };
+  /* The type of program being compiled. */
+  Type type;
+  /* The source code for all the shader stages (Separated by a null terminator).
+   * The stages follows the execution order (eg. vert > geom > frag). */
+  char sources[compilation_subprocess_shared_memory_size - sizeof(type)];
+};
+
+static_assert(sizeof(ShaderSourceHeader) == compilation_subprocess_shared_memory_size,
+              "Size must match the shared memory size");

 struct ShaderBinaryHeader {
  /* Size of the shader binary data. */
@ -21,11 +33,13 @@ struct ShaderBinaryHeader {
  /* Magic number that identifies the format of this shader binary (Driver-defined).
   * This (and size) is set to 0 when the shader has failed to compile. */
  uint32_t format;
-  /* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the
-   * shader binary data. */
-  uint8_t data_start;
+  /* The serialized shader binary data. */
+  uint8_t data[compilation_subprocess_shared_memory_size - sizeof(size) - sizeof(format)];
 };

+static_assert(sizeof(ShaderBinaryHeader) == compilation_subprocess_shared_memory_size,
+              "Size must match the shared memory size");
+
 }  // namespace blender::gpu

 #endif
--- a/source/blender/gpu/opengl/gl_shader.cc
+++ b/source/blender/gpu/opengl/gl_shader.cc
@ -1472,6 +1472,30 @@ Vector<const char *> GLSources::sources_get() const
  return result;
 }

+std::string GLSources::to_string() const
+{
+  std::string result;
+  for (const GLSource &source : *this) {
+    if (source.source_ref) {
+      result.append(source.source_ref);
+    }
+    else {
+      result.append(source.source);
+    }
+  }
+  return result;
+}
+
+size_t GLSourcesBaked::size()
+{
+  size_t result = 0;
+  result += comp.empty() ? 0 : comp.size() + sizeof('\0');
+  result += vert.empty() ? 0 : vert.size() + sizeof('\0');
+  result += geom.empty() ? 0 : geom.size() + sizeof('\0');
+  result += frag.empty() ? 0 : frag.size() + sizeof('\0');
+  return result;
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -1588,6 +1612,16 @@ GLuint GLShader::program_get()
  return program_active_->program_id;
 }

+GLSourcesBaked GLShader::get_sources()
+{
+  GLSourcesBaked result;
+  result.comp = compute_sources_.to_string();
+  result.vert = vertex_sources_.to_string();
+  result.geom = geometry_sources_.to_string();
+  result.frag = fragment_sources_.to_string();
+  return result;
+}
+
 /** \} */

 #if BLI_SUBPROCESS_SUPPORT
@ -1620,12 +1654,37 @@ GLCompilerWorker::~GLCompilerWorker()
  start_semaphore_->increment();
 }

-void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag)
+void GLCompilerWorker::compile(const GLSourcesBaked &sources)
 {
  BLI_assert(state_ == AVAILABLE);

-  strcpy((char *)shared_mem_->get_data(), vert.c_str());
-  strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str());
+  ShaderSourceHeader *shared_src = reinterpret_cast<ShaderSourceHeader *>(shared_mem_->get_data());
+  char *next_src = shared_src->sources;
+
+  auto add_src = [&](const std::string &src) {
+    if (!src.empty()) {
+      strcpy(next_src, src.c_str());
+      next_src += src.size() + sizeof('\0');
+    }
+  };
+
+  add_src(sources.comp);
+  add_src(sources.vert);
+  add_src(sources.geom);
+  add_src(sources.frag);
+
+  BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size);
+
+  if (!sources.comp.empty()) {
+    BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty());
+    shared_src->type = ShaderSourceHeader::Type::COMPUTE;
+  }
+  else {
+    BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty());
+    shared_src->type = sources.geom.empty() ?
+                           ShaderSourceHeader::Type::GRAPHICS :
+                           ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE;
+  }

  start_semaphore_->increment();

@ -1668,7 +1727,7 @@ bool GLCompilerWorker::load_program_binary(GLint program)
  state_ = COMPILATION_FINISHED;

  if (binary->size > 0) {
-    glProgramBinary(program, binary->format, &binary->data_start, binary->size);
+    glProgramBinary(program, binary->format, binary->data, binary->size);
    return true;
  }

@ -1695,7 +1754,7 @@ GLShaderCompiler::~GLShaderCompiler()
  }
 }

-GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag)
+GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const GLSourcesBaked &sources)
 {
  GLCompilerWorker *result = nullptr;
  for (GLCompilerWorker *compiler : workers_) {
@ -1709,7 +1768,7 @@ GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const
    workers_.append(result);
  }
  if (result) {
-    result->compile(vert, frag);
+    result->compile(sources);
  }
  return result;
 }
@ -1739,31 +1798,21 @@ BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo

  for (const shader::ShaderCreateInfo *info : infos) {
    const_cast<ShaderCreateInfo *>(info)->finalize();
-    CompilationWork item = {};
+    batch.items.append({});
+    CompilationWork &item = batch.items.last();
    item.info = info;
-    item.do_async_compilation = !info->vertex_source_.is_empty() &&
-                                !info->fragment_source_.is_empty() &&
-                                info->compute_source_.is_empty() &&
-                                info->geometry_source_.is_empty();
-    if (item.do_async_compilation) {
-      item.shader = static_cast<GLShader *>(compile(*info, true));
-      for (const char *src : item.shader->vertex_sources_.sources_get()) {
-        item.vertex_src.append(src);
-      }
-      for (const char *src : item.shader->fragment_sources_.sources_get()) {
-        item.fragment_src.append(src);
-      }
+    item.shader = static_cast<GLShader *>(compile(*info, true));
+    item.sources = item.shader->get_sources();

-      size_t required_size = item.vertex_src.size() + item.fragment_src.size();
-      if (required_size < compilation_subprocess_shared_memory_size) {
-        item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
-      }
-      else {
-        delete item.shader;
-        item.do_async_compilation = false;
-      }
+    size_t required_size = item.sources.size();
+    item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
+    if (item.do_async_compilation) {
+      item.worker = get_compiler_worker(item.sources);
+    }
+    else {
+      delete item.shader;
+      item.sources = {};
    }
-    batch.items.append(item);
  }
  return handle;
 }
@ -1791,7 +1840,7 @@ bool GLShaderCompiler::batch_is_ready(BatchHandle handle)

    if (!item.worker) {
      /* Try to acquire an available worker. */
-      item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
+      item.worker = get_compiler_worker(item.sources);
    }
    else if (item.worker->is_ready()) {
      /* Retrieve the binary compiled by the worker. */
--- a/source/blender/gpu/opengl/gl_shader.hh
+++ b/source/blender/gpu/opengl/gl_shader.hh
@ -44,6 +44,21 @@ class GLSources : public Vector<GLSource> {
 public:
  GLSources &operator=(Span<const char *> other);
  Vector<const char *> sources_get() const;
+  std::string to_string() const;
+};
+
+/**
+ * The full sources for each shader stage, baked into a single string from their respective
+ * GLSources. (Can be retrieved from GLShader::get_sources())
+ */
+struct GLSourcesBaked : NonCopyable {
+  std::string comp;
+  std::string vert;
+  std::string geom;
+  std::string frag;
+
+  /* Returns the size (in bytes) required to store the source of all the used stages. */
+  size_t size();
 };

 /**
@ -194,6 +209,8 @@ class GLShader : public Shader {
    return program_active_->compute_shader != 0;
  }

+  GLSourcesBaked get_sources();
+
 private:
  const char *glsl_patch_get(GLenum gl_stage);

@ -240,7 +257,7 @@ class GLCompilerWorker {
  GLCompilerWorker();
  ~GLCompilerWorker();

-  void compile(StringRefNull vert, StringRefNull frag);
+  void compile(const GLSourcesBaked &sources);
  bool is_ready();
  bool load_program_binary(GLint program);
  void release();
@ -255,14 +272,12 @@ class GLShaderCompiler : public ShaderCompiler {
  Vector<GLCompilerWorker *> workers_;

  struct CompilationWork {
-    GLCompilerWorker *worker = nullptr;
-    GLShader *shader = nullptr;
    const shader::ShaderCreateInfo *info = nullptr;
+    GLShader *shader = nullptr;
+    GLSourcesBaked sources;
+
+    GLCompilerWorker *worker = nullptr;
    bool do_async_compilation = false;
-
-    std::string vertex_src;
-    std::string fragment_src;
-
    bool is_ready = false;
  };

@ -274,7 +289,7 @@ class GLShaderCompiler : public ShaderCompiler {
  BatchHandle next_batch_handle = 1;
  Map<BatchHandle, Batch> batches;

-  GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag);
+  GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources);
  bool worker_is_lost(GLCompilerWorker *&worker);

 public:
--- a/source/blender/gpu/shaders/gpu_shader_sequencer_strips_frag.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_sequencer_strips_frag.glsl
@ -44,11 +44,13 @@ void main()
  vec2 center = vec2(strip.right_handle + strip.left_handle, strip.top + strip.bottom) * 0.5;

  /* Transform strip rectangle into pixel coordinates, so that
-   * rounded corners have proper aspect ratio and can be expressed in pixels. */
+   * rounded corners have proper aspect ratio and can be expressed in pixels.
+   * Also snap to pixel grid coorinates, so that outline/border is clear
+   * non-fractional pixel sizes. */
  vec2 view_to_pixel = vec2(context_data.inv_pixelx, context_data.inv_pixely);
-  size *= view_to_pixel;
-  center *= view_to_pixel;
-  vec2 pos = co * view_to_pixel;
+  size = round(size * view_to_pixel);
+  center = round(center * view_to_pixel);
+  vec2 pos = round(co * view_to_pixel);

  float radius = context_data.round_radius;
  if (radius > size.x) {
--- a/source/blender/gpu/shaders/gpu_shader_sequencer_strips_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_sequencer_strips_vert.glsl
@ -9,11 +9,10 @@ void main()
  int vid = gl_VertexID;
  SeqStripDrawData strip = strip_data[id];
  vec4 rect = vec4(strip.left_handle, strip.bottom, strip.right_handle, strip.top);
-  /* Expand rasterized rectangle by 1px so that we can do outlines. */
-  rect.x -= context_data.pixelx;
-  rect.z += context_data.pixelx;
-  rect.y -= context_data.pixely;
-  rect.w += context_data.pixely;
+  /* Expand by 2px to fit possible outline and pixel grid rounding. */
+  vec2 expand = vec2(context_data.pixelx, context_data.pixely) * 2.0;
+  rect.xy -= expand;
+  rect.zw += expand;

  vec2 co;
  if (vid == 0) {