Fix crash when editing shaders on Intel HD 4000.

In the Intel HD 4000 driver a shader has to be deleted in the same context in which it is created. However, because you can't use a rendering context on different threads, to maintain the multithreaded compilation, the solution was to use the `GL_ARB_get_program_binary` and copy the binary generated for the shader and generate a shader on the main context using that binary. This solution is limited only to Intel HD 4000 and windows. Reviewers: fclem Reviewed By: fclem Differential Revision: https://developer.blender.org/D5019
2019-06-05 13:06:11 -03:00 · 2019-06-05 13:06:11 -03:00 · ce66b22c42
commit ce66b22c42
parent dd81efa4a3
8 changed files with 145 additions and 40 deletions
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@ -63,7 +63,8 @@ typedef struct DRWDeferredShader {
 } DRWDeferredShader;

 typedef struct DRWShaderCompiler {
-  ListBase queue; /* DRWDeferredShader */
+  ListBase queue;          /* DRWDeferredShader */
+  ListBase queue_conclude; /* DRWDeferredShader */
  SpinLock list_lock;

  DRWDeferredShader *mat_compiling;
@ -134,7 +135,12 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
    BLI_mutex_unlock(&comp->compilation_lock);

    BLI_spin_lock(&comp->list_lock);
-    drw_deferred_shader_free(comp->mat_compiling);
+    if (GPU_material_status(comp->mat_compiling->mat) == GPU_MAT_QUEUED) {
+      BLI_addtail(&comp->queue_conclude, comp->mat_compiling);
+    }
+    else {
+      drw_deferred_shader_free(comp->mat_compiling);
+    }
    comp->mat_compiling = NULL;
    BLI_spin_unlock(&comp->list_lock);
  }
@ -148,6 +154,17 @@ static void drw_deferred_shader_compilation_free(void *custom_data)

  drw_deferred_shader_queue_free(&comp->queue);

+  if (!BLI_listbase_is_empty(&comp->queue_conclude)) {
+    /* Compile the shaders in the context they will be deleted. */
+    DRW_opengl_context_enable_ex(false);
+    DRWDeferredShader *mat_conclude;
+    while (mat_conclude = BLI_poptail(&comp->queue_conclude)) {
+      GPU_material_compile(mat_conclude->mat);
+      drw_deferred_shader_free(mat_conclude);
+    }
+    DRW_opengl_context_disable_ex(true);
+  }
+
  BLI_spin_end(&comp->list_lock);
  BLI_mutex_end(&comp->compilation_lock);

--- a/source/blender/gpu/GPU_extensions.h
+++ b/source/blender/gpu/GPU_extensions.h
@ -46,6 +46,7 @@ void GPU_get_dfdy_factors(float fac[2]);
 bool GPU_mip_render_workaround(void);
 bool GPU_depth_blitting_workaround(void);
 bool GPU_unused_fb_slot_workaround(void);
+bool GPU_context_local_shaders_workaround(void);
 bool GPU_crappy_amd_driver(void);

 bool GPU_mem_stats_supported(void);
--- a/source/blender/gpu/GPU_shader.h
+++ b/source/blender/gpu/GPU_shader.h
@ -58,6 +58,10 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
                                const char **tf_names,
                                const int tf_count,
                                const char *shader_name);
+GPUShader *GPU_shader_load_from_binary(const char *binary,
+                                       const int binary_format,
+                                       const int binary_len,
+                                       const char *shname);
 struct GPU_ShaderCreateFromArray_Params {
  const char **vert, **geom, **frag, **defs;
 };
@ -95,6 +99,8 @@ void GPU_shader_uniform_int(GPUShader *shader, int location, int value);

 int GPU_shader_get_attribute(GPUShader *shader, const char *name);

+char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len);
+
 /* Builtin/Non-generated shaders */
 typedef enum eGPUBuiltinShader {
  /* specialized drawing */
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@ -2105,17 +2105,17 @@ static int count_active_texture_sampler(GPUShader *shader, char *source)
  return sampler_len;
 }

-static bool gpu_pass_shader_validate(GPUPass *pass)
+static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
 {
-  if (pass->shader == NULL) {
+  if (shader == NULL) {
    return false;
  }

  /* NOTE: The only drawback of this method is that it will count a sampler
   * used in the fragment shader and only declared (but not used) in the vertex
   * shader as used by both. But this corner case is not happening for now. */
-  int vert_samplers_len = count_active_texture_sampler(pass->shader, pass->vertexcode);
-  int frag_samplers_len = count_active_texture_sampler(pass->shader, pass->fragmentcode);
+  int vert_samplers_len = count_active_texture_sampler(shader, pass->vertexcode);
+  int frag_samplers_len = count_active_texture_sampler(shader, pass->fragmentcode);

  int total_samplers_len = vert_samplers_len + frag_samplers_len;

@ -2126,7 +2126,7 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
  }

  if (pass->geometrycode) {
-    int geom_samplers_len = count_active_texture_sampler(pass->shader, pass->geometrycode);
+    int geom_samplers_len = count_active_texture_sampler(shader, pass->geometrycode);
    total_samplers_len += geom_samplers_len;
    if (geom_samplers_len > GPU_max_textures_geom()) {
      return false;
@ -2136,30 +2136,40 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
  return (total_samplers_len <= GPU_max_textures());
 }

-void GPU_pass_compile(GPUPass *pass, const char *shname)
+bool GPU_pass_compile(GPUPass *pass, const char *shname)
 {
+  bool sucess = true;
  if (!pass->compiled) {
-    pass->shader = GPU_shader_create(
+    GPUShader *shader = GPU_shader_create(
        pass->vertexcode, pass->fragmentcode, pass->geometrycode, NULL, pass->defines, shname);

    /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
     * We need to make sure to count active samplers to avoid undefined behavior. */
-    if (!gpu_pass_shader_validate(pass)) {
-      if (pass->shader != NULL) {
+    if (!gpu_pass_shader_validate(pass, shader)) {
+      sucess = false;
+      if (shader != NULL) {
        fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
-        GPU_shader_free(pass->shader);
+        GPU_shader_free(shader);
+        shader = NULL;
      }
-      pass->shader = NULL;
    }
-    else if (!BLI_thread_is_main()) {
-      /* For some Intel drivers, you must use the program at least once
-       * in the rendering context that it is linked. */
-      glUseProgram(GPU_shader_get_program(pass->shader));
-      glUseProgram(0);
+    else if (!BLI_thread_is_main() && GPU_context_local_shaders_workaround()) {
+      pass->binary.content = GPU_shader_get_binary(
+          shader, &pass->binary.format, &pass->binary.len);
+      GPU_shader_free(shader);
+      shader = NULL;
    }

+    pass->shader = shader;
    pass->compiled = true;
  }
+  else if (pass->binary.content && BLI_thread_is_main()) {
+    pass->shader = GPU_shader_load_from_binary(
+        pass->binary.content, pass->binary.format, pass->binary.len, shname);
+    MEM_SAFE_FREE(pass->binary.content);
+  }
+
+  return sucess;
 }

 void GPU_pass_release(GPUPass *pass)
@ -2178,6 +2188,9 @@ static void gpu_pass_free(GPUPass *pass)
  MEM_SAFE_FREE(pass->geometrycode);
  MEM_SAFE_FREE(pass->vertexcode);
  MEM_SAFE_FREE(pass->defines);
+  if (pass->binary.content) {
+    MEM_freeN(pass->binary.content);
+  }
  MEM_freeN(pass);
 }

--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@ -164,6 +164,11 @@ struct GPUPass {
  char *defines;
  uint refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
  uint32_t hash; /* Identity hash generated from all GLSL code. */
+  struct {
+    char *content;
+    int format;
+    int len;
+  } binary;
  bool compiled; /* Did we already tried to compile the attached GPUShader. */
 };

@ -185,7 +190,7 @@ void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs
 void GPU_nodes_get_vertex_attrs(ListBase *nodes, struct GPUVertAttrLayers *attrs);
 void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);

-void GPU_pass_compile(GPUPass *pass, const char *shname);
+bool GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
 void GPU_pass_free_nodes(ListBase *nodes);

--- a/source/blender/gpu/intern/gpu_extensions.c
+++ b/source/blender/gpu/intern/gpu_extensions.c
@ -89,6 +89,9 @@ static struct GPUGlobal {
  /* Crappy driver don't know how to map framebuffer slot to output vars...
   * We need to have no "holes" in the output buffer slots. */
  bool unused_fb_slot_workaround;
+  /* Some crappy Intel drivers don't work well with shaders created in different
+   * rendering contexts. */
+  bool context_local_shaders_workaround;
 } GG = {1, 0};

 static void gpu_detect_mip_render_workaround(void)
@ -209,6 +212,11 @@ bool GPU_unused_fb_slot_workaround(void)
  return GG.unused_fb_slot_workaround;
 }

+bool GPU_context_local_shaders_workaround(void)
+{
+  return GG.context_local_shaders_workaround;
+}
+
 bool GPU_crappy_amd_driver(void)
 {
  /* Currently are the same drivers with the `unused_fb_slot` problem. */
@ -347,6 +355,7 @@ void gpu_extensions_init(void)
    GG.mip_render_workaround = true;
    GG.depth_blitting_workaround = true;
    GG.unused_fb_slot_workaround = true;
+    GG.context_local_shaders_workaround = true;
  }

  /* df/dy calculation factors, those are dependent on driver */
@ -354,19 +363,24 @@ void gpu_extensions_init(void)
    GG.dfdyfactors[0] = 1.0;
    GG.dfdyfactors[1] = -1.0;
  }
-  else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN) &&
-           (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
-            strstr(version, "4.0.0 - Build 9.18.10.3186") ||
-            strstr(version, "4.0.0 - Build 9.18.10.3165") ||
-            strstr(version, "3.1.0 - Build 9.17.10.3347") ||
-            strstr(version, "3.1.0 - Build 9.17.10.4101") ||
-            strstr(version, "3.3.0 - Build 8.15.10.2618"))) {
-    GG.dfdyfactors[0] = -1.0;
-    GG.dfdyfactors[1] = 1.0;
-  }
-  else {
-    GG.dfdyfactors[0] = 1.0;
-    GG.dfdyfactors[1] = 1.0;
+  else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN)) {
+    if (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
+        strstr(version, "4.0.0 - Build 9.18.10.3186") ||
+        strstr(version, "4.0.0 - Build 9.18.10.3165") ||
+        strstr(version, "3.1.0 - Build 9.17.10.3347") ||
+        strstr(version, "3.1.0 - Build 9.17.10.4101") ||
+        strstr(version, "3.3.0 - Build 8.15.10.2618")) {
+      GG.dfdyfactors[0] = -1.0;
+      GG.dfdyfactors[1] = 1.0;
+    }
+    else {
+      GG.dfdyfactors[0] = 1.0;
+      GG.dfdyfactors[1] = 1.0;
+    }
+
+    if (strstr(renderer, "HD Graphics 4000")) {
+      GG.context_local_shaders_workaround = true;
+    }
  }

  GPU_invalid_tex_init();
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@ -733,23 +733,25 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,

 void GPU_material_compile(GPUMaterial *mat)
 {
-  /* Only run once! */
+  bool sucess;
+
  BLI_assert(mat->status == GPU_MAT_QUEUED);
  BLI_assert(mat->pass);

  /* NOTE: The shader may have already been compiled here since we are
   * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
 #ifndef NDEBUG
-  GPU_pass_compile(mat->pass, mat->name);
+  sucess = GPU_pass_compile(mat->pass, mat->name);
 #else
-  GPU_pass_compile(mat->pass, __func__);
+  sucess = GPU_pass_compile(mat->pass, __func__);
 #endif

-  GPUShader *sh = GPU_pass_shader_get(mat->pass);
-
-  if (sh != NULL) {
-    mat->status = GPU_MAT_SUCCESS;
-    GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
+  if (sucess) {
+    GPUShader *sh = GPU_pass_shader_get(mat->pass);
+    if (sh != NULL) {
+      mat->status = GPU_MAT_SUCCESS;
+      GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
+    }
  }
  else {
    mat->status = GPU_MAT_FAILED;
--- a/source/blender/gpu/intern/gpu_shader.c
+++ b/source/blender/gpu/intern/gpu_shader.c
@ -292,6 +292,36 @@ GPUShader *GPU_shader_create(const char *vertexcode,
      vertexcode, fragcode, geocode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, shname);
 }

+GPUShader *GPU_shader_load_from_binary(const char *binary,
+                                       const int binary_format,
+                                       const int binary_len,
+                                       const char *shname)
+{
+  BLI_assert(GL_ARB_get_program_binary);
+  int success;
+  int program = glCreateProgram();
+
+  glProgramBinary(program, binary_format, binary, binary_len);
+  glGetProgramiv(program, GL_LINK_STATUS, &success);
+
+  if (success) {
+    GPUShader *shader = MEM_callocN(sizeof(*shader), __func__);
+    shader->interface = GPU_shaderinterface_create(program);
+    shader->program = program;
+
+#ifndef NDEBUG
+    BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
+#else
+    UNUSED_VARS(shname);
+#endif
+
+    return shader;
+  }
+
+  glDeleteProgram(program);
+  return NULL;
+}
+
 #define DEBUG_SHADER_NONE ""
 #define DEBUG_SHADER_VERTEX "vert"
 #define DEBUG_SHADER_FRAGMENT "frag"
@ -815,6 +845,23 @@ int GPU_shader_get_attribute(GPUShader *shader, const char *name)
  return attr ? attr->location : -1;
 }

+char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len)
+{
+  BLI_assert(GLEW_ARB_get_program_binary);
+  char *r_binary;
+  int binary_len = 0;
+
+  glGetProgramiv(shader->program, GL_PROGRAM_BINARY_LENGTH, &binary_len);
+  r_binary = MEM_mallocN(binary_len, __func__);
+  glGetProgramBinary(shader->program, binary_len, NULL, r_binary_format, r_binary);
+
+  if (r_binary_len) {
+    *r_binary_len = binary_len;
+  }
+
+  return r_binary;
+}
+
 static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = {
    [GPU_SHADER_TEXT] =
        {