Fix crash when editing shaders on Intel HD 4000.

In the Intel HD 4000 driver a shader has to be deleted in the same context in which it is created.
However, because you can't use a rendering context on different threads, to maintain the multithreaded compilation, the solution was to use the `GL_ARB_get_program_binary` and copy the binary generated for the shader and generate a shader on the main context using that binary.
This solution is limited only to Intel HD 4000 and windows.

Reviewers: fclem

Reviewed By: fclem

Differential Revision: https://developer.blender.org/D5019
This commit is contained in:
mano-wii 2019-06-05 13:06:11 -03:00
parent dd81efa4a3
commit ce66b22c42
8 changed files with 145 additions and 40 deletions

@ -63,7 +63,8 @@ typedef struct DRWDeferredShader {
} DRWDeferredShader;
typedef struct DRWShaderCompiler {
ListBase queue; /* DRWDeferredShader */
ListBase queue; /* DRWDeferredShader */
ListBase queue_conclude; /* DRWDeferredShader */
SpinLock list_lock;
DRWDeferredShader *mat_compiling;
@ -134,7 +135,12 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
BLI_mutex_unlock(&comp->compilation_lock);
BLI_spin_lock(&comp->list_lock);
drw_deferred_shader_free(comp->mat_compiling);
if (GPU_material_status(comp->mat_compiling->mat) == GPU_MAT_QUEUED) {
BLI_addtail(&comp->queue_conclude, comp->mat_compiling);
}
else {
drw_deferred_shader_free(comp->mat_compiling);
}
comp->mat_compiling = NULL;
BLI_spin_unlock(&comp->list_lock);
}
@ -148,6 +154,17 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
drw_deferred_shader_queue_free(&comp->queue);
if (!BLI_listbase_is_empty(&comp->queue_conclude)) {
/* Compile the shaders in the context they will be deleted. */
DRW_opengl_context_enable_ex(false);
DRWDeferredShader *mat_conclude;
while (mat_conclude = BLI_poptail(&comp->queue_conclude)) {
GPU_material_compile(mat_conclude->mat);
drw_deferred_shader_free(mat_conclude);
}
DRW_opengl_context_disable_ex(true);
}
BLI_spin_end(&comp->list_lock);
BLI_mutex_end(&comp->compilation_lock);

@ -46,6 +46,7 @@ void GPU_get_dfdy_factors(float fac[2]);
bool GPU_mip_render_workaround(void);
bool GPU_depth_blitting_workaround(void);
bool GPU_unused_fb_slot_workaround(void);
bool GPU_context_local_shaders_workaround(void);
bool GPU_crappy_amd_driver(void);
bool GPU_mem_stats_supported(void);

@ -58,6 +58,10 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
const char **tf_names,
const int tf_count,
const char *shader_name);
GPUShader *GPU_shader_load_from_binary(const char *binary,
const int binary_format,
const int binary_len,
const char *shname);
struct GPU_ShaderCreateFromArray_Params {
const char **vert, **geom, **frag, **defs;
};
@ -95,6 +99,8 @@ void GPU_shader_uniform_int(GPUShader *shader, int location, int value);
int GPU_shader_get_attribute(GPUShader *shader, const char *name);
char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len);
/* Builtin/Non-generated shaders */
typedef enum eGPUBuiltinShader {
/* specialized drawing */

@ -2105,17 +2105,17 @@ static int count_active_texture_sampler(GPUShader *shader, char *source)
return sampler_len;
}
static bool gpu_pass_shader_validate(GPUPass *pass)
static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
{
if (pass->shader == NULL) {
if (shader == NULL) {
return false;
}
/* NOTE: The only drawback of this method is that it will count a sampler
* used in the fragment shader and only declared (but not used) in the vertex
* shader as used by both. But this corner case is not happening for now. */
int vert_samplers_len = count_active_texture_sampler(pass->shader, pass->vertexcode);
int frag_samplers_len = count_active_texture_sampler(pass->shader, pass->fragmentcode);
int vert_samplers_len = count_active_texture_sampler(shader, pass->vertexcode);
int frag_samplers_len = count_active_texture_sampler(shader, pass->fragmentcode);
int total_samplers_len = vert_samplers_len + frag_samplers_len;
@ -2126,7 +2126,7 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
}
if (pass->geometrycode) {
int geom_samplers_len = count_active_texture_sampler(pass->shader, pass->geometrycode);
int geom_samplers_len = count_active_texture_sampler(shader, pass->geometrycode);
total_samplers_len += geom_samplers_len;
if (geom_samplers_len > GPU_max_textures_geom()) {
return false;
@ -2136,30 +2136,40 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
return (total_samplers_len <= GPU_max_textures());
}
void GPU_pass_compile(GPUPass *pass, const char *shname)
bool GPU_pass_compile(GPUPass *pass, const char *shname)
{
bool sucess = true;
if (!pass->compiled) {
pass->shader = GPU_shader_create(
GPUShader *shader = GPU_shader_create(
pass->vertexcode, pass->fragmentcode, pass->geometrycode, NULL, pass->defines, shname);
/* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
* We need to make sure to count active samplers to avoid undefined behavior. */
if (!gpu_pass_shader_validate(pass)) {
if (pass->shader != NULL) {
if (!gpu_pass_shader_validate(pass, shader)) {
sucess = false;
if (shader != NULL) {
fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
GPU_shader_free(pass->shader);
GPU_shader_free(shader);
shader = NULL;
}
pass->shader = NULL;
}
else if (!BLI_thread_is_main()) {
/* For some Intel drivers, you must use the program at least once
* in the rendering context that it is linked. */
glUseProgram(GPU_shader_get_program(pass->shader));
glUseProgram(0);
else if (!BLI_thread_is_main() && GPU_context_local_shaders_workaround()) {
pass->binary.content = GPU_shader_get_binary(
shader, &pass->binary.format, &pass->binary.len);
GPU_shader_free(shader);
shader = NULL;
}
pass->shader = shader;
pass->compiled = true;
}
else if (pass->binary.content && BLI_thread_is_main()) {
pass->shader = GPU_shader_load_from_binary(
pass->binary.content, pass->binary.format, pass->binary.len, shname);
MEM_SAFE_FREE(pass->binary.content);
}
return sucess;
}
void GPU_pass_release(GPUPass *pass)
@ -2178,6 +2188,9 @@ static void gpu_pass_free(GPUPass *pass)
MEM_SAFE_FREE(pass->geometrycode);
MEM_SAFE_FREE(pass->vertexcode);
MEM_SAFE_FREE(pass->defines);
if (pass->binary.content) {
MEM_freeN(pass->binary.content);
}
MEM_freeN(pass);
}

@ -164,6 +164,11 @@ struct GPUPass {
char *defines;
uint refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
uint32_t hash; /* Identity hash generated from all GLSL code. */
struct {
char *content;
int format;
int len;
} binary;
bool compiled; /* Did we already tried to compile the attached GPUShader. */
};
@ -185,7 +190,7 @@ void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs
void GPU_nodes_get_vertex_attrs(ListBase *nodes, struct GPUVertAttrLayers *attrs);
void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
void GPU_pass_compile(GPUPass *pass, const char *shname);
bool GPU_pass_compile(GPUPass *pass, const char *shname);
void GPU_pass_release(GPUPass *pass);
void GPU_pass_free_nodes(ListBase *nodes);

@ -89,6 +89,9 @@ static struct GPUGlobal {
/* Crappy driver don't know how to map framebuffer slot to output vars...
* We need to have no "holes" in the output buffer slots. */
bool unused_fb_slot_workaround;
/* Some crappy Intel drivers don't work well with shaders created in different
* rendering contexts. */
bool context_local_shaders_workaround;
} GG = {1, 0};
static void gpu_detect_mip_render_workaround(void)
@ -209,6 +212,11 @@ bool GPU_unused_fb_slot_workaround(void)
return GG.unused_fb_slot_workaround;
}
bool GPU_context_local_shaders_workaround(void)
{
return GG.context_local_shaders_workaround;
}
bool GPU_crappy_amd_driver(void)
{
/* Currently are the same drivers with the `unused_fb_slot` problem. */
@ -347,6 +355,7 @@ void gpu_extensions_init(void)
GG.mip_render_workaround = true;
GG.depth_blitting_workaround = true;
GG.unused_fb_slot_workaround = true;
GG.context_local_shaders_workaround = true;
}
/* df/dy calculation factors, those are dependent on driver */
@ -354,19 +363,24 @@ void gpu_extensions_init(void)
GG.dfdyfactors[0] = 1.0;
GG.dfdyfactors[1] = -1.0;
}
else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN) &&
(strstr(version, "4.0.0 - Build 10.18.10.3308") ||
strstr(version, "4.0.0 - Build 9.18.10.3186") ||
strstr(version, "4.0.0 - Build 9.18.10.3165") ||
strstr(version, "3.1.0 - Build 9.17.10.3347") ||
strstr(version, "3.1.0 - Build 9.17.10.4101") ||
strstr(version, "3.3.0 - Build 8.15.10.2618"))) {
GG.dfdyfactors[0] = -1.0;
GG.dfdyfactors[1] = 1.0;
}
else {
GG.dfdyfactors[0] = 1.0;
GG.dfdyfactors[1] = 1.0;
else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN)) {
if (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
strstr(version, "4.0.0 - Build 9.18.10.3186") ||
strstr(version, "4.0.0 - Build 9.18.10.3165") ||
strstr(version, "3.1.0 - Build 9.17.10.3347") ||
strstr(version, "3.1.0 - Build 9.17.10.4101") ||
strstr(version, "3.3.0 - Build 8.15.10.2618")) {
GG.dfdyfactors[0] = -1.0;
GG.dfdyfactors[1] = 1.0;
}
else {
GG.dfdyfactors[0] = 1.0;
GG.dfdyfactors[1] = 1.0;
}
if (strstr(renderer, "HD Graphics 4000")) {
GG.context_local_shaders_workaround = true;
}
}
GPU_invalid_tex_init();

@ -733,23 +733,25 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
void GPU_material_compile(GPUMaterial *mat)
{
/* Only run once! */
bool sucess;
BLI_assert(mat->status == GPU_MAT_QUEUED);
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
#ifndef NDEBUG
GPU_pass_compile(mat->pass, mat->name);
sucess = GPU_pass_compile(mat->pass, mat->name);
#else
GPU_pass_compile(mat->pass, __func__);
sucess = GPU_pass_compile(mat->pass, __func__);
#endif
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
mat->status = GPU_MAT_SUCCESS;
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
if (sucess) {
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
mat->status = GPU_MAT_SUCCESS;
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
}
}
else {
mat->status = GPU_MAT_FAILED;

@ -292,6 +292,36 @@ GPUShader *GPU_shader_create(const char *vertexcode,
vertexcode, fragcode, geocode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, shname);
}
GPUShader *GPU_shader_load_from_binary(const char *binary,
const int binary_format,
const int binary_len,
const char *shname)
{
BLI_assert(GL_ARB_get_program_binary);
int success;
int program = glCreateProgram();
glProgramBinary(program, binary_format, binary, binary_len);
glGetProgramiv(program, GL_LINK_STATUS, &success);
if (success) {
GPUShader *shader = MEM_callocN(sizeof(*shader), __func__);
shader->interface = GPU_shaderinterface_create(program);
shader->program = program;
#ifndef NDEBUG
BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
#else
UNUSED_VARS(shname);
#endif
return shader;
}
glDeleteProgram(program);
return NULL;
}
#define DEBUG_SHADER_NONE ""
#define DEBUG_SHADER_VERTEX "vert"
#define DEBUG_SHADER_FRAGMENT "frag"
@ -815,6 +845,23 @@ int GPU_shader_get_attribute(GPUShader *shader, const char *name)
return attr ? attr->location : -1;
}
char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len)
{
BLI_assert(GLEW_ARB_get_program_binary);
char *r_binary;
int binary_len = 0;
glGetProgramiv(shader->program, GL_PROGRAM_BINARY_LENGTH, &binary_len);
r_binary = MEM_mallocN(binary_len, __func__);
glGetProgramBinary(shader->program, binary_len, NULL, r_binary_format, r_binary);
if (r_binary_len) {
*r_binary_len = binary_len;
}
return r_binary;
}
static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = {
[GPU_SHADER_TEXT] =
{