Fix crash when editing shaders on Intel HD 4000.
In the Intel HD 4000 driver a shader has to be deleted in the same context in which it is created. However, because you can't use a rendering context on different threads, to maintain the multithreaded compilation, the solution was to use the `GL_ARB_get_program_binary` and copy the binary generated for the shader and generate a shader on the main context using that binary. This solution is limited only to Intel HD 4000 and windows. Reviewers: fclem Reviewed By: fclem Differential Revision: https://developer.blender.org/D5019
This commit is contained in:
parent
dd81efa4a3
commit
ce66b22c42
@ -63,7 +63,8 @@ typedef struct DRWDeferredShader {
|
||||
} DRWDeferredShader;
|
||||
|
||||
typedef struct DRWShaderCompiler {
|
||||
ListBase queue; /* DRWDeferredShader */
|
||||
ListBase queue; /* DRWDeferredShader */
|
||||
ListBase queue_conclude; /* DRWDeferredShader */
|
||||
SpinLock list_lock;
|
||||
|
||||
DRWDeferredShader *mat_compiling;
|
||||
@ -134,7 +135,12 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
|
||||
BLI_mutex_unlock(&comp->compilation_lock);
|
||||
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
drw_deferred_shader_free(comp->mat_compiling);
|
||||
if (GPU_material_status(comp->mat_compiling->mat) == GPU_MAT_QUEUED) {
|
||||
BLI_addtail(&comp->queue_conclude, comp->mat_compiling);
|
||||
}
|
||||
else {
|
||||
drw_deferred_shader_free(comp->mat_compiling);
|
||||
}
|
||||
comp->mat_compiling = NULL;
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
}
|
||||
@ -148,6 +154,17 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
|
||||
|
||||
drw_deferred_shader_queue_free(&comp->queue);
|
||||
|
||||
if (!BLI_listbase_is_empty(&comp->queue_conclude)) {
|
||||
/* Compile the shaders in the context they will be deleted. */
|
||||
DRW_opengl_context_enable_ex(false);
|
||||
DRWDeferredShader *mat_conclude;
|
||||
while (mat_conclude = BLI_poptail(&comp->queue_conclude)) {
|
||||
GPU_material_compile(mat_conclude->mat);
|
||||
drw_deferred_shader_free(mat_conclude);
|
||||
}
|
||||
DRW_opengl_context_disable_ex(true);
|
||||
}
|
||||
|
||||
BLI_spin_end(&comp->list_lock);
|
||||
BLI_mutex_end(&comp->compilation_lock);
|
||||
|
||||
|
@ -46,6 +46,7 @@ void GPU_get_dfdy_factors(float fac[2]);
|
||||
bool GPU_mip_render_workaround(void);
|
||||
bool GPU_depth_blitting_workaround(void);
|
||||
bool GPU_unused_fb_slot_workaround(void);
|
||||
bool GPU_context_local_shaders_workaround(void);
|
||||
bool GPU_crappy_amd_driver(void);
|
||||
|
||||
bool GPU_mem_stats_supported(void);
|
||||
|
@ -58,6 +58,10 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
|
||||
const char **tf_names,
|
||||
const int tf_count,
|
||||
const char *shader_name);
|
||||
GPUShader *GPU_shader_load_from_binary(const char *binary,
|
||||
const int binary_format,
|
||||
const int binary_len,
|
||||
const char *shname);
|
||||
struct GPU_ShaderCreateFromArray_Params {
|
||||
const char **vert, **geom, **frag, **defs;
|
||||
};
|
||||
@ -95,6 +99,8 @@ void GPU_shader_uniform_int(GPUShader *shader, int location, int value);
|
||||
|
||||
int GPU_shader_get_attribute(GPUShader *shader, const char *name);
|
||||
|
||||
char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len);
|
||||
|
||||
/* Builtin/Non-generated shaders */
|
||||
typedef enum eGPUBuiltinShader {
|
||||
/* specialized drawing */
|
||||
|
@ -2105,17 +2105,17 @@ static int count_active_texture_sampler(GPUShader *shader, char *source)
|
||||
return sampler_len;
|
||||
}
|
||||
|
||||
static bool gpu_pass_shader_validate(GPUPass *pass)
|
||||
static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
|
||||
{
|
||||
if (pass->shader == NULL) {
|
||||
if (shader == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* NOTE: The only drawback of this method is that it will count a sampler
|
||||
* used in the fragment shader and only declared (but not used) in the vertex
|
||||
* shader as used by both. But this corner case is not happening for now. */
|
||||
int vert_samplers_len = count_active_texture_sampler(pass->shader, pass->vertexcode);
|
||||
int frag_samplers_len = count_active_texture_sampler(pass->shader, pass->fragmentcode);
|
||||
int vert_samplers_len = count_active_texture_sampler(shader, pass->vertexcode);
|
||||
int frag_samplers_len = count_active_texture_sampler(shader, pass->fragmentcode);
|
||||
|
||||
int total_samplers_len = vert_samplers_len + frag_samplers_len;
|
||||
|
||||
@ -2126,7 +2126,7 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
|
||||
}
|
||||
|
||||
if (pass->geometrycode) {
|
||||
int geom_samplers_len = count_active_texture_sampler(pass->shader, pass->geometrycode);
|
||||
int geom_samplers_len = count_active_texture_sampler(shader, pass->geometrycode);
|
||||
total_samplers_len += geom_samplers_len;
|
||||
if (geom_samplers_len > GPU_max_textures_geom()) {
|
||||
return false;
|
||||
@ -2136,30 +2136,40 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
|
||||
return (total_samplers_len <= GPU_max_textures());
|
||||
}
|
||||
|
||||
void GPU_pass_compile(GPUPass *pass, const char *shname)
|
||||
bool GPU_pass_compile(GPUPass *pass, const char *shname)
|
||||
{
|
||||
bool sucess = true;
|
||||
if (!pass->compiled) {
|
||||
pass->shader = GPU_shader_create(
|
||||
GPUShader *shader = GPU_shader_create(
|
||||
pass->vertexcode, pass->fragmentcode, pass->geometrycode, NULL, pass->defines, shname);
|
||||
|
||||
/* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
|
||||
* We need to make sure to count active samplers to avoid undefined behavior. */
|
||||
if (!gpu_pass_shader_validate(pass)) {
|
||||
if (pass->shader != NULL) {
|
||||
if (!gpu_pass_shader_validate(pass, shader)) {
|
||||
sucess = false;
|
||||
if (shader != NULL) {
|
||||
fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
|
||||
GPU_shader_free(pass->shader);
|
||||
GPU_shader_free(shader);
|
||||
shader = NULL;
|
||||
}
|
||||
pass->shader = NULL;
|
||||
}
|
||||
else if (!BLI_thread_is_main()) {
|
||||
/* For some Intel drivers, you must use the program at least once
|
||||
* in the rendering context that it is linked. */
|
||||
glUseProgram(GPU_shader_get_program(pass->shader));
|
||||
glUseProgram(0);
|
||||
else if (!BLI_thread_is_main() && GPU_context_local_shaders_workaround()) {
|
||||
pass->binary.content = GPU_shader_get_binary(
|
||||
shader, &pass->binary.format, &pass->binary.len);
|
||||
GPU_shader_free(shader);
|
||||
shader = NULL;
|
||||
}
|
||||
|
||||
pass->shader = shader;
|
||||
pass->compiled = true;
|
||||
}
|
||||
else if (pass->binary.content && BLI_thread_is_main()) {
|
||||
pass->shader = GPU_shader_load_from_binary(
|
||||
pass->binary.content, pass->binary.format, pass->binary.len, shname);
|
||||
MEM_SAFE_FREE(pass->binary.content);
|
||||
}
|
||||
|
||||
return sucess;
|
||||
}
|
||||
|
||||
void GPU_pass_release(GPUPass *pass)
|
||||
@ -2178,6 +2188,9 @@ static void gpu_pass_free(GPUPass *pass)
|
||||
MEM_SAFE_FREE(pass->geometrycode);
|
||||
MEM_SAFE_FREE(pass->vertexcode);
|
||||
MEM_SAFE_FREE(pass->defines);
|
||||
if (pass->binary.content) {
|
||||
MEM_freeN(pass->binary.content);
|
||||
}
|
||||
MEM_freeN(pass);
|
||||
}
|
||||
|
||||
|
@ -164,6 +164,11 @@ struct GPUPass {
|
||||
char *defines;
|
||||
uint refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
|
||||
uint32_t hash; /* Identity hash generated from all GLSL code. */
|
||||
struct {
|
||||
char *content;
|
||||
int format;
|
||||
int len;
|
||||
} binary;
|
||||
bool compiled; /* Did we already tried to compile the attached GPUShader. */
|
||||
};
|
||||
|
||||
@ -185,7 +190,7 @@ void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs
|
||||
void GPU_nodes_get_vertex_attrs(ListBase *nodes, struct GPUVertAttrLayers *attrs);
|
||||
void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
|
||||
|
||||
void GPU_pass_compile(GPUPass *pass, const char *shname);
|
||||
bool GPU_pass_compile(GPUPass *pass, const char *shname);
|
||||
void GPU_pass_release(GPUPass *pass);
|
||||
void GPU_pass_free_nodes(ListBase *nodes);
|
||||
|
||||
|
@ -89,6 +89,9 @@ static struct GPUGlobal {
|
||||
/* Crappy driver don't know how to map framebuffer slot to output vars...
|
||||
* We need to have no "holes" in the output buffer slots. */
|
||||
bool unused_fb_slot_workaround;
|
||||
/* Some crappy Intel drivers don't work well with shaders created in different
|
||||
* rendering contexts. */
|
||||
bool context_local_shaders_workaround;
|
||||
} GG = {1, 0};
|
||||
|
||||
static void gpu_detect_mip_render_workaround(void)
|
||||
@ -209,6 +212,11 @@ bool GPU_unused_fb_slot_workaround(void)
|
||||
return GG.unused_fb_slot_workaround;
|
||||
}
|
||||
|
||||
bool GPU_context_local_shaders_workaround(void)
|
||||
{
|
||||
return GG.context_local_shaders_workaround;
|
||||
}
|
||||
|
||||
bool GPU_crappy_amd_driver(void)
|
||||
{
|
||||
/* Currently are the same drivers with the `unused_fb_slot` problem. */
|
||||
@ -347,6 +355,7 @@ void gpu_extensions_init(void)
|
||||
GG.mip_render_workaround = true;
|
||||
GG.depth_blitting_workaround = true;
|
||||
GG.unused_fb_slot_workaround = true;
|
||||
GG.context_local_shaders_workaround = true;
|
||||
}
|
||||
|
||||
/* df/dy calculation factors, those are dependent on driver */
|
||||
@ -354,19 +363,24 @@ void gpu_extensions_init(void)
|
||||
GG.dfdyfactors[0] = 1.0;
|
||||
GG.dfdyfactors[1] = -1.0;
|
||||
}
|
||||
else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN) &&
|
||||
(strstr(version, "4.0.0 - Build 10.18.10.3308") ||
|
||||
strstr(version, "4.0.0 - Build 9.18.10.3186") ||
|
||||
strstr(version, "4.0.0 - Build 9.18.10.3165") ||
|
||||
strstr(version, "3.1.0 - Build 9.17.10.3347") ||
|
||||
strstr(version, "3.1.0 - Build 9.17.10.4101") ||
|
||||
strstr(version, "3.3.0 - Build 8.15.10.2618"))) {
|
||||
GG.dfdyfactors[0] = -1.0;
|
||||
GG.dfdyfactors[1] = 1.0;
|
||||
}
|
||||
else {
|
||||
GG.dfdyfactors[0] = 1.0;
|
||||
GG.dfdyfactors[1] = 1.0;
|
||||
else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN)) {
|
||||
if (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
|
||||
strstr(version, "4.0.0 - Build 9.18.10.3186") ||
|
||||
strstr(version, "4.0.0 - Build 9.18.10.3165") ||
|
||||
strstr(version, "3.1.0 - Build 9.17.10.3347") ||
|
||||
strstr(version, "3.1.0 - Build 9.17.10.4101") ||
|
||||
strstr(version, "3.3.0 - Build 8.15.10.2618")) {
|
||||
GG.dfdyfactors[0] = -1.0;
|
||||
GG.dfdyfactors[1] = 1.0;
|
||||
}
|
||||
else {
|
||||
GG.dfdyfactors[0] = 1.0;
|
||||
GG.dfdyfactors[1] = 1.0;
|
||||
}
|
||||
|
||||
if (strstr(renderer, "HD Graphics 4000")) {
|
||||
GG.context_local_shaders_workaround = true;
|
||||
}
|
||||
}
|
||||
|
||||
GPU_invalid_tex_init();
|
||||
|
@ -733,23 +733,25 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
||||
|
||||
void GPU_material_compile(GPUMaterial *mat)
|
||||
{
|
||||
/* Only run once! */
|
||||
bool sucess;
|
||||
|
||||
BLI_assert(mat->status == GPU_MAT_QUEUED);
|
||||
BLI_assert(mat->pass);
|
||||
|
||||
/* NOTE: The shader may have already been compiled here since we are
|
||||
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
|
||||
#ifndef NDEBUG
|
||||
GPU_pass_compile(mat->pass, mat->name);
|
||||
sucess = GPU_pass_compile(mat->pass, mat->name);
|
||||
#else
|
||||
GPU_pass_compile(mat->pass, __func__);
|
||||
sucess = GPU_pass_compile(mat->pass, __func__);
|
||||
#endif
|
||||
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->pass);
|
||||
|
||||
if (sh != NULL) {
|
||||
mat->status = GPU_MAT_SUCCESS;
|
||||
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
|
||||
if (sucess) {
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->pass);
|
||||
if (sh != NULL) {
|
||||
mat->status = GPU_MAT_SUCCESS;
|
||||
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
|
||||
}
|
||||
}
|
||||
else {
|
||||
mat->status = GPU_MAT_FAILED;
|
||||
|
@ -292,6 +292,36 @@ GPUShader *GPU_shader_create(const char *vertexcode,
|
||||
vertexcode, fragcode, geocode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, shname);
|
||||
}
|
||||
|
||||
GPUShader *GPU_shader_load_from_binary(const char *binary,
|
||||
const int binary_format,
|
||||
const int binary_len,
|
||||
const char *shname)
|
||||
{
|
||||
BLI_assert(GL_ARB_get_program_binary);
|
||||
int success;
|
||||
int program = glCreateProgram();
|
||||
|
||||
glProgramBinary(program, binary_format, binary, binary_len);
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &success);
|
||||
|
||||
if (success) {
|
||||
GPUShader *shader = MEM_callocN(sizeof(*shader), __func__);
|
||||
shader->interface = GPU_shaderinterface_create(program);
|
||||
shader->program = program;
|
||||
|
||||
#ifndef NDEBUG
|
||||
BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
|
||||
#else
|
||||
UNUSED_VARS(shname);
|
||||
#endif
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
glDeleteProgram(program);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define DEBUG_SHADER_NONE ""
|
||||
#define DEBUG_SHADER_VERTEX "vert"
|
||||
#define DEBUG_SHADER_FRAGMENT "frag"
|
||||
@ -815,6 +845,23 @@ int GPU_shader_get_attribute(GPUShader *shader, const char *name)
|
||||
return attr ? attr->location : -1;
|
||||
}
|
||||
|
||||
char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len)
|
||||
{
|
||||
BLI_assert(GLEW_ARB_get_program_binary);
|
||||
char *r_binary;
|
||||
int binary_len = 0;
|
||||
|
||||
glGetProgramiv(shader->program, GL_PROGRAM_BINARY_LENGTH, &binary_len);
|
||||
r_binary = MEM_mallocN(binary_len, __func__);
|
||||
glGetProgramBinary(shader->program, binary_len, NULL, r_binary_format, r_binary);
|
||||
|
||||
if (r_binary_len) {
|
||||
*r_binary_len = binary_len;
|
||||
}
|
||||
|
||||
return r_binary;
|
||||
}
|
||||
|
||||
static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = {
|
||||
[GPU_SHADER_TEXT] =
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user