From 9a71bc176b04eee0235427e94cf1e2d80a769b48 Mon Sep 17 00:00:00 2001 From: Ray Molenkamp Date: Fri, 7 Jun 2024 07:37:13 -0600 Subject: [PATCH 1/8] deps: Fix osl build OSL needs to know where to find deflate, this was mentioned during review but not addressed before landing, my bad. --- build_files/build_environment/cmake/osl.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/build_files/build_environment/cmake/osl.cmake b/build_files/build_environment/cmake/osl.cmake index 2038210dba2..5db83a02a52 100644 --- a/build_files/build_environment/cmake/osl.cmake +++ b/build_files/build_environment/cmake/osl.cmake @@ -51,6 +51,7 @@ set(OSL_EXTRA_ARGS -DCMAKE_DEBUG_POSTFIX=_d -DPython_ROOT=${LIBDIR}/python -DPython_EXECUTABLE=${PYTHON_BINARY} + -Dlibdeflate_DIR=${LIBDIR}/deflate/lib/cmake/libdeflate ) if(NOT APPLE) From 1790314f898a416d8ea0b28cba0f88d58216b223 Mon Sep 17 00:00:00 2001 From: Nikita Sirgienko Date: Fri, 7 Jun 2024 16:05:48 +0200 Subject: [PATCH 2/8] Cycles: Fix multiple "Loading denoising kernels" messages Pull Request: https://projects.blender.org/blender/blender/pulls/122880 --- intern/cycles/integrator/denoiser.cpp | 8 +++++++- intern/cycles/integrator/denoiser.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp index 326dfd06b44..e2f28a8dfc4 100644 --- a/intern/cycles/integrator/denoiser.cpp +++ b/intern/cycles/integrator/denoiser.cpp @@ -148,7 +148,7 @@ DenoiserType Denoiser::automatic_viewport_denoiser_type(const DeviceInfo &path_t } Denoiser::Denoiser(Device *denoiser_device, const DenoiseParams ¶ms) - : denoiser_device_(denoiser_device), params_(params) + : denoiser_device_(denoiser_device), denoise_kernels_are_loaded_(false), params_(params) { DCHECK(denoiser_device_); DCHECK(params.use); @@ -173,6 +173,11 @@ const DenoiseParams &Denoiser::get_params() const bool Denoiser::load_kernels(Progress *progress) { + /* If we have successfully loaded kernels once, then there is no need to repeat this again. */ + if (denoise_kernels_are_loaded_) { + return denoise_kernels_are_loaded_; + } + if (progress) { progress->set_status("Loading denoising kernels (may take a few minutes the first time)"); } @@ -195,6 +200,7 @@ bool Denoiser::load_kernels(Progress *progress) VLOG_WORK << "Will denoise on " << denoiser_device_->info.description << " (" << denoiser_device_->info.id << ")"; + denoise_kernels_are_loaded_ = true; return true; } diff --git a/intern/cycles/integrator/denoiser.h b/intern/cycles/integrator/denoiser.h index ac34a473920..eb118da46b2 100644 --- a/intern/cycles/integrator/denoiser.h +++ b/intern/cycles/integrator/denoiser.h @@ -115,6 +115,7 @@ class Denoiser { virtual uint get_device_type_mask() const = 0; Device *denoiser_device_; + bool denoise_kernels_are_loaded_; DenoiseParams params_; }; From ec0dd18de5b80007e51f9f56a8bf0c799f7f6673 Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Fri, 7 Jun 2024 16:10:43 +0200 Subject: [PATCH 3/8] GPU: OpenGL: Support parallel geometry and compute shader compilation Support parallel compilation of geometry and compute shaders when using the `GPU_shader_batch` API. Pull Request: https://projects.blender.org/blender/blender/pulls/122792 --- .../gpu/opengl/gl_compilation_subprocess.cc | 107 ++++++++++++------ .../gpu/opengl/gl_compilation_subprocess.hh | 22 +++- source/blender/gpu/opengl/gl_shader.cc | 107 +++++++++++++----- source/blender/gpu/opengl/gl_shader.hh | 31 +++-- 4 files changed, 194 insertions(+), 73 deletions(-) diff --git a/source/blender/gpu/opengl/gl_compilation_subprocess.cc b/source/blender/gpu/opengl/gl_compilation_subprocess.cc index 5cdf88f804d..27e3f8dc5ab 100644 --- a/source/blender/gpu/opengl/gl_compilation_subprocess.cc +++ b/source/blender/gpu/opengl/gl_compilation_subprocess.cc @@ -25,35 +25,57 @@ namespace blender::gpu { class SubprocessShader { + GLuint comp_ = 0; GLuint vert_ = 0; + GLuint geom_ = 0; GLuint frag_ = 0; GLuint program_ = 0; bool success_ = false; public: - SubprocessShader(const char *vert_src, const char *frag_src) + SubprocessShader(const char *comp_src, + const char *vert_src, + const char *geom_src, + const char *frag_src) { GLint status; - - vert_ = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vert_, 1, &vert_src, nullptr); - glCompileShader(vert_); - glGetShaderiv(vert_, GL_COMPILE_STATUS, &status); - if (!status) { - return; - } - - frag_ = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(frag_, 1, &frag_src, nullptr); - glCompileShader(frag_); - glGetShaderiv(frag_, GL_COMPILE_STATUS, &status); - if (!status) { - return; - } - program_ = glCreateProgram(); - glAttachShader(program_, vert_); - glAttachShader(program_, frag_); + + auto compile_stage = [&](const char *src, GLenum stage) -> GLuint { + if (src == nullptr) { + /* We only want status errors if compilation fails. */ + status = GL_TRUE; + return 0; + } + + GLuint shader = glCreateShader(stage); + glShaderSource(shader, 1, &src, nullptr); + glCompileShader(shader); + glGetShaderiv(shader, GL_COMPILE_STATUS, &status); + glAttachShader(program_, shader); + return shader; + }; + + comp_ = compile_stage(comp_src, GL_COMPUTE_SHADER); + if (!status) { + return; + } + + vert_ = compile_stage(vert_src, GL_VERTEX_SHADER); + if (!status) { + return; + } + + geom_ = compile_stage(geom_src, GL_GEOMETRY_SHADER); + if (!status) { + return; + } + + frag_ = compile_stage(frag_src, GL_FRAGMENT_SHADER); + if (!status) { + return; + } + glLinkProgram(program_); glGetProgramiv(program_, GL_LINK_STATUS, &status); if (!status) { @@ -65,7 +87,9 @@ class SubprocessShader { ~SubprocessShader() { + glDeleteShader(comp_); glDeleteShader(vert_); + glDeleteShader(geom_); glDeleteShader(frag_); glDeleteProgram(program_); } @@ -78,8 +102,8 @@ class SubprocessShader { if (success_) { glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size); - if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) { - glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start); + if (bin->size <= sizeof(ShaderBinaryHeader::data)) { + glGetProgramBinary(program_, bin->size, nullptr, &bin->format, bin->data); } } @@ -92,7 +116,7 @@ static bool validate_binary(void *binary) { ShaderBinaryHeader *bin = reinterpret_cast(binary); GLuint program = glCreateProgram(); - glProgramBinary(program, bin->format, &bin->data_start, bin->size); + glProgramBinary(program, bin->format, bin->data, bin->size); GLint status; glGetProgramiv(program, GL_LINK_STATUS, &status); glDeleteProgram(program); @@ -165,15 +189,34 @@ void GPU_compilation_subprocess_run(const char *subprocess_name) break; } - const char *shaders = reinterpret_cast(shared_mem.get_data()); - - const char *vert_src = shaders; - const char *frag_src = shaders + strlen(shaders) + 1; + ShaderSourceHeader *source = reinterpret_cast(shared_mem.get_data()); + const char *next_src = source->sources; + const char *comp_src = nullptr; + const char *vert_src = nullptr; + const char *geom_src = nullptr; + const char *frag_src = nullptr; DefaultHash hasher; - uint64_t vert_hash = hasher(vert_src); - uint64_t frag_hash = hasher(frag_src); - std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash); + std::string hash_str = "_"; + + auto get_src = [&]() { + const char *src = next_src; + next_src += strlen(src) + sizeof('\0'); + hash_str += std::to_string(hasher(src)) + "_"; + return src; + }; + + if (source->type == ShaderSourceHeader::Type::COMPUTE) { + comp_src = get_src(); + } + else { + vert_src = get_src(); + if (source->type == ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE) { + geom_src = get_src(); + } + frag_src = get_src(); + } + std::string cache_path = cache_dir + SEP_STR + hash_str; /* TODO: This should lock the files? */ @@ -203,14 +246,14 @@ void GPU_compilation_subprocess_run(const char *subprocess_name) } } - SubprocessShader shader(vert_src, frag_src); + SubprocessShader shader(comp_src, vert_src, geom_src, frag_src); ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data()); end_semaphore.increment(); fstream file(cache_path, std::ios::binary | std::ios::out); file.write(reinterpret_cast(shared_mem.get_data()), - binary->size + offsetof(ShaderBinaryHeader, data_start)); + binary->size + offsetof(ShaderBinaryHeader, data)); } GPU_exit(); diff --git a/source/blender/gpu/opengl/gl_compilation_subprocess.hh b/source/blender/gpu/opengl/gl_compilation_subprocess.hh index 022e731f51c..0cdbf1db34d 100644 --- a/source/blender/gpu/opengl/gl_compilation_subprocess.hh +++ b/source/blender/gpu/opengl/gl_compilation_subprocess.hh @@ -13,7 +13,19 @@ namespace blender::gpu { /* The size of the memory pools shared by Blender and the compilation subprocesses. */ -constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */ +constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5 MiB */ + +struct ShaderSourceHeader { + enum Type { COMPUTE, GRAPHICS, GRAPHICS_WITH_GEOMETRY_STAGE }; + /* The type of program being compiled. */ + Type type; + /* The source code for all the shader stages (Separated by a null terminator). + * The stages follows the execution order (eg. vert > geom > frag). */ + char sources[compilation_subprocess_shared_memory_size - sizeof(type)]; +}; + +static_assert(sizeof(ShaderSourceHeader) == compilation_subprocess_shared_memory_size, + "Size must match the shared memory size"); struct ShaderBinaryHeader { /* Size of the shader binary data. */ @@ -21,11 +33,13 @@ struct ShaderBinaryHeader { /* Magic number that identifies the format of this shader binary (Driver-defined). * This (and size) is set to 0 when the shader has failed to compile. */ uint32_t format; - /* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the - * shader binary data. */ - uint8_t data_start; + /* The serialized shader binary data. */ + uint8_t data[compilation_subprocess_shared_memory_size - sizeof(size) - sizeof(format)]; }; +static_assert(sizeof(ShaderBinaryHeader) == compilation_subprocess_shared_memory_size, + "Size must match the shared memory size"); + } // namespace blender::gpu #endif diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 15d1718d200..01641359b3f 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -1472,6 +1472,30 @@ Vector GLSources::sources_get() const return result; } +std::string GLSources::to_string() const +{ + std::string result; + for (const GLSource &source : *this) { + if (source.source_ref) { + result.append(source.source_ref); + } + else { + result.append(source.source); + } + } + return result; +} + +size_t GLSourcesBaked::size() +{ + size_t result = 0; + result += comp.empty() ? 0 : comp.size() + sizeof('\0'); + result += vert.empty() ? 0 : vert.size() + sizeof('\0'); + result += geom.empty() ? 0 : geom.size() + sizeof('\0'); + result += frag.empty() ? 0 : frag.size() + sizeof('\0'); + return result; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -1588,6 +1612,16 @@ GLuint GLShader::program_get() return program_active_->program_id; } +GLSourcesBaked GLShader::get_sources() +{ + GLSourcesBaked result; + result.comp = compute_sources_.to_string(); + result.vert = vertex_sources_.to_string(); + result.geom = geometry_sources_.to_string(); + result.frag = fragment_sources_.to_string(); + return result; +} + /** \} */ #if BLI_SUBPROCESS_SUPPORT @@ -1620,12 +1654,37 @@ GLCompilerWorker::~GLCompilerWorker() start_semaphore_->increment(); } -void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag) +void GLCompilerWorker::compile(const GLSourcesBaked &sources) { BLI_assert(state_ == AVAILABLE); - strcpy((char *)shared_mem_->get_data(), vert.c_str()); - strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str()); + ShaderSourceHeader *shared_src = reinterpret_cast(shared_mem_->get_data()); + char *next_src = shared_src->sources; + + auto add_src = [&](const std::string &src) { + if (!src.empty()) { + strcpy(next_src, src.c_str()); + next_src += src.size() + sizeof('\0'); + } + }; + + add_src(sources.comp); + add_src(sources.vert); + add_src(sources.geom); + add_src(sources.frag); + + BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size); + + if (!sources.comp.empty()) { + BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty()); + shared_src->type = ShaderSourceHeader::Type::COMPUTE; + } + else { + BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty()); + shared_src->type = sources.geom.empty() ? + ShaderSourceHeader::Type::GRAPHICS : + ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE; + } start_semaphore_->increment(); @@ -1668,7 +1727,7 @@ bool GLCompilerWorker::load_program_binary(GLint program) state_ = COMPILATION_FINISHED; if (binary->size > 0) { - glProgramBinary(program, binary->format, &binary->data_start, binary->size); + glProgramBinary(program, binary->format, binary->data, binary->size); return true; } @@ -1695,7 +1754,7 @@ GLShaderCompiler::~GLShaderCompiler() } } -GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag) +GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const GLSourcesBaked &sources) { GLCompilerWorker *result = nullptr; for (GLCompilerWorker *compiler : workers_) { @@ -1709,7 +1768,7 @@ GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const workers_.append(result); } if (result) { - result->compile(vert, frag); + result->compile(sources); } return result; } @@ -1739,31 +1798,21 @@ BatchHandle GLShaderCompiler::batch_compile(Span(info)->finalize(); - CompilationWork item = {}; + batch.items.append({}); + CompilationWork &item = batch.items.last(); item.info = info; - item.do_async_compilation = !info->vertex_source_.is_empty() && - !info->fragment_source_.is_empty() && - info->compute_source_.is_empty() && - info->geometry_source_.is_empty(); - if (item.do_async_compilation) { - item.shader = static_cast(compile(*info, true)); - for (const char *src : item.shader->vertex_sources_.sources_get()) { - item.vertex_src.append(src); - } - for (const char *src : item.shader->fragment_sources_.sources_get()) { - item.fragment_src.append(src); - } + item.shader = static_cast(compile(*info, true)); + item.sources = item.shader->get_sources(); - size_t required_size = item.vertex_src.size() + item.fragment_src.size(); - if (required_size < compilation_subprocess_shared_memory_size) { - item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str()); - } - else { - delete item.shader; - item.do_async_compilation = false; - } + size_t required_size = item.sources.size(); + item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources); + if (item.do_async_compilation) { + item.worker = get_compiler_worker(item.sources); + } + else { + delete item.shader; + item.sources = {}; } - batch.items.append(item); } return handle; } @@ -1791,7 +1840,7 @@ bool GLShaderCompiler::batch_is_ready(BatchHandle handle) if (!item.worker) { /* Try to acquire an available worker. */ - item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str()); + item.worker = get_compiler_worker(item.sources); } else if (item.worker->is_ready()) { /* Retrieve the binary compiled by the worker. */ diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 38a99c276cf..0e15a494683 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -44,6 +44,21 @@ class GLSources : public Vector { public: GLSources &operator=(Span other); Vector sources_get() const; + std::string to_string() const; +}; + +/** + * The full sources for each shader stage, baked into a single string from their respective + * GLSources. (Can be retrieved from GLShader::get_sources()) + */ +struct GLSourcesBaked : NonCopyable { + std::string comp; + std::string vert; + std::string geom; + std::string frag; + + /* Returns the size (in bytes) required to store the source of all the used stages. */ + size_t size(); }; /** @@ -194,6 +209,8 @@ class GLShader : public Shader { return program_active_->compute_shader != 0; } + GLSourcesBaked get_sources(); + private: const char *glsl_patch_get(GLenum gl_stage); @@ -240,7 +257,7 @@ class GLCompilerWorker { GLCompilerWorker(); ~GLCompilerWorker(); - void compile(StringRefNull vert, StringRefNull frag); + void compile(const GLSourcesBaked &sources); bool is_ready(); bool load_program_binary(GLint program); void release(); @@ -255,14 +272,12 @@ class GLShaderCompiler : public ShaderCompiler { Vector workers_; struct CompilationWork { - GLCompilerWorker *worker = nullptr; - GLShader *shader = nullptr; const shader::ShaderCreateInfo *info = nullptr; + GLShader *shader = nullptr; + GLSourcesBaked sources; + + GLCompilerWorker *worker = nullptr; bool do_async_compilation = false; - - std::string vertex_src; - std::string fragment_src; - bool is_ready = false; }; @@ -274,7 +289,7 @@ class GLShaderCompiler : public ShaderCompiler { BatchHandle next_batch_handle = 1; Map batches; - GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag); + GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources); bool worker_is_lost(GLCompilerWorker *&worker); public: From fb98edf32296a41c186acf7b315d82218b499d6a Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Fri, 7 Jun 2024 16:11:50 +0200 Subject: [PATCH 4/8] GPU: Add GPU_material_batch_compile Add `GPU_material_batch` API. It uses the new `GPU_shader_batch` from #122232 internally and it works in the same way. Note: This doesn't implement parallel material optimizations. Pull Request: https://projects.blender.org/blender/blender/pulls/122793 --- .../draw/intern/draw_manager_shader.cc | 60 +++++++++++++-- source/blender/gpu/GPU_material.hh | 24 ++++++ source/blender/gpu/intern/gpu_codegen.cc | 34 +++++++-- source/blender/gpu/intern/gpu_codegen.hh | 5 ++ source/blender/gpu/intern/gpu_material.cc | 75 +++++++++++++++---- 5 files changed, 170 insertions(+), 28 deletions(-) diff --git a/source/blender/draw/intern/draw_manager_shader.cc b/source/blender/draw/intern/draw_manager_shader.cc index f60864c20b7..efb4b0a2a90 100644 --- a/source/blender/draw/intern/draw_manager_shader.cc +++ b/source/blender/draw/intern/draw_manager_shader.cc @@ -12,6 +12,7 @@ #include "BLI_dynstr.h" #include "BLI_listbase.h" +#include "BLI_map.hh" #include "BLI_string_utils.hh" #include "BLI_threads.h" #include "BLI_time.h" @@ -66,6 +67,8 @@ struct DRWShaderCompiler { static void drw_deferred_shader_compilation_exec(void *custom_data, wmJobWorkerStatus *worker_status) { + using namespace blender; + GPU_render_begin(); DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data; void *system_gpu_context = comp->system_gpu_context; @@ -80,11 +83,16 @@ static void drw_deferred_shader_compilation_exec(void *custom_data, GPU_context_main_lock(); } + const bool use_parallel_compilation = GPU_use_parallel_compilation(); + WM_system_gpu_context_activate(system_gpu_context); GPU_context_active_set(blender_gpu_context); + Vector next_batch; + Map> batches; + while (true) { - if (worker_status->stop != 0) { + if (worker_status->stop) { break; } @@ -96,14 +104,44 @@ static void drw_deferred_shader_compilation_exec(void *custom_data, if (mat) { /* Avoid another thread freeing the material mid compilation. */ GPU_material_acquire(mat); + MEM_freeN(link); } BLI_spin_unlock(&comp->list_lock); if (mat) { - /* Do the compilation. */ - GPU_material_compile(mat); - GPU_material_release(mat); - MEM_freeN(link); + /* We have a new material that must be compiled, + * we either compile it directly or add it to a parallel compilation batch. */ + if (use_parallel_compilation) { + next_batch.append(mat); + } + else { + GPU_material_compile(mat); + GPU_material_release(mat); + } + } + else if (!next_batch.is_empty()) { + /* (only if use_parallel_compilation == true) + * We ran out of pending materials. Request the compilation of the current batch. */ + BatchHandle batch_handle = GPU_material_batch_compile(next_batch); + batches.add(batch_handle, next_batch); + next_batch.clear(); + } + else if (!batches.is_empty()) { + /* (only if use_parallel_compilation == true) + * Keep querying the requested batches until all of them are ready. */ + Vector ready_handles; + for (BatchHandle handle : batches.keys()) { + if (GPU_material_batch_is_ready(handle)) { + ready_handles.append(handle); + } + } + for (BatchHandle handle : ready_handles) { + Vector batch = batches.pop(handle); + GPU_material_batch_finalize(handle, batch); + for (GPUMaterial *mat : batch) { + GPU_material_release(mat); + } + } } else { /* Check for Material Optimization job once there are no more @@ -111,7 +149,7 @@ static void drw_deferred_shader_compilation_exec(void *custom_data, BLI_spin_lock(&comp->list_lock); /* Pop tail because it will be less likely to lock the main thread * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */ - link = (LinkData *)BLI_poptail(&comp->optimize_queue); + LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue); GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : nullptr; if (optimize_mat) { /* Avoid another thread freeing the material during optimization. */ @@ -136,6 +174,16 @@ static void drw_deferred_shader_compilation_exec(void *custom_data, } } + /* We have to wait until all the requested batches are ready, + * even if worker_status->stop is true. */ + for (BatchHandle handle : batches.keys()) { + Vector &batch = batches.lookup(handle); + GPU_material_batch_finalize(handle, batch); + for (GPUMaterial *mat : batch) { + GPU_material_release(mat); + } + } + GPU_context_active_set(nullptr); WM_system_gpu_context_release(system_gpu_context); if (use_main_context_workaround) { diff --git a/source/blender/gpu/GPU_material.hh b/source/blender/gpu/GPU_material.hh index 43c6df717f0..01082089d35 100644 --- a/source/blender/gpu/GPU_material.hh +++ b/source/blender/gpu/GPU_material.hh @@ -260,6 +260,30 @@ void GPU_material_compile(GPUMaterial *mat); void GPU_material_free_single(GPUMaterial *material); void GPU_material_free(ListBase *gpumaterial); +/** + * Request the creation of multiple `GPUMaterial`s at once, allowing the backend to use + * multithreaded compilation. + * Returns a handle that can be used to poll if all materials have been + * compiled, and to retrieve the compiled result. + * NOTE: This function is asynchronous on OpenGL, but it's blocking on Vulkan and Metal. + * WARNING: The material pointers and their pass->create_info should be valid until + * `GPU_material_batch_finalize` has returned. + */ +BatchHandle GPU_material_batch_compile(blender::Span mats); +/** + * Returns true if all the materials from the batch have finished their compilation. + */ +bool GPU_material_batch_is_ready(BatchHandle handle); +/** + * Asign the compiled shaders to their respective materials and flag their status. + * The materials list should have the same length and order as in the `GPU_material_batch_compile` + * call. + * If the compilation has not finished yet, this call will block the thread until all the + * shaders are ready. + * WARNING: The handle will be invalidated by this call, you can't process the same batch twice. + */ +void GPU_material_batch_finalize(BatchHandle &handle, blender::Span mats); + void GPU_material_acquire(GPUMaterial *mat); void GPU_material_release(GPUMaterial *mat); diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index f186488761b..f09e2066daa 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -102,6 +102,8 @@ struct GPUPass { uint32_t hash; /** Did we already tried to compile the attached GPUShader. */ bool compiled; + /** If this pass is already being_compiled (A GPUPass can be shared by multiple GPUMaterials). */ + bool compilation_requested; /** Hint that an optimized variant of this pass should be created based on a complexity heuristic * during pass code generation. */ bool should_optimize; @@ -805,6 +807,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, pass->engine = engine; pass->hash = codegen.hash_get(); pass->compiled = false; + pass->compilation_requested = false; pass->cached = false; /* Only flag pass optimization hint if this is the first generated pass for a material. * Optimized passes cannot be optimized further, even if the heuristic is still not @@ -881,17 +884,22 @@ static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader) return (active_samplers_len * 3 <= GPU_max_textures()); } -bool GPU_pass_compile(GPUPass *pass, const char *shname) +GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname) +{ + if (!pass->compilation_requested) { + pass->compilation_requested = true; + pass->create_info->name_ = shname; + GPUShaderCreateInfo *info = reinterpret_cast( + static_cast(pass->create_info)); + return info; + } + return nullptr; +} + +bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader) { bool success = true; if (!pass->compiled) { - GPUShaderCreateInfo *info = reinterpret_cast( - static_cast(pass->create_info)); - - pass->create_info->name_ = shname; - - GPUShader *shader = GPU_shader_create_from_info(info); - /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit. * We need to make sure to count active samplers to avoid undefined behavior. */ if (!gpu_pass_shader_validate(pass, shader)) { @@ -908,6 +916,16 @@ bool GPU_pass_compile(GPUPass *pass, const char *shname) return success; } +bool GPU_pass_compile(GPUPass *pass, const char *shname) +{ + bool success = true; + if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) { + GPUShader *shader = GPU_shader_create_from_info(info); + success = GPU_pass_finalize_compilation(pass, shader); + } + return success; +} + GPUShader *GPU_pass_shader_get(GPUPass *pass) { return pass->shader; diff --git a/source/blender/gpu/intern/gpu_codegen.hh b/source/blender/gpu/intern/gpu_codegen.hh index 8889f76e88b..38bee7975db 100644 --- a/source/blender/gpu/intern/gpu_codegen.hh +++ b/source/blender/gpu/intern/gpu_codegen.hh @@ -31,6 +31,11 @@ void GPU_pass_acquire(GPUPass *pass); void GPU_pass_release(GPUPass *pass); bool GPU_pass_should_optimize(GPUPass *pass); +/* Custom pass compilation. */ + +GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname); +bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader); + /* Module */ void gpu_codegen_init(); diff --git a/source/blender/gpu/intern/gpu_material.cc b/source/blender/gpu/intern/gpu_material.cc index 8adc1d82a0e..06948a27cc9 100644 --- a/source/blender/gpu/intern/gpu_material.cc +++ b/source/blender/gpu/intern/gpu_material.cc @@ -143,6 +143,8 @@ struct GPUMaterial { uint32_t refcount; + bool do_batch_compilation; + #ifndef NDEBUG char name[64]; #else @@ -951,21 +953,8 @@ void GPU_material_release(GPUMaterial *mat) GPU_material_free_single(mat); } -void GPU_material_compile(GPUMaterial *mat) +static void gpu_material_finalize(GPUMaterial *mat, bool success) { - bool success; - - BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED)); - BLI_assert(mat->pass); - -/* NOTE: The shader may have already been compiled here since we are - * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ -#ifndef NDEBUG - success = GPU_pass_compile(mat->pass, mat->name); -#else - success = GPU_pass_compile(mat->pass, __func__); -#endif - mat->flag |= GPU_MATFLAG_UPDATED; if (success) { @@ -1017,6 +1006,64 @@ void GPU_material_compile(GPUMaterial *mat) } } +void GPU_material_compile(GPUMaterial *mat) +{ + bool success; + BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED)); + BLI_assert(mat->pass); + +/* NOTE: The shader may have already been compiled here since we are + * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ +#ifndef NDEBUG + success = GPU_pass_compile(mat->pass, mat->name); +#else + success = GPU_pass_compile(mat->pass, __func__); +#endif + + gpu_material_finalize(mat, success); +} + +BatchHandle GPU_material_batch_compile(blender::Span mats) +{ + blender::Vector infos; + infos.reserve(mats.size()); + + for (GPUMaterial *mat : mats) { + BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED)); + BLI_assert(mat->pass); +#ifndef NDEBUG + const char *name = mat->name; +#else + const char *name = __func__; +#endif + mat->do_batch_compilation = false; + if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(mat->pass, name)) { + infos.append(info); + mat->do_batch_compilation = true; + } + } + + return GPU_shader_batch_create_from_infos(infos); +} + +bool GPU_material_batch_is_ready(BatchHandle handle) +{ + return GPU_shader_batch_is_ready(handle); +} + +void GPU_material_batch_finalize(BatchHandle &handle, blender::Span mats) +{ + blender::Vector shaders = GPU_shader_batch_finalize(handle); + int i = 0; + for (GPUMaterial *mat : mats) { + bool success = true; + if (mat->do_batch_compilation) { + success = GPU_pass_finalize_compilation(mat->pass, shaders[i++]); + } + gpu_material_finalize(mat, success); + } +} + void GPU_material_optimize(GPUMaterial *mat) { /* If shader is flagged for skipping optimization or has already been successfully From 91fa37fecb682bf9211188b8e89ec5b14bb1162c Mon Sep 17 00:00:00 2001 From: Aras Pranckevicius Date: Fri, 7 Jun 2024 16:18:09 +0200 Subject: [PATCH 5/8] Fix: VSE timeline strip rounded corner outlines not pixel correct PR #122576 added rounded corners to VSE timeline strips, but they were not "snapped" to pixel grid so the outline that is normally 1px was sometimes falling in between pixels and was blurred out. Fix by rounding all SDF related coordinates inside the shader to the pixel grid. Pull Request: https://projects.blender.org/blender/blender/pulls/122764 --- .../gpu/shaders/gpu_shader_sequencer_strips_frag.glsl | 10 ++++++---- .../gpu/shaders/gpu_shader_sequencer_strips_vert.glsl | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/source/blender/gpu/shaders/gpu_shader_sequencer_strips_frag.glsl b/source/blender/gpu/shaders/gpu_shader_sequencer_strips_frag.glsl index 89ea3f907f7..7ed02cd5aa9 100644 --- a/source/blender/gpu/shaders/gpu_shader_sequencer_strips_frag.glsl +++ b/source/blender/gpu/shaders/gpu_shader_sequencer_strips_frag.glsl @@ -44,11 +44,13 @@ void main() vec2 center = vec2(strip.right_handle + strip.left_handle, strip.top + strip.bottom) * 0.5; /* Transform strip rectangle into pixel coordinates, so that - * rounded corners have proper aspect ratio and can be expressed in pixels. */ + * rounded corners have proper aspect ratio and can be expressed in pixels. + * Also snap to pixel grid coorinates, so that outline/border is clear + * non-fractional pixel sizes. */ vec2 view_to_pixel = vec2(context_data.inv_pixelx, context_data.inv_pixely); - size *= view_to_pixel; - center *= view_to_pixel; - vec2 pos = co * view_to_pixel; + size = round(size * view_to_pixel); + center = round(center * view_to_pixel); + vec2 pos = round(co * view_to_pixel); float radius = context_data.round_radius; if (radius > size.x) { diff --git a/source/blender/gpu/shaders/gpu_shader_sequencer_strips_vert.glsl b/source/blender/gpu/shaders/gpu_shader_sequencer_strips_vert.glsl index 767e17808df..ba93f1ce11e 100644 --- a/source/blender/gpu/shaders/gpu_shader_sequencer_strips_vert.glsl +++ b/source/blender/gpu/shaders/gpu_shader_sequencer_strips_vert.glsl @@ -9,11 +9,10 @@ void main() int vid = gl_VertexID; SeqStripDrawData strip = strip_data[id]; vec4 rect = vec4(strip.left_handle, strip.bottom, strip.right_handle, strip.top); - /* Expand rasterized rectangle by 1px so that we can do outlines. */ - rect.x -= context_data.pixelx; - rect.z += context_data.pixelx; - rect.y -= context_data.pixely; - rect.w += context_data.pixely; + /* Expand by 2px to fit possible outline and pixel grid rounding. */ + vec2 expand = vec2(context_data.pixelx, context_data.pixely) * 2.0; + rect.xy -= expand; + rect.zw += expand; vec2 co; if (vid == 0) { From 12eca8692f86c58076492f236eae2950860ec931 Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Fri, 7 Jun 2024 16:14:46 +0200 Subject: [PATCH 6/8] EEVEE: Parallel static shader compilation Use the `GPU_shader_batch` API to compile the EEVEE static shaders in parallel and without blocking Blender. Pull Request: https://projects.blender.org/blender/blender/pulls/122797 --- .../draw/engines/eevee_next/eevee_instance.cc | 27 ++++++++++++ .../draw/engines/eevee_next/eevee_instance.hh | 2 + .../draw/engines/eevee_next/eevee_shader.cc | 43 +++++++++++++++---- .../draw/engines/eevee_next/eevee_shader.hh | 3 ++ 4 files changed, 66 insertions(+), 9 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index e51b9923408..db1cdfa4f60 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -65,6 +65,11 @@ void Instance::init(const int2 &output_res, info = ""; + shaders_are_ready_ = shaders.is_ready(is_image_render()); + if (!shaders_are_ready_) { + return; + } + if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) { sampling.reset(); } @@ -121,6 +126,8 @@ void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager) debug_mode = (eDebugMode)G.debug_value; info = ""; + shaders.is_ready(true); + update_eval_members(); sampling.init(scene); @@ -175,6 +182,10 @@ void Instance::view_update() void Instance::begin_sync() { + if (!shaders_are_ready_) { + return; + } + /* Needs to be first for sun light parameters. */ world.sync(); @@ -218,6 +229,10 @@ void Instance::begin_sync() void Instance::object_sync(Object *ob) { + if (!shaders_are_ready_) { + return; + } + const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL_LEGACY, @@ -303,6 +318,10 @@ void Instance::object_sync_render(void *instance_, void Instance::end_sync() { + if (!shaders_are_ready_) { + return; + } + velocity.end_sync(); volume.end_sync(); /* Needs to be before shadows. */ shadows.end_sync(); /* Needs to be before lights. */ @@ -515,6 +534,14 @@ void Instance::render_frame(RenderLayer *render_layer, const char *view_name) void Instance::draw_viewport() { + if (!shaders_are_ready_) { + DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get(); + GPU_framebuffer_clear_color_depth(dfbl->default_fb, float4(0.0f), 1.0f); + info += "Compiling EEVEE Engine Shaders\n"; + DRW_viewport_request_redraw(); + return; + } + render_sample(); velocity.step_swap(); diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh index b552f42fdcd..ae48b945bf2 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.hh +++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh @@ -75,6 +75,8 @@ class Instance { uint64_t depsgraph_last_update_ = 0; bool overlays_enabled_ = false; + bool shaders_are_ready_ = true; + public: ShaderModule &shaders; SyncModule sync; diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index d5a1371b045..3797ded1f81 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -53,19 +53,27 @@ ShaderModule::ShaderModule() shader = nullptr; } -#ifndef NDEBUG - /* Ensure all shader are described. */ + Vector infos; + infos.reserve(MAX_SHADER_TYPE); + for (auto i : IndexRange(MAX_SHADER_TYPE)) { const char *name = static_shader_create_info_name_get(eShaderType(i)); + const GPUShaderCreateInfo *create_info = GPU_shader_create_info_get(name); + infos.append(create_info); + +#ifndef NDEBUG if (name == nullptr) { std::cerr << "EEVEE: Missing case for eShaderType(" << i << ") in static_shader_create_info_name_get()."; BLI_assert(0); } - const GPUShaderCreateInfo *create_info = GPU_shader_create_info_get(name); BLI_assert_msg(create_info != nullptr, "EEVEE: Missing create info for static shader."); - } #endif + } + + if (GPU_use_parallel_compilation()) { + compilation_handle_ = GPU_shader_batch_create_from_infos(infos); + } } ShaderModule::~ShaderModule() @@ -82,6 +90,22 @@ ShaderModule::~ShaderModule() * * \{ */ +bool ShaderModule::is_ready(bool block) +{ + if (compilation_handle_ == 0) { + return true; + } + + if (block || GPU_shader_batch_is_ready(compilation_handle_)) { + Vector shaders = GPU_shader_batch_finalize(compilation_handle_); + for (int i : IndexRange(MAX_SHADER_TYPE)) { + shaders_[i] = shaders[i]; + } + } + + return compilation_handle_ == 0; +} + const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type) { switch (shader_type) { @@ -300,15 +324,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_ GPUShader *ShaderModule::static_shader_get(eShaderType shader_type) { + BLI_assert(is_ready()); if (shaders_[shader_type] == nullptr) { const char *shader_name = static_shader_create_info_name_get(shader_type); - - shaders_[shader_type] = GPU_shader_create_from_info_name(shader_name); - - if (shaders_[shader_type] == nullptr) { + if (GPU_use_parallel_compilation()) { fprintf(stderr, "EEVEE: error: Could not compile static shader \"%s\"\n", shader_name); + BLI_assert(0); + } + else { + shaders_[shader_type] = GPU_shader_create_from_info_name(shader_name); } - BLI_assert(shaders_[shader_type] != nullptr); } return shaders_[shader_type]; } diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh index 9d8d03560c3..0c7db3621af 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh @@ -157,6 +157,7 @@ enum eShaderType { class ShaderModule { private: std::array shaders_; + BatchHandle compilation_handle_ = 0; /** Shared shader module across all engine instances. */ static ShaderModule *g_shader_module; @@ -165,6 +166,8 @@ class ShaderModule { ShaderModule(); ~ShaderModule(); + bool is_ready(bool block = false); + GPUShader *static_shader_get(eShaderType shader_type); GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type, eMaterialGeometry geometry_type); From 6a3d9018cf7d759d55be6e06e96508efaa83c315 Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Fri, 7 Jun 2024 16:15:57 +0200 Subject: [PATCH 7/8] EEVEE: Compile probe passes in parallel Right now probe passes are compiled one by one, since passes are only requested if `materials.queued_shaders_count` is 0 and requesting a pass will increase the number. This splits the logic into 2 functions, one for checking if sync is needed and another to check if pass shaders are needed. This allows compiling the shaders in parallel. Pull Request: https://projects.blender.org/blender/blender/pulls/122799 --- .../draw/engines/eevee_next/eevee_instance.cc | 26 +++++++++---------- .../draw/engines/eevee_next/eevee_instance.hh | 7 +++++ .../draw/engines/eevee_next/eevee_material.cc | 5 ++-- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index db1cdfa4f60..9c4c3c6282b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -371,26 +371,24 @@ void Instance::render_sync() DRW_curves_update(); } +bool Instance::needs_lightprobe_sphere_passes() const +{ + return sphere_probes.update_probes_this_sample_; +} + bool Instance::do_lightprobe_sphere_sync() const { - if (!sphere_probes.update_probes_this_sample_) { - return false; - } - if (materials.queued_shaders_count > 0) { - return false; - } - return true; + return (materials.queued_shaders_count == 0) && needs_lightprobe_sphere_passes(); +} + +bool Instance::needs_planar_probe_passes() const +{ + return planar_probes.update_probes_; } bool Instance::do_planar_probe_sync() const { - if (!planar_probes.update_probes_) { - return false; - } - if (materials.queued_shaders_count > 0) { - return false; - } - return true; + return (materials.queued_shaders_count == 0) && needs_planar_probe_passes(); } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh index ae48b945bf2..631035c3045 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.hh +++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh @@ -197,6 +197,13 @@ class Instance { bool do_lightprobe_sphere_sync() const; bool do_planar_probe_sync() const; + /** + * Return true when probe passes should be loaded. + * It can be true even if do__probe_sync() is false due to shaders still being compiled. + */ + bool needs_lightprobe_sphere_passes() const; + bool needs_planar_probe_passes() const; + /* Render. */ void render_sync(); diff --git a/source/blender/draw/engines/eevee_next/eevee_material.cc b/source/blender/draw/engines/eevee_next/eevee_material.cc index 7d83d0f495f..57817cf7acd 100644 --- a/source/blender/draw/engines/eevee_next/eevee_material.cc +++ b/source/blender/draw/engines/eevee_next/eevee_material.cc @@ -322,7 +322,8 @@ Material &MaterialModule::material_sync(Object *ob, mat.overlap_masking = MaterialPass(); mat.capture = MaterialPass(); - if (inst_.do_lightprobe_sphere_sync() && !(ob->visibility_flag & OB_HIDE_PROBE_CUBEMAP)) { + if (inst_.needs_lightprobe_sphere_passes() && !(ob->visibility_flag & OB_HIDE_PROBE_CUBEMAP)) + { mat.lightprobe_sphere_prepass = material_pass_get( ob, blender_mat, MAT_PIPE_PREPASS_DEFERRED, geometry_type, MAT_PROBE_REFLECTION); mat.lightprobe_sphere_shading = material_pass_get( @@ -333,7 +334,7 @@ Material &MaterialModule::material_sync(Object *ob, mat.lightprobe_sphere_shading = MaterialPass(); } - if (inst_.do_planar_probe_sync() && !(ob->visibility_flag & OB_HIDE_PROBE_PLANAR)) { + if (inst_.needs_planar_probe_passes() && !(ob->visibility_flag & OB_HIDE_PROBE_PLANAR)) { mat.planar_probe_prepass = material_pass_get( ob, blender_mat, MAT_PIPE_PREPASS_PLANAR, geometry_type, MAT_PROBE_PLANAR); mat.planar_probe_shading = material_pass_get( From a9ec92005a26bf99a872d2dd37c3943d358ff2df Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Fri, 7 Jun 2024 16:22:20 +0200 Subject: [PATCH 8/8] Fix: EEVEE: Add missing DRW_viewport_request_redraw() call Required to keep the compilation process going on. --- source/blender/draw/engines/eevee_next/eevee_instance.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index 9c4c3c6282b..9b3525c1854 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -554,6 +554,7 @@ void Instance::draw_viewport() std::stringstream ss; ss << "Compiling Shaders (" << materials.queued_shaders_count << " remaining)"; info = ss.str(); + DRW_viewport_request_redraw(); } else if (materials.queued_optimize_shaders_count > 0) { std::stringstream ss;