Merge branch 'blender-v4.2-release'

This commit is contained in:
Miguel Pozo 2024-06-07 16:25:37 +02:00
commit ca7f1785fc
19 changed files with 472 additions and 136 deletions

@ -51,6 +51,7 @@ set(OSL_EXTRA_ARGS
-DCMAKE_DEBUG_POSTFIX=_d
-DPython_ROOT=${LIBDIR}/python
-DPython_EXECUTABLE=${PYTHON_BINARY}
-Dlibdeflate_DIR=${LIBDIR}/deflate/lib/cmake/libdeflate
)
if(NOT APPLE)

@ -148,7 +148,7 @@ DenoiserType Denoiser::automatic_viewport_denoiser_type(const DeviceInfo &path_t
}
Denoiser::Denoiser(Device *denoiser_device, const DenoiseParams &params)
: denoiser_device_(denoiser_device), params_(params)
: denoiser_device_(denoiser_device), denoise_kernels_are_loaded_(false), params_(params)
{
DCHECK(denoiser_device_);
DCHECK(params.use);
@ -173,6 +173,11 @@ const DenoiseParams &Denoiser::get_params() const
bool Denoiser::load_kernels(Progress *progress)
{
/* If we have successfully loaded kernels once, then there is no need to repeat this again. */
if (denoise_kernels_are_loaded_) {
return denoise_kernels_are_loaded_;
}
if (progress) {
progress->set_status("Loading denoising kernels (may take a few minutes the first time)");
}
@ -195,6 +200,7 @@ bool Denoiser::load_kernels(Progress *progress)
VLOG_WORK << "Will denoise on " << denoiser_device_->info.description << " ("
<< denoiser_device_->info.id << ")";
denoise_kernels_are_loaded_ = true;
return true;
}

@ -115,6 +115,7 @@ class Denoiser {
virtual uint get_device_type_mask() const = 0;
Device *denoiser_device_;
bool denoise_kernels_are_loaded_;
DenoiseParams params_;
};

@ -65,6 +65,11 @@ void Instance::init(const int2 &output_res,
info = "";
shaders_are_ready_ = shaders.is_ready(is_image_render());
if (!shaders_are_ready_) {
return;
}
if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) {
sampling.reset();
}
@ -121,6 +126,8 @@ void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager)
debug_mode = (eDebugMode)G.debug_value;
info = "";
shaders.is_ready(true);
update_eval_members();
sampling.init(scene);
@ -175,6 +182,10 @@ void Instance::view_update()
void Instance::begin_sync()
{
if (!shaders_are_ready_) {
return;
}
/* Needs to be first for sun light parameters. */
world.sync();
@ -218,6 +229,10 @@ void Instance::begin_sync()
void Instance::object_sync(Object *ob)
{
if (!shaders_are_ready_) {
return;
}
const bool is_renderable_type = ELEM(ob->type,
OB_CURVES,
OB_GPENCIL_LEGACY,
@ -303,6 +318,10 @@ void Instance::object_sync_render(void *instance_,
void Instance::end_sync()
{
if (!shaders_are_ready_) {
return;
}
velocity.end_sync();
volume.end_sync(); /* Needs to be before shadows. */
shadows.end_sync(); /* Needs to be before lights. */
@ -352,26 +371,24 @@ void Instance::render_sync()
DRW_curves_update();
}
bool Instance::needs_lightprobe_sphere_passes() const
{
return sphere_probes.update_probes_this_sample_;
}
bool Instance::do_lightprobe_sphere_sync() const
{
if (!sphere_probes.update_probes_this_sample_) {
return false;
}
if (materials.queued_shaders_count > 0) {
return false;
}
return true;
return (materials.queued_shaders_count == 0) && needs_lightprobe_sphere_passes();
}
bool Instance::needs_planar_probe_passes() const
{
return planar_probes.update_probes_;
}
bool Instance::do_planar_probe_sync() const
{
if (!planar_probes.update_probes_) {
return false;
}
if (materials.queued_shaders_count > 0) {
return false;
}
return true;
return (materials.queued_shaders_count == 0) && needs_planar_probe_passes();
}
/** \} */
@ -515,6 +532,14 @@ void Instance::render_frame(RenderLayer *render_layer, const char *view_name)
void Instance::draw_viewport()
{
if (!shaders_are_ready_) {
DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
GPU_framebuffer_clear_color_depth(dfbl->default_fb, float4(0.0f), 1.0f);
info += "Compiling EEVEE Engine Shaders\n";
DRW_viewport_request_redraw();
return;
}
render_sample();
velocity.step_swap();
@ -529,6 +554,7 @@ void Instance::draw_viewport()
std::stringstream ss;
ss << "Compiling Shaders (" << materials.queued_shaders_count << " remaining)";
info = ss.str();
DRW_viewport_request_redraw();
}
else if (materials.queued_optimize_shaders_count > 0) {
std::stringstream ss;

@ -75,6 +75,8 @@ class Instance {
uint64_t depsgraph_last_update_ = 0;
bool overlays_enabled_ = false;
bool shaders_are_ready_ = true;
public:
ShaderModule &shaders;
SyncModule sync;
@ -195,6 +197,13 @@ class Instance {
bool do_lightprobe_sphere_sync() const;
bool do_planar_probe_sync() const;
/**
* Return true when probe passes should be loaded.
* It can be true even if do_<type>_probe_sync() is false due to shaders still being compiled.
*/
bool needs_lightprobe_sphere_passes() const;
bool needs_planar_probe_passes() const;
/* Render. */
void render_sync();

@ -322,7 +322,8 @@ Material &MaterialModule::material_sync(Object *ob,
mat.overlap_masking = MaterialPass();
mat.capture = MaterialPass();
if (inst_.do_lightprobe_sphere_sync() && !(ob->visibility_flag & OB_HIDE_PROBE_CUBEMAP)) {
if (inst_.needs_lightprobe_sphere_passes() && !(ob->visibility_flag & OB_HIDE_PROBE_CUBEMAP))
{
mat.lightprobe_sphere_prepass = material_pass_get(
ob, blender_mat, MAT_PIPE_PREPASS_DEFERRED, geometry_type, MAT_PROBE_REFLECTION);
mat.lightprobe_sphere_shading = material_pass_get(
@ -333,7 +334,7 @@ Material &MaterialModule::material_sync(Object *ob,
mat.lightprobe_sphere_shading = MaterialPass();
}
if (inst_.do_planar_probe_sync() && !(ob->visibility_flag & OB_HIDE_PROBE_PLANAR)) {
if (inst_.needs_planar_probe_passes() && !(ob->visibility_flag & OB_HIDE_PROBE_PLANAR)) {
mat.planar_probe_prepass = material_pass_get(
ob, blender_mat, MAT_PIPE_PREPASS_PLANAR, geometry_type, MAT_PROBE_PLANAR);
mat.planar_probe_shading = material_pass_get(

@ -53,19 +53,27 @@ ShaderModule::ShaderModule()
shader = nullptr;
}
#ifndef NDEBUG
/* Ensure all shader are described. */
Vector<const GPUShaderCreateInfo *> infos;
infos.reserve(MAX_SHADER_TYPE);
for (auto i : IndexRange(MAX_SHADER_TYPE)) {
const char *name = static_shader_create_info_name_get(eShaderType(i));
const GPUShaderCreateInfo *create_info = GPU_shader_create_info_get(name);
infos.append(create_info);
#ifndef NDEBUG
if (name == nullptr) {
std::cerr << "EEVEE: Missing case for eShaderType(" << i
<< ") in static_shader_create_info_name_get().";
BLI_assert(0);
}
const GPUShaderCreateInfo *create_info = GPU_shader_create_info_get(name);
BLI_assert_msg(create_info != nullptr, "EEVEE: Missing create info for static shader.");
}
#endif
}
if (GPU_use_parallel_compilation()) {
compilation_handle_ = GPU_shader_batch_create_from_infos(infos);
}
}
ShaderModule::~ShaderModule()
@ -82,6 +90,22 @@ ShaderModule::~ShaderModule()
*
* \{ */
bool ShaderModule::is_ready(bool block)
{
if (compilation_handle_ == 0) {
return true;
}
if (block || GPU_shader_batch_is_ready(compilation_handle_)) {
Vector<GPUShader *> shaders = GPU_shader_batch_finalize(compilation_handle_);
for (int i : IndexRange(MAX_SHADER_TYPE)) {
shaders_[i] = shaders[i];
}
}
return compilation_handle_ == 0;
}
const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type)
{
switch (shader_type) {
@ -300,15 +324,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
GPUShader *ShaderModule::static_shader_get(eShaderType shader_type)
{
BLI_assert(is_ready());
if (shaders_[shader_type] == nullptr) {
const char *shader_name = static_shader_create_info_name_get(shader_type);
shaders_[shader_type] = GPU_shader_create_from_info_name(shader_name);
if (shaders_[shader_type] == nullptr) {
if (GPU_use_parallel_compilation()) {
fprintf(stderr, "EEVEE: error: Could not compile static shader \"%s\"\n", shader_name);
BLI_assert(0);
}
else {
shaders_[shader_type] = GPU_shader_create_from_info_name(shader_name);
}
BLI_assert(shaders_[shader_type] != nullptr);
}
return shaders_[shader_type];
}

@ -157,6 +157,7 @@ enum eShaderType {
class ShaderModule {
private:
std::array<GPUShader *, MAX_SHADER_TYPE> shaders_;
BatchHandle compilation_handle_ = 0;
/** Shared shader module across all engine instances. */
static ShaderModule *g_shader_module;
@ -165,6 +166,8 @@ class ShaderModule {
ShaderModule();
~ShaderModule();
bool is_ready(bool block = false);
GPUShader *static_shader_get(eShaderType shader_type);
GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type);

@ -12,6 +12,7 @@
#include "BLI_dynstr.h"
#include "BLI_listbase.h"
#include "BLI_map.hh"
#include "BLI_string_utils.hh"
#include "BLI_threads.h"
#include "BLI_time.h"
@ -66,6 +67,8 @@ struct DRWShaderCompiler {
static void drw_deferred_shader_compilation_exec(void *custom_data,
wmJobWorkerStatus *worker_status)
{
using namespace blender;
GPU_render_begin();
DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
void *system_gpu_context = comp->system_gpu_context;
@ -80,11 +83,16 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
GPU_context_main_lock();
}
const bool use_parallel_compilation = GPU_use_parallel_compilation();
WM_system_gpu_context_activate(system_gpu_context);
GPU_context_active_set(blender_gpu_context);
Vector<GPUMaterial *> next_batch;
Map<BatchHandle, Vector<GPUMaterial *>> batches;
while (true) {
if (worker_status->stop != 0) {
if (worker_status->stop) {
break;
}
@ -96,14 +104,44 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
if (mat) {
/* Avoid another thread freeing the material mid compilation. */
GPU_material_acquire(mat);
MEM_freeN(link);
}
BLI_spin_unlock(&comp->list_lock);
if (mat) {
/* Do the compilation. */
GPU_material_compile(mat);
GPU_material_release(mat);
MEM_freeN(link);
/* We have a new material that must be compiled,
* we either compile it directly or add it to a parallel compilation batch. */
if (use_parallel_compilation) {
next_batch.append(mat);
}
else {
GPU_material_compile(mat);
GPU_material_release(mat);
}
}
else if (!next_batch.is_empty()) {
/* (only if use_parallel_compilation == true)
* We ran out of pending materials. Request the compilation of the current batch. */
BatchHandle batch_handle = GPU_material_batch_compile(next_batch);
batches.add(batch_handle, next_batch);
next_batch.clear();
}
else if (!batches.is_empty()) {
/* (only if use_parallel_compilation == true)
* Keep querying the requested batches until all of them are ready. */
Vector<BatchHandle> ready_handles;
for (BatchHandle handle : batches.keys()) {
if (GPU_material_batch_is_ready(handle)) {
ready_handles.append(handle);
}
}
for (BatchHandle handle : ready_handles) {
Vector<GPUMaterial *> batch = batches.pop(handle);
GPU_material_batch_finalize(handle, batch);
for (GPUMaterial *mat : batch) {
GPU_material_release(mat);
}
}
}
else {
/* Check for Material Optimization job once there are no more
@ -111,7 +149,7 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
BLI_spin_lock(&comp->list_lock);
/* Pop tail because it will be less likely to lock the main thread
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
link = (LinkData *)BLI_poptail(&comp->optimize_queue);
LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : nullptr;
if (optimize_mat) {
/* Avoid another thread freeing the material during optimization. */
@ -136,6 +174,16 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
}
}
/* We have to wait until all the requested batches are ready,
* even if worker_status->stop is true. */
for (BatchHandle handle : batches.keys()) {
Vector<GPUMaterial *> &batch = batches.lookup(handle);
GPU_material_batch_finalize(handle, batch);
for (GPUMaterial *mat : batch) {
GPU_material_release(mat);
}
}
GPU_context_active_set(nullptr);
WM_system_gpu_context_release(system_gpu_context);
if (use_main_context_workaround) {

@ -260,6 +260,30 @@ void GPU_material_compile(GPUMaterial *mat);
void GPU_material_free_single(GPUMaterial *material);
void GPU_material_free(ListBase *gpumaterial);
/**
* Request the creation of multiple `GPUMaterial`s at once, allowing the backend to use
* multithreaded compilation.
* Returns a handle that can be used to poll if all materials have been
* compiled, and to retrieve the compiled result.
* NOTE: This function is asynchronous on OpenGL, but it's blocking on Vulkan and Metal.
* WARNING: The material pointers and their pass->create_info should be valid until
* `GPU_material_batch_finalize` has returned.
*/
BatchHandle GPU_material_batch_compile(blender::Span<GPUMaterial *> mats);
/**
* Returns true if all the materials from the batch have finished their compilation.
*/
bool GPU_material_batch_is_ready(BatchHandle handle);
/**
* Asign the compiled shaders to their respective materials and flag their status.
* The materials list should have the same length and order as in the `GPU_material_batch_compile`
* call.
* If the compilation has not finished yet, this call will block the thread until all the
* shaders are ready.
* WARNING: The handle will be invalidated by this call, you can't process the same batch twice.
*/
void GPU_material_batch_finalize(BatchHandle &handle, blender::Span<GPUMaterial *> mats);
void GPU_material_acquire(GPUMaterial *mat);
void GPU_material_release(GPUMaterial *mat);

@ -102,6 +102,8 @@ struct GPUPass {
uint32_t hash;
/** Did we already tried to compile the attached GPUShader. */
bool compiled;
/** If this pass is already being_compiled (A GPUPass can be shared by multiple GPUMaterials). */
bool compilation_requested;
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
* during pass code generation. */
bool should_optimize;
@ -805,6 +807,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
pass->engine = engine;
pass->hash = codegen.hash_get();
pass->compiled = false;
pass->compilation_requested = false;
pass->cached = false;
/* Only flag pass optimization hint if this is the first generated pass for a material.
* Optimized passes cannot be optimized further, even if the heuristic is still not
@ -881,17 +884,22 @@ static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
return (active_samplers_len * 3 <= GPU_max_textures());
}
bool GPU_pass_compile(GPUPass *pass, const char *shname)
GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname)
{
if (!pass->compilation_requested) {
pass->compilation_requested = true;
pass->create_info->name_ = shname;
GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
static_cast<ShaderCreateInfo *>(pass->create_info));
return info;
}
return nullptr;
}
bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader)
{
bool success = true;
if (!pass->compiled) {
GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
static_cast<ShaderCreateInfo *>(pass->create_info));
pass->create_info->name_ = shname;
GPUShader *shader = GPU_shader_create_from_info(info);
/* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
* We need to make sure to count active samplers to avoid undefined behavior. */
if (!gpu_pass_shader_validate(pass, shader)) {
@ -908,6 +916,16 @@ bool GPU_pass_compile(GPUPass *pass, const char *shname)
return success;
}
bool GPU_pass_compile(GPUPass *pass, const char *shname)
{
bool success = true;
if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
GPUShader *shader = GPU_shader_create_from_info(info);
success = GPU_pass_finalize_compilation(pass, shader);
}
return success;
}
GPUShader *GPU_pass_shader_get(GPUPass *pass)
{
return pass->shader;

@ -31,6 +31,11 @@ void GPU_pass_acquire(GPUPass *pass);
void GPU_pass_release(GPUPass *pass);
bool GPU_pass_should_optimize(GPUPass *pass);
/* Custom pass compilation. */
GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname);
bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader);
/* Module */
void gpu_codegen_init();

@ -143,6 +143,8 @@ struct GPUMaterial {
uint32_t refcount;
bool do_batch_compilation;
#ifndef NDEBUG
char name[64];
#else
@ -951,21 +953,8 @@ void GPU_material_release(GPUMaterial *mat)
GPU_material_free_single(mat);
}
void GPU_material_compile(GPUMaterial *mat)
static void gpu_material_finalize(GPUMaterial *mat, bool success)
{
bool success;
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
#ifndef NDEBUG
success = GPU_pass_compile(mat->pass, mat->name);
#else
success = GPU_pass_compile(mat->pass, __func__);
#endif
mat->flag |= GPU_MATFLAG_UPDATED;
if (success) {
@ -1017,6 +1006,64 @@ void GPU_material_compile(GPUMaterial *mat)
}
}
void GPU_material_compile(GPUMaterial *mat)
{
bool success;
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
#ifndef NDEBUG
success = GPU_pass_compile(mat->pass, mat->name);
#else
success = GPU_pass_compile(mat->pass, __func__);
#endif
gpu_material_finalize(mat, success);
}
BatchHandle GPU_material_batch_compile(blender::Span<GPUMaterial *> mats)
{
blender::Vector<GPUShaderCreateInfo *> infos;
infos.reserve(mats.size());
for (GPUMaterial *mat : mats) {
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
#ifndef NDEBUG
const char *name = mat->name;
#else
const char *name = __func__;
#endif
mat->do_batch_compilation = false;
if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(mat->pass, name)) {
infos.append(info);
mat->do_batch_compilation = true;
}
}
return GPU_shader_batch_create_from_infos(infos);
}
bool GPU_material_batch_is_ready(BatchHandle handle)
{
return GPU_shader_batch_is_ready(handle);
}
void GPU_material_batch_finalize(BatchHandle &handle, blender::Span<GPUMaterial *> mats)
{
blender::Vector<GPUShader *> shaders = GPU_shader_batch_finalize(handle);
int i = 0;
for (GPUMaterial *mat : mats) {
bool success = true;
if (mat->do_batch_compilation) {
success = GPU_pass_finalize_compilation(mat->pass, shaders[i++]);
}
gpu_material_finalize(mat, success);
}
}
void GPU_material_optimize(GPUMaterial *mat)
{
/* If shader is flagged for skipping optimization or has already been successfully

@ -25,35 +25,57 @@
namespace blender::gpu {
class SubprocessShader {
GLuint comp_ = 0;
GLuint vert_ = 0;
GLuint geom_ = 0;
GLuint frag_ = 0;
GLuint program_ = 0;
bool success_ = false;
public:
SubprocessShader(const char *vert_src, const char *frag_src)
SubprocessShader(const char *comp_src,
const char *vert_src,
const char *geom_src,
const char *frag_src)
{
GLint status;
vert_ = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vert_, 1, &vert_src, nullptr);
glCompileShader(vert_);
glGetShaderiv(vert_, GL_COMPILE_STATUS, &status);
if (!status) {
return;
}
frag_ = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(frag_, 1, &frag_src, nullptr);
glCompileShader(frag_);
glGetShaderiv(frag_, GL_COMPILE_STATUS, &status);
if (!status) {
return;
}
program_ = glCreateProgram();
glAttachShader(program_, vert_);
glAttachShader(program_, frag_);
auto compile_stage = [&](const char *src, GLenum stage) -> GLuint {
if (src == nullptr) {
/* We only want status errors if compilation fails. */
status = GL_TRUE;
return 0;
}
GLuint shader = glCreateShader(stage);
glShaderSource(shader, 1, &src, nullptr);
glCompileShader(shader);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
glAttachShader(program_, shader);
return shader;
};
comp_ = compile_stage(comp_src, GL_COMPUTE_SHADER);
if (!status) {
return;
}
vert_ = compile_stage(vert_src, GL_VERTEX_SHADER);
if (!status) {
return;
}
geom_ = compile_stage(geom_src, GL_GEOMETRY_SHADER);
if (!status) {
return;
}
frag_ = compile_stage(frag_src, GL_FRAGMENT_SHADER);
if (!status) {
return;
}
glLinkProgram(program_);
glGetProgramiv(program_, GL_LINK_STATUS, &status);
if (!status) {
@ -65,7 +87,9 @@ class SubprocessShader {
~SubprocessShader()
{
glDeleteShader(comp_);
glDeleteShader(vert_);
glDeleteShader(geom_);
glDeleteShader(frag_);
glDeleteProgram(program_);
}
@ -78,8 +102,8 @@ class SubprocessShader {
if (success_) {
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size);
if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) {
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start);
if (bin->size <= sizeof(ShaderBinaryHeader::data)) {
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, bin->data);
}
}
@ -92,7 +116,7 @@ static bool validate_binary(void *binary)
{
ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(binary);
GLuint program = glCreateProgram();
glProgramBinary(program, bin->format, &bin->data_start, bin->size);
glProgramBinary(program, bin->format, bin->data, bin->size);
GLint status;
glGetProgramiv(program, GL_LINK_STATUS, &status);
glDeleteProgram(program);
@ -165,15 +189,34 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
break;
}
const char *shaders = reinterpret_cast<const char *>(shared_mem.get_data());
const char *vert_src = shaders;
const char *frag_src = shaders + strlen(shaders) + 1;
ShaderSourceHeader *source = reinterpret_cast<ShaderSourceHeader *>(shared_mem.get_data());
const char *next_src = source->sources;
const char *comp_src = nullptr;
const char *vert_src = nullptr;
const char *geom_src = nullptr;
const char *frag_src = nullptr;
DefaultHash<StringRefNull> hasher;
uint64_t vert_hash = hasher(vert_src);
uint64_t frag_hash = hasher(frag_src);
std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash);
std::string hash_str = "_";
auto get_src = [&]() {
const char *src = next_src;
next_src += strlen(src) + sizeof('\0');
hash_str += std::to_string(hasher(src)) + "_";
return src;
};
if (source->type == ShaderSourceHeader::Type::COMPUTE) {
comp_src = get_src();
}
else {
vert_src = get_src();
if (source->type == ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE) {
geom_src = get_src();
}
frag_src = get_src();
}
std::string cache_path = cache_dir + SEP_STR + hash_str;
/* TODO: This should lock the files? */
@ -203,14 +246,14 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
}
}
SubprocessShader shader(vert_src, frag_src);
SubprocessShader shader(comp_src, vert_src, geom_src, frag_src);
ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data());
end_semaphore.increment();
fstream file(cache_path, std::ios::binary | std::ios::out);
file.write(reinterpret_cast<char *>(shared_mem.get_data()),
binary->size + offsetof(ShaderBinaryHeader, data_start));
binary->size + offsetof(ShaderBinaryHeader, data));
}
GPU_exit();

@ -13,7 +13,19 @@
namespace blender::gpu {
/* The size of the memory pools shared by Blender and the compilation subprocesses. */
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5 MiB */
struct ShaderSourceHeader {
enum Type { COMPUTE, GRAPHICS, GRAPHICS_WITH_GEOMETRY_STAGE };
/* The type of program being compiled. */
Type type;
/* The source code for all the shader stages (Separated by a null terminator).
* The stages follows the execution order (eg. vert > geom > frag). */
char sources[compilation_subprocess_shared_memory_size - sizeof(type)];
};
static_assert(sizeof(ShaderSourceHeader) == compilation_subprocess_shared_memory_size,
"Size must match the shared memory size");
struct ShaderBinaryHeader {
/* Size of the shader binary data. */
@ -21,11 +33,13 @@ struct ShaderBinaryHeader {
/* Magic number that identifies the format of this shader binary (Driver-defined).
* This (and size) is set to 0 when the shader has failed to compile. */
uint32_t format;
/* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the
* shader binary data. */
uint8_t data_start;
/* The serialized shader binary data. */
uint8_t data[compilation_subprocess_shared_memory_size - sizeof(size) - sizeof(format)];
};
static_assert(sizeof(ShaderBinaryHeader) == compilation_subprocess_shared_memory_size,
"Size must match the shared memory size");
} // namespace blender::gpu
#endif

@ -1472,6 +1472,30 @@ Vector<const char *> GLSources::sources_get() const
return result;
}
std::string GLSources::to_string() const
{
std::string result;
for (const GLSource &source : *this) {
if (source.source_ref) {
result.append(source.source_ref);
}
else {
result.append(source.source);
}
}
return result;
}
size_t GLSourcesBaked::size()
{
size_t result = 0;
result += comp.empty() ? 0 : comp.size() + sizeof('\0');
result += vert.empty() ? 0 : vert.size() + sizeof('\0');
result += geom.empty() ? 0 : geom.size() + sizeof('\0');
result += frag.empty() ? 0 : frag.size() + sizeof('\0');
return result;
}
/** \} */
/* -------------------------------------------------------------------- */
@ -1588,6 +1612,16 @@ GLuint GLShader::program_get()
return program_active_->program_id;
}
GLSourcesBaked GLShader::get_sources()
{
GLSourcesBaked result;
result.comp = compute_sources_.to_string();
result.vert = vertex_sources_.to_string();
result.geom = geometry_sources_.to_string();
result.frag = fragment_sources_.to_string();
return result;
}
/** \} */
#if BLI_SUBPROCESS_SUPPORT
@ -1620,12 +1654,37 @@ GLCompilerWorker::~GLCompilerWorker()
start_semaphore_->increment();
}
void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag)
void GLCompilerWorker::compile(const GLSourcesBaked &sources)
{
BLI_assert(state_ == AVAILABLE);
strcpy((char *)shared_mem_->get_data(), vert.c_str());
strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str());
ShaderSourceHeader *shared_src = reinterpret_cast<ShaderSourceHeader *>(shared_mem_->get_data());
char *next_src = shared_src->sources;
auto add_src = [&](const std::string &src) {
if (!src.empty()) {
strcpy(next_src, src.c_str());
next_src += src.size() + sizeof('\0');
}
};
add_src(sources.comp);
add_src(sources.vert);
add_src(sources.geom);
add_src(sources.frag);
BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size);
if (!sources.comp.empty()) {
BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty());
shared_src->type = ShaderSourceHeader::Type::COMPUTE;
}
else {
BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty());
shared_src->type = sources.geom.empty() ?
ShaderSourceHeader::Type::GRAPHICS :
ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE;
}
start_semaphore_->increment();
@ -1668,7 +1727,7 @@ bool GLCompilerWorker::load_program_binary(GLint program)
state_ = COMPILATION_FINISHED;
if (binary->size > 0) {
glProgramBinary(program, binary->format, &binary->data_start, binary->size);
glProgramBinary(program, binary->format, binary->data, binary->size);
return true;
}
@ -1695,7 +1754,7 @@ GLShaderCompiler::~GLShaderCompiler()
}
}
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag)
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const GLSourcesBaked &sources)
{
GLCompilerWorker *result = nullptr;
for (GLCompilerWorker *compiler : workers_) {
@ -1709,7 +1768,7 @@ GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const
workers_.append(result);
}
if (result) {
result->compile(vert, frag);
result->compile(sources);
}
return result;
}
@ -1739,31 +1798,21 @@ BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo
for (const shader::ShaderCreateInfo *info : infos) {
const_cast<ShaderCreateInfo *>(info)->finalize();
CompilationWork item = {};
batch.items.append({});
CompilationWork &item = batch.items.last();
item.info = info;
item.do_async_compilation = !info->vertex_source_.is_empty() &&
!info->fragment_source_.is_empty() &&
info->compute_source_.is_empty() &&
info->geometry_source_.is_empty();
if (item.do_async_compilation) {
item.shader = static_cast<GLShader *>(compile(*info, true));
for (const char *src : item.shader->vertex_sources_.sources_get()) {
item.vertex_src.append(src);
}
for (const char *src : item.shader->fragment_sources_.sources_get()) {
item.fragment_src.append(src);
}
item.shader = static_cast<GLShader *>(compile(*info, true));
item.sources = item.shader->get_sources();
size_t required_size = item.vertex_src.size() + item.fragment_src.size();
if (required_size < compilation_subprocess_shared_memory_size) {
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
}
else {
delete item.shader;
item.do_async_compilation = false;
}
size_t required_size = item.sources.size();
item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
if (item.do_async_compilation) {
item.worker = get_compiler_worker(item.sources);
}
else {
delete item.shader;
item.sources = {};
}
batch.items.append(item);
}
return handle;
}
@ -1791,7 +1840,7 @@ bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
if (!item.worker) {
/* Try to acquire an available worker. */
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
item.worker = get_compiler_worker(item.sources);
}
else if (item.worker->is_ready()) {
/* Retrieve the binary compiled by the worker. */

@ -44,6 +44,21 @@ class GLSources : public Vector<GLSource> {
public:
GLSources &operator=(Span<const char *> other);
Vector<const char *> sources_get() const;
std::string to_string() const;
};
/**
* The full sources for each shader stage, baked into a single string from their respective
* GLSources. (Can be retrieved from GLShader::get_sources())
*/
struct GLSourcesBaked : NonCopyable {
std::string comp;
std::string vert;
std::string geom;
std::string frag;
/* Returns the size (in bytes) required to store the source of all the used stages. */
size_t size();
};
/**
@ -194,6 +209,8 @@ class GLShader : public Shader {
return program_active_->compute_shader != 0;
}
GLSourcesBaked get_sources();
private:
const char *glsl_patch_get(GLenum gl_stage);
@ -240,7 +257,7 @@ class GLCompilerWorker {
GLCompilerWorker();
~GLCompilerWorker();
void compile(StringRefNull vert, StringRefNull frag);
void compile(const GLSourcesBaked &sources);
bool is_ready();
bool load_program_binary(GLint program);
void release();
@ -255,14 +272,12 @@ class GLShaderCompiler : public ShaderCompiler {
Vector<GLCompilerWorker *> workers_;
struct CompilationWork {
GLCompilerWorker *worker = nullptr;
GLShader *shader = nullptr;
const shader::ShaderCreateInfo *info = nullptr;
GLShader *shader = nullptr;
GLSourcesBaked sources;
GLCompilerWorker *worker = nullptr;
bool do_async_compilation = false;
std::string vertex_src;
std::string fragment_src;
bool is_ready = false;
};
@ -274,7 +289,7 @@ class GLShaderCompiler : public ShaderCompiler {
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag);
GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources);
bool worker_is_lost(GLCompilerWorker *&worker);
public:

@ -44,11 +44,13 @@ void main()
vec2 center = vec2(strip.right_handle + strip.left_handle, strip.top + strip.bottom) * 0.5;
/* Transform strip rectangle into pixel coordinates, so that
* rounded corners have proper aspect ratio and can be expressed in pixels. */
* rounded corners have proper aspect ratio and can be expressed in pixels.
* Also snap to pixel grid coorinates, so that outline/border is clear
* non-fractional pixel sizes. */
vec2 view_to_pixel = vec2(context_data.inv_pixelx, context_data.inv_pixely);
size *= view_to_pixel;
center *= view_to_pixel;
vec2 pos = co * view_to_pixel;
size = round(size * view_to_pixel);
center = round(center * view_to_pixel);
vec2 pos = round(co * view_to_pixel);
float radius = context_data.round_radius;
if (radius > size.x) {

@ -9,11 +9,10 @@ void main()
int vid = gl_VertexID;
SeqStripDrawData strip = strip_data[id];
vec4 rect = vec4(strip.left_handle, strip.bottom, strip.right_handle, strip.top);
/* Expand rasterized rectangle by 1px so that we can do outlines. */
rect.x -= context_data.pixelx;
rect.z += context_data.pixelx;
rect.y -= context_data.pixely;
rect.w += context_data.pixely;
/* Expand by 2px to fit possible outline and pixel grid rounding. */
vec2 expand = vec2(context_data.pixelx, context_data.pixely) * 2.0;
rect.xy -= expand;
rect.zw += expand;
vec2 co;
if (vid == 0) {