Merge branch 'blender-v4.2-release'

This commit is contained in:
Lukas Stockner 2024-06-05 20:25:50 +02:00
commit 5891a73785
34 changed files with 1603 additions and 181 deletions

@ -28,9 +28,7 @@ OIDNDenoiser::OIDNDenoiser(Device *denoiser_device, const DenoiseParams &params)
DCHECK_EQ(params.type, DENOISER_OPENIMAGEDENOISE);
#ifndef WITH_OPENIMAGEDENOISE
(void)progress;
set_error("Failed to denoise, build has no OpenImageDenoise support");
return nullptr;
#else
if (!openimagedenoise_supported()) {
set_error("OpenImageDenoiser is not supported on this CPU: missing SSE 4.1 support");

@ -114,6 +114,7 @@ const UserDef U_default = {
#else
.gpu_backend = GPU_BACKEND_OPENGL,
#endif
.max_shader_compilation_subprocesses = 0,
/** Initialized by: #BKE_studiolight_default. */
.light_param = {{0}},

@ -746,6 +746,8 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel):
bl_label = "Memory & Limits"
def draw_centered(self, context, layout):
import sys
prefs = context.preferences
system = prefs.system
edit = prefs.edit
@ -772,6 +774,11 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel):
col.prop(system, "vbo_time_out", text="VBO Time Out")
col.prop(system, "vbo_collection_rate", text="Garbage Collection Rate")
if sys.platform != "darwin":
layout.separator()
col = layout.column()
col.prop(system, "max_shader_compilation_subprocesses")
class USERPREF_PT_system_video_sequencer(SystemPanel, CenterAlignMixIn, Panel):
bl_label = "Video Sequencer"

@ -100,11 +100,6 @@ std::optional<std::string> BKE_appdir_resource_path_id_with_version(int folder_i
int version);
std::optional<std::string> BKE_appdir_resource_path_id(int folder_id, bool check_is_dir);
/**
* Check if this is an install with user files kept together
* with the Blender executable and its installation files.
*/
bool BKE_appdir_app_is_portable_install();
/**
* Return true if templates exist
*/

@ -391,8 +391,8 @@ static bool get_path_local_ex(char *targetpath,
targetpath_maxncpy,
check_is_dir,
path_base,
blender_version_decimal(version),
relfolder);
(version) ? blender_version_decimal(version) : relfolder,
(version) ? relfolder : nullptr);
}
static bool get_path_local(char *targetpath,
size_t targetpath_maxncpy,
@ -405,13 +405,6 @@ static bool get_path_local(char *targetpath,
targetpath, targetpath_maxncpy, folder_name, subfolder_name, version, check_is_dir);
}
bool BKE_appdir_app_is_portable_install()
{
/* Detect portable install by the existence of `config` folder. */
char dirpath[FILE_MAX];
return get_path_local(dirpath, sizeof(dirpath), "config", nullptr);
}
/**
* Returns the path of a folder from environment variables.
*
@ -465,15 +458,15 @@ static bool get_path_user_ex(char *targetpath,
{
char user_path[FILE_MAX];
/* Environment variable override. */
if (test_env_path(user_path, "BLENDER_USER_RESOURCES", check_is_dir)) {
/* Pass. */
}
/* Portable install, to store user files next to Blender executable. */
else if (get_path_local_ex(user_path, sizeof(user_path), "portable", nullptr, 0, true)) {
/* Pass. */
}
else {
/* for portable install, user path is always local */
if (BKE_appdir_app_is_portable_install()) {
return get_path_local_ex(
targetpath, targetpath_maxncpy, folder_name, subfolder_name, version, check_is_dir);
}
user_path[0] = '\0';
const char *user_base_path = GHOST_getUserDir(version, blender_version_decimal(version));

@ -840,21 +840,17 @@ void BKE_studiolight_init()
BLI_addtail(&studiolights, sl);
/* Go over the preset folder and add a studio-light for every image with its path. */
/* For portable installs (where USER and SYSTEM paths are the same),
* only go over LOCAL data-files once. */
/* Also reserve icon space for it. */
if (!BKE_appdir_app_is_portable_install()) {
studiolight_add_files_from_datafolder(BLENDER_USER_DATAFILES,
STUDIOLIGHT_LIGHTS_FOLDER,
STUDIOLIGHT_TYPE_STUDIO | STUDIOLIGHT_USER_DEFINED |
STUDIOLIGHT_SPECULAR_HIGHLIGHT_PASS);
studiolight_add_files_from_datafolder(BLENDER_USER_DATAFILES,
STUDIOLIGHT_WORLD_FOLDER,
STUDIOLIGHT_TYPE_WORLD | STUDIOLIGHT_USER_DEFINED);
studiolight_add_files_from_datafolder(BLENDER_USER_DATAFILES,
STUDIOLIGHT_MATCAP_FOLDER,
STUDIOLIGHT_TYPE_MATCAP | STUDIOLIGHT_USER_DEFINED);
}
studiolight_add_files_from_datafolder(BLENDER_USER_DATAFILES,
STUDIOLIGHT_LIGHTS_FOLDER,
STUDIOLIGHT_TYPE_STUDIO | STUDIOLIGHT_USER_DEFINED |
STUDIOLIGHT_SPECULAR_HIGHLIGHT_PASS);
studiolight_add_files_from_datafolder(BLENDER_USER_DATAFILES,
STUDIOLIGHT_WORLD_FOLDER,
STUDIOLIGHT_TYPE_WORLD | STUDIOLIGHT_USER_DEFINED);
studiolight_add_files_from_datafolder(BLENDER_USER_DATAFILES,
STUDIOLIGHT_MATCAP_FOLDER,
STUDIOLIGHT_TYPE_MATCAP | STUDIOLIGHT_USER_DEFINED);
studiolight_add_files_from_datafolder(BLENDER_SYSTEM_DATAFILES,
STUDIOLIGHT_LIGHTS_FOLDER,
STUDIOLIGHT_TYPE_STUDIO |

@ -0,0 +1,157 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
/** \file
* \ingroup bli
*
* API for subprocess creation and inter-process communication.
* NOTE: The use of subprocesses is generally discouraged.
* It should only be used for parallelizing workloads that can only happen on a per-process level
* due to OS or driver limitations.
* WARNING: The Subprocess API is only supported on Windows and Linux.
* Its use should always be inside `#if BLI_SUBPROCESS_SUPPORT` preprocessor directives.
*/
#if defined(_WIN32) || defined(__linux__)
# define BLI_SUBPROCESS_SUPPORT 1
#else
# define BLI_SUBPROCESS_SUPPORT 0
#endif
#if BLI_SUBPROCESS_SUPPORT
# include "BLI_span.hh"
# include "BLI_string_ref.hh"
# include "BLI_sys_types.h"
# include "BLI_utility_mixins.hh"
# include <string>
# ifdef _WIN32
typedef void *HANDLE;
# else
# include <semaphore.h>
# endif
namespace blender {
/**
* Creates a subprocess of the current Blender executable.
* WARNING: This class doesn't handle subprocess destruction.
* On Windows, subprocesses are closed automatically when the parent process finishes.
* On Linux, subprocesses become children of init or systemd when the parent process finishes.
*/
class BlenderSubprocess : NonCopyable {
private:
# ifdef _WIN32
HANDLE handle_ = nullptr;
# else
pid_t pid_ = 0;
# endif
public:
~BlenderSubprocess();
/**
* Create a subprocess and pass the arguments to the main function.
* NOTE: The subprocess path is not passed as `argv[0]`.
* `args` only support alpha-numeric characters, underscores and hyphen-minus as a safety
* measure.
* WARNING: This function shouldn't be called again after it succeeds.
*/
bool create(Span<StringRefNull> args);
/**
* Checks if the subprocess is still running.
* It always returns false if creation failed.
* It doesn't detects hanged subprocesses.
*/
bool is_running();
};
/**
* Creates or gets access to a block of memory that can be read and written by more than once
* process.
* WARNING: It doesn't have any built-in safety measure to prevent concurrent writes or
* read/writes. Synchronization should be handled with SharedSemaphores.
*/
class SharedMemory : NonCopyable {
private:
std::string name_;
# ifdef _WIN32
HANDLE handle_;
# else
int handle_;
# endif
void *data_;
size_t data_size_;
bool is_owner_;
public:
/**
* WARNING: The name should be unique a unique identifier accross all processes (including
* multiple Blender instances). You should include the PID of the "owner" process in the name to
* prevent name collisions.
* `is_owner` should only be true for the first process that creates a SharedMemory with a given
* name.
* On Linux, the memory will become invalid across all processes after the owner destructor has
* run (Windows uses reference counting).
*/
SharedMemory(std::string name, size_t size, bool is_owner);
~SharedMemory();
/**
* Get a pointer to the shared memory block.
* WARNING: It can be null if creation failed, or invalid if the owner destructor has run.
* */
void *get_data()
{
return data_;
}
size_t get_size()
{
return data_size_;
}
};
/**
* Creates or get access to a semaphore that can be used accros multiple processes.
*/
class SharedSemaphore : NonCopyable {
private:
std::string name_;
# if defined(_WIN32)
HANDLE handle_;
# else
sem_t *handle_;
# endif
bool is_owner_;
public:
/**
* WARNING: The name should be unique a unique identifier accross all processes (including
* multiple Blender instances). You should include the PID of the "owner" process in the name to
* prevent name collisions.
* `is_owner` should only be true for the last process that needs to read it (It's ok if the
* creator is not the owner).
* On Linux, the semaphore will become invalid across all processes after the owner destructor
* has run (Windows uses reference counting).
*/
SharedSemaphore(std::string name, bool is_owner);
~SharedSemaphore();
/* Increment the semaphore value. */
void increment();
/* Decrement the semaphore value (Blocks until the semaphore value is greater than 0). */
void decrement();
/**
* Try to decrement the semaphore value. Returns true on success.
* (Blocks until the semaphore value is greater than 0 or the wait time runs out).
*/
bool try_decrement(int wait_ms = 0);
};
} // namespace blender
#endif

@ -40,6 +40,7 @@ set(SRC
intern/BLI_memiter.c
intern/BLI_mempool.c
intern/BLI_mmap.c
intern/BLI_subprocess.cc
intern/BLI_timer.c
intern/DLRB_tree.c
intern/array_store.cc
@ -365,6 +366,7 @@ set(SRC
BLI_string_utils.hh
BLI_struct_equality_utils.hh
BLI_sub_frame.hh
BLI_subprocess.hh
BLI_sys_types.h
BLI_system.h
BLI_task.h

@ -0,0 +1,420 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_subprocess.hh"
#if BLI_SUBPROCESS_SUPPORT
/* Based on https://github.com/jarikomppa/ipc (Unlicense) */
# include "BLI_assert.h"
# include "BLI_path_util.h"
# include "BLI_string_utf8.h"
# include <iostream>
namespace blender {
static bool check_arguments_are_valid(Span<StringRefNull> args)
{
for (StringRefNull arg : args) {
for (const char c : arg) {
if (!std::isalnum(c) && !ELEM(c, '_', '-')) {
return false;
}
}
}
return true;
}
} // namespace blender
# ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <comdef.h>
# include <windows.h>
namespace blender {
static void print_last_error(const char *function, const char *msg)
{
DWORD error_code = GetLastError();
std::cerr << "ERROR (" << error_code << "): " << function << " : " << msg << std::endl;
}
static void check(bool result, const char *function, const char *msg)
{
if (!result) {
print_last_error(function, msg);
BLI_assert(false);
}
}
# define CHECK(result) check((result), __func__, #result)
# undef ERROR /* Defined in wingdi.h */
# define ERROR(msg) check(false, __func__, msg)
bool BlenderSubprocess::create(Span<StringRefNull> args)
{
BLI_assert(handle_ == nullptr);
if (!check_arguments_are_valid(args)) {
BLI_assert(false);
return false;
}
wchar_t path[FILE_MAX];
if (!GetModuleFileNameW(nullptr, path, FILE_MAX)) {
ERROR("GetModuleFileNameW");
return false;
}
std::string args_str;
for (StringRefNull arg : args) {
args_str += arg + " ";
}
const int length_wc = MultiByteToWideChar(
CP_UTF8, 0, args_str.c_str(), args_str.length(), nullptr, 0);
std::wstring w_args(length_wc, 0);
CHECK(MultiByteToWideChar(
CP_UTF8, 0, args_str.c_str(), args_str.length(), w_args.data(), length_wc));
STARTUPINFOW startup_info = {0};
startup_info.cb = sizeof(startup_info);
PROCESS_INFORMATION process_info = {0};
if (!CreateProcessW(path,
/** Use data() since lpCommandLine must be mutable. */
w_args.data(),
nullptr,
nullptr,
false,
0,
nullptr,
nullptr,
&startup_info,
&process_info))
{
ERROR("CreateProcessW");
return false;
}
handle_ = process_info.hProcess;
CHECK(CloseHandle(process_info.hThread));
return true;
}
BlenderSubprocess::~BlenderSubprocess()
{
if (handle_) {
CHECK(CloseHandle(handle_));
}
}
bool BlenderSubprocess::is_running()
{
if (!handle_) {
return false;
}
DWORD exit_code = 0;
if (GetExitCodeProcess(handle_, &exit_code)) {
return exit_code == STILL_ACTIVE;
}
ERROR("GetExitCodeProcess");
/* Assume the process is still running. */
return true;
}
SharedMemory::SharedMemory(std::string name, size_t size, bool is_owner)
: name_(name), is_owner_(is_owner)
{
if (is_owner) {
handle_ = CreateFileMappingA(
INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0, size, name.c_str());
CHECK(handle_ /*Create*/);
}
else {
handle_ = OpenFileMappingA(FILE_MAP_ALL_ACCESS, FALSE, name.c_str());
CHECK(handle_ /*Open*/);
}
if (handle_) {
data_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, size);
CHECK(data_);
}
else {
data_ = nullptr;
}
data_size_ = data_ ? size : 0;
}
SharedMemory::~SharedMemory()
{
if (data_) {
CHECK(UnmapViewOfFile(data_));
}
if (handle_) {
CHECK(CloseHandle(handle_));
}
}
SharedSemaphore::SharedSemaphore(std::string name, bool is_owner)
: name_(name), is_owner_(is_owner)
{
handle_ = CreateSemaphoreA(nullptr, 0, 1, name.c_str());
CHECK(handle_);
}
SharedSemaphore::~SharedSemaphore()
{
if (handle_) {
CHECK(CloseHandle(handle_));
}
}
void SharedSemaphore::increment()
{
CHECK(ReleaseSemaphore(handle_, 1, nullptr));
}
void SharedSemaphore::decrement()
{
CHECK(WaitForSingleObject(handle_, INFINITE) != WAIT_FAILED);
}
bool SharedSemaphore::try_decrement(int wait_ms)
{
DWORD result = WaitForSingleObject(handle_, wait_ms);
CHECK(result != WAIT_FAILED);
return result == WAIT_OBJECT_0;
}
} // namespace blender
# elif defined(__linux__)
# include "BLI_time.h"
# include "BLI_vector.hh"
# include <fcntl.h>
# include <linux/limits.h>
# include <stdlib.h>
# include <sys/mman.h>
# include <sys/stat.h>
# include <unistd.h>
# include <wait.h>
namespace blender {
static void print_last_error(const char *function, const char *msg)
{
int error_code = errno;
std::string error_msg = "ERROR (" + std::to_string(error_code) + "): " + function + " : " + msg;
perror(error_msg.c_str());
}
static void check(int result, const char *function, const char *msg)
{
if (result == -1) {
print_last_error(function, msg);
BLI_assert(false);
}
}
# define CHECK(result) check((result), __func__, #result)
# define ERROR(msg) check(-1, __func__, msg)
bool BlenderSubprocess::create(Span<StringRefNull> args)
{
if (!check_arguments_are_valid(args)) {
BLI_assert(false);
return false;
}
char path[PATH_MAX + 1];
size_t len = readlink("/proc/self/exe", path, PATH_MAX);
if (len == -1) {
ERROR("readlink");
return false;
}
/* readlink doesn't append a null terminator. */
path[len] = '\0';
Vector<char *> char_args;
for (StringRefNull arg : args) {
char_args.append((char *)arg.data());
}
char_args.append(nullptr);
pid_ = fork();
if (pid_ == -1) {
ERROR("fork");
return false;
}
else if (pid_ > 0) {
return true;
}
/* Child process initialization. */
execv(path, char_args.data());
ERROR("execv");
exit(errno);
return false;
}
BlenderSubprocess::~BlenderSubprocess() {}
bool BlenderSubprocess::is_running()
{
if (pid_ == -1) {
return false;
}
pid_t result = waitpid(pid_, nullptr, WNOHANG);
CHECK(result);
if (result == pid_) {
pid_ = -1;
return false;
}
return true;
}
SharedMemory::SharedMemory(std::string name, size_t size, bool is_owner)
: name_(name), is_owner_(is_owner)
{
constexpr mode_t user_mode = S_IRUSR | S_IWUSR;
if (is_owner) {
handle_ = shm_open(name.c_str(), O_CREAT | O_EXCL | O_RDWR, user_mode);
CHECK(handle_);
if (handle_ != -1) {
if (ftruncate(handle_, size) == -1) {
ERROR("ftruncate");
CHECK(close(handle_));
handle_ = -1;
}
}
}
else {
handle_ = shm_open(name.c_str(), O_RDWR, user_mode);
CHECK(handle_);
}
if (handle_ != -1) {
data_ = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, handle_, 0);
if (data_ == MAP_FAILED) {
ERROR("mmap");
data_ = nullptr;
}
/* File descriptor can close after mmap. */
CHECK(close(handle_));
}
else {
data_ = nullptr;
}
data_size_ = data_ ? size : 0;
}
SharedMemory::~SharedMemory()
{
if (data_) {
CHECK(munmap(data_, data_size_));
if (is_owner_) {
CHECK(shm_unlink(name_.c_str()));
}
}
}
SharedSemaphore::SharedSemaphore(std::string name, bool is_owner)
: name_(name), is_owner_(is_owner)
{
constexpr mode_t user_mode = S_IRUSR | S_IWUSR;
handle_ = sem_open(name.c_str(), O_CREAT, user_mode, 0);
if (!handle_) {
ERROR("sem_open");
}
}
SharedSemaphore::~SharedSemaphore()
{
if (handle_) {
CHECK(sem_close(handle_));
if (is_owner_) {
CHECK(sem_unlink(name_.c_str()));
}
}
}
void SharedSemaphore::increment()
{
CHECK(sem_post(handle_));
}
void SharedSemaphore::decrement()
{
while (true) {
int result = sem_wait(handle_);
if (result == 0) {
return;
}
else if (errno != EINTR) {
ERROR("sem_wait");
return;
}
/* Try again if interrupted by handler. */
}
}
bool SharedSemaphore::try_decrement(int wait_ms)
{
if (wait_ms == 0) {
int result = sem_trywait(handle_);
if (result == 0) {
return true;
}
else if (errno == EINVAL) {
ERROR("sem_trywait");
}
return false;
}
timespec time;
if (clock_gettime(CLOCK_REALTIME, &time) == -1) {
ERROR("clock_gettime");
BLI_time_sleep_ms(wait_ms);
return try_decrement(0);
}
time.tv_sec += wait_ms / 1000;
time.tv_nsec += (wait_ms % 1000) * 10e6;
while (true) {
int result = sem_timedwait(handle_, &time);
if (result == 0) {
return true;
}
else if (errno != EINTR) {
if (errno != ETIMEDOUT) {
ERROR("sem_timedwait");
}
return false;
}
/* Try again if interrupted by handler. */
}
}
} // namespace blender
# endif
#endif

@ -138,7 +138,10 @@ BsdfSample bxdf_ggx_sample_transmission(
/* Compute the GGX BxDF without the Fresnel term, multiplied by the cosine foreshortening term. */
BsdfEval bxdf_ggx_eval(vec3 N, vec3 L, vec3 V, float alpha, float eta, const bool do_reflection)
{
alpha = max(square(BSDF_ROUGHNESS_THRESHOLD), alpha);
/* This threshold was computed based on precision of NVidia compiler (see #118997).
* These drivers tend to produce NaNs in the computation of the NDF (`D`) if alpha is close to 0.
*/
alpha = max(1e-3, alpha);
float LV = dot(L, V);
float NV = dot(N, V);

@ -88,6 +88,7 @@ set(SRC
GPU_capabilities.hh
GPU_common.hh
GPU_common_types.hh
GPU_compilation_subprocess.hh
GPU_compute.hh
GPU_context.hh
GPU_debug.hh
@ -150,6 +151,7 @@ set(OPENGL_SRC
opengl/gl_backend.cc
opengl/gl_batch.cc
opengl/gl_compilation_subprocess.cc
opengl/gl_compute.cc
opengl/gl_context.cc
opengl/gl_debug.cc
@ -171,6 +173,7 @@ set(OPENGL_SRC
opengl/gl_backend.hh
opengl/gl_batch.hh
opengl/gl_compilation_subprocess.hh
opengl/gl_compute.hh
opengl/gl_context.hh
opengl/gl_debug.hh
@ -841,6 +844,10 @@ target_link_libraries(bf_gpu PUBLIC
bf_gpu_shaders
)
if(WITH_OPENGL_BACKEND AND UNIX)
target_link_libraries(bf_gpu PUBLIC rt)
endif()
if(WITH_OPENCOLORIO)
target_link_libraries(bf_gpu PUBLIC bf_ocio_shaders)
endif()

@ -40,6 +40,8 @@ const char *GPU_extension_get(int i);
int GPU_texture_size_with_limit(int res);
bool GPU_use_parallel_compilation();
bool GPU_mip_render_workaround();
bool GPU_depth_blitting_workaround();
bool GPU_use_main_context_workaround();

@ -0,0 +1,13 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_subprocess.hh"
#if defined(WITH_OPENGL_BACKEND) && defined(BLI_SUBPROCESS_SUPPORT)
void GPU_compilation_subprocess_run(const char *subprocess_name);
#endif

@ -10,6 +10,8 @@
#pragma once
#include "BLI_span.hh"
#include "BLI_vector.hh"
#include "GPU_shader_builtin.hh"
namespace blender::gpu {
@ -59,6 +61,29 @@ const GPUShaderCreateInfo *GPU_shader_create_info_get(const char *info_name);
*/
bool GPU_shader_create_info_check_error(const GPUShaderCreateInfo *_info, char r_error[128]);
using BatchHandle = int64_t;
/**
* Request the creation of multiple shaders at once, allowing the backend to use multithreaded
* compilation. Returns a handle that can be used to poll if all shaders have been compiled, and to
* retrieve the compiled shaders.
* NOTE: This function is asynchronous on OpenGL, but it's blocking on Vulkan and Metal.
* WARNING: The GPUShaderCreateInfo pointers should be valid until `GPU_shader_batch_finalize` has
* returned.
*/
BatchHandle GPU_shader_batch_create_from_infos(blender::Span<const GPUShaderCreateInfo *> infos);
/**
* Returns true if all the shaders from the batch have finished their compilation.
*/
bool GPU_shader_batch_is_ready(BatchHandle handle);
/**
* Retrieve the compiled shaders, in the same order as the `GPUShaderCreateInfo`s.
* If the compilation has not finished yet, this call will block the thread until all the shaders
* are ready.
* Shaders with compilation errors are returned as null pointers.
* WARNING: The handle will be invalidated by this call, you can't request the same batch twice.
*/
blender::Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle);
/** \} */
/* -------------------------------------------------------------------- */

@ -131,6 +131,11 @@ int GPU_max_samplers()
return GCaps.max_samplers;
}
bool GPU_use_parallel_compilation()
{
return GCaps.max_parallel_compilations > 0;
}
bool GPU_mip_render_workaround()
{
return GCaps.mip_render_workaround;

@ -51,6 +51,8 @@ struct GPUCapabilities {
bool texture_view_support = true;
bool stencil_export_support = false;
int max_parallel_compilations = 0;
/* OpenGL related workarounds. */
bool mip_render_workaround = false;
bool depth_blitting_workaround = false;

@ -35,6 +35,8 @@ class Context {
StateManager *state_manager = nullptr;
Immediate *imm = nullptr;
ShaderCompiler *compiler = nullptr;
/**
* All 4 window frame-buffers.
* None of them are valid in an off-screen context.

@ -290,130 +290,7 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
{
using namespace blender::gpu::shader;
const ShaderCreateInfo &info = *reinterpret_cast<const ShaderCreateInfo *>(_info);
const_cast<ShaderCreateInfo &>(info).finalize();
GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP);
const std::string error = info.check_error();
if (!error.empty()) {
std::cerr << error.c_str() << "\n";
BLI_assert(false);
}
Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str());
shader->init(info);
shader->specialization_constants_init(info);
std::string defines = shader->defines_declare(info);
std::string resources = shader->resources_declare(info);
if (info.legacy_resource_location_ == false) {
defines += "#define USE_GPU_SHADER_CREATE_INFO\n";
}
Vector<const char *> typedefs;
if (!info.typedef_sources_.is_empty() || !info.typedef_source_generated.empty()) {
typedefs.append(gpu_shader_dependency_get_source("GPU_shader_shared_utils.hh").c_str());
}
if (!info.typedef_source_generated.empty()) {
typedefs.append(info.typedef_source_generated.c_str());
}
for (auto filename : info.typedef_sources_) {
typedefs.append(gpu_shader_dependency_get_source(filename).c_str());
}
if (!info.vertex_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.vertex_source_);
std::string interface = shader->vertex_interface_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_VERTEX_SHADER\n");
if (!info.geometry_source_.is_empty()) {
sources.append("#define USE_GEOMETRY_SHADER\n");
}
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(interface.c_str());
sources.extend(code);
sources.extend(info.dependencies_generated);
sources.append(info.vertex_source_generated.c_str());
shader->vertex_shader_from_glsl(sources);
}
if (!info.fragment_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.fragment_source_);
std::string interface = shader->fragment_interface_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_FRAGMENT_SHADER\n");
if (!info.geometry_source_.is_empty()) {
sources.append("#define USE_GEOMETRY_SHADER\n");
}
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(interface.c_str());
sources.extend(code);
sources.extend(info.dependencies_generated);
sources.append(info.fragment_source_generated.c_str());
shader->fragment_shader_from_glsl(sources);
}
if (!info.geometry_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.geometry_source_);
std::string layout = shader->geometry_layout_declare(info);
std::string interface = shader->geometry_interface_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_GEOMETRY_SHADER\n");
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(layout.c_str());
sources.append(interface.c_str());
sources.append(info.geometry_source_generated.c_str());
sources.extend(code);
shader->geometry_shader_from_glsl(sources);
}
if (!info.compute_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.compute_source_);
std::string layout = shader->compute_layout_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_COMPUTE_SHADER\n");
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(layout.c_str());
sources.extend(code);
sources.extend(info.dependencies_generated);
sources.append(info.compute_source_generated.c_str());
shader->compute_shader_from_glsl(sources);
}
if (info.tf_type_ != GPU_SHADER_TFB_NONE && info.tf_names_.size() > 0) {
shader->transform_feedback_names_set(info.tf_names_.as_span(), info.tf_type_);
}
if (!shader->finalize(&info)) {
delete shader;
GPU_debug_group_end();
return nullptr;
}
GPU_debug_group_end();
return wrap(shader);
return wrap(Context::get()->compiler->compile(info, false));
}
GPUShader *GPU_shader_create_from_python(const char *vertcode,
@ -450,6 +327,25 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode,
return sh;
}
BatchHandle GPU_shader_batch_create_from_infos(Span<const GPUShaderCreateInfo *> infos)
{
using namespace blender::gpu::shader;
Span<const ShaderCreateInfo *> &infos_ = reinterpret_cast<Span<const ShaderCreateInfo *> &>(
infos);
return Context::get()->compiler->batch_compile(infos_);
}
bool GPU_shader_batch_is_ready(BatchHandle handle)
{
return Context::get()->compiler->batch_is_ready(handle);
}
Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle)
{
Vector<Shader *> result = Context::get()->compiler->batch_finalize(handle);
return reinterpret_cast<Vector<GPUShader *> &>(result);
}
void GPU_shader_compile_static()
{
printf("Compiling all static GPU shaders. This process takes a while.\n");
@ -880,4 +776,175 @@ void Shader::set_framebuffer_srgb_target(int use_srgb_to_linear)
/** \} */
/* -------------------------------------------------------------------- */
/** \name ShaderCompiler
* \{ */
Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
{
using namespace blender::gpu::shader;
const_cast<ShaderCreateInfo &>(info).finalize();
GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP);
const std::string error = info.check_error();
if (!error.empty()) {
std::cerr << error.c_str() << "\n";
BLI_assert(false);
}
Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str());
shader->init(info, is_batch_compilation);
shader->specialization_constants_init(info);
std::string defines = shader->defines_declare(info);
std::string resources = shader->resources_declare(info);
if (info.legacy_resource_location_ == false) {
defines += "#define USE_GPU_SHADER_CREATE_INFO\n";
}
Vector<const char *> typedefs;
if (!info.typedef_sources_.is_empty() || !info.typedef_source_generated.empty()) {
typedefs.append(gpu_shader_dependency_get_source("GPU_shader_shared_utils.hh").c_str());
}
if (!info.typedef_source_generated.empty()) {
typedefs.append(info.typedef_source_generated.c_str());
}
for (auto filename : info.typedef_sources_) {
typedefs.append(gpu_shader_dependency_get_source(filename).c_str());
}
if (!info.vertex_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.vertex_source_);
std::string interface = shader->vertex_interface_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_VERTEX_SHADER\n");
if (!info.geometry_source_.is_empty()) {
sources.append("#define USE_GEOMETRY_SHADER\n");
}
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(interface.c_str());
sources.extend(code);
sources.extend(info.dependencies_generated);
sources.append(info.vertex_source_generated.c_str());
shader->vertex_shader_from_glsl(sources);
}
if (!info.fragment_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.fragment_source_);
std::string interface = shader->fragment_interface_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_FRAGMENT_SHADER\n");
if (!info.geometry_source_.is_empty()) {
sources.append("#define USE_GEOMETRY_SHADER\n");
}
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(interface.c_str());
sources.extend(code);
sources.extend(info.dependencies_generated);
sources.append(info.fragment_source_generated.c_str());
shader->fragment_shader_from_glsl(sources);
}
if (!info.geometry_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.geometry_source_);
std::string layout = shader->geometry_layout_declare(info);
std::string interface = shader->geometry_interface_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_GEOMETRY_SHADER\n");
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(layout.c_str());
sources.append(interface.c_str());
sources.append(info.geometry_source_generated.c_str());
sources.extend(code);
shader->geometry_shader_from_glsl(sources);
}
if (!info.compute_source_.is_empty()) {
auto code = gpu_shader_dependency_get_resolved_source(info.compute_source_);
std::string layout = shader->compute_layout_declare(info);
Vector<const char *> sources;
standard_defines(sources);
sources.append("#define GPU_COMPUTE_SHADER\n");
sources.append(defines.c_str());
sources.extend(typedefs);
sources.append(resources.c_str());
sources.append(layout.c_str());
sources.extend(code);
sources.extend(info.dependencies_generated);
sources.append(info.compute_source_generated.c_str());
shader->compute_shader_from_glsl(sources);
}
if (info.tf_type_ != GPU_SHADER_TFB_NONE && info.tf_names_.size() > 0) {
shader->transform_feedback_names_set(info.tf_names_.as_span(), info.tf_type_);
}
if (!shader->finalize(&info)) {
delete shader;
GPU_debug_group_end();
return nullptr;
}
GPU_debug_group_end();
return shader;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name ShaderCompilerGeneric
* \{ */
ShaderCompilerGeneric::~ShaderCompilerGeneric()
{
/* Ensure all the requested batches have been retrieved. */
BLI_assert(batches.is_empty());
}
BatchHandle ShaderCompilerGeneric::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
{
BatchHandle handle = next_batch_handle++;
batches.add(handle, {{}, infos, true});
Batch &batch = batches.lookup(handle);
batch.shaders.reserve(infos.size());
for (const shader::ShaderCreateInfo *info : infos) {
batch.shaders.append(compile(*info, true));
}
return handle;
}
bool ShaderCompilerGeneric::batch_is_ready(BatchHandle handle)
{
bool is_ready = batches.lookup(handle).is_ready;
return is_ready;
}
Vector<Shader *> ShaderCompilerGeneric::batch_finalize(BatchHandle &handle)
{
Vector<Shader *> shaders = batches.pop(handle).shaders;
handle = 0;
return shaders;
}
/** \} */
} // namespace blender::gpu

@ -17,6 +17,7 @@
#include "BLI_map.hh"
#include <mutex>
#include <string>
namespace blender {
@ -77,7 +78,9 @@ class Shader {
Shader(const char *name);
virtual ~Shader();
virtual void init(const shader::ShaderCreateInfo &info) = 0;
/* `is_batch_compilation` is true when the shader is being compiled as part of a
* `GPU_shader_batch`. Backends that use the `ShaderCompilerGeneric` can ignore it. */
virtual void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) = 0;
virtual void vertex_shader_from_glsl(MutableSpan<const char *> sources) = 0;
virtual void geometry_shader_from_glsl(MutableSpan<const char *> sources) = 0;
@ -160,6 +163,43 @@ static inline const Shader *unwrap(const GPUShader *vert)
return reinterpret_cast<const Shader *>(vert);
}
class ShaderCompiler {
protected:
struct Sources {
std::string vert;
std::string geom;
std::string frag;
std::string comp;
};
public:
Shader *compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation);
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) = 0;
virtual bool batch_is_ready(BatchHandle handle) = 0;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) = 0;
};
/* Generic (fully synchronous) implementation for backends that don't implement their own
* ShaderCompiler. Used by Vulkan and Metal. */
class ShaderCompilerGeneric : public ShaderCompiler {
private:
struct Batch {
Vector<Shader *> shaders;
Vector<const shader::ShaderCreateInfo *> infos;
bool is_ready = false;
};
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
public:
~ShaderCompilerGeneric();
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
virtual bool batch_is_ready(BatchHandle handle) override;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
};
enum class Severity {
Unknown,
Warning,

@ -267,6 +267,8 @@ MTLContext::MTLContext(void *ghost_window, void *ghost_context)
/* Initialize samplers. */
this->sampler_state_cache_init();
compiler = new ShaderCompilerGeneric();
}
MTLContext::~MTLContext()
@ -369,6 +371,8 @@ MTLContext::~MTLContext()
if (this->device) {
[this->device release];
}
delete compiler;
}
void MTLContext::begin_frame()

@ -277,7 +277,7 @@ class MTLShader : public Shader {
NSString *fragment_function_name_);
~MTLShader();
void init(const shader::ShaderCreateInfo & /*info*/) override {}
void init(const shader::ShaderCreateInfo & /*info*/, bool /*is_batch_compilation*/) override {}
/* Assign GLSL source. */
void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;

@ -10,6 +10,9 @@
#if defined(WIN32)
# include "BLI_winstuff.h"
#endif
#include "BLI_subprocess.hh"
#include "BLI_threads.h"
#include "DNA_userdef_types.h"
#include "gpu_capabilities_private.hh"
#include "gpu_platform_private.hh"
@ -594,6 +597,13 @@ void GLBackend::capabilities_init()
detect_workarounds();
#if BLI_SUBPROCESS_SUPPORT
GCaps.max_parallel_compilations = std::min(int(U.max_shader_compilation_subprocesses),
BLI_system_thread_count());
#else
GCaps.max_parallel_compilations = 0;
#endif
/* Disable this feature entirely when not debugging. */
if ((G.debug & G_DEBUG_GPU) == 0) {
GLContext::debug_layer_support = false;

@ -39,6 +39,8 @@ class GLBackend : public GPUBackend {
renderdoc::api::Renderdoc renderdoc_;
#endif
GLShaderCompiler compiler_;
public:
GLBackend()
{
@ -64,6 +66,11 @@ class GLBackend : public GPUBackend {
return static_cast<GLBackend *>(GPUBackend::get());
}
GLShaderCompiler *get_compiler()
{
return &compiler_;
}
void samplers_update() override
{
GLTexture::samplers_update();

@ -0,0 +1,222 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gl_compilation_subprocess.hh"
#if BLI_SUBPROCESS_SUPPORT
# include "BKE_appdir.hh"
# include "BLI_fileops.hh"
# include "BLI_hash.hh"
# include "BLI_path_util.h"
# include "CLG_log.h"
# include "GHOST_C-api.h"
# include "GPU_context.hh"
# include "GPU_init_exit.hh"
# include <epoxy/gl.h>
# include <iostream>
# include <string>
# ifndef _WIN32
# include <unistd.h>
# endif
namespace blender::gpu {
class SubprocessShader {
GLuint vert_ = 0;
GLuint frag_ = 0;
GLuint program_ = 0;
bool success_ = false;
public:
SubprocessShader(const char *vert_src, const char *frag_src)
{
GLint status;
vert_ = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vert_, 1, &vert_src, nullptr);
glCompileShader(vert_);
glGetShaderiv(vert_, GL_COMPILE_STATUS, &status);
if (!status) {
return;
}
frag_ = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(frag_, 1, &frag_src, nullptr);
glCompileShader(frag_);
glGetShaderiv(frag_, GL_COMPILE_STATUS, &status);
if (!status) {
return;
}
program_ = glCreateProgram();
glAttachShader(program_, vert_);
glAttachShader(program_, frag_);
glLinkProgram(program_);
glGetProgramiv(program_, GL_LINK_STATUS, &status);
if (!status) {
return;
}
success_ = true;
}
~SubprocessShader()
{
glDeleteShader(vert_);
glDeleteShader(frag_);
glDeleteProgram(program_);
}
ShaderBinaryHeader *get_binary(void *memory)
{
ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(memory);
bin->format = 0;
bin->size = 0;
if (success_) {
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size);
if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) {
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start);
}
}
return bin;
}
};
/* Check if the binary is valid and can be loaded by the driver. */
static bool validate_binary(void *binary)
{
ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(binary);
GLuint program = glCreateProgram();
glProgramBinary(program, bin->format, &bin->data_start, bin->size);
GLint status;
glGetProgramiv(program, GL_LINK_STATUS, &status);
glDeleteProgram(program);
return status;
}
} // namespace blender::gpu
void GPU_compilation_subprocess_run(const char *subprocess_name)
{
using namespace blender;
using namespace blender::gpu;
# ifndef _WIN32
/** NOTE: Technically, the parent process could have crashed before this. */
pid_t ppid = getppid();
# endif
CLG_init();
std::string name = subprocess_name;
SharedMemory shared_mem(name, compilation_subprocess_shared_memory_size, false);
if (!shared_mem.get_data()) {
std::cerr << "Compilation Subprocess: Failed to open shared memory " << subprocess_name
<< "\n";
return;
}
SharedSemaphore start_semaphore(name + "_START", true);
SharedSemaphore end_semaphore(name + "_END", true);
SharedSemaphore close_semaphore(name + "_CLOSE", true);
GHOST_SystemHandle ghost_system = GHOST_CreateSystemBackground();
BLI_assert(ghost_system);
GHOST_GPUSettings gpu_settings = {0};
gpu_settings.context_type = GHOST_kDrawingContextTypeOpenGL;
GHOST_ContextHandle ghost_context = GHOST_CreateGPUContext(ghost_system, gpu_settings);
if (ghost_context == nullptr) {
std::cerr << "Compilation Subprocess: Failed to initialize GHOST context for "
<< subprocess_name << "\n";
GHOST_DisposeSystem(ghost_system);
return;
}
GHOST_ActivateGPUContext(ghost_context);
GPUContext *gpu_context = GPU_context_create(nullptr, ghost_context);
GPU_init();
BKE_tempdir_init(nullptr);
std::string cache_dir = std::string(BKE_tempdir_base()) + "BLENDER_SHADER_CACHE" + SEP_STR;
BLI_dir_create_recursive(cache_dir.c_str());
while (true) {
/* Process events to avoid crashes on Wayland.
* See https://bugreports.qt.io/browse/QTBUG-81504 */
GHOST_ProcessEvents(ghost_system, false);
# ifdef _WIN32
start_semaphore.decrement();
# else
bool lost_parent = false;
while (!lost_parent && !start_semaphore.try_decrement(1000)) {
lost_parent = getppid() != ppid;
}
if (lost_parent) {
std::cerr << "Compilation Subprocess: Lost parent process\n";
break;
}
# endif
if (close_semaphore.try_decrement()) {
break;
}
const char *shaders = reinterpret_cast<const char *>(shared_mem.get_data());
const char *vert_src = shaders;
const char *frag_src = shaders + strlen(shaders) + 1;
DefaultHash<StringRefNull> hasher;
uint64_t vert_hash = hasher(vert_src);
uint64_t frag_hash = hasher(frag_src);
std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash);
std::string cache_path = cache_dir + SEP_STR + hash_str;
/* TODO: This should lock the files? */
if (BLI_exists(cache_path.c_str())) {
/* Read cached binary. */
fstream file(cache_path, std::ios::binary | std::ios::in | std::ios::ate);
std::streamsize size = file.tellg();
if (size <= compilation_subprocess_shared_memory_size) {
file.seekg(0, std::ios::beg);
file.read(reinterpret_cast<char *>(shared_mem.get_data()), size);
/* Ensure it's valid. */
if (validate_binary(shared_mem.get_data())) {
end_semaphore.increment();
continue;
}
else {
std::cout << "Compilation Subprocess: Failed to load cached shader binary " << hash_str
<< "\n";
}
}
else {
/* This should never happen, since shaders larger than the pool size should be discarded
* and compiled in the main Blender process. */
std::cerr << "Compilation Subprocess: Wrong size for cached shader binary " << hash_str
<< "\n";
BLI_assert_unreachable();
}
}
SubprocessShader shader(vert_src, frag_src);
ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data());
end_semaphore.increment();
fstream file(cache_path, std::ios::binary | std::ios::out);
file.write(reinterpret_cast<char *>(shared_mem.get_data()),
binary->size + offsetof(ShaderBinaryHeader, data_start));
}
GPU_exit();
GPU_context_discard(gpu_context);
GHOST_DisposeGPUContext(ghost_system, ghost_context);
GHOST_DisposeSystem(ghost_system);
}
#endif

@ -0,0 +1,31 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "GPU_compilation_subprocess.hh"
#if BLI_SUBPROCESS_SUPPORT
# include "BLI_sys_types.h"
namespace blender::gpu {
/* The size of the memory pools shared by Blender and the compilation subprocesses. */
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */
struct ShaderBinaryHeader {
/* Size of the shader binary data. */
int32_t size;
/* Magic number that identifies the format of this shader binary (Driver-defined).
* This (and size) is set to 0 when the shader has failed to compile. */
uint32_t format;
/* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the
* shader binary data. */
uint8_t data_start;
};
} // namespace blender::gpu
#endif

@ -84,6 +84,8 @@ GLContext::GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list
active_fb = back_left;
static_cast<GLStateManager *>(state_manager)->active_fb = static_cast<GLFrameBuffer *>(
active_fb);
compiler = GLBackend::get()->get_compiler();
}
GLContext::~GLContext()

@ -8,22 +8,34 @@
#include <iomanip>
#include "BKE_appdir.hh"
#include "BKE_global.hh"
#include "BLI_string.h"
#include "BLI_time.h"
#include "BLI_vector.hh"
#include "BLI_system.h"
#include BLI_SYSTEM_PID_H
#include "GPU_capabilities.hh"
#include "GPU_platform.hh"
#include "gpu_capabilities_private.hh"
#include "gpu_shader_dependency_private.hh"
#include "gl_debug.hh"
#include "gl_vertex_buffer.hh"
#include "gl_compilation_subprocess.hh"
#include "gl_shader.hh"
#include "gl_shader_interface.hh"
#include <sstream>
#include <stdio.h>
#ifdef WIN32
# define popen _popen
# define pclose _pclose
#endif
using namespace blender;
using namespace blender::gpu;
@ -51,8 +63,10 @@ GLShader::~GLShader()
#endif
}
void GLShader::init(const shader::ShaderCreateInfo &info)
void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
{
async_compilation_ = is_batch_compilation;
/* Extract the constants names from info and store them locally. */
for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
specialization_constant_names_.append(constant.name.c_str());
@ -1093,14 +1107,8 @@ const char *GLShader::glsl_patch_get(GLenum gl_stage)
GLuint GLShader::create_shader_stage(GLenum gl_stage,
MutableSpan<const char *> sources,
const GLSources &gl_sources)
GLSources &gl_sources)
{
GLuint shader = glCreateShader(gl_stage);
if (shader == 0) {
fprintf(stderr, "GLShader: Error: Could not create shader object.\n");
return 0;
}
/* Patch the shader sources to include specialization constants. */
std::string constants_source;
Vector<const char *> recreated_sources;
@ -1117,6 +1125,12 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
sources[SOURCES_INDEX_VERSION] = glsl_patch_get(gl_stage);
sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS] = constants_source.c_str();
if (async_compilation_) {
gl_sources[SOURCES_INDEX_VERSION].source = std::string(sources[SOURCES_INDEX_VERSION]);
gl_sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS].source = std::string(
sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS]);
}
if (DEBUG_LOG_SHADER_SRC_ON_ERROR) {
/* Store the generated source for printing in case the link fails. */
StringRefNull source_type;
@ -1141,6 +1155,17 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
}
}
if (async_compilation_) {
/* Only build the sources. */
return 0;
}
GLuint shader = glCreateShader(gl_stage);
if (shader == 0) {
fprintf(stderr, "GLShader: Error: Could not create shader object.\n");
return 0;
}
glShaderSource(shader, sources.size(), sources.data(), nullptr);
glCompileShader(shader);
@ -1180,8 +1205,8 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
void GLShader::update_program_and_sources(GLSources &stage_sources,
MutableSpan<const char *> sources)
{
const bool has_specialization_constants = !constants.types.is_empty();
if (has_specialization_constants && stage_sources.is_empty()) {
const bool store_sources = !constants.types.is_empty() || async_compilation_;
if (store_sources && stage_sources.is_empty()) {
stage_sources = sources;
}
@ -1231,10 +1256,23 @@ bool GLShader::finalize(const shader::ShaderCreateInfo *info)
geometry_shader_from_glsl(sources);
}
if (!program_link()) {
if (async_compilation_) {
return true;
}
program_link();
return post_finalize(info);
}
bool GLShader::post_finalize(const shader::ShaderCreateInfo *info)
{
if (!check_link_status()) {
return false;
}
/* Reset for specialization constants variations. */
async_compilation_ = false;
GLuint program_id = program_get();
if (info != nullptr && info->legacy_resource_location_ == false) {
interface = new GLShaderInterface(program_id, *info);
@ -1450,13 +1488,18 @@ GLShader::GLProgram::~GLProgram()
glDeleteProgram(program_id);
}
bool GLShader::program_link()
void GLShader::program_link()
{
BLI_assert(program_active_ != nullptr);
if (program_active_->program_id == 0) {
program_active_->program_id = glCreateProgram();
debug::object_label(GL_PROGRAM, program_active_->program_id, name);
}
if (async_compilation_) {
return;
}
GLuint program_id = program_active_->program_id;
if (program_active_->vert_shader) {
@ -1472,7 +1515,11 @@ bool GLShader::program_link()
glAttachShader(program_id, program_active_->compute_shader);
}
glLinkProgram(program_id);
}
bool GLShader::check_link_status()
{
GLuint program_id = program_active_->program_id;
GLint status;
glGetProgramiv(program_id, GL_LINK_STATUS, &status);
if (!status) {
@ -1542,3 +1589,256 @@ GLuint GLShader::program_get()
}
/** \} */
#if BLI_SUBPROCESS_SUPPORT
/* -------------------------------------------------------------------- */
/** \name Compiler workers
* \{ */
GLCompilerWorker::GLCompilerWorker()
{
static size_t pipe_id = 0;
pipe_id++;
std::string name = "BLENDER_SHADER_COMPILER_" + std::to_string(getpid()) + "_" +
std::to_string(pipe_id);
shared_mem_ = std::make_unique<SharedMemory>(
name, compilation_subprocess_shared_memory_size, true);
start_semaphore_ = std::make_unique<SharedSemaphore>(name + "_START", false);
end_semaphore_ = std::make_unique<SharedSemaphore>(name + "_END", false);
close_semaphore_ = std::make_unique<SharedSemaphore>(name + "_CLOSE", false);
subprocess_.create({"--compilation-subprocess", name.c_str()});
}
GLCompilerWorker::~GLCompilerWorker()
{
close_semaphore_->increment();
/* Flag start so the subprocess can reach the close semaphore. */
start_semaphore_->increment();
}
void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag)
{
BLI_assert(state_ == AVAILABLE);
strcpy((char *)shared_mem_->get_data(), vert.c_str());
strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str());
start_semaphore_->increment();
state_ = COMPILATION_REQUESTED;
compilation_start = BLI_time_now_seconds();
}
bool GLCompilerWorker::is_ready()
{
BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY));
if (state_ == COMPILATION_READY) {
return true;
}
if (end_semaphore_->try_decrement()) {
state_ = COMPILATION_READY;
}
return state_ == COMPILATION_READY;
}
bool GLCompilerWorker::is_lost()
{
/* Use a timeout for hanged processes. */
float max_timeout_seconds = 30.0f;
return !subprocess_.is_running() ||
(BLI_time_now_seconds() - compilation_start) > max_timeout_seconds;
}
bool GLCompilerWorker::load_program_binary(GLint program)
{
BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY));
if (state_ == COMPILATION_REQUESTED) {
end_semaphore_->decrement();
state_ = COMPILATION_READY;
}
ShaderBinaryHeader *binary = (ShaderBinaryHeader *)shared_mem_->get_data();
state_ = COMPILATION_FINISHED;
if (binary->size > 0) {
glProgramBinary(program, binary->format, &binary->data_start, binary->size);
return true;
}
return false;
}
void GLCompilerWorker::release()
{
state_ = AVAILABLE;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name GLShaderCompiler
* \{ */
GLShaderCompiler::~GLShaderCompiler()
{
BLI_assert(batches.is_empty());
for (GLCompilerWorker *worker : workers_) {
delete worker;
}
}
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag)
{
GLCompilerWorker *result = nullptr;
for (GLCompilerWorker *compiler : workers_) {
if (compiler->state_ == GLCompilerWorker::AVAILABLE) {
result = compiler;
break;
}
}
if (!result && workers_.size() < GCaps.max_parallel_compilations) {
result = new GLCompilerWorker();
workers_.append(result);
}
if (result) {
result->compile(vert, frag);
}
return result;
}
bool GLShaderCompiler::worker_is_lost(GLCompilerWorker *&worker)
{
if (worker->is_lost()) {
std::cerr << "ERROR: Compilation subprocess lost\n";
workers_.remove_first_occurrence_and_reorder(worker);
delete worker;
worker = nullptr;
}
return worker == nullptr;
}
BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
{
BLI_assert(GPU_use_parallel_compilation());
std::scoped_lock lock(mutex_);
BatchHandle handle = next_batch_handle++;
batches.add(handle, {});
Batch &batch = batches.lookup(handle);
batch.items.reserve(infos.size());
batch.is_ready = false;
for (const shader::ShaderCreateInfo *info : infos) {
const_cast<ShaderCreateInfo *>(info)->finalize();
CompilationWork item = {};
item.info = info;
item.do_async_compilation = !info->vertex_source_.is_empty() &&
!info->fragment_source_.is_empty() &&
info->compute_source_.is_empty() &&
info->geometry_source_.is_empty();
if (item.do_async_compilation) {
item.shader = static_cast<GLShader *>(compile(*info, true));
for (const char *src : item.shader->vertex_sources_.sources_get()) {
item.vertex_src.append(src);
}
for (const char *src : item.shader->fragment_sources_.sources_get()) {
item.fragment_src.append(src);
}
size_t required_size = item.vertex_src.size() + item.fragment_src.size();
if (required_size < compilation_subprocess_shared_memory_size) {
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
}
else {
delete item.shader;
item.do_async_compilation = false;
}
}
batch.items.append(item);
}
return handle;
}
bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
{
std::scoped_lock lock(mutex_);
Batch &batch = batches.lookup(handle);
if (batch.is_ready) {
return true;
}
batch.is_ready = true;
for (CompilationWork &item : batch.items) {
if (item.is_ready) {
continue;
}
if (!item.do_async_compilation) {
/* Compile it locally. */
item.shader = static_cast<GLShader *>(compile(*item.info, false));
item.is_ready = true;
continue;
}
if (!item.worker) {
/* Try to acquire an available worker. */
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
}
else if (item.worker->is_ready()) {
/* Retrieve the binary compiled by the worker. */
if (!item.worker->load_program_binary(item.shader->program_active_->program_id) ||
!item.shader->post_finalize(item.info))
{
/* Compilation failed, try to compile it locally. */
delete item.shader;
item.shader = nullptr;
item.do_async_compilation = false;
}
else {
item.is_ready = true;
}
item.worker->release();
item.worker = nullptr;
}
else if (worker_is_lost(item.worker)) {
/* We lost the worker, try to compile it locally. */
delete item.shader;
item.shader = nullptr;
item.do_async_compilation = false;
}
if (!item.is_ready) {
batch.is_ready = false;
}
}
return batch.is_ready;
}
Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
{
while (!batch_is_ready(handle)) {
BLI_time_sleep_ms(1);
}
std::scoped_lock lock(mutex_);
Batch batch = batches.pop(handle);
Vector<Shader *> result;
for (CompilationWork &item : batch.items) {
result.append(item.shader);
}
handle = 0;
return result;
}
/** \} */
#endif

@ -13,10 +13,14 @@
#include <epoxy/gl.h>
#include "BLI_map.hh"
#include "BLI_subprocess.hh"
#include "BLI_utility_mixins.hh"
#include "gpu_shader_create_info.hh"
#include "gpu_shader_private.hh"
#include <functional>
namespace blender::gpu {
/**
@ -48,6 +52,7 @@ class GLSources : public Vector<GLSource> {
class GLShader : public Shader {
friend shader::ShaderCreateInfo;
friend shader::StageInterfaceInfo;
friend class GLShaderCompiler;
private:
struct GLProgram {
@ -85,6 +90,8 @@ class GLShader : public Shader {
*/
GLProgram *program_active_ = nullptr;
bool async_compilation_ = false;
/**
* When the shader uses Specialization Constants these attribute contains the sources to
* rebuild shader stages. When Specialization Constants aren't used they are empty to
@ -112,7 +119,8 @@ class GLShader : public Shader {
/**
* Link the active program.
*/
bool program_link();
void program_link();
bool check_link_status();
/**
* Return a GLProgram program id that reflects the current state of shader.constants.values.
@ -131,7 +139,7 @@ class GLShader : public Shader {
GLShader(const char *name);
~GLShader();
void init(const shader::ShaderCreateInfo &info) override;
void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) override;
/** Return true on success. */
void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
@ -139,6 +147,7 @@ class GLShader : public Shader {
void fragment_shader_from_glsl(MutableSpan<const char *> sources) override;
void compute_shader_from_glsl(MutableSpan<const char *> sources) override;
bool finalize(const shader::ShaderCreateInfo *info = nullptr) override;
bool post_finalize(const shader::ShaderCreateInfo *info = nullptr);
void warm_cache(int /*limit*/) override{};
std::string resources_declare(const shader::ShaderCreateInfo &info) const override;
@ -191,7 +200,7 @@ class GLShader : public Shader {
/** Create, compile and attach the shader stage to the shader program. */
GLuint create_shader_stage(GLenum gl_stage,
MutableSpan<const char *> sources,
const GLSources &gl_sources);
GLSources &gl_sources);
/**
* \brief features available on newer implementation such as native barycentric coordinates
@ -204,6 +213,84 @@ class GLShader : public Shader {
MEM_CXX_CLASS_ALLOC_FUNCS("GLShader");
};
#if BLI_SUBPROCESS_SUPPORT
class GLCompilerWorker {
friend class GLShaderCompiler;
private:
BlenderSubprocess subprocess_;
std::unique_ptr<SharedMemory> shared_mem_;
std::unique_ptr<SharedSemaphore> start_semaphore_;
std::unique_ptr<SharedSemaphore> end_semaphore_;
std::unique_ptr<SharedSemaphore> close_semaphore_;
enum eState {
/* The worker has been acquired and the compilation has been requested. */
COMPILATION_REQUESTED,
/* The shader binary result is ready to be read. */
COMPILATION_READY,
/* The binary result has been loaded into a program and the worker can be released. */
COMPILATION_FINISHED,
/* The worker is not currently in use and can be acquired. */
AVAILABLE
};
eState state_ = AVAILABLE;
double compilation_start = 0;
GLCompilerWorker();
~GLCompilerWorker();
void compile(StringRefNull vert, StringRefNull frag);
bool is_ready();
bool load_program_binary(GLint program);
void release();
/* Check if the process may have closed/crashed/hanged. */
bool is_lost();
};
class GLShaderCompiler : public ShaderCompiler {
private:
std::mutex mutex_;
Vector<GLCompilerWorker *> workers_;
struct CompilationWork {
GLCompilerWorker *worker = nullptr;
GLShader *shader = nullptr;
const shader::ShaderCreateInfo *info = nullptr;
bool do_async_compilation = false;
std::string vertex_src;
std::string fragment_src;
bool is_ready = false;
};
struct Batch {
Vector<CompilationWork> items;
bool is_ready = false;
};
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag);
bool worker_is_lost(GLCompilerWorker *&worker);
public:
~GLShaderCompiler();
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
virtual bool batch_is_ready(BatchHandle handle) override;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
};
#else
class GLShaderCompiler : public ShaderCompilerGeneric {};
#endif
class GLLogParser : public GPULogParser {
public:
const char *parse_line(const char *source_combined,

@ -36,6 +36,8 @@ VKContext::VKContext(void *ghost_window,
VKFrameBuffer *framebuffer = new VKFrameBuffer("back_left");
back_left = framebuffer;
active_fb = framebuffer;
compiler = new ShaderCompilerGeneric();
}
VKContext::~VKContext()
@ -51,6 +53,8 @@ VKContext::~VKContext()
delete imm;
imm = nullptr;
delete compiler;
}
void VKContext::sync_backbuffer()

@ -568,7 +568,7 @@ VKShader::VKShader(const char *name) : Shader(name)
context_ = VKContext::get();
}
void VKShader::init(const shader::ShaderCreateInfo &info)
void VKShader::init(const shader::ShaderCreateInfo &info, bool /*is_batch_compilation*/)
{
VKShaderInterface *vk_interface = new VKShaderInterface();
vk_interface->init(info);

@ -49,7 +49,7 @@ class VKShader : public Shader {
VKShader(const char *name);
virtual ~VKShader();
void init(const shader::ShaderCreateInfo &info) override;
void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) override;
void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
void geometry_shader_from_glsl(MutableSpan<const char *> sources) override;

@ -979,11 +979,12 @@ typedef struct UserDef {
/** #eGPUBackendType */
short gpu_backend;
/** Max number of parallel shader compilation subprocesses. */
short max_shader_compilation_subprocesses;
/** Number of samples for FPS display calculations. */
short playback_fps_samples;
char _pad7[2];
/** Private, defaults to 20 for 72 DPI setting. */
short widget_unit;
short anisotropic_filter;

@ -6196,6 +6196,15 @@ static void rna_def_userdef_system(BlenderRNA *brna)
"GPU Backend",
"GPU backend to use (requires restarting Blender for changes to take effect)");
prop = RNA_def_property(srna, "max_shader_compilation_subprocesses", PROP_INT, PROP_NONE);
RNA_def_property_range(prop, 0, INT16_MAX);
RNA_def_property_ui_text(prop,
"Max Shader Compilation Subprocesses",
"Max number of parallel shader compilation subprocesses, "
"clamped at the max threads supported by the CPU "
"(requires restarting Blender for changes to take effect). "
"Setting it to 0 disables subprocess shader compilation ");
/* Network. */
prop = RNA_def_property(srna, "use_online_access", PROP_BOOLEAN, PROP_NONE);

@ -71,6 +71,8 @@
#include "RNA_define.hh"
#include "GPU_compilation_subprocess.hh"
#ifdef WITH_FREESTYLE
# include "FRS_freestyle.h"
#endif
@ -328,6 +330,14 @@ int main(int argc,
# endif /* USE_WIN32_UNICODE_ARGS */
#endif /* WIN32 */
#if defined(WITH_OPENGL_BACKEND) && defined(BLI_SUBPROCESS_SUPPORT)
if (strcmp(argv[0], "--compilation-subprocess") == 0) {
BLI_assert(argc == 2);
GPU_compilation_subprocess_run(argv[1]);
return 0;
}
#endif
/* NOTE: Special exception for guarded allocator type switch:
* we need to perform switch from lock-free to fully
* guarded allocator before any allocation happened.