Merge branch 'blender-v4.2-release'

This commit is contained in:
Sergey Sharybin 2024-06-07 17:54:45 +02:00
commit 9c270d6a48
36 changed files with 123 additions and 72 deletions

@ -14,9 +14,9 @@
CCL_NAMESPACE_BEGIN
Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
{
return new CPUDevice(info, stats, profiler);
return new CPUDevice(info, stats, profiler, headless);
}
void device_cpu_info(vector<DeviceInfo> &devices)

@ -14,7 +14,7 @@ class DeviceInfo;
class Profiler;
class Stats;
Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
void device_cpu_info(vector<DeviceInfo> &devices);

@ -55,8 +55,8 @@
CCL_NAMESPACE_BEGIN
CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_), texture_info(this, "texture_info", MEM_GLOBAL)
CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
: Device(info_, stats_, profiler_, headless_), texture_info(this, "texture_info", MEM_GLOBAL)
{
/* Pick any kernel, all of them are supposed to have same level of microarchitecture
* optimization. */

@ -54,7 +54,7 @@ class CPUDevice : public Device {
mutable unique_ptr<openpgl::cpp::Device> guiding_device;
#endif
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_);
~CPUDevice();
virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

@ -60,14 +60,15 @@ bool device_cuda_init()
#endif /* WITH_CUDA_DYNLOAD */
}
Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
{
#ifdef WITH_CUDA
return new CUDADevice(info, stats, profiler);
return new CUDADevice(info, stats, profiler, headless);
#else
(void)info;
(void)stats;
(void)profiler;
(void)headless;
LOG(FATAL) << "Request to create CUDA device without compiled-in support. Should never happen.";

@ -16,7 +16,10 @@ class Stats;
bool device_cuda_init();
Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_cuda_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless);
void device_cuda_info(vector<DeviceInfo> &devices);

@ -53,8 +53,8 @@ void CUDADevice::set_error(const string &error)
}
}
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler)
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: GPUDevice(info, stats, profiler, headless)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
@ -965,6 +965,13 @@ bool CUDADevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
if (headless) {
/* Avoid any call which might involve interaction with a graphics backend when we know that
* we don't have active graphics context. This avoid crash on certain platforms when calling
* cuGLGetDevices(). */
return false;
}
CUDAContextScope scope(this);
int num_all_devices = 0;

@ -43,7 +43,7 @@ class CUDADevice : public GPUDevice {
void set_error(const string &error) override;
CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
virtual ~CUDADevice();

@ -62,52 +62,52 @@ void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
{
if (!info.multi_devices.empty()) {
/* Always create a multi device when info contains multiple devices.
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
* that it is a homogeneous collection of devices, which simplifies checks. */
return device_multi_create(info, stats, profiler);
return device_multi_create(info, stats, profiler, headless);
}
Device *device = NULL;
switch (info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler);
device = device_cpu_create(info, stats, profiler, headless);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if (device_cuda_init()) {
device = device_cuda_create(info, stats, profiler);
device = device_cuda_create(info, stats, profiler, headless);
}
break;
#endif
#ifdef WITH_OPTIX
case DEVICE_OPTIX:
if (device_optix_init())
device = device_optix_create(info, stats, profiler);
device = device_optix_create(info, stats, profiler, headless);
break;
#endif
#ifdef WITH_HIP
case DEVICE_HIP:
if (device_hip_init())
device = device_hip_create(info, stats, profiler);
device = device_hip_create(info, stats, profiler, headless);
break;
#endif
#ifdef WITH_METAL
case DEVICE_METAL:
if (device_metal_init())
device = device_metal_create(info, stats, profiler);
device = device_metal_create(info, stats, profiler, headless);
break;
#endif
#ifdef WITH_ONEAPI
case DEVICE_ONEAPI:
device = device_oneapi_create(info, stats, profiler);
device = device_oneapi_create(info, stats, profiler, headless);
break;
#endif
@ -116,7 +116,7 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
}
if (device == NULL) {
device = device_dummy_create(info, stats, profiler);
device = device_dummy_create(info, stats, profiler, headless);
}
return device;

@ -139,8 +139,8 @@ class Device {
friend class device_sub_ptr;
protected:
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: info(info_), stats(stats_), profiler(profiler_)
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
: info(info_), stats(stats_), profiler(profiler_), headless(headless_)
{
}
@ -181,6 +181,7 @@ class Device {
/* statistics */
Stats &stats;
Profiler &profiler;
bool headless = true;
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
@ -287,7 +288,7 @@ class Device {
}
/* static */
static Device *create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
static Device *create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);
@ -332,8 +333,8 @@ class Device {
/* Device, which is GPU, with some common functionality for GPU back-ends. */
class GPUDevice : public Device {
protected:
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_),
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
: Device(info_, stats_, profiler_, headless_),
texture_info(this, "texture_info", MEM_GLOBAL),
need_texture_info(false),
can_map_host(false),

@ -13,8 +13,8 @@ CCL_NAMESPACE_BEGIN
class DummyDevice : public Device {
public:
DummyDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_)
DummyDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
: Device(info_, stats_, profiler_, headless_)
{
error_msg = info.error_msg;
}
@ -39,9 +39,12 @@ class DummyDevice : public Device {
virtual void const_copy_to(const char *, void *, size_t) override {}
};
Device *device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_dummy_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless)
{
return new DummyDevice(info, stats, profiler);
return new DummyDevice(info, stats, profiler, headless);
}
CCL_NAMESPACE_END

@ -14,6 +14,9 @@ class DeviceInfo;
class Profiler;
class Stats;
Device *device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_dummy_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless);
CCL_NAMESPACE_END

@ -70,19 +70,20 @@ bool device_hip_init()
#endif /* WITH_HIP_DYNLOAD */
}
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
{
#ifdef WITH_HIPRT
if (info.use_hardware_raytracing)
return new HIPRTDevice(info, stats, profiler);
return new HIPRTDevice(info, stats, profiler, headless);
else
return new HIPDevice(info, stats, profiler);
return new HIPDevice(info, stats, profiler, headless);
#elif defined(WITH_HIP)
return new HIPDevice(info, stats, profiler);
return new HIPDevice(info, stats, profiler, headless);
#else
(void)info;
(void)stats;
(void)profiler;
(void)headless;
LOG(FATAL) << "Request to create HIP device without compiled-in support. Should never happen.";

@ -16,7 +16,7 @@ class Stats;
bool device_hip_init();
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
void device_hip_info(vector<DeviceInfo> &devices);

@ -53,8 +53,8 @@ void HIPDevice::set_error(const string &error)
}
}
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler)
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: GPUDevice(info, stats, profiler, headless)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
@ -907,6 +907,12 @@ bool HIPDevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
if (headless) {
/* Avoid any call which might involve interaction with a graphics backend when we know that
* we don't have active graphics context. This avoids potential crash in the driver. */
return false;
}
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
# if 0
HIPContextScope scope(this);

@ -43,7 +43,7 @@ class HIPDevice : public GPUDevice {
void set_error(const string &error) override;
HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
virtual ~HIPDevice();

@ -57,8 +57,8 @@ BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */)
return BVH_LAYOUT_HIPRT;
}
HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: HIPDevice(info, stats, profiler),
HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: HIPDevice(info, stats, profiler, headless),
global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY),
hiprt_context(NULL),
scene(NULL),

@ -33,7 +33,7 @@ class HIPRTDevice : public HIPDevice {
public:
virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const override;
HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
virtual ~HIPRTDevice();
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;

@ -16,7 +16,10 @@ class Stats;
bool device_metal_init();
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_metal_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless);
void device_metal_info(vector<DeviceInfo> &devices);

@ -18,9 +18,12 @@ CCL_NAMESPACE_BEGIN
#ifdef WITH_METAL
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_metal_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless)
{
return new MetalDevice(info, stats, profiler);
return new MetalDevice(info, stats, profiler, headless);
}
bool device_metal_init()

@ -106,7 +106,7 @@ class MetalDevice : public Device {
void set_error(const string &error) override;
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
virtual ~MetalDevice();

@ -61,8 +61,8 @@ void MetalDevice::set_error(const string &error)
}
}
MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: Device(info, stats, profiler, headless), texture_info(this, "texture_info", MEM_GLOBAL)
{
@autoreleasepool {
{

@ -35,8 +35,8 @@ class MultiDevice : public Device {
device_ptr unique_key;
vector<vector<SubDevice *>> peer_islands;
MultiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler), unique_key(1)
MultiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: Device(info, stats, profiler, headless), unique_key(1)
{
foreach (const DeviceInfo &subinfo, info.multi_devices) {
/* Always add CPU devices at the back since GPU devices can change
@ -53,7 +53,7 @@ class MultiDevice : public Device {
/* The pointer to 'sub->stats' will stay valid even after new devices
* are added, since 'devices' is a linked list. */
sub->device = Device::create(subinfo, sub->stats, profiler);
sub->device = Device::create(subinfo, sub->stats, profiler, headless);
}
/* Build a list of peer islands for the available render devices */
@ -467,9 +467,12 @@ class MultiDevice : public Device {
}
};
Device *device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_multi_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless)
{
return new MultiDevice(info, stats, profiler);
return new MultiDevice(info, stats, profiler, headless);
}
CCL_NAMESPACE_END

@ -14,6 +14,9 @@ class DeviceInfo;
class Profiler;
class Stats;
Device *device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_multi_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless);
CCL_NAMESPACE_END

@ -76,10 +76,13 @@ bool device_oneapi_init()
#endif
}
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_oneapi_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless)
{
#ifdef WITH_ONEAPI
return new OneapiDevice(info, stats, profiler);
return new OneapiDevice(info, stats, profiler, headless);
#else
(void)info;
(void)stats;

@ -16,7 +16,10 @@ class Stats;
bool device_oneapi_init();
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_oneapi_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless);
void device_oneapi_info(vector<DeviceInfo> &devices);

@ -47,8 +47,8 @@ static void queue_error_cb(const char *message, void *user_ptr)
}
}
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler),
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: GPUDevice(info, stats, profiler, headless),
device_queue_(nullptr),
# ifdef WITH_EMBREE_GPU
embree_device(nullptr),

@ -48,7 +48,7 @@ class OneapiDevice : public GPUDevice {
public:
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
virtual ~OneapiDevice();
# ifdef WITH_EMBREE_GPU

@ -96,14 +96,18 @@ void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo
#endif
}
Device *device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_optix_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless)
{
#ifdef WITH_OPTIX
return new OptiXDevice(info, stats, profiler);
return new OptiXDevice(info, stats, profiler, headless);
#else
(void)info;
(void)stats;
(void)profiler;
(void)headless;
LOG(FATAL) << "Request to create OptiX device without compiled-in support. Should never happen.";

@ -16,7 +16,10 @@ class Stats;
bool device_optix_init();
Device *device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
Device *device_optix_create(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool headless);
void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo> &devices);

@ -50,8 +50,8 @@ static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &fail
}
# endif
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: CUDADevice(info, stats, profiler),
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: CUDADevice(info, stats, profiler, headless),
sbt_data(this, "__sbt", MEM_READ_ONLY),
launch_params(this, "kernel_params", false)
{

@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
thread_mutex delayed_free_bvh_mutex;
public:
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
~OptiXDevice();
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

@ -40,7 +40,8 @@ PathTrace::PathTrace(Device *device,
vector<DeviceInfo> cpu_devices;
device_cpu_info(cpu_devices);
cpu_device_.reset(device_cpu_create(cpu_devices[0], device->stats, device->profiler));
cpu_device_.reset(
device_cpu_create(cpu_devices[0], device->stats, device->profiler, device_->headless));
}
/* Create path tracing work in advance, so that it can be reused by incremental sampling as much

@ -606,12 +606,12 @@ DenoiserPipeline::DenoiserPipeline(DeviceInfo &denoiser_device_info, const Denoi
TaskScheduler::init();
/* Initialize device. */
device = Device::create(denoiser_device_info, stats, profiler);
device = Device::create(denoiser_device_info, stats, profiler, true);
device->load_kernels(KERNEL_FEATURE_DENOISING);
vector<DeviceInfo> cpu_devices;
device_cpu_info(cpu_devices);
cpu_device = device_cpu_create(cpu_devices[0], device->stats, device->profiler);
cpu_device = device_cpu_create(cpu_devices[0], device->stats, device->profiler, true);
denoiser = Denoiser::create(device, cpu_device, params);
denoiser->load_kernels(nullptr);

@ -42,7 +42,7 @@ Session::Session(const SessionParams &params_, const SceneParams &scene_params)
pause_ = false;
new_work_added_ = false;
device = Device::create(params.device, stats, profiler);
device = Device::create(params.device, stats, profiler, params_.headless);
if (device->have_error()) {
progress.set_error(device->error_message());
@ -54,7 +54,7 @@ Session::Session(const SessionParams &params_, const SceneParams &scene_params)
denoise_device = device;
}
else {
denoise_device = Device::create(params.denoise_device, stats, profiler);
denoise_device = Device::create(params.denoise_device, stats, profiler, params_.headless);
if (denoise_device->have_error()) {
progress.set_error(denoise_device->error_message());

@ -172,7 +172,7 @@ class RenderGraph : public testing::Test {
* the same raw configuration. */
ColorSpaceManager::init_fallback_config();
device_cpu = Device::create(device_info, stats, profiler);
device_cpu = Device::create(device_info, stats, profiler, true);
scene = new Scene(scene_params, device_cpu);
/* Initialize logging after the creation of the essential resources. This way the logging