Merge remote-tracking branch 'origin/blender-v2.93-release'

This commit is contained in:
Sybren A. Stüvel 2021-05-20 13:00:07 +02:00
commit 0745afeddb
14 changed files with 118 additions and 130 deletions

@ -17,6 +17,8 @@
#ifdef WITH_OPTIX
# include "device/device.h"
# include "bvh/bvh_optix.h"
CCL_NAMESPACE_BEGIN
@ -26,6 +28,7 @@ BVHOptiX::BVHOptiX(const BVHParams &params_,
const vector<Object *> &objects_,
Device *device)
: BVH(params_, geometry_, objects_),
device(device),
traversable_handle(0),
as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
motion_transform_data(device, "optix motion transform", false)
@ -34,7 +37,9 @@ BVHOptiX::BVHOptiX(const BVHParams &params_,
BVHOptiX::~BVHOptiX()
{
// Acceleration structure memory is freed via the 'as_data' destructor
// Acceleration structure memory is delayed freed on device, since deleting the
// BVH may happen while still being used for rendering.
device->release_optix_bvh(this);
}
CCL_NAMESPACE_END

@ -28,6 +28,7 @@ CCL_NAMESPACE_BEGIN
class BVHOptiX : public BVH {
public:
Device *device;
uint64_t traversable_handle;
device_only_memory<char> as_data;
device_only_memory<char> motion_transform_data;

@ -61,7 +61,6 @@ enum DeviceTypeMask {
};
enum DeviceKernelStatus {
DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
DEVICE_KERNEL_USING_FEATURE_KERNEL,
DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
@ -427,6 +426,9 @@ class Device {
/* acceleration structure building */
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
/* OptiX specific destructor. */
virtual void release_optix_bvh(BVH *){};
#ifdef WITH_NETWORK
/* networking */
void server_run();

@ -35,10 +35,54 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
device_pointer(0),
host_pointer(0),
shared_pointer(0),
shared_counter(0)
shared_counter(0),
original_device_ptr(0),
original_device_size(0),
original_device(0),
need_realloc_(false),
modified(false)
{
}
device_memory::device_memory(device_memory &&other) noexcept
: data_type(other.data_type),
data_elements(other.data_elements),
data_size(other.data_size),
device_size(other.device_size),
data_width(other.data_width),
data_height(other.data_height),
data_depth(other.data_depth),
type(other.type),
name(other.name),
device(other.device),
device_pointer(other.device_pointer),
host_pointer(other.host_pointer),
shared_pointer(other.shared_pointer),
shared_counter(other.shared_counter),
original_device_ptr(other.original_device_ptr),
original_device_size(other.original_device_size),
original_device(other.original_device),
need_realloc_(other.need_realloc_),
modified(other.modified)
{
other.data_elements = 0;
other.data_size = 0;
other.device_size = 0;
other.data_width = 0;
other.data_height = 0;
other.data_depth = 0;
other.device = 0;
other.device_pointer = 0;
other.host_pointer = 0;
other.shared_pointer = 0;
other.shared_counter = 0;
other.original_device_ptr = 0;
other.original_device_size = 0;
other.original_device = 0;
other.need_realloc_ = false;
other.modified = false;
}
device_memory::~device_memory()
{
assert(shared_pointer == 0);

@ -238,6 +238,7 @@ class device_memory {
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
device_memory(device_memory &&other) noexcept;
/* No copying allowed. */
device_memory(const device_memory &) = delete;
@ -277,6 +278,10 @@ template<typename T> class device_only_memory : public device_memory {
data_elements = max(device_type_traits<T>::num_elements, 1);
}
device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
{
}
virtual ~device_only_memory()
{
free();

@ -232,10 +232,6 @@ class MultiDevice : public Device {
foreach (SubDevice &sub, devices) {
DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
switch (subresult) {
case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
result = subresult;
break;
case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
return subresult;

@ -193,6 +193,9 @@ class OptiXDevice : public CUDADevice {
device_only_memory<unsigned char> denoiser_state;
int denoiser_input_passes = 0;
vector<device_only_memory<char>> delayed_free_bvh_memory;
thread_mutex delayed_free_bvh_mutex;
public:
OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: CUDADevice(info_, stats_, profiler_, background_),
@ -258,6 +261,8 @@ class OptiXDevice : public CUDADevice {
// Make CUDA context current
const CUDAContextScope scope(cuContext);
free_bvh_memory_delayed();
sbt_data.free();
texture_info.free();
launch_params.free();
@ -1297,6 +1302,8 @@ class OptiXDevice : public CUDADevice {
return;
}
free_bvh_memory_delayed();
BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
progress.set_substatus("Building OptiX acceleration structure");
@ -1767,6 +1774,24 @@ class OptiXDevice : public CUDADevice {
}
}
void release_optix_bvh(BVH *bvh) override
{
thread_scoped_lock lock(delayed_free_bvh_mutex);
/* Do delayed free of BVH memory, since geometry holding BVH might be deleted
* while GPU is still rendering. */
BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
bvh_optix->traversable_handle = 0;
}
void free_bvh_memory_delayed()
{
thread_scoped_lock lock(delayed_free_bvh_mutex);
delayed_free_bvh_memory.free_memory();
}
void const_copy_to(const char *name, void *host, size_t size) override
{
// Set constant memory for CUDA module

@ -269,7 +269,6 @@ class OpenCLDevice : public Device {
cl_device_id cdDevice;
cl_int ciErr;
int device_num;
bool use_preview_kernels;
class OpenCLProgram {
public:
@ -369,8 +368,7 @@ class OpenCLDevice : public Device {
/* Load the kernels and put the created kernels in the given
* `programs` parameter. */
void load_kernels(vector<OpenCLProgram *> &programs,
const DeviceRequestedFeatures &requested_features,
bool is_preview = false);
const DeviceRequestedFeatures &requested_features);
};
DeviceSplitKernel *split_kernel;
@ -382,7 +380,6 @@ class OpenCLDevice : public Device {
OpenCLProgram denoising_program;
OpenCLSplitPrograms kernel_programs;
OpenCLSplitPrograms preview_programs;
typedef map<string, device_vector<uchar> *> ConstMemMap;
typedef map<string, device_ptr> MemMap;
@ -412,7 +409,6 @@ class OpenCLDevice : public Device {
string device_md5_hash(string kernel_custom_build_options = "");
bool load_kernels(const DeviceRequestedFeatures &requested_features);
void load_required_kernels(const DeviceRequestedFeatures &requested_features);
void load_preview_kernels();
bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
DeviceKernelStatus get_active_kernel_switch_state();
@ -422,8 +418,7 @@ class OpenCLDevice : public Device {
/* Get the program file name to compile (*.cl) for the given kernel */
const string get_opencl_program_filename(const string &kernel_name);
string get_build_options(const DeviceRequestedFeatures &requested_features,
const string &opencl_program_name,
bool preview_kernel = false);
const string &opencl_program_name);
/* Enable the default features to reduce recompilation events */
void enable_default_features(DeviceRequestedFeatures &features);

@ -107,8 +107,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures &features)
}
string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_features,
const string &opencl_program_name,
bool preview_kernel)
const string &opencl_program_name)
{
/* first check for non-split kernel programs */
if (opencl_program_name == "base" || opencl_program_name == "denoising") {
@ -185,13 +184,7 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_
enable_default_features(nofeatures);
/* Add program specific optimized compile directives */
if (preview_kernel) {
DeviceRequestedFeatures preview_features;
preview_features.use_hair = true;
build_options += "-D__KERNEL_AO_PREVIEW__ ";
build_options += preview_features.get_build_options();
}
else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
build_options += nofeatures.get_build_options();
}
else {
@ -238,9 +231,7 @@ OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
}
void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
vector<OpenCLProgram *> &programs,
const DeviceRequestedFeatures &requested_features,
bool is_preview)
vector<OpenCLProgram *> &programs, const DeviceRequestedFeatures &requested_features)
{
if (!requested_features.use_baking) {
# define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) \
@ -251,7 +242,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
device, \
program_name_##kernel_name, \
"kernel_" #kernel_name ".cl", \
device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \
device->get_build_options(requested_features, program_name_##kernel_name)); \
program_##kernel_name.add_kernel(ustring("path_trace_" #kernel_name)); \
programs.push_back(&program_##kernel_name);
@ -259,7 +250,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
if (requested_features.use_volume || is_preview) {
if (requested_features.use_volume) {
ADD_SPLIT_KERNEL_PROGRAM(do_volume);
}
ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
@ -274,7 +265,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
device,
"split_bundle",
"kernel_split_bundle.cl",
device->get_build_options(requested_features, "split_bundle", is_preview));
device->get_build_options(requested_features, "split_bundle"));
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
@ -403,7 +394,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
device,
program_name,
device->get_opencl_program_filename(kernel_name),
device->get_build_options(requested_features, program_name, device->use_preview_kernels));
device->get_build_options(requested_features, program_name));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
kernel->program.load();
@ -617,7 +608,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
: Device(info, stats, profiler, background),
load_kernel_num_compiling(0),
kernel_programs(this),
preview_programs(this),
memory_manager(this),
texture_info(this, "__texture_info", MEM_GLOBAL)
{
@ -627,7 +617,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
cqCommandQueue = NULL;
device_initialized = false;
textures_need_update = true;
use_preview_kernels = !background;
vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices);
@ -683,9 +672,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
device_initialized = true;
split_kernel = new OpenCLSplitKernel(this);
if (use_preview_kernels) {
load_preview_kernels();
}
}
OpenCLDevice::~OpenCLDevice()
@ -776,7 +762,7 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature
load_required_kernels(requested_features);
vector<OpenCLProgram *> programs;
kernel_programs.load_kernels(programs, requested_features, false);
kernel_programs.load_kernels(programs, requested_features);
if (!requested_features.use_baking && requested_features.use_denoising) {
denoising_program = OpenCLProgram(
@ -854,19 +840,6 @@ void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures &requeste
}
}
void OpenCLDevice::load_preview_kernels()
{
DeviceRequestedFeatures no_features;
vector<OpenCLProgram *> programs;
preview_programs.load_kernels(programs, no_features, true);
foreach (OpenCLProgram *program, programs) {
if (!program->load()) {
load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
}
}
}
bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requested_features)
{
if (requested_features.use_baking) {
@ -874,61 +847,20 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste
return true;
}
if (background) {
load_kernel_task_pool.wait_work();
use_preview_kernels = false;
}
else {
/* We use a device setting to determine to load preview kernels or not
* Better to check on device level than per kernel as mixing preview and
* non-preview kernels does not work due to different data types */
if (use_preview_kernels) {
use_preview_kernels = load_kernel_num_compiling.load() > 0;
}
}
return split_kernel->load_kernels(requested_features);
}
OpenCLDevice::OpenCLSplitPrograms *OpenCLDevice::get_split_programs()
{
return use_preview_kernels ? &preview_programs : &kernel_programs;
return &kernel_programs;
}
DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
{
/* Do not switch kernels for background renderings
* We do foreground rendering but use the preview kernels
* Check for the optimized kernels
*
* This works also the other way around, where we are using
* optimized kernels but new ones are being compiled due
* to other features that are needed */
if (background) {
/* The if-statements below would find the same result,
* But as the `finished` method uses a mutex we added
* this as an early exit */
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
bool other_kernels_finished = load_kernel_num_compiling.load() == 0;
if (use_preview_kernels) {
if (other_kernels_finished) {
return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
}
else {
return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL;
}
}
else {
if (other_kernels_finished) {
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
else {
return DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
}
}
}
void OpenCLDevice::mem_alloc(device_memory &mem)
{
if (mem.name) {

@ -99,9 +99,6 @@ CCL_NAMESPACE_BEGIN
#define __AO__
#define __PASSES__
#define __HAIR__
/* Without these we get an AO render, used by OpenCL preview kernel. */
#ifndef __KERNEL_AO_PREVIEW__
#define __SVM__
#define __EMISSION__
#define __HOLDOUT__
@ -119,7 +116,6 @@ CCL_NAMESPACE_BEGIN
#define __CMJ__
#define __SHADOW_RECORD_ALL__
#define __BRANCHED_PATH__
#endif
/* Device specific features */
#ifdef __KERNEL_CPU__

@ -542,9 +542,6 @@ bool Scene::update(Progress &progress, bool &kernel_switch_needed)
DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
progress.set_kernel_status("Compiling render kernels");
}
if (new_kernels_needed || kernel_switch_needed) {
progress.set_kernel_status("Compiling render kernels");
device->wait_for_availability(loaded_kernel_features);

@ -243,11 +243,6 @@ void Session::run_gpu()
}
}
/* Don't go in pause mode when image was rendered with preview kernels
* When feature kernels become available the session will be reset. */
else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
time_sleep(0.1);
}
else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
reset_gpu(tile_manager.params, params.samples);
}
@ -762,11 +757,6 @@ void Session::run_cpu()
}
}
/* Don't go in pause mode when preview kernels are used
* When feature kernels become available the session will be reset. */
else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
time_sleep(0.1);
}
else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
reset_cpu(tile_manager.params, params.samples);
}

@ -43,8 +43,8 @@ class vector : public std::vector<value_type, allocator_type> {
/* Try as hard as possible to use zero memory. */
void free_memory()
{
BaseClass::resize(0);
BaseClass::shrink_to_fit();
vector<value_type, allocator_type> empty;
BaseClass::swap(empty);
}
/* Some external API might demand working with std::vector. */

@ -296,7 +296,7 @@ def bake_action_iter(
pbone.keyframe_insert("rotation_axis_angle", index=-1, frame=f, group=name)
else: # euler, XYZ, ZXY etc
if euler_prev is not None:
euler = pbone.matrix_basis.to_euler(obj.rotation_mode, euler_prev)
euler = pbone.matrix_basis.to_euler(pbone.rotation_mode, euler_prev)
pbone.rotation_euler = euler
del euler
euler_prev = pbone.rotation_euler.copy()