forked from bartvdbraak/blender
T61463: Separate Baking kernels
Cycles OpenCL: Split baking kernels in own program Fix T61463. Before this patch baking was part of the base kernels. There are 3 baking kernels that and all 3 uses shader evaluation. Only for one of these kernels the functionality was wrapped in the __NO_BAKING__ compile directive. When you start baking this leads to long compile times. By separating in individual programs will reduce the compile times. Also wrapped all baking kernels with __NO_BAKING__ to reduce the compilation times. Impact on compilation time job | scene_name | previous | new | percentage --------+-----------------+----------+-------+------------ T61463 | empty | 10.63 | 7.27 | 32% T61463 | bmw | 17.91 | 14.24 | 20% T61463 | fishycat | 19.57 | 15.08 | 23% T61463 | barbershop | 54.10 | 48.18 | 11% T61463 | classroom | 17.55 | 14.42 | 18% T61463 | koro | 18.92 | 17.15 | 9% T61463 | pavillion | 17.43 | 14.23 | 18% T61463 | splash279 | 16.48 | 15.33 | 7% T61463 | volume_emission | 36.22 | 34.19 | 6% Impact on render time job | scene_name | previous | new | percentage --------+-----------------+----------+---------+------------ T61463 | empty | 21.06 | 20.54 | 2% T61463 | bmw | 198.44 | 189.59 | 4% T61463 | fishycat | 394.20 | 388.50 | 1% T61463 | barbershop | 1188.16 | 1185.49 | 0% T61463 | classroom | 341.08 | 339.27 | 1% T61463 | koro | 472.43 | 360.70 | 24% T61463 | pavillion | 905.77 | 902.14 | 0% T61463 | splash279 | 55.26 | 54.92 | 1% T61463 | volume_emission | 62.59 | 39.09 | 38% I don't have a grounded explanation why koro and volume_emission is this much faster; I have done several tests though... Maniphest Tasks: T61463 Differential Revision: https://developer.blender.org/D4376
This commit is contained in:
parent
e6f5632eb1
commit
667033e89e
@ -325,7 +325,11 @@ public:
|
||||
map<ustring, cl_kernel> kernels;
|
||||
};
|
||||
|
||||
OpenCLProgram base_program, denoising_program;
|
||||
OpenCLProgram base_program;
|
||||
OpenCLProgram bake_program;
|
||||
OpenCLProgram displace_program;
|
||||
OpenCLProgram background_program;
|
||||
OpenCLProgram denoising_program;
|
||||
|
||||
typedef map<string, device_vector<uchar>*> ConstMemMap;
|
||||
typedef map<string, device_ptr> MemMap;
|
||||
@ -571,7 +575,7 @@ protected:
|
||||
ustring key,
|
||||
thread_scoped_lock& cache_locker);
|
||||
|
||||
virtual string build_options_for_base_program(
|
||||
virtual string build_options_for_bake_program(
|
||||
const DeviceRequestedFeatures& /*requested_features*/);
|
||||
|
||||
private:
|
||||
|
@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
|
||||
}
|
||||
|
||||
base_program.release();
|
||||
bake_program.release();
|
||||
displace_program.release();
|
||||
background_program.release();
|
||||
if(cqCommandQueue)
|
||||
clReleaseCommandQueue(cqCommandQueue);
|
||||
if(cxContext)
|
||||
@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
||||
if(!opencl_version_check())
|
||||
return false;
|
||||
|
||||
base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
|
||||
base_program = OpenCLProgram(this, "base", "kernel.cl", "");
|
||||
base_program.add_kernel(ustring("convert_to_byte"));
|
||||
base_program.add_kernel(ustring("convert_to_half_float"));
|
||||
base_program.add_kernel(ustring("displace"));
|
||||
base_program.add_kernel(ustring("background"));
|
||||
base_program.add_kernel(ustring("bake"));
|
||||
base_program.add_kernel(ustring("zero_buffer"));
|
||||
|
||||
bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features));
|
||||
bake_program.add_kernel(ustring("bake"));
|
||||
|
||||
displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features));
|
||||
displace_program.add_kernel(ustring("displace"));
|
||||
|
||||
background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features));
|
||||
background_program.add_kernel(ustring("background"));
|
||||
|
||||
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
|
||||
denoising_program.add_kernel(ustring("filter_divide_shadow"));
|
||||
denoising_program.add_kernel(ustring("filter_get_feature"));
|
||||
@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
||||
denoising_program.add_kernel(ustring("filter_finalize"));
|
||||
|
||||
vector<OpenCLProgram*> programs;
|
||||
programs.push_back(&base_program);
|
||||
programs.push_back(&denoising_program);
|
||||
programs.push_back(&bake_program);
|
||||
programs.push_back(&displace_program);
|
||||
programs.push_back(&background_program);
|
||||
/* Call actual class to fill the vector with its programs. */
|
||||
if(!add_kernel_programs(requested_features, programs)) {
|
||||
return false;
|
||||
}
|
||||
programs.push_back(&base_program);
|
||||
programs.push_back(&denoising_program);
|
||||
|
||||
/* Parallel compilation of Cycles kernels, this launches multiple
|
||||
* processes to workaround OpenCL frameworks serializing the calls
|
||||
@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
|
||||
cl_kernel kernel;
|
||||
|
||||
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
|
||||
kernel = base_program(ustring("bake"));
|
||||
kernel = bake_program(ustring("bake"));
|
||||
}
|
||||
else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
|
||||
kernel = base_program(ustring("displace"));
|
||||
kernel = displace_program(ustring("displace"));
|
||||
}
|
||||
else {
|
||||
kernel = base_program(ustring("background"));
|
||||
kernel = background_program(ustring("background"));
|
||||
}
|
||||
|
||||
cl_uint start_arg_index =
|
||||
@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel(
|
||||
cache_locker);
|
||||
}
|
||||
|
||||
string OpenCLDeviceBase::build_options_for_base_program(
|
||||
string OpenCLDeviceBase::build_options_for_bake_program(
|
||||
const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
/* TODO(sergey): By default we compile all features, meaning
|
||||
|
@ -327,7 +327,7 @@ public:
|
||||
protected:
|
||||
/* ** Those guys are for workign around some compiler-specific bugs ** */
|
||||
|
||||
string build_options_for_base_program(
|
||||
string build_options_for_bake_program(
|
||||
const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
return requested_features.get_build_options();
|
||||
|
@ -37,6 +37,9 @@ set(SRC_CUDA_KERNELS
|
||||
|
||||
set(SRC_OPENCL_KERNELS
|
||||
kernels/opencl/kernel.cl
|
||||
kernels/opencl/kernel_bake.cl
|
||||
kernels/opencl/kernel_displace.cl
|
||||
kernels/opencl/kernel_background.cl
|
||||
kernels/opencl/kernel_state_buffer_size.cl
|
||||
kernels/opencl/kernel_split.cl
|
||||
kernels/opencl/kernel_split_bundle.cl
|
||||
|
@ -81,78 +81,6 @@ __kernel void kernel_ocl_path_trace(
|
||||
|
||||
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
|
||||
|
||||
__kernel void kernel_ocl_displace(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
kernel_displace_evaluate(kg, input, output, x);
|
||||
}
|
||||
}
|
||||
__kernel void kernel_ocl_background(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
kernel_background_evaluate(kg, input, output, x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void kernel_ocl_bake(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int filter, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
#ifdef __NO_BAKING__
|
||||
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
#else
|
||||
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void kernel_ocl_convert_to_byte(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uchar4 *rgba,
|
||||
|
39
intern/cycles/kernel/kernels/opencl/kernel_background.cl
Normal file
39
intern/cycles/kernel/kernels/opencl/kernel_background.cl
Normal file
@ -0,0 +1,39 @@
|
||||
|
||||
#include "kernel/kernel_compat_opencl.h"
|
||||
#include "kernel/kernel_math.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
#include "kernel/kernel_globals.h"
|
||||
#include "kernel/kernel_color.h"
|
||||
#include "kernel/kernels/opencl/kernel_opencl_image.h"
|
||||
|
||||
#include "kernel/kernel_path.h"
|
||||
#include "kernel/kernel_path_branched.h"
|
||||
|
||||
#include "kernel/kernel_bake.h"
|
||||
|
||||
__kernel void kernel_ocl_background(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
#ifdef __NO_BAKING__
|
||||
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
#else
|
||||
kernel_background_evaluate(kg, input, output, x);
|
||||
#endif
|
||||
}
|
||||
}
|
38
intern/cycles/kernel/kernels/opencl/kernel_bake.cl
Normal file
38
intern/cycles/kernel/kernels/opencl/kernel_bake.cl
Normal file
@ -0,0 +1,38 @@
|
||||
#include "kernel/kernel_compat_opencl.h"
|
||||
#include "kernel/kernel_math.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
#include "kernel/kernel_globals.h"
|
||||
#include "kernel/kernel_color.h"
|
||||
#include "kernel/kernels/opencl/kernel_opencl_image.h"
|
||||
|
||||
#include "kernel/kernel_path.h"
|
||||
#include "kernel/kernel_path_branched.h"
|
||||
|
||||
#include "kernel/kernel_bake.h"
|
||||
|
||||
__kernel void kernel_ocl_bake(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int filter, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
#ifdef __NO_BAKING__
|
||||
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
#else
|
||||
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
|
||||
#endif
|
||||
}
|
||||
}
|
40
intern/cycles/kernel/kernels/opencl/kernel_displace.cl
Normal file
40
intern/cycles/kernel/kernels/opencl/kernel_displace.cl
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
#include "kernel/kernel_compat_opencl.h"
|
||||
#include "kernel/kernel_math.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
#include "kernel/kernel_globals.h"
|
||||
#include "kernel/kernel_color.h"
|
||||
#include "kernel/kernels/opencl/kernel_opencl_image.h"
|
||||
|
||||
#include "kernel/kernel_path.h"
|
||||
#include "kernel/kernel_path_branched.h"
|
||||
|
||||
#include "kernel/kernel_bake.h
|
||||
|
||||
__kernel void kernel_ocl_displace(
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global uint4 *input,
|
||||
ccl_global float4 *output,
|
||||
|
||||
KERNEL_BUFFER_PARAMS,
|
||||
|
||||
int type, int sx, int sw, int offset, int sample)
|
||||
{
|
||||
KernelGlobals kglobals, *kg = &kglobals;
|
||||
|
||||
kg->data = data;
|
||||
|
||||
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
|
||||
kernel_set_buffer_info(kg);
|
||||
|
||||
int x = sx + ccl_global_id(0);
|
||||
|
||||
if(x < sx + sw) {
|
||||
#ifdef __NO_BAKING__
|
||||
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
#else
|
||||
kernel_displace_evaluate(kg, input, output, x);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user