Cycles OpenCL: Remove single program

Part of the cleanup of the OpenCL codebase.
Single program is not effective when using OpenCL, it is slower
to compile and slower during rendering (when used in for example
`barbershop` or `victor`).

Reviewers: brecht, #cycles

Maniphest Tasks: T62267

Differential Revision: https://developer.blender.org/D4481
This commit is contained in:
Jeroen Bakker 2019-03-08 16:31:05 +01:00
parent 7ecbf9b409
commit 02a7e875d7
10 changed files with 47 additions and 152 deletions

@ -724,12 +724,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
update=devices_update_callback update=devices_update_callback
) )
cls.debug_opencl_kernel_single_program = BoolProperty(
name="Single Program",
default=False,
update=devices_update_callback,
)
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False) cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
cls.debug_opencl_mem_limit = IntProperty(name="Memory limit", default=0, cls.debug_opencl_mem_limit = IntProperty(name="Memory limit", default=0,

@ -1635,7 +1635,6 @@ class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
col = layout.column() col = layout.column()
col.label('OpenCL Flags:') col.label('OpenCL Flags:')
col.prop(cscene, "debug_opencl_device_type", text="Device") col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
col.prop(cscene, "debug_use_opencl_debug", text="Debug") col.prop(cscene, "debug_use_opencl_debug", text="Debug")
col.prop(cscene, "debug_opencl_mem_limit") col.prop(cscene, "debug_opencl_mem_limit")

@ -104,7 +104,6 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
/* Synchronize other OpenCL flags. */ /* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug"); flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit"))*1024*1024; flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit"))*1024*1024;
flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
return flags.opencl.device_type != opencl_device_type; return flags.opencl.device_type != opencl_device_type;
} }

@ -95,7 +95,6 @@ public:
cl_device_id device_id); cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false); bool force_all = false);
static bool use_single_program();
/* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */ /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
@ -371,9 +370,9 @@ public:
bool load_kernels(const DeviceRequestedFeatures& requested_features); bool load_kernels(const DeviceRequestedFeatures& requested_features);
/* Get the name of the opencl program for the given kernel */ /* Get the name of the opencl program for the given kernel */
const string get_opencl_program_name(bool single_program, const string& kernel_name); const string get_opencl_program_name(const string& kernel_name);
/* Get the program file name to compile (*.cl) for the given kernel */ /* Get the program file name to compile (*.cl) for the given kernel */
const string get_opencl_program_filename(bool single_program, const string& kernel_name); const string get_opencl_program_filename(const string& kernel_name);
string get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name); string get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name);
void mem_alloc(device_memory& mem); void mem_alloc(device_memory& mem);

@ -53,12 +53,8 @@ static const string fast_compiled_kernels =
"indirect_subsurface " "indirect_subsurface "
"buffer_update"; "buffer_update";
const string OpenCLDevice::get_opencl_program_name(bool single_program, const string& kernel_name) const string OpenCLDevice::get_opencl_program_name(const string& kernel_name)
{ {
if (single_program) {
return "split";
}
else {
if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
return "split_bundle"; return "split_bundle";
} }
@ -66,14 +62,9 @@ const string OpenCLDevice::get_opencl_program_name(bool single_program, const st
return "split_" + kernel_name; return "split_" + kernel_name;
} }
} }
}
const string OpenCLDevice::get_opencl_program_filename(bool single_program, const string& kernel_name) const string OpenCLDevice::get_opencl_program_filename(const string& kernel_name)
{ {
if (single_program) {
return "kernel_split.cl";
}
else {
if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
return "kernel_split_bundle.cl"; return "kernel_split_bundle.cl";
} }
@ -81,7 +72,6 @@ const string OpenCLDevice::get_opencl_program_filename(bool single_program, cons
return "kernel_" + kernel_name + ".cl"; return "kernel_" + kernel_name + ".cl";
} }
} }
}
string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name) string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name)
{ {
@ -280,12 +270,11 @@ public:
{ {
OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory); OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory);
bool single_program = OpenCLInfo::use_single_program(); const string program_name = device->get_opencl_program_name(kernel_name);
const string program_name = device->get_opencl_program_name(single_program, kernel_name);
kernel->program = kernel->program =
OpenCLDevice::OpenCLProgram(device, OpenCLDevice::OpenCLProgram(device,
program_name, program_name,
device->get_opencl_program_filename(single_program, kernel_name), device->get_opencl_program_filename(kernel_name),
device->get_build_options(requested_features, program_name)); device->get_build_options(requested_features, program_name));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
@ -663,10 +652,8 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
programs.push_back(&background_program); programs.push_back(&background_program);
} }
bool single_program = OpenCLInfo::use_single_program(); #define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \
const string program_name_##kernel_name = "split_"#kernel_name; \ const string program_name_##kernel_name = "split_"#kernel_name; \
program_##kernel_name = \ program_##kernel_name = \
OpenCLDevice::OpenCLProgram(this, \ OpenCLDevice::OpenCLProgram(this, \
@ -676,48 +663,18 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
programs.push_back(&program_##kernel_name); programs.push_back(&program_##kernel_name);
if (single_program) {
program_split = OpenCLDevice::OpenCLProgram(this,
"split" ,
"kernel_split.cl",
get_build_options(requested_features, "split"));
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(state_buffer_size);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(data_init);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
programs.push_back(&program_split);
}
else {
/* Ordered with most complex kernels first, to reduce overall compile time. */ /* Ordered with most complex kernels first, to reduce overall compile time. */
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter); ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
if (requested_features.use_volume) { if (requested_features.use_volume) {
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume); ADD_SPLIT_KERNEL_PROGRAM(do_volume);
} }
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl); ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao); ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao); ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission); ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting); ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background); ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval); ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
/* Quick kernels bundled in a single program to reduce overhead of starting /* Quick kernels bundled in a single program to reduce overhead of starting
* Blender processes. */ * Blender processes. */
@ -726,21 +683,21 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
"kernel_split_bundle.cl", "kernel_split_bundle.cl",
get_build_options(requested_features, "split_bundle")); get_build_options(requested_features, "split_bundle"));
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(data_init); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(state_buffer_size); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
programs.push_back(&program_split); programs.push_back(&program_split);
}
#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM #undef ADD_SPLIT_KERNEL_PROGRAM
#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM #undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base")); base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
base_program.add_kernel(ustring("convert_to_byte")); base_program.add_kernel(ustring("convert_to_byte"));

@ -691,11 +691,6 @@ bool OpenCLInfo::use_debug()
return DebugFlags().opencl.debug; return DebugFlags().opencl.debug;
} }
bool OpenCLInfo::use_single_program()
{
return DebugFlags().opencl.single_program;
}
bool OpenCLInfo::kernel_use_advanced_shading(const string& platform) bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
{ {
/* keep this in sync with kernel_types.h! */ /* keep this in sync with kernel_types.h! */

@ -41,7 +41,6 @@ set(SRC_OPENCL_KERNELS
kernels/opencl/kernel_displace.cl kernels/opencl/kernel_displace.cl
kernels/opencl/kernel_background.cl kernels/opencl/kernel_background.cl
kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
kernels/opencl/kernel_split_bundle.cl kernels/opencl/kernel_split_bundle.cl
kernels/opencl/kernel_data_init.cl kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl kernels/opencl/kernel_path_init.cl

@ -1,41 +0,0 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/kernel_compat_opencl.h" // PRECOMPILED
#include "kernel/split/kernel_split_common.h" // PRECOMPILED
#include "kernel/kernels/opencl/kernel_state_buffer_size.cl"
#include "kernel/kernels/opencl/kernel_data_init.cl"
#include "kernel/kernels/opencl/kernel_path_init.cl"
#include "kernel/kernels/opencl/kernel_scene_intersect.cl"
#include "kernel/kernels/opencl/kernel_lamp_emission.cl"
#include "kernel/kernels/opencl/kernel_do_volume.cl"
#include "kernel/kernels/opencl/kernel_indirect_background.cl"
#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
#include "kernel/kernels/opencl/kernel_shader_setup.cl"
#include "kernel/kernels/opencl/kernel_shader_sort.cl"
#include "kernel/kernels/opencl/kernel_shader_eval.cl"
#include "kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl"
#include "kernel/kernels/opencl/kernel_subsurface_scatter.cl"
#include "kernel/kernels/opencl/kernel_direct_lighting.cl"
#include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl"
#include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl"
#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"

@ -90,8 +90,7 @@ void DebugFlags::CUDA::reset()
DebugFlags::OpenCL::OpenCL() DebugFlags::OpenCL::OpenCL()
: device_type(DebugFlags::OpenCL::DEVICE_ALL), : device_type(DebugFlags::OpenCL::DEVICE_ALL),
debug(false), debug(false)
single_program(false)
{ {
reset(); reset();
} }
@ -123,7 +122,6 @@ void DebugFlags::OpenCL::reset()
} }
/* Initialize other flags from environment variables. */ /* Initialize other flags from environment variables. */
debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL); debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL);
single_program = (getenv("CYCLES_OPENCL_SINGLE_PROGRAM") != NULL);
} }
DebugFlags::DebugFlags() DebugFlags::DebugFlags()
@ -179,7 +177,6 @@ std::ostream& operator <<(std::ostream &os,
os << "OpenCL flags:\n" os << "OpenCL flags:\n"
<< " Device type : " << opencl_device_type << "\n" << " Device type : " << opencl_device_type << "\n"
<< " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n" << " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n"
<< " Single program : " << string_from_bool(debug_flags.opencl.single_program) << "\n"
<< " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n"; << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n";
return os; return os;
} }

@ -126,9 +126,6 @@ public:
/* Use debug version of the kernel. */ /* Use debug version of the kernel. */
bool debug; bool debug;
/* Use single program */
bool single_program;
/* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */ /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */
/* Artificial memory limit in bytes (0 if disabled). */ /* Artificial memory limit in bytes (0 if disabled). */
size_t mem_limit; size_t mem_limit;