From eb293f59f2eb9847b8fd593ac2dde2781ac8ace1 Mon Sep 17 00:00:00 2001 From: Mai Lavelle Date: Thu, 11 May 2017 19:23:49 -0400 Subject: [PATCH] Cycles: Pass all buffers to each kernel call for OpenCL Technically not passing all buffers used by a kernel is undefined behavior. We haven't had any issues with this so far on AMD or Nvidia, but it's known to be a problem with Intel and we received a report from AMD that this is a problem on newer hardware, so we need to make this change at some point. Unfortunately there a cost to being correct, about 5% for the benchmark scenes. For low sample counts it's even worse, I've seen up to 50% slowdown. For the latter case I think adjusting tile updating logic can help, but not sure what that would look like yet (it would be just a few lines change however). --- intern/cycles/device/opencl/opencl_split.cpp | 63 ++++++++++++++-- intern/cycles/kernel/CMakeLists.txt | 6 ++ .../kernels/opencl/kernel_buffer_update.cl | 13 ++-- .../kernels/opencl/kernel_direct_lighting.cl | 13 ++-- .../kernel/kernels/opencl/kernel_do_volume.cl | 10 ++- .../kernels/opencl/kernel_enqueue_inactive.cl | 13 ++-- ...ut_emission_blurring_pathtermination_ao.cl | 15 ++-- .../opencl/kernel_indirect_background.cl | 10 ++- .../opencl/kernel_indirect_subsurface.cl | 10 ++- .../kernels/opencl/kernel_lamp_emission.cl | 10 ++- .../opencl/kernel_next_iteration_setup.cl | 13 ++-- .../kernel/kernels/opencl/kernel_path_init.cl | 10 ++- .../kernels/opencl/kernel_queue_enqueue.cl | 13 ++-- .../kernels/opencl/kernel_scene_intersect.cl | 10 ++- .../kernels/opencl/kernel_shader_eval.cl | 10 ++- .../kernels/opencl/kernel_shader_setup.cl | 13 ++-- .../kernels/opencl/kernel_shader_sort.cl | 13 ++-- .../opencl/kernel_shadow_blocked_ao.cl | 10 ++- .../opencl/kernel_shadow_blocked_dl.cl | 10 ++- .../kernels/opencl/kernel_split_function.h | 72 +++++++++++++++++++ .../opencl/kernel_subsurface_scatter.cl | 10 ++- 21 files changed, 225 insertions(+), 122 deletions(-) create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_split_function.h diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 76dcbd6fc9a..08b632ee9d3 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -176,17 +176,62 @@ protected: friend class OpenCLSplitKernelFunction; }; +struct CachedSplitMemory { + int id; + device_memory *split_data; + device_memory *ray_state; + device_ptr *rng_state; + device_memory *queue_index; + device_memory *use_queues_flag; + device_memory *work_pools; + device_ptr *buffer; +}; + class OpenCLSplitKernelFunction : public SplitKernelFunction { public: OpenCLDeviceSplitKernel* device; OpenCLDeviceBase::OpenCLProgram program; + CachedSplitMemory& cached_memory; + int cached_id; - OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device) : device(device) {} - ~OpenCLSplitKernelFunction() { program.release(); } + OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device, CachedSplitMemory& cached_memory) : + device(device), cached_memory(cached_memory), cached_id(cached_memory.id-1) + { + } + + ~OpenCLSplitKernelFunction() + { + program.release(); + } virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { - device->kernel_set_args(program(), 0, kg, data); + if(cached_id != cached_memory.id) { + cl_uint start_arg_index = + device->kernel_set_args(program(), + 0, + kg, + data, + *cached_memory.split_data, + *cached_memory.ray_state, + *cached_memory.rng_state); + +/* TODO(sergey): Avoid map lookup here. */ +#define KERNEL_TEX(type, ttype, name) \ + device->set_kernel_arg_mem(program(), &start_arg_index, #name); +#include "kernel/kernel_textures.h" +#undef KERNEL_TEX + + start_arg_index += + device->kernel_set_args(program(), + start_arg_index, + *cached_memory.queue_index, + *cached_memory.use_queues_flag, + *cached_memory.work_pools, + *cached_memory.buffer); + + cached_id = cached_memory.id; + } device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, program(), @@ -213,6 +258,7 @@ public: class OpenCLSplitKernel : public DeviceSplitKernel { OpenCLDeviceSplitKernel *device; + CachedSplitMemory cached_memory; public: explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) { } @@ -220,7 +266,7 @@ public: virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures& requested_features) { - OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device); + OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory); bool single_program = OpenCLInfo::use_single_program(); kernel->program = @@ -349,6 +395,15 @@ public: return false; } + cached_memory.split_data = &split_data; + cached_memory.ray_state = &ray_state; + cached_memory.rng_state = &rtile.rng_state; + cached_memory.queue_index = &queue_index; + cached_memory.use_queues_flag = &use_queues_flag; + cached_memory.work_pools = &work_pool_wgs; + cached_memory.buffer = &rtile.buffer; + cached_memory.id++; + return true; } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index b85067d4e66..23e9bd311c4 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -122,6 +122,10 @@ set(SRC_KERNELS_CUDA_HEADERS kernels/cuda/kernel_config.h ) +set(SRC_KERNELS_OPENCL_HEADERS + kernels/opencl/kernel_split_function.h +) + set(SRC_CLOSURE_HEADERS closure/alloc.h closure/bsdf.h @@ -452,6 +456,7 @@ add_library(cycles_kernel ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPENCL_HEADERS} ${SRC_BVH_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_FILTER_HEADERS} @@ -496,6 +501,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inact delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index db65c91baf7..dcea2630aef 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_buffer_update.h" -__kernel void kernel_ocl_path_trace_buffer_update( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME buffer_update +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index eb34f750881..ed64ae01aae 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_direct_lighting.h" -__kernel void kernel_ocl_path_trace_direct_lighting( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME direct_lighting +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl index 83ef5f5f3f2..8afaa686e28 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_do_volume.h" -__kernel void kernel_ocl_path_trace_do_volume( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_do_volume((KernelGlobals*)kg); -} +#define KERNEL_NAME do_volume +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl index 940f3b890a4..e68d4104a91 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_enqueue_inactive.h" -__kernel void kernel_ocl_path_trace_enqueue_inactive( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_enqueue_inactive((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME enqueue_inactive +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index d071b39aa6f..9e1e57beba6 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -18,12 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" -__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local BackgroundAOLocals locals; - kernel_holdout_emission_blurring_pathtermination_ao( - (KernelGlobals*)kg, - &locals); -} +#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao +#define LOCALS_TYPE BackgroundAOLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl index 8c213ff5cb2..192d01444ba 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_background.h" -__kernel void kernel_ocl_path_trace_indirect_background( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_background((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_background +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl index 998ebc4c0c3..84938b889e5 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_subsurface.h" -__kernel void kernel_ocl_path_trace_indirect_subsurface( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_subsurface((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_subsurface +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 822d2287715..c314dc96c33 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_lamp_emission.h" -__kernel void kernel_ocl_path_trace_lamp_emission( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_lamp_emission((KernelGlobals*)kg); -} +#define KERNEL_NAME lamp_emission +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 6d207253a40..8b1332bf013 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_next_iteration_setup.h" -__kernel void kernel_ocl_path_trace_next_iteration_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME next_iteration_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl index bd9aa9538c8..fa210e747c0 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_path_init.h" -__kernel void kernel_ocl_path_trace_path_init( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_path_init((KernelGlobals*)kg); -} +#define KERNEL_NAME path_init +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 9be154e3d75..68ee6f1d536 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_queue_enqueue.h" -__kernel void kernel_ocl_path_trace_queue_enqueue( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local QueueEnqueueLocals locals; - kernel_queue_enqueue((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME queue_enqueue +#define LOCALS_TYPE QueueEnqueueLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index eb4fb4d153a..10d09377ba9 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_scene_intersect.h" -__kernel void kernel_ocl_path_trace_scene_intersect( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_scene_intersect((KernelGlobals*)kg); -} +#define KERNEL_NAME scene_intersect +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index 5bfb31b193a..40eaa561863 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_eval.h" -__kernel void kernel_ocl_path_trace_shader_eval( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shader_eval((KernelGlobals*)kg); -} +#define KERNEL_NAME shader_eval +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl index 38bfd04ad4c..8c36100f762 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_setup.h" -__kernel void kernel_ocl_path_trace_shader_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME shader_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl index 6f722915d45..bcacaa4a054 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl @@ -19,10 +19,9 @@ #include "kernel/split/kernel_shader_sort.h" __attribute__((reqd_work_group_size(64, 1, 1))) -__kernel void kernel_ocl_path_trace_shader_sort( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local ShaderSortLocals locals; - kernel_shader_sort((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME shader_sort +#define LOCALS_TYPE ShaderSortLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl index 6a8ef81b32a..8de250a375c 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_ao.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_ao((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_ao +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl index b255cc5ef8b..29da77022ed 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_dl.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_dl( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_dl((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_dl +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h new file mode 100644 index 00000000000..f1e914a70d4 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h @@ -0,0 +1,72 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define KERNEL_NAME_JOIN(a, b) a ## _ ## b +#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b) + +__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( + ccl_global char *kg_global, + ccl_constant KernelData *data, + + ccl_global void *split_data_buffer, + ccl_global char *ray_state, + ccl_global uint *rng_state, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "kernel/kernel_textures.h" + + ccl_global int *queue_index, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pools, + ccl_global float *buffer + ) +{ +#ifdef LOCALS_TYPE + ccl_local LOCALS_TYPE locals; +#endif + + KernelGlobals *kg = (KernelGlobals*)kg_global; + + if(ccl_local_id(0) + ccl_local_id(1) == 0) { + kg->data = data; + + kernel_split_params.rng_state = rng_state; + kernel_split_params.queue_index = queue_index; + kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.work_pools = work_pools; + kernel_split_params.buffer = buffer; + + split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "kernel/kernel_textures.h" + } + + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( + kg +#ifdef LOCALS_TYPE + , &locals +#endif + ); +} + +#undef KERNEL_NAME_JOIN +#undef KERNEL_NAME_EVAL + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl index 99b74a1802b..2b3be38df84 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_subsurface_scatter.h" -__kernel void kernel_ocl_path_trace_subsurface_scatter( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_subsurface_scatter((KernelGlobals*)kg); -} +#define KERNEL_NAME subsurface_scatter +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +