blender/intern/cycles/kernel/split/kernel_sum_all_radiance.h
Mai Lavelle 230c00d872 Cycles: OpenCL split kernel refactor
This does a few things at once:

- Refactors host side split kernel logic into a new device
  agnostic class `DeviceSplitKernel`.
- Removes tile splitting, a new work pool implementation takes its place and
  allows as many threads as will fit in memory regardless of tile size, which
  can give performance gains.
- Refactors split state buffers into one buffer, as well as reduces the
  number of arguments passed to kernels. Means there's less code to deal
  with overall.
- Moves kernel logic out of OpenCL kernel files so they can later be used by
  other device types.
- Replaced OpenCL specific APIs with new generic versions
- Tiles can now be seen updating during rendering
2017-03-08 00:52:41 -05:00

58 lines
2.0 KiB
C

/*
* Copyright 2011-2015 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
/* Since we process various samples in parallel; The output radiance of different samples
* are stored in different locations; This kernel combines the output radiance contributed
* by all different samples and stores them in the RenderTile's output buffer.
*/
ccl_device void kernel_sum_all_radiance(KernelGlobals *kg)
{
int x = ccl_global_id(0);
int y = ccl_global_id(1);
ccl_global float *buffer = kernel_split_params.buffer;
int sw = kernel_split_params.w;
int sh = kernel_split_params.h;
int stride = kernel_split_params.stride;
int start_sample = kernel_split_params.start_sample;
if(x < sw && y < sh) {
ccl_global float *per_sample_output_buffer = kernel_split_state.per_sample_output_buffers;
per_sample_output_buffer += (x + y * stride) * (kernel_data.film.pass_stride);
x += kernel_split_params.x;
y += kernel_split_params.y;
buffer += (kernel_split_params.offset + x + y*stride) * (kernel_data.film.pass_stride);
int pass_stride_iterator = 0;
int num_floats = kernel_data.film.pass_stride;
for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
*(buffer + pass_stride_iterator) =
(start_sample == 0)
? *(per_sample_output_buffer + pass_stride_iterator)
: *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
}
}
}
CCL_NAMESPACE_END