230c00d872
This does a few things at once: - Refactors host side split kernel logic into a new device agnostic class `DeviceSplitKernel`. - Removes tile splitting, a new work pool implementation takes its place and allows as many threads as will fit in memory regardless of tile size, which can give performance gains. - Refactors split state buffers into one buffer, as well as reduces the number of arguments passed to kernels. Means there's less code to deal with overall. - Moves kernel logic out of OpenCL kernel files so they can later be used by other device types. - Replaced OpenCL specific APIs with new generic versions - Tiles can now be seen updating during rendering
156 lines
4.2 KiB
C++
156 lines
4.2 KiB
C++
/*
|
|
* Copyright 2011-2013 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifdef WITH_OPENCL
|
|
|
|
#include "opencl.h"
|
|
|
|
#include "buffers.h"
|
|
|
|
#include "kernel_types.h"
|
|
|
|
#include "util_md5.h"
|
|
#include "util_path.h"
|
|
#include "util_time.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
|
|
{
|
|
public:
|
|
OpenCLProgram path_trace_program;
|
|
|
|
OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, bool background_)
|
|
: OpenCLDeviceBase(info, stats, background_),
|
|
path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
|
|
{
|
|
}
|
|
|
|
virtual bool show_samples() const {
|
|
return true;
|
|
}
|
|
|
|
virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
|
|
vector<OpenCLProgram*> &programs)
|
|
{
|
|
path_trace_program.add_kernel(ustring("path_trace"));
|
|
programs.push_back(&path_trace_program);
|
|
return true;
|
|
}
|
|
|
|
~OpenCLDeviceMegaKernel()
|
|
{
|
|
task_pool.stop();
|
|
path_trace_program.release();
|
|
}
|
|
|
|
void path_trace(RenderTile& rtile, int sample)
|
|
{
|
|
/* Cast arguments to cl types. */
|
|
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
|
|
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
|
|
cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
|
|
cl_int d_x = rtile.x;
|
|
cl_int d_y = rtile.y;
|
|
cl_int d_w = rtile.w;
|
|
cl_int d_h = rtile.h;
|
|
cl_int d_offset = rtile.offset;
|
|
cl_int d_stride = rtile.stride;
|
|
|
|
/* Sample arguments. */
|
|
cl_int d_sample = sample;
|
|
|
|
cl_kernel ckPathTraceKernel = path_trace_program(ustring("path_trace"));
|
|
|
|
cl_uint start_arg_index =
|
|
kernel_set_args(ckPathTraceKernel,
|
|
0,
|
|
d_data,
|
|
d_buffer,
|
|
d_rng_state);
|
|
|
|
#define KERNEL_TEX(type, ttype, name) \
|
|
set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
|
|
#include "kernel_textures.h"
|
|
#undef KERNEL_TEX
|
|
|
|
start_arg_index += kernel_set_args(ckPathTraceKernel,
|
|
start_arg_index,
|
|
d_sample,
|
|
d_x,
|
|
d_y,
|
|
d_w,
|
|
d_h,
|
|
d_offset,
|
|
d_stride);
|
|
|
|
enqueue_kernel(ckPathTraceKernel, d_w, d_h);
|
|
}
|
|
|
|
void thread_run(DeviceTask *task)
|
|
{
|
|
if(task->type == DeviceTask::FILM_CONVERT) {
|
|
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
|
|
}
|
|
else if(task->type == DeviceTask::SHADER) {
|
|
shader(*task);
|
|
}
|
|
else if(task->type == DeviceTask::PATH_TRACE) {
|
|
RenderTile tile;
|
|
/* Keep rendering tiles until done. */
|
|
while(task->acquire_tile(this, tile)) {
|
|
int start_sample = tile.start_sample;
|
|
int end_sample = tile.start_sample + tile.num_samples;
|
|
|
|
for(int sample = start_sample; sample < end_sample; sample++) {
|
|
if(task->get_cancel()) {
|
|
if(task->need_finish_queue == false)
|
|
break;
|
|
}
|
|
|
|
path_trace(tile, sample);
|
|
|
|
tile.sample = sample + 1;
|
|
|
|
task->update_progress(&tile, tile.w*tile.h);
|
|
}
|
|
|
|
/* Complete kernel execution before release tile */
|
|
/* This helps in multi-device render;
|
|
* The device that reaches the critical-section function
|
|
* release_tile waits (stalling other devices from entering
|
|
* release_tile) for all kernels to complete. If device1 (a
|
|
* slow-render device) reaches release_tile first then it would
|
|
* stall device2 (a fast-render device) from proceeding to render
|
|
* next tile.
|
|
*/
|
|
clFinish(cqCommandQueue);
|
|
|
|
task->release_tile(tile);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background)
|
|
{
|
|
return new OpenCLDeviceMegaKernel(info, stats, background);
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif
|