2016-09-14 21:47:54 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2011-2013 Blender Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef WITH_OPENCL
|
|
|
|
|
|
|
|
#include "opencl.h"
|
|
|
|
|
|
|
|
#include "buffers.h"
|
|
|
|
|
|
|
|
#include "kernel_types.h"
|
2017-03-08 10:02:54 +00:00
|
|
|
#include "kernel_split_data_types.h"
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
#include "device_split_kernel.h"
|
|
|
|
|
|
|
|
#include "util_logging.h"
|
2016-09-14 21:47:54 +00:00
|
|
|
#include "util_md5.h"
|
|
|
|
#include "util_path.h"
|
|
|
|
#include "util_time.h"
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
class OpenCLSplitKernel;
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
static string get_build_options(OpenCLDeviceBase *device, const DeviceRequestedFeatures& requested_features)
|
|
|
|
{
|
|
|
|
string build_options = "-D__SPLIT_KERNEL__ ";
|
|
|
|
build_options += requested_features.get_build_options();
|
|
|
|
|
|
|
|
/* Set compute device build option. */
|
|
|
|
cl_device_type device_type;
|
|
|
|
device->ciErr = clGetDeviceInfo(device->cdDevice,
|
|
|
|
CL_DEVICE_TYPE,
|
|
|
|
sizeof(cl_device_type),
|
|
|
|
&device_type,
|
|
|
|
NULL);
|
|
|
|
assert(device->ciErr == CL_SUCCESS);
|
|
|
|
if(device_type == CL_DEVICE_TYPE_GPU) {
|
|
|
|
build_options += " -D__COMPUTE_DEVICE_GPU__";
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
return build_options;
|
|
|
|
}
|
2016-09-14 21:47:54 +00:00
|
|
|
|
|
|
|
/* OpenCLDeviceSplitKernel's declaration/definition. */
|
|
|
|
class OpenCLDeviceSplitKernel : public OpenCLDeviceBase
|
|
|
|
{
|
|
|
|
public:
|
2017-02-22 13:10:02 +00:00
|
|
|
DeviceSplitKernel *split_kernel;
|
2016-09-14 21:47:54 +00:00
|
|
|
OpenCLProgram program_data_init;
|
2017-03-04 11:29:01 +00:00
|
|
|
OpenCLProgram program_state_buffer_size;
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
~OpenCLDeviceSplitKernel()
|
|
|
|
{
|
|
|
|
task_pool.stop();
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
/* Release kernels */
|
|
|
|
program_data_init.release();
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
delete split_kernel;
|
|
|
|
}
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
|
|
|
|
vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
|
|
|
|
"split_data_init",
|
|
|
|
"kernel_data_init.cl",
|
|
|
|
get_build_options(this, requested_features));
|
|
|
|
program_data_init.add_kernel(ustring("path_trace_data_init"));
|
|
|
|
programs.push_back(&program_data_init);
|
|
|
|
|
2017-03-04 11:29:01 +00:00
|
|
|
program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
|
|
|
|
"split_state_buffer_size",
|
|
|
|
"kernel_state_buffer_size.cl",
|
|
|
|
get_build_options(this, requested_features));
|
|
|
|
program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
|
|
|
|
programs.push_back(&program_state_buffer_size);
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
return split_kernel->load_kernels(requested_features);
|
|
|
|
}
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
void thread_run(DeviceTask *task)
|
|
|
|
{
|
|
|
|
if(task->type == DeviceTask::FILM_CONVERT) {
|
|
|
|
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
|
|
|
|
}
|
|
|
|
else if(task->type == DeviceTask::SHADER) {
|
|
|
|
shader(*task);
|
|
|
|
}
|
|
|
|
else if(task->type == DeviceTask::PATH_TRACE) {
|
|
|
|
RenderTile tile;
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
/* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
|
|
|
|
* fetch its size.
|
|
|
|
*/
|
|
|
|
typedef struct KernelGlobals {
|
|
|
|
ccl_constant KernelData *data;
|
|
|
|
#define KERNEL_TEX(type, ttype, name) \
|
|
|
|
ccl_global type *name;
|
|
|
|
#include "kernel_textures.h"
|
|
|
|
#undef KERNEL_TEX
|
|
|
|
SplitData split_data;
|
|
|
|
SplitParams split_param_data;
|
|
|
|
} KernelGlobals;
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
/* Allocate buffer for kernel globals */
|
|
|
|
device_memory kgbuffer;
|
|
|
|
kgbuffer.resize(sizeof(KernelGlobals));
|
2016-12-14 01:45:09 +00:00
|
|
|
mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
/* Keep rendering tiles until done. */
|
|
|
|
while(task->acquire_tile(this, tile)) {
|
|
|
|
split_kernel->path_trace(task,
|
2017-03-07 10:21:36 +00:00
|
|
|
tile,
|
|
|
|
kgbuffer,
|
|
|
|
*const_mem_map["__data"]);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
/* Complete kernel execution before release tile. */
|
|
|
|
/* This helps in multi-device render;
|
|
|
|
* The device that reaches the critical-section function
|
|
|
|
* release_tile waits (stalling other devices from entering
|
|
|
|
* release_tile) for all kernels to complete. If device1 (a
|
|
|
|
* slow-render device) reaches release_tile first then it would
|
|
|
|
* stall device2 (a fast-render device) from proceeding to render
|
|
|
|
* next tile.
|
|
|
|
*/
|
|
|
|
clFinish(cqCommandQueue);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
task->release_tile(tile);
|
|
|
|
}
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
mem_free(kgbuffer);
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
protected:
|
|
|
|
/* ** Those guys are for workign around some compiler-specific bugs ** */
|
Cycles: Refactor Progress system to provide better estimates
The Progress system in Cycles had two limitations so far:
- It just counted tiles, but ignored their size. For example, when rendering a 600x500 image with 512x512 tiles, the right 88x500 tile would count for 50% of the progress, although it only covers 15% of the image.
- Scene update time was incorrectly counted as rendering time - therefore, the remaining time started very long and gradually decreased.
This patch fixes both problems:
First of all, the Progress now has a function to ignore time spans, and that is used to ignore scene update time.
The larger change is the tile size: Instead of counting samples per tile, so that the final value is num_samples*num_tiles, the code now counts every sample for every pixel, so that the final value is num_samples*num_pixels.
Along with that, some unused variables were removed from the Progress and Session classes.
Reviewers: brecht, sergey, #cycles
Subscribers: brecht, candreacchio, sergey
Differential Revision: https://developer.blender.org/D2214
2016-11-26 03:22:34 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
string build_options_for_base_program(
|
|
|
|
const DeviceRequestedFeatures& requested_features)
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
return requested_features.get_build_options();
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
friend class OpenCLSplitKernel;
|
|
|
|
friend class OpenCLSplitKernelFunction;
|
|
|
|
};
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
class OpenCLSplitKernelFunction : public SplitKernelFunction {
|
|
|
|
public:
|
|
|
|
OpenCLDeviceSplitKernel* device;
|
|
|
|
OpenCLDeviceBase::OpenCLProgram program;
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device) : device(device) {}
|
|
|
|
~OpenCLSplitKernelFunction() { program.release(); }
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
device->kernel_set_args(program(), 0, kg, data);
|
|
|
|
|
|
|
|
device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
|
|
|
|
program(),
|
|
|
|
2,
|
|
|
|
NULL,
|
|
|
|
dim.global_size,
|
|
|
|
dim.local_size,
|
|
|
|
0,
|
|
|
|
NULL,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
|
|
|
|
|
|
|
|
if(device->ciErr != CL_SUCCESS) {
|
|
|
|
string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
|
|
|
|
clewErrorString(device->ciErr));
|
|
|
|
device->opencl_error(message);
|
|
|
|
return false;
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
class OpenCLSplitKernel : public DeviceSplitKernel {
|
|
|
|
OpenCLDeviceSplitKernel *device;
|
|
|
|
public:
|
|
|
|
explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) {
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name,
|
|
|
|
const DeviceRequestedFeatures& requested_features)
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device);
|
|
|
|
|
|
|
|
kernel->program = OpenCLDeviceBase::OpenCLProgram(device,
|
|
|
|
"split_" + kernel_name,
|
|
|
|
"kernel_" + kernel_name + ".cl",
|
|
|
|
get_build_options(device, requested_features));
|
|
|
|
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
|
|
|
|
kernel->program.load();
|
|
|
|
|
|
|
|
if(!kernel->program.is_loaded()) {
|
|
|
|
delete kernel;
|
|
|
|
return NULL;
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
2017-02-22 13:10:02 +00:00
|
|
|
|
|
|
|
return kernel;
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-03-04 11:29:01 +00:00
|
|
|
virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
|
|
|
|
{
|
|
|
|
device_vector<uint> size_buffer;
|
|
|
|
size_buffer.resize(1);
|
|
|
|
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
|
|
|
|
|
|
|
uint threads = num_threads;
|
|
|
|
device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer);
|
|
|
|
|
|
|
|
size_t global_size = 64;
|
|
|
|
device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
|
|
|
|
device->program_state_buffer_size(),
|
|
|
|
1,
|
|
|
|
NULL,
|
|
|
|
&global_size,
|
|
|
|
NULL,
|
|
|
|
0,
|
|
|
|
NULL,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
|
|
|
|
|
|
|
|
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
|
|
|
|
device->mem_free(size_buffer);
|
|
|
|
|
|
|
|
if(device->ciErr != CL_SUCCESS) {
|
|
|
|
string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
|
|
|
|
clewErrorString(device->ciErr));
|
|
|
|
device->opencl_error(message);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return *size_buffer.get_data();
|
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
|
|
|
RenderTile& rtile,
|
|
|
|
int num_global_elements,
|
|
|
|
device_memory& kernel_globals,
|
|
|
|
device_memory& kernel_data,
|
|
|
|
device_memory& split_data,
|
|
|
|
device_memory& ray_state,
|
|
|
|
device_memory& queue_index,
|
|
|
|
device_memory& use_queues_flag,
|
|
|
|
device_memory& work_pool_wgs
|
|
|
|
)
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
cl_int dQueue_size = dim.global_size[0] * dim.global_size[1];
|
2016-09-14 21:47:54 +00:00
|
|
|
|
|
|
|
/* Set the range of samples to be processed for every ray in
|
|
|
|
* path-regeneration logic.
|
|
|
|
*/
|
|
|
|
cl_int start_sample = rtile.start_sample;
|
|
|
|
cl_int end_sample = rtile.start_sample + rtile.num_samples;
|
|
|
|
|
|
|
|
cl_uint start_arg_index =
|
2017-02-22 13:10:02 +00:00
|
|
|
device->kernel_set_args(device->program_data_init(),
|
2016-09-14 21:47:54 +00:00
|
|
|
0,
|
2017-02-22 13:10:02 +00:00
|
|
|
kernel_globals,
|
|
|
|
kernel_data,
|
2017-03-07 10:21:36 +00:00
|
|
|
split_data,
|
2017-02-22 13:10:02 +00:00
|
|
|
num_global_elements,
|
2017-03-07 10:21:36 +00:00
|
|
|
ray_state,
|
2017-02-22 13:10:02 +00:00
|
|
|
rtile.rng_state);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
|
|
|
/* TODO(sergey): Avoid map lookup here. */
|
|
|
|
#define KERNEL_TEX(type, ttype, name) \
|
2017-02-22 13:10:02 +00:00
|
|
|
device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name);
|
2016-09-14 21:47:54 +00:00
|
|
|
#include "kernel_textures.h"
|
|
|
|
#undef KERNEL_TEX
|
|
|
|
|
|
|
|
start_arg_index +=
|
2017-02-22 13:10:02 +00:00
|
|
|
device->kernel_set_args(device->program_data_init(),
|
2016-09-14 21:47:54 +00:00
|
|
|
start_arg_index,
|
|
|
|
start_sample,
|
2017-02-22 13:10:02 +00:00
|
|
|
end_sample,
|
|
|
|
rtile.x,
|
|
|
|
rtile.y,
|
|
|
|
rtile.w,
|
|
|
|
rtile.h,
|
|
|
|
rtile.offset,
|
|
|
|
rtile.stride,
|
|
|
|
queue_index,
|
2016-09-14 21:47:54 +00:00
|
|
|
dQueue_size,
|
|
|
|
use_queues_flag,
|
|
|
|
work_pool_wgs,
|
2017-02-22 13:10:02 +00:00
|
|
|
rtile.num_samples,
|
2017-03-07 10:21:36 +00:00
|
|
|
rtile.buffer);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
|
|
|
/* Enqueue ckPathTraceKernel_data_init kernel. */
|
2017-02-22 13:10:02 +00:00
|
|
|
device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
|
|
|
|
device->program_data_init(),
|
|
|
|
2,
|
|
|
|
NULL,
|
|
|
|
dim.global_size,
|
|
|
|
dim.local_size,
|
|
|
|
0,
|
|
|
|
NULL,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
|
|
|
|
|
|
|
|
if(device->ciErr != CL_SUCCESS) {
|
|
|
|
string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
|
|
|
|
clewErrorString(device->ciErr));
|
|
|
|
device->opencl_error(message);
|
2016-09-14 21:47:54 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
return true;
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
virtual int2 split_kernel_local_size()
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
return make_int2(64, 1);
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
|
|
|
|
2017-03-04 11:29:01 +00:00
|
|
|
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
|
2016-09-14 21:47:54 +00:00
|
|
|
{
|
2017-02-22 13:10:02 +00:00
|
|
|
size_t max_buffer_size;
|
|
|
|
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL);
|
2017-02-20 12:02:11 +00:00
|
|
|
VLOG(1) << "Maximum device allocation side: "
|
|
|
|
<< string_human_readable_number(max_buffer_size) << " bytes. ("
|
|
|
|
<< string_human_readable_size(max_buffer_size) << ").";
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-03-04 11:29:01 +00:00
|
|
|
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2);
|
2017-02-22 13:10:02 +00:00
|
|
|
int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements));
|
2017-02-20 12:02:11 +00:00
|
|
|
VLOG(1) << "Global size: " << global_size << ".";
|
2017-02-22 13:10:02 +00:00
|
|
|
return global_size;
|
2016-09-14 21:47:54 +00:00
|
|
|
}
|
2017-02-22 13:10:02 +00:00
|
|
|
};
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
OpenCLDeviceSplitKernel::OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_)
|
|
|
|
: OpenCLDeviceBase(info, stats, background_)
|
|
|
|
{
|
|
|
|
split_kernel = new OpenCLSplitKernel(this);
|
2016-09-14 21:47:54 +00:00
|
|
|
|
2017-02-22 13:10:02 +00:00
|
|
|
background = background_;
|
|
|
|
}
|
2016-09-14 21:47:54 +00:00
|
|
|
|
|
|
|
Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, bool background)
|
|
|
|
{
|
|
|
|
return new OpenCLDeviceSplitKernel(info, stats, background);
|
|
|
|
}
|
|
|
|
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
#endif /* WITH_OPENCL */
|