forked from bartvdbraak/blender
31ed71cb6b
When the scene is updated Cycles resets the renderer device, cancelling all existing tasks. The main thread would wait for all running tasks to finish before continuing. This is ok when tasks can actually cancel in a timely fashion. For OSL however, this does not work, since the OSL shader group optimization takes quite a bit of time and can not be easily be cancelled once running (on my crappy machine in full debug mode: ~0.12 seconds for simple node trees). This would lead to very laggy UI behavior and make it difficult to accurately control elements such as sliders. This patch removes the wait condition from the device->task_cancel method. Instead it just sets the do_cancel flag and returns. To avoid backlog in the task pool of the device it will return early from the BlenderSession::sync function while the reset is going on (tested in Session::resetting). Once all existing tasks have finished the do_cancel flag is finally cleared again (checked in TaskPool::num_decrease). Care has to be taken to avoid race conditions on the do_cancel flag, since it can now be modified outside the TaskPool::cancel function itself. For this purpose the scope of the TaskPool::num_mutex locks has been extended, in most cases the mutex is now locked by the TaskPool itself before calling TaskScheduler methods, instead of only locking inside the num_increase/num_decrease functions themselves. The only occurrence of a lock outside of the TaskPool methods is in TaskScheduler::thread_run. This patch is most useful in combination with the OSL renderer mode, so it can probably wait until after the 2.64 release. SVM tasks tend to be cancelled quickly, so the effect is less noticeable.
304 lines
6.6 KiB
C++
304 lines
6.6 KiB
C++
/*
|
|
* Copyright 2011, Blender Foundation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "device.h"
|
|
#include "device_intern.h"
|
|
|
|
#include "kernel.h"
|
|
#include "kernel_types.h"
|
|
|
|
#include "osl_shader.h"
|
|
|
|
#include "buffers.h"
|
|
|
|
#include "util_debug.h"
|
|
#include "util_foreach.h"
|
|
#include "util_function.h"
|
|
#include "util_opengl.h"
|
|
#include "util_progress.h"
|
|
#include "util_system.h"
|
|
#include "util_thread.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
class CPUDevice : public Device
|
|
{
|
|
public:
|
|
TaskPool task_pool;
|
|
KernelGlobals *kg;
|
|
|
|
CPUDevice(int threads_num)
|
|
{
|
|
kg = kernel_globals_create();
|
|
|
|
/* do now to avoid thread issues */
|
|
system_cpu_support_optimized();
|
|
}
|
|
|
|
~CPUDevice()
|
|
{
|
|
task_pool.stop();
|
|
kernel_globals_free(kg);
|
|
}
|
|
|
|
bool support_advanced_shading()
|
|
{
|
|
return true;
|
|
}
|
|
|
|
void mem_alloc(device_memory& mem, MemoryType type)
|
|
{
|
|
mem.device_pointer = mem.data_pointer;
|
|
}
|
|
|
|
void mem_copy_to(device_memory& mem)
|
|
{
|
|
/* no-op */
|
|
}
|
|
|
|
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
|
|
{
|
|
/* no-op */
|
|
}
|
|
|
|
void mem_zero(device_memory& mem)
|
|
{
|
|
memset((void*)mem.device_pointer, 0, mem.memory_size());
|
|
}
|
|
|
|
void mem_free(device_memory& mem)
|
|
{
|
|
mem.device_pointer = 0;
|
|
}
|
|
|
|
void const_copy_to(const char *name, void *host, size_t size)
|
|
{
|
|
kernel_const_copy(kg, name, host, size);
|
|
}
|
|
|
|
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
|
|
{
|
|
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
|
|
mem.device_pointer = mem.data_pointer;
|
|
}
|
|
|
|
void tex_free(device_memory& mem)
|
|
{
|
|
mem.device_pointer = 0;
|
|
}
|
|
|
|
void *osl_memory()
|
|
{
|
|
#ifdef WITH_OSL
|
|
return kernel_osl_memory(kg);
|
|
#else
|
|
return NULL;
|
|
#endif
|
|
}
|
|
|
|
void thread_run(DeviceTask *task)
|
|
{
|
|
if(task->type == DeviceTask::PATH_TRACE)
|
|
thread_path_trace(*task);
|
|
else if(task->type == DeviceTask::TONEMAP)
|
|
thread_tonemap(*task);
|
|
else if(task->type == DeviceTask::SHADER)
|
|
thread_shader(*task);
|
|
}
|
|
|
|
class CPUDeviceTask : public DeviceTask {
|
|
public:
|
|
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
|
|
: DeviceTask(task)
|
|
{
|
|
run = function_bind(&CPUDevice::thread_run, device, this);
|
|
}
|
|
};
|
|
|
|
void thread_path_trace(DeviceTask& task)
|
|
{
|
|
if(task_pool.cancelled())
|
|
return;
|
|
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_init(kg);
|
|
#endif
|
|
|
|
RenderTile tile;
|
|
|
|
while(task.acquire_tile(this, tile)) {
|
|
float *render_buffer = (float*)tile.buffer;
|
|
uint *rng_state = (uint*)tile.rng_state;
|
|
int start_sample = tile.start_sample;
|
|
int end_sample = tile.start_sample + tile.num_samples;
|
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
if(system_cpu_support_optimized()) {
|
|
for(int sample = start_sample; sample < end_sample; sample++) {
|
|
if (task.get_cancel() || task_pool.cancelled())
|
|
break;
|
|
|
|
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
|
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
|
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
|
|
sample, x, y, tile.offset, tile.stride);
|
|
}
|
|
}
|
|
|
|
tile.sample = sample + 1;
|
|
|
|
task.update_progress(tile);
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
for(int sample = start_sample; sample < end_sample; sample++) {
|
|
if (task.get_cancel() || task_pool.cancelled())
|
|
break;
|
|
|
|
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
|
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
|
kernel_cpu_path_trace(kg, render_buffer, rng_state,
|
|
sample, x, y, tile.offset, tile.stride);
|
|
}
|
|
}
|
|
|
|
tile.sample = sample + 1;
|
|
|
|
task.update_progress(tile);
|
|
}
|
|
}
|
|
|
|
task.release_tile(tile);
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
}
|
|
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_free(kg);
|
|
#endif
|
|
}
|
|
|
|
void thread_tonemap(DeviceTask& task)
|
|
{
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
if(system_cpu_support_optimized()) {
|
|
for(int y = task.y; y < task.y + task.h; y++)
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
|
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
|
task.sample, task.resolution, x, y, task.offset, task.stride);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
for(int y = task.y; y < task.y + task.h; y++)
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
|
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
|
task.sample, task.resolution, x, y, task.offset, task.stride);
|
|
}
|
|
}
|
|
|
|
void thread_shader(DeviceTask& task)
|
|
{
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_init(kg);
|
|
#endif
|
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
if(system_cpu_support_optimized()) {
|
|
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
|
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
|
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_free(kg);
|
|
#endif
|
|
}
|
|
|
|
void task_add(DeviceTask& task)
|
|
{
|
|
/* split task into smaller ones, more than number of threads for uneven
|
|
* workloads where some parts of the image render slower than others */
|
|
list<DeviceTask> tasks;
|
|
task.split(tasks, TaskScheduler::num_threads()+1);
|
|
|
|
foreach(DeviceTask& task, tasks)
|
|
task_pool.push(new CPUDeviceTask(this, task));
|
|
}
|
|
|
|
void task_wait()
|
|
{
|
|
task_pool.wait_work();
|
|
}
|
|
|
|
void task_cancel()
|
|
{
|
|
task_pool.cancel();
|
|
}
|
|
|
|
bool task_cancelled()
|
|
{
|
|
return task_pool.cancelled();
|
|
}
|
|
};
|
|
|
|
Device *device_cpu_create(DeviceInfo& info, int threads)
|
|
{
|
|
return new CPUDevice(threads);
|
|
}
|
|
|
|
void device_cpu_info(vector<DeviceInfo>& devices)
|
|
{
|
|
DeviceInfo info;
|
|
|
|
info.type = DEVICE_CPU;
|
|
info.description = system_cpu_brand_string();
|
|
info.id = "CPU";
|
|
info.num = 0;
|
|
info.advanced_shading = true;
|
|
info.pack_images = false;
|
|
|
|
devices.insert(devices.begin(), info);
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|