blender/intern/cycles/device/device_cpu.cpp
Thomas Dinges 11707119de Cycles:
* Code cleanup, remove unused "resolution" variable from the DeviceTask class, was never used.
2013-05-14 21:18:20 +00:00

351 lines
8.1 KiB
C++

/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdlib.h>
#include <string.h>
#include "device.h"
#include "device_intern.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "osl_shader.h"
#include "osl_globals.h"
#include "buffers.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_function.h"
#include "util_opengl.h"
#include "util_progress.h"
#include "util_system.h"
#include "util_thread.h"
CCL_NAMESPACE_BEGIN
class CPUDevice : public Device
{
public:
TaskPool task_pool;
KernelGlobals kernel_globals;
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
CPUDevice(Stats &stats) : Device(stats)
{
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
#endif
/* do now to avoid thread issues */
system_cpu_support_sse2();
system_cpu_support_sse3();
}
~CPUDevice()
{
task_pool.stop();
}
void mem_alloc(device_memory& mem, MemoryType type)
{
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
}
void mem_copy_to(device_memory& mem)
{
/* no-op */
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
/* no-op */
}
void mem_zero(device_memory& mem)
{
memset((void*)mem.device_pointer, 0, mem.memory_size());
}
void mem_free(device_memory& mem)
{
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
void const_copy_to(const char *name, void *host, size_t size)
{
kernel_const_copy(&kernel_globals, name, host, size);
}
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
}
void tex_free(device_memory& mem)
{
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
void *osl_memory()
{
#ifdef WITH_OSL
return &osl_globals;
#else
return NULL;
#endif
}
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task);
else if(task->type == DeviceTask::TONEMAP)
thread_tonemap(*task);
else if(task->type == DeviceTask::SHADER)
thread_shader(*task);
}
class CPUDeviceTask : public DeviceTask {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CPUDevice::thread_run, device, this);
}
};
void thread_path_trace(DeviceTask& task)
{
if(task_pool.cancelled()) {
if(task.need_finish_queue == false)
return;
}
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
RenderTile tile;
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
uint *rng_state = (uint*)tile.rng_state;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_sse3()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
else if(system_cpu_support_sse2()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
else
#endif
{
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
task.release_tile(tile);
if(task_pool.cancelled()) {
if(task.need_finish_queue == false)
break;
}
}
#ifdef WITH_OSL
OSLShader::thread_free(&kg);
#endif
}
void thread_tonemap(DeviceTask& task)
{
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_sse3()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, x, y, task.offset, task.stride);
}
else if(system_cpu_support_sse2()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, x, y, task.offset, task.stride);
}
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, x, y, task.offset, task.stride);
}
}
void thread_shader(DeviceTask& task)
{
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_sse3()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
}
}
else if(system_cpu_support_sse2()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
}
}
else
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
}
}
#ifdef WITH_OSL
OSLShader::thread_free(&kg);
#endif
}
void task_add(DeviceTask& task)
{
/* split task into smaller ones, more than number of threads for uneven
* workloads where some parts of the image render slower than others */
list<DeviceTask> tasks;
task.split(tasks, TaskScheduler::num_threads());
foreach(DeviceTask& task, tasks)
task_pool.push(new CPUDeviceTask(this, task));
}
void task_wait()
{
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};
Device *device_cpu_create(DeviceInfo& info, Stats &stats)
{
return new CPUDevice(stats);
}
void device_cpu_info(vector<DeviceInfo>& devices)
{
DeviceInfo info;
info.type = DEVICE_CPU;
info.description = system_cpu_brand_string();
info.id = "CPU";
info.num = 0;
info.advanced_shading = true;
info.pack_images = false;
devices.insert(devices.begin(), info);
}
CCL_NAMESPACE_END