2011-04-27 11:58:34 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2011, Blender Foundation.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "device.h"
|
|
|
|
#include "device_intern.h"
|
|
|
|
|
|
|
|
#include "kernel.h"
|
|
|
|
#include "kernel_types.h"
|
|
|
|
|
|
|
|
#include "osl_shader.h"
|
|
|
|
|
|
|
|
#include "util_debug.h"
|
|
|
|
#include "util_foreach.h"
|
|
|
|
#include "util_function.h"
|
|
|
|
#include "util_opengl.h"
|
|
|
|
#include "util_progress.h"
|
|
|
|
#include "util_system.h"
|
|
|
|
#include "util_thread.h"
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
class CPUDevice : public Device
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
vector<thread*> threads;
|
|
|
|
ThreadQueue<DeviceTask> tasks;
|
|
|
|
KernelGlobals *kg;
|
|
|
|
|
2011-08-24 10:44:04 +00:00
|
|
|
CPUDevice(int threads_num)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
kg = kernel_globals_create();
|
2011-08-24 10:44:04 +00:00
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
/* do now to avoid thread issues */
|
|
|
|
system_cpu_support_optimized();
|
|
|
|
|
2011-08-24 10:44:04 +00:00
|
|
|
if(threads_num == 0)
|
|
|
|
threads_num = system_cpu_thread_count();
|
|
|
|
|
|
|
|
threads.resize(threads_num);
|
2011-04-27 11:58:34 +00:00
|
|
|
|
|
|
|
for(size_t i = 0; i < threads.size(); i++)
|
|
|
|
threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
~CPUDevice()
|
|
|
|
{
|
|
|
|
tasks.stop();
|
|
|
|
|
|
|
|
foreach(thread *t, threads) {
|
|
|
|
t->join();
|
|
|
|
delete t;
|
|
|
|
}
|
|
|
|
|
|
|
|
kernel_globals_free(kg);
|
|
|
|
}
|
|
|
|
|
2012-01-26 19:07:01 +00:00
|
|
|
bool support_advanced_shading()
|
2011-10-16 18:54:27 +00:00
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
void mem_alloc(device_memory& mem, MemoryType type)
|
|
|
|
{
|
|
|
|
mem.device_pointer = mem.data_pointer;
|
|
|
|
}
|
|
|
|
|
|
|
|
void mem_copy_to(device_memory& mem)
|
|
|
|
{
|
|
|
|
/* no-op */
|
|
|
|
}
|
|
|
|
|
2012-01-09 16:58:01 +00:00
|
|
|
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
/* no-op */
|
|
|
|
}
|
|
|
|
|
|
|
|
void mem_zero(device_memory& mem)
|
|
|
|
{
|
|
|
|
memset((void*)mem.device_pointer, 0, mem.memory_size());
|
|
|
|
}
|
|
|
|
|
|
|
|
void mem_free(device_memory& mem)
|
|
|
|
{
|
|
|
|
mem.device_pointer = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void const_copy_to(const char *name, void *host, size_t size)
|
|
|
|
{
|
|
|
|
kernel_const_copy(kg, name, host, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
|
|
|
|
{
|
|
|
|
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
|
|
|
|
mem.device_pointer = mem.data_pointer;
|
|
|
|
}
|
|
|
|
|
|
|
|
void tex_free(device_memory& mem)
|
|
|
|
{
|
|
|
|
mem.device_pointer = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *osl_memory()
|
|
|
|
{
|
|
|
|
#ifdef WITH_OSL
|
|
|
|
return kernel_osl_memory(kg);
|
|
|
|
#else
|
|
|
|
return NULL;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void thread_run(int t)
|
|
|
|
{
|
|
|
|
DeviceTask task;
|
|
|
|
|
|
|
|
while(tasks.worker_wait_pop(task)) {
|
|
|
|
if(task.type == DeviceTask::PATH_TRACE)
|
|
|
|
thread_path_trace(task);
|
|
|
|
else if(task.type == DeviceTask::TONEMAP)
|
|
|
|
thread_tonemap(task);
|
2011-12-31 15:18:13 +00:00
|
|
|
else if(task.type == DeviceTask::SHADER)
|
|
|
|
thread_shader(task);
|
2011-04-27 11:58:34 +00:00
|
|
|
|
|
|
|
tasks.worker_done();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void thread_path_trace(DeviceTask& task)
|
|
|
|
{
|
|
|
|
if(tasks.worker_cancel())
|
|
|
|
return;
|
|
|
|
|
|
|
|
#ifdef WITH_OSL
|
|
|
|
if(kernel_osl_use(kg))
|
|
|
|
OSLShader::thread_init(kg);
|
|
|
|
#endif
|
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
|
|
if(system_cpu_support_optimized()) {
|
|
|
|
for(int y = task.y; y < task.y + task.h; y++) {
|
|
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
2012-01-25 17:23:52 +00:00
|
|
|
kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
|
2011-12-20 12:25:37 +00:00
|
|
|
task.sample, x, y, task.offset, task.stride);
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
if(tasks.worker_cancel())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
for(int y = task.y; y < task.y + task.h; y++) {
|
|
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
2012-01-25 17:23:52 +00:00
|
|
|
kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
|
2011-12-20 12:25:37 +00:00
|
|
|
task.sample, x, y, task.offset, task.stride);
|
2011-11-15 15:13:38 +00:00
|
|
|
|
|
|
|
if(tasks.worker_cancel())
|
|
|
|
break;
|
|
|
|
}
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef WITH_OSL
|
|
|
|
if(kernel_osl_use(kg))
|
|
|
|
OSLShader::thread_free(kg);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void thread_tonemap(DeviceTask& task)
|
|
|
|
{
|
2011-11-15 15:13:38 +00:00
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
|
|
if(system_cpu_support_optimized()) {
|
|
|
|
for(int y = task.y; y < task.y + task.h; y++)
|
|
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
2012-01-25 17:23:52 +00:00
|
|
|
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
2011-12-20 12:25:37 +00:00
|
|
|
task.sample, task.resolution, x, y, task.offset, task.stride);
|
2011-11-15 15:13:38 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
for(int y = task.y; y < task.y + task.h; y++)
|
|
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
2012-01-25 17:23:52 +00:00
|
|
|
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
2011-12-20 12:25:37 +00:00
|
|
|
task.sample, task.resolution, x, y, task.offset, task.stride);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-31 15:18:13 +00:00
|
|
|
void thread_shader(DeviceTask& task)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
#ifdef WITH_OSL
|
|
|
|
if(kernel_osl_use(kg))
|
|
|
|
OSLShader::thread_init(kg);
|
|
|
|
#endif
|
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
|
|
if(system_cpu_support_optimized()) {
|
2011-12-31 15:18:13 +00:00
|
|
|
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
2012-01-20 17:49:17 +00:00
|
|
|
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
2011-11-15 15:13:38 +00:00
|
|
|
|
|
|
|
if(tasks.worker_cancel())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
2011-12-31 15:18:13 +00:00
|
|
|
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
2012-01-20 17:49:17 +00:00
|
|
|
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
if(tasks.worker_cancel())
|
|
|
|
break;
|
|
|
|
}
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef WITH_OSL
|
|
|
|
if(kernel_osl_use(kg))
|
|
|
|
OSLShader::thread_free(kg);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void task_add(DeviceTask& task)
|
|
|
|
{
|
2011-09-08 18:58:07 +00:00
|
|
|
/* split task into smaller ones, more than number of threads for uneven
|
|
|
|
workloads where some parts of the image render slower than others */
|
|
|
|
task.split(tasks, threads.size()*10);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void task_wait()
|
|
|
|
{
|
|
|
|
tasks.wait_done();
|
|
|
|
}
|
|
|
|
|
|
|
|
void task_cancel()
|
|
|
|
{
|
|
|
|
tasks.cancel();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-01-04 18:06:32 +00:00
|
|
|
Device *device_cpu_create(DeviceInfo& info, int threads)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2011-08-24 10:44:04 +00:00
|
|
|
return new CPUDevice(threads);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
2012-01-04 18:06:32 +00:00
|
|
|
void device_cpu_info(vector<DeviceInfo>& devices)
|
|
|
|
{
|
|
|
|
DeviceInfo info;
|
|
|
|
|
|
|
|
info.type = DEVICE_CPU;
|
|
|
|
info.description = system_cpu_brand_string();
|
|
|
|
info.id = "CPU";
|
|
|
|
info.num = 0;
|
2012-01-26 19:07:01 +00:00
|
|
|
info.advanced_shading = true;
|
2012-01-04 18:06:32 +00:00
|
|
|
|
2012-01-11 13:18:06 +00:00
|
|
|
devices.insert(devices.begin(), info);
|
2012-01-04 18:06:32 +00:00
|
|
|
}
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|