Cycles code refactoring: change displace kernel into more generic shader

evaluate kernel, added background shader evaluate.
This commit is contained in:
Brecht Van Lommel 2011-12-31 15:18:13 +00:00
parent 1b4487e813
commit b5595298d3
13 changed files with 112 additions and 63 deletions

@ -38,7 +38,8 @@ CCL_NAMESPACE_BEGIN
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rng_state(0), rgba(0), buffer(0),
sample(0), resolution(0),
displace_input(0), displace_offset(0), displace_x(0), displace_w(0)
shader_input(0), shader_output(0),
shader_eval_type(0), shader_x(0), shader_w(0)
{
}
@ -46,8 +47,8 @@ void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
{
int num;
if(type == DISPLACE) {
num = (displace_w + max_size - 1)/max_size;
if(type == SHADER) {
num = (shader_w + max_size - 1)/max_size;
}
else {
max_size = max(1, max_size/w);
@ -68,17 +69,17 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == DISPLACE) {
num = min(displace_w, num);
if(type == SHADER) {
num = min(shader_w, num);
for(int i = 0; i < num; i++) {
int tx = displace_x + (displace_w/num)*i;
int tw = (i == num-1)? displace_w - i*(displace_w/num): displace_w/num;
int tx = shader_x + (shader_w/num)*i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
DeviceTask task = *this;
task.displace_x = tx;
task.displace_w = tw;
task.shader_x = tx;
task.shader_w = tw;
tasks.push_back(task);
}

@ -52,7 +52,7 @@ enum MemoryType {
class DeviceTask {
public:
typedef enum { PATH_TRACE, TONEMAP, DISPLACE } Type;
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
Type type;
int x, y, w, h;
@ -63,9 +63,10 @@ public:
int resolution;
int offset, stride;
device_ptr displace_input;
device_ptr displace_offset;
int displace_x, displace_w;
device_ptr shader_input;
device_ptr shader_output;
int shader_eval_type;
int shader_x, shader_w;
DeviceTask(Type type = PATH_TRACE);

@ -141,8 +141,8 @@ public:
thread_path_trace(task);
else if(task.type == DeviceTask::TONEMAP)
thread_tonemap(task);
else if(task.type == DeviceTask::DISPLACE)
thread_displace(task);
else if(task.type == DeviceTask::SHADER)
thread_shader(task);
tasks.worker_done();
}
@ -207,7 +207,7 @@ public:
}
}
void thread_displace(DeviceTask& task)
void thread_shader(DeviceTask& task)
{
#ifdef WITH_OSL
if(kernel_osl_use(kg))
@ -216,8 +216,8 @@ public:
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
kernel_cpu_optimized_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
if(tasks.worker_cancel())
break;
@ -226,8 +226,8 @@ public:
else
#endif
{
for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
if(tasks.worker_cancel())
break;

@ -615,16 +615,16 @@ public:
cuda_pop_context();
}
void displace(DeviceTask& task)
void shader(DeviceTask& task)
{
cuda_push_context();
CUfunction cuDisplace;
CUdeviceptr d_input = cuda_device_ptr(task.displace_input);
CUdeviceptr d_offset = cuda_device_ptr(task.displace_offset);
CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
CUdeviceptr d_offset = cuda_device_ptr(task.shader_output);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_displace"))
cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_shader"))
/* pass in parameters */
int offset = 0;
@ -635,11 +635,14 @@ public:
cuda_assert(cuParamSetv(cuDisplace, offset, &d_offset, sizeof(d_offset)))
offset += sizeof(d_offset);
int displace_x = task.displace_x;
offset = cuda_align_up(offset, __alignof(displace_x));
int shader_eval_type = task.shader_eval_type;
offset = cuda_align_up(offset, __alignof(shader_eval_type));
cuda_assert(cuParamSeti(cuDisplace, offset, task.displace_x))
offset += sizeof(task.displace_x);
cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_eval_type))
offset += sizeof(task.shader_eval_type);
cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_x))
offset += sizeof(task.shader_x);
cuda_assert(cuParamSetSize(cuDisplace, offset))
@ -649,7 +652,7 @@ public:
#else
int xthreads = 8;
#endif
int xblocks = (task.displace_w + xthreads - 1)/xthreads;
int xblocks = (task.shader_w + xthreads - 1)/xthreads;
cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuDisplace, xthreads, 1, 1))
@ -828,8 +831,8 @@ public:
tonemap(task);
else if(task.type == DeviceTask::PATH_TRACE)
path_trace(task);
else if(task.type == DeviceTask::DISPLACE)
displace(task);
else if(task.type == DeviceTask::SHADER)
shader(task);
}
void task_wait()

@ -306,8 +306,8 @@ public:
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state];
if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba];
if(task.displace_input) subtask.displace_input = sub.ptr_map[task.displace_input];
if(task.displace_offset) subtask.displace_offset = sub.ptr_map[task.displace_offset];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
sub.device->task_add(subtask);
}

@ -80,10 +80,10 @@ __kernel void kernel_ocl_tonemap(
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
}
/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
/*__kernel void kernel_ocl_shader(__global uint4 *input, __global float3 *output, int type, int sx)
{
int x = sx + get_global_id(0);
kernel_displace(input, offset, x);
kernel_shader_evaluate(input, output, (ShaderEvalType)type, x);
}*/

@ -216,11 +216,11 @@ void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sam
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
}
/* Displacement */
/* Shader Evaluation */
void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i)
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float3 *output, int type, int i)
{
kernel_displace(kg, input, offset, i);
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END

@ -44,10 +44,10 @@ extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int
kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride);
}
extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx)
extern "C" __global__ void kernel_cuda_shader(uint4 *input, float3 *output, int type, int sx)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
kernel_displace(NULL, input, offset, x);
kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
}

@ -40,14 +40,16 @@ void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_
int sample, int x, int y, int offset, int stride);
void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer,
int sample, int resolution, int x, int y, int offset, int stride);
void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float3 *output,
int type, int i);
#ifdef WITH_OPTIMIZED_KERNEL
void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer,
int sample, int resolution, int x, int y, int offset, int stride);
void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float3 *output,
int type, int i);
#endif
CCL_NAMESPACE_END

@ -18,17 +18,51 @@
CCL_NAMESPACE_BEGIN
__device void kernel_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i)
__device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float3 *output, ShaderEvalType type, int i)
{
/* setup shader data */
ShaderData sd;
uint4 in = input[i];
shader_setup_from_displace(kg, &sd, in.x, in.y, __int_as_float(in.z), __int_as_float(in.w));
float3 out;
/* evaluate */
float3 P = sd.P;
shader_eval_displacement(kg, &sd);
offset[i] = sd.P - P;
if(type == SHADER_EVAL_DISPLACE) {
/* setup shader data */
int object = in.x;
int prim = in.y;
float u = __int_as_float(in.z);
float v = __int_as_float(in.w);
shader_setup_from_displace(kg, &sd, object, prim, u, v);
/* evaluate */
float3 P = sd.P;
shader_eval_displacement(kg, &sd);
out = sd.P - P;
}
else { // SHADER_EVAL_BACKGROUND
/* setup ray */
Ray ray;
ray.P = make_float3(0.0f, 0.0f, 0.0f);
ray.D = make_float3(__int_as_float(in.x), __int_as_float(in.y), __int_as_float(in.z));
ray.t = 0.0f;
#ifdef __RAY_DIFFERENTIALS__
ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f);
ray.dD.dy = make_float3(0.0f, 0.0f, 0.0f);
ray.dP.dx = make_float3(0.0f, 0.0f, 0.0f);
ray.dP.dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
/* setup shader data */
shader_setup_from_background(kg, &sd, &ray);
/* evaluate */
int flag = 0; /* we can't know which type of BSDF this is for */
out = shader_eval_background(kg, &sd, flag);
}
/* write output */
output[i] = out;
}
CCL_NAMESPACE_END

@ -47,11 +47,11 @@ void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffe
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
}
/* Displacement */
/* Shader Evaluate */
void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i)
void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float3 *output, int type, int i)
{
kernel_displace(kg, input, offset, i);
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END

@ -78,6 +78,13 @@ CCL_NAMESPACE_BEGIN
//#define __MODIFY_TP__
//#define __QBVH__
/* Shader Evaluation */
enum ShaderEvalType {
SHADER_EVAL_DISPLACE,
SHADER_EVAL_BACKGROUND
};
/* Path Tracing */
enum PathTraceDimension {

@ -89,25 +89,26 @@ bool MeshManager::displace(Device *device, Scene *scene, Mesh *mesh, Progress& p
return false;
/* run device task */
device_vector<float3> d_offset;
d_offset.resize(d_input.size());
device_vector<float3> d_output;
d_output.resize(d_input.size());
device->mem_alloc(d_input, MEM_READ_ONLY);
device->mem_copy_to(d_input);
device->mem_alloc(d_offset, MEM_WRITE_ONLY);
device->mem_alloc(d_output, MEM_WRITE_ONLY);
DeviceTask task(DeviceTask::DISPLACE);
task.displace_input = d_input.device_pointer;
task.displace_offset = d_offset.device_pointer;
task.displace_x = 0;
task.displace_w = d_input.size();
DeviceTask task(DeviceTask::SHADER);
task.shader_input = d_input.device_pointer;
task.shader_output = d_output.device_pointer;
task.shader_eval_type = SHADER_EVAL_DISPLACE;
task.shader_x = 0;
task.shader_w = d_input.size();
device->task_add(task);
device->task_wait();
device->mem_copy_from(d_offset, 0, sizeof(float3)*d_offset.size());
device->mem_copy_from(d_output, 0, sizeof(float3)*d_output.size());
device->mem_free(d_input);
device->mem_free(d_offset);
device->mem_free(d_output);
if(progress.get_cancel())
return false;
@ -117,7 +118,7 @@ bool MeshManager::displace(Device *device, Scene *scene, Mesh *mesh, Progress& p
done.resize(mesh->verts.size(), false);
int k = 0;
float3 *offset = (float3*)d_offset.data_pointer;
float3 *offset = (float3*)d_output.data_pointer;
for(size_t i = 0; i < mesh->triangles.size(); i++) {
Mesh::Triangle t = mesh->triangles[i];