From b5595298d36a5023cc33ed41463fd6c032f2ec7b Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 31 Dec 2011 15:18:13 +0000 Subject: [PATCH] Cycles code refactoring: change displace kernel into more generic shader evaluate kernel, added background shader evaluate. --- intern/cycles/device/device.cpp | 19 ++++----- intern/cycles/device/device.h | 9 +++-- intern/cycles/device/device_cpu.cpp | 14 +++---- intern/cycles/device/device_cuda.cpp | 25 ++++++------ intern/cycles/device/device_multi.cpp | 4 +- intern/cycles/kernel/kernel.cl | 4 +- intern/cycles/kernel/kernel.cpp | 6 +-- intern/cycles/kernel/kernel.cu | 4 +- intern/cycles/kernel/kernel.h | 6 ++- intern/cycles/kernel/kernel_displace.h | 48 +++++++++++++++++++---- intern/cycles/kernel/kernel_optimized.cpp | 6 +-- intern/cycles/kernel/kernel_types.h | 7 ++++ intern/cycles/render/mesh_displace.cpp | 23 +++++------ 13 files changed, 112 insertions(+), 63 deletions(-) diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 6ebc359fdb3..55fc3bacbba 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -38,7 +38,8 @@ CCL_NAMESPACE_BEGIN DeviceTask::DeviceTask(Type type_) : type(type_), x(0), y(0), w(0), h(0), rng_state(0), rgba(0), buffer(0), sample(0), resolution(0), - displace_input(0), displace_offset(0), displace_x(0), displace_w(0) + shader_input(0), shader_output(0), + shader_eval_type(0), shader_x(0), shader_w(0) { } @@ -46,8 +47,8 @@ void DeviceTask::split_max_size(list& tasks, int max_size) { int num; - if(type == DISPLACE) { - num = (displace_w + max_size - 1)/max_size; + if(type == SHADER) { + num = (shader_w + max_size - 1)/max_size; } else { max_size = max(1, max_size/w); @@ -68,17 +69,17 @@ void DeviceTask::split(ThreadQueue& queue, int num) void DeviceTask::split(list& tasks, int num) { - if(type == DISPLACE) { - num = min(displace_w, num); + if(type == SHADER) { + num = min(shader_w, num); for(int i = 0; i < num; i++) { - int tx = displace_x + (displace_w/num)*i; - int tw = (i == num-1)? displace_w - i*(displace_w/num): displace_w/num; + int tx = shader_x + (shader_w/num)*i; + int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num; DeviceTask task = *this; - task.displace_x = tx; - task.displace_w = tw; + task.shader_x = tx; + task.shader_w = tw; tasks.push_back(task); } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index a6a81e7b326..af9bb694c1b 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -52,7 +52,7 @@ enum MemoryType { class DeviceTask { public: - typedef enum { PATH_TRACE, TONEMAP, DISPLACE } Type; + typedef enum { PATH_TRACE, TONEMAP, SHADER } Type; Type type; int x, y, w, h; @@ -63,9 +63,10 @@ public: int resolution; int offset, stride; - device_ptr displace_input; - device_ptr displace_offset; - int displace_x, displace_w; + device_ptr shader_input; + device_ptr shader_output; + int shader_eval_type; + int shader_x, shader_w; DeviceTask(Type type = PATH_TRACE); diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index a45a4fb69f6..145eab9ff59 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -141,8 +141,8 @@ public: thread_path_trace(task); else if(task.type == DeviceTask::TONEMAP) thread_tonemap(task); - else if(task.type == DeviceTask::DISPLACE) - thread_displace(task); + else if(task.type == DeviceTask::SHADER) + thread_shader(task); tasks.worker_done(); } @@ -207,7 +207,7 @@ public: } } - void thread_displace(DeviceTask& task) + void thread_shader(DeviceTask& task) { #ifdef WITH_OSL if(kernel_osl_use(kg)) @@ -216,8 +216,8 @@ public: #ifdef WITH_OPTIMIZED_KERNEL if(system_cpu_support_optimized()) { - for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) { - kernel_cpu_optimized_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x); + for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { + kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x); if(tasks.worker_cancel()) break; @@ -226,8 +226,8 @@ public: else #endif { - for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) { - kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x); + for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { + kernel_cpu_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x); if(tasks.worker_cancel()) break; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 2a49d4fae4c..3c5aafd3f60 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -615,16 +615,16 @@ public: cuda_pop_context(); } - void displace(DeviceTask& task) + void shader(DeviceTask& task) { cuda_push_context(); CUfunction cuDisplace; - CUdeviceptr d_input = cuda_device_ptr(task.displace_input); - CUdeviceptr d_offset = cuda_device_ptr(task.displace_offset); + CUdeviceptr d_input = cuda_device_ptr(task.shader_input); + CUdeviceptr d_offset = cuda_device_ptr(task.shader_output); /* get kernel function */ - cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_displace")) + cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_shader")) /* pass in parameters */ int offset = 0; @@ -635,11 +635,14 @@ public: cuda_assert(cuParamSetv(cuDisplace, offset, &d_offset, sizeof(d_offset))) offset += sizeof(d_offset); - int displace_x = task.displace_x; - offset = cuda_align_up(offset, __alignof(displace_x)); + int shader_eval_type = task.shader_eval_type; + offset = cuda_align_up(offset, __alignof(shader_eval_type)); - cuda_assert(cuParamSeti(cuDisplace, offset, task.displace_x)) - offset += sizeof(task.displace_x); + cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_eval_type)) + offset += sizeof(task.shader_eval_type); + + cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_x)) + offset += sizeof(task.shader_x); cuda_assert(cuParamSetSize(cuDisplace, offset)) @@ -649,7 +652,7 @@ public: #else int xthreads = 8; #endif - int xblocks = (task.displace_w + xthreads - 1)/xthreads; + int xblocks = (task.shader_w + xthreads - 1)/xthreads; cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1)) cuda_assert(cuFuncSetBlockShape(cuDisplace, xthreads, 1, 1)) @@ -828,8 +831,8 @@ public: tonemap(task); else if(task.type == DeviceTask::PATH_TRACE) path_trace(task); - else if(task.type == DeviceTask::DISPLACE) - displace(task); + else if(task.type == DeviceTask::SHADER) + shader(task); } void task_wait() diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index fc5348ad168..7f24e5789cc 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -306,8 +306,8 @@ public: if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer]; if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state]; if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba]; - if(task.displace_input) subtask.displace_input = sub.ptr_map[task.displace_input]; - if(task.displace_offset) subtask.displace_offset = sub.ptr_map[task.displace_offset]; + if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input]; + if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output]; sub.device->task_add(subtask); } diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 90eb7a2513f..479cf9b2e64 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -80,10 +80,10 @@ __kernel void kernel_ocl_tonemap( kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } -/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) +/*__kernel void kernel_ocl_shader(__global uint4 *input, __global float3 *output, int type, int sx) { int x = sx + get_global_id(0); - kernel_displace(input, offset, x); + kernel_shader_evaluate(input, output, (ShaderEvalType)type, x); }*/ diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index b4c3839dbd0..e66ddd86cd6 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -216,11 +216,11 @@ void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sam kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } -/* Displacement */ +/* Shader Evaluation */ -void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i) +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float3 *output, int type, int i) { - kernel_displace(kg, input, offset, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 71fc7ac3197..c97aeb67548 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -44,10 +44,10 @@ extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride); } -extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx) +extern "C" __global__ void kernel_cuda_shader(uint4 *input, float3 *output, int type, int sx) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - kernel_displace(NULL, input, offset, x); + kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x); } diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 78247504b39..20d43c91169 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -40,14 +40,16 @@ void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_ int sample, int x, int y, int offset, int stride); void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride); -void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float3 *output, + int type, int i); #ifdef WITH_OPTIMIZED_KERNEL void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride); -void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); +void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float3 *output, + int type, int i); #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h index ef6c3810a75..c39e5e43dbb 100644 --- a/intern/cycles/kernel/kernel_displace.h +++ b/intern/cycles/kernel/kernel_displace.h @@ -18,17 +18,51 @@ CCL_NAMESPACE_BEGIN -__device void kernel_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i) +__device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float3 *output, ShaderEvalType type, int i) { - /* setup shader data */ ShaderData sd; uint4 in = input[i]; - shader_setup_from_displace(kg, &sd, in.x, in.y, __int_as_float(in.z), __int_as_float(in.w)); + float3 out; - /* evaluate */ - float3 P = sd.P; - shader_eval_displacement(kg, &sd); - offset[i] = sd.P - P; + if(type == SHADER_EVAL_DISPLACE) { + /* setup shader data */ + int object = in.x; + int prim = in.y; + float u = __int_as_float(in.z); + float v = __int_as_float(in.w); + + shader_setup_from_displace(kg, &sd, object, prim, u, v); + + /* evaluate */ + float3 P = sd.P; + shader_eval_displacement(kg, &sd); + out = sd.P - P; + } + else { // SHADER_EVAL_BACKGROUND + /* setup ray */ + Ray ray; + + ray.P = make_float3(0.0f, 0.0f, 0.0f); + ray.D = make_float3(__int_as_float(in.x), __int_as_float(in.y), __int_as_float(in.z)); + ray.t = 0.0f; + +#ifdef __RAY_DIFFERENTIALS__ + ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f); + ray.dD.dy = make_float3(0.0f, 0.0f, 0.0f); + ray.dP.dx = make_float3(0.0f, 0.0f, 0.0f); + ray.dP.dy = make_float3(0.0f, 0.0f, 0.0f); +#endif + + /* setup shader data */ + shader_setup_from_background(kg, &sd, &ray); + + /* evaluate */ + int flag = 0; /* we can't know which type of BSDF this is for */ + out = shader_eval_background(kg, &sd, flag); + } + + /* write output */ + output[i] = out; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_optimized.cpp b/intern/cycles/kernel/kernel_optimized.cpp index ea43e01ab58..c437e06adfa 100644 --- a/intern/cycles/kernel/kernel_optimized.cpp +++ b/intern/cycles/kernel/kernel_optimized.cpp @@ -47,11 +47,11 @@ void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffe kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } -/* Displacement */ +/* Shader Evaluate */ -void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i) +void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float3 *output, int type, int i) { - kernel_displace(kg, input, offset, i); + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 56db4d2b78a..2c03a34df1f 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -78,6 +78,13 @@ CCL_NAMESPACE_BEGIN //#define __MODIFY_TP__ //#define __QBVH__ +/* Shader Evaluation */ + +enum ShaderEvalType { + SHADER_EVAL_DISPLACE, + SHADER_EVAL_BACKGROUND +}; + /* Path Tracing */ enum PathTraceDimension { diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index e86bea59ec1..f0ddf4e8d7b 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -89,25 +89,26 @@ bool MeshManager::displace(Device *device, Scene *scene, Mesh *mesh, Progress& p return false; /* run device task */ - device_vector d_offset; - d_offset.resize(d_input.size()); + device_vector d_output; + d_output.resize(d_input.size()); device->mem_alloc(d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_offset, MEM_WRITE_ONLY); + device->mem_alloc(d_output, MEM_WRITE_ONLY); - DeviceTask task(DeviceTask::DISPLACE); - task.displace_input = d_input.device_pointer; - task.displace_offset = d_offset.device_pointer; - task.displace_x = 0; - task.displace_w = d_input.size(); + DeviceTask task(DeviceTask::SHADER); + task.shader_input = d_input.device_pointer; + task.shader_output = d_output.device_pointer; + task.shader_eval_type = SHADER_EVAL_DISPLACE; + task.shader_x = 0; + task.shader_w = d_input.size(); device->task_add(task); device->task_wait(); - device->mem_copy_from(d_offset, 0, sizeof(float3)*d_offset.size()); + device->mem_copy_from(d_output, 0, sizeof(float3)*d_output.size()); device->mem_free(d_input); - device->mem_free(d_offset); + device->mem_free(d_output); if(progress.get_cancel()) return false; @@ -117,7 +118,7 @@ bool MeshManager::displace(Device *device, Scene *scene, Mesh *mesh, Progress& p done.resize(mesh->verts.size(), false); int k = 0; - float3 *offset = (float3*)d_offset.data_pointer; + float3 *offset = (float3*)d_output.data_pointer; for(size_t i = 0; i < mesh->triangles.size(); i++) { Mesh::Triangle t = mesh->triangles[i];