forked from bartvdbraak/blender
Cycles code refactoring: change displace kernel into more generic shader
evaluate kernel, added background shader evaluate.
This commit is contained in:
parent
1b4487e813
commit
b5595298d3
@ -38,7 +38,8 @@ CCL_NAMESPACE_BEGIN
|
||||
DeviceTask::DeviceTask(Type type_)
|
||||
: type(type_), x(0), y(0), w(0), h(0), rng_state(0), rgba(0), buffer(0),
|
||||
sample(0), resolution(0),
|
||||
displace_input(0), displace_offset(0), displace_x(0), displace_w(0)
|
||||
shader_input(0), shader_output(0),
|
||||
shader_eval_type(0), shader_x(0), shader_w(0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -46,8 +47,8 @@ void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
|
||||
{
|
||||
int num;
|
||||
|
||||
if(type == DISPLACE) {
|
||||
num = (displace_w + max_size - 1)/max_size;
|
||||
if(type == SHADER) {
|
||||
num = (shader_w + max_size - 1)/max_size;
|
||||
}
|
||||
else {
|
||||
max_size = max(1, max_size/w);
|
||||
@ -68,17 +69,17 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
|
||||
|
||||
void DeviceTask::split(list<DeviceTask>& tasks, int num)
|
||||
{
|
||||
if(type == DISPLACE) {
|
||||
num = min(displace_w, num);
|
||||
if(type == SHADER) {
|
||||
num = min(shader_w, num);
|
||||
|
||||
for(int i = 0; i < num; i++) {
|
||||
int tx = displace_x + (displace_w/num)*i;
|
||||
int tw = (i == num-1)? displace_w - i*(displace_w/num): displace_w/num;
|
||||
int tx = shader_x + (shader_w/num)*i;
|
||||
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
|
||||
|
||||
DeviceTask task = *this;
|
||||
|
||||
task.displace_x = tx;
|
||||
task.displace_w = tw;
|
||||
task.shader_x = tx;
|
||||
task.shader_w = tw;
|
||||
|
||||
tasks.push_back(task);
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ enum MemoryType {
|
||||
|
||||
class DeviceTask {
|
||||
public:
|
||||
typedef enum { PATH_TRACE, TONEMAP, DISPLACE } Type;
|
||||
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
|
||||
Type type;
|
||||
|
||||
int x, y, w, h;
|
||||
@ -63,9 +63,10 @@ public:
|
||||
int resolution;
|
||||
int offset, stride;
|
||||
|
||||
device_ptr displace_input;
|
||||
device_ptr displace_offset;
|
||||
int displace_x, displace_w;
|
||||
device_ptr shader_input;
|
||||
device_ptr shader_output;
|
||||
int shader_eval_type;
|
||||
int shader_x, shader_w;
|
||||
|
||||
DeviceTask(Type type = PATH_TRACE);
|
||||
|
||||
|
@ -141,8 +141,8 @@ public:
|
||||
thread_path_trace(task);
|
||||
else if(task.type == DeviceTask::TONEMAP)
|
||||
thread_tonemap(task);
|
||||
else if(task.type == DeviceTask::DISPLACE)
|
||||
thread_displace(task);
|
||||
else if(task.type == DeviceTask::SHADER)
|
||||
thread_shader(task);
|
||||
|
||||
tasks.worker_done();
|
||||
}
|
||||
@ -207,7 +207,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void thread_displace(DeviceTask& task)
|
||||
void thread_shader(DeviceTask& task)
|
||||
{
|
||||
#ifdef WITH_OSL
|
||||
if(kernel_osl_use(kg))
|
||||
@ -216,8 +216,8 @@ public:
|
||||
|
||||
#ifdef WITH_OPTIMIZED_KERNEL
|
||||
if(system_cpu_support_optimized()) {
|
||||
for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
|
||||
kernel_cpu_optimized_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
|
||||
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
||||
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
|
||||
|
||||
if(tasks.worker_cancel())
|
||||
break;
|
||||
@ -226,8 +226,8 @@ public:
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
|
||||
kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
|
||||
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
||||
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
|
||||
|
||||
if(tasks.worker_cancel())
|
||||
break;
|
||||
|
@ -615,16 +615,16 @@ public:
|
||||
cuda_pop_context();
|
||||
}
|
||||
|
||||
void displace(DeviceTask& task)
|
||||
void shader(DeviceTask& task)
|
||||
{
|
||||
cuda_push_context();
|
||||
|
||||
CUfunction cuDisplace;
|
||||
CUdeviceptr d_input = cuda_device_ptr(task.displace_input);
|
||||
CUdeviceptr d_offset = cuda_device_ptr(task.displace_offset);
|
||||
CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
|
||||
CUdeviceptr d_offset = cuda_device_ptr(task.shader_output);
|
||||
|
||||
/* get kernel function */
|
||||
cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_displace"))
|
||||
cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_shader"))
|
||||
|
||||
/* pass in parameters */
|
||||
int offset = 0;
|
||||
@ -635,11 +635,14 @@ public:
|
||||
cuda_assert(cuParamSetv(cuDisplace, offset, &d_offset, sizeof(d_offset)))
|
||||
offset += sizeof(d_offset);
|
||||
|
||||
int displace_x = task.displace_x;
|
||||
offset = cuda_align_up(offset, __alignof(displace_x));
|
||||
int shader_eval_type = task.shader_eval_type;
|
||||
offset = cuda_align_up(offset, __alignof(shader_eval_type));
|
||||
|
||||
cuda_assert(cuParamSeti(cuDisplace, offset, task.displace_x))
|
||||
offset += sizeof(task.displace_x);
|
||||
cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_eval_type))
|
||||
offset += sizeof(task.shader_eval_type);
|
||||
|
||||
cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_x))
|
||||
offset += sizeof(task.shader_x);
|
||||
|
||||
cuda_assert(cuParamSetSize(cuDisplace, offset))
|
||||
|
||||
@ -649,7 +652,7 @@ public:
|
||||
#else
|
||||
int xthreads = 8;
|
||||
#endif
|
||||
int xblocks = (task.displace_w + xthreads - 1)/xthreads;
|
||||
int xblocks = (task.shader_w + xthreads - 1)/xthreads;
|
||||
|
||||
cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1))
|
||||
cuda_assert(cuFuncSetBlockShape(cuDisplace, xthreads, 1, 1))
|
||||
@ -828,8 +831,8 @@ public:
|
||||
tonemap(task);
|
||||
else if(task.type == DeviceTask::PATH_TRACE)
|
||||
path_trace(task);
|
||||
else if(task.type == DeviceTask::DISPLACE)
|
||||
displace(task);
|
||||
else if(task.type == DeviceTask::SHADER)
|
||||
shader(task);
|
||||
}
|
||||
|
||||
void task_wait()
|
||||
|
@ -306,8 +306,8 @@ public:
|
||||
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
|
||||
if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state];
|
||||
if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba];
|
||||
if(task.displace_input) subtask.displace_input = sub.ptr_map[task.displace_input];
|
||||
if(task.displace_offset) subtask.displace_offset = sub.ptr_map[task.displace_offset];
|
||||
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
|
||||
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
|
||||
|
||||
sub.device->task_add(subtask);
|
||||
}
|
||||
|
@ -80,10 +80,10 @@ __kernel void kernel_ocl_tonemap(
|
||||
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
|
||||
}
|
||||
|
||||
/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
|
||||
/*__kernel void kernel_ocl_shader(__global uint4 *input, __global float3 *output, int type, int sx)
|
||||
{
|
||||
int x = sx + get_global_id(0);
|
||||
|
||||
kernel_displace(input, offset, x);
|
||||
kernel_shader_evaluate(input, output, (ShaderEvalType)type, x);
|
||||
}*/
|
||||
|
||||
|
@ -216,11 +216,11 @@ void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sam
|
||||
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
|
||||
}
|
||||
|
||||
/* Displacement */
|
||||
/* Shader Evaluation */
|
||||
|
||||
void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i)
|
||||
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float3 *output, int type, int i)
|
||||
{
|
||||
kernel_displace(kg, input, offset, i);
|
||||
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -44,10 +44,10 @@ extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int
|
||||
kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride);
|
||||
}
|
||||
|
||||
extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx)
|
||||
extern "C" __global__ void kernel_cuda_shader(uint4 *input, float3 *output, int type, int sx)
|
||||
{
|
||||
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
kernel_displace(NULL, input, offset, x);
|
||||
kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
|
||||
}
|
||||
|
||||
|
@ -40,14 +40,16 @@ void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_
|
||||
int sample, int x, int y, int offset, int stride);
|
||||
void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer,
|
||||
int sample, int resolution, int x, int y, int offset, int stride);
|
||||
void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
|
||||
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float3 *output,
|
||||
int type, int i);
|
||||
|
||||
#ifdef WITH_OPTIMIZED_KERNEL
|
||||
void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state,
|
||||
int sample, int x, int y, int offset, int stride);
|
||||
void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer,
|
||||
int sample, int resolution, int x, int y, int offset, int stride);
|
||||
void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
|
||||
void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float3 *output,
|
||||
int type, int i);
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -18,17 +18,51 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
__device void kernel_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i)
|
||||
__device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float3 *output, ShaderEvalType type, int i)
|
||||
{
|
||||
/* setup shader data */
|
||||
ShaderData sd;
|
||||
uint4 in = input[i];
|
||||
shader_setup_from_displace(kg, &sd, in.x, in.y, __int_as_float(in.z), __int_as_float(in.w));
|
||||
float3 out;
|
||||
|
||||
/* evaluate */
|
||||
float3 P = sd.P;
|
||||
shader_eval_displacement(kg, &sd);
|
||||
offset[i] = sd.P - P;
|
||||
if(type == SHADER_EVAL_DISPLACE) {
|
||||
/* setup shader data */
|
||||
int object = in.x;
|
||||
int prim = in.y;
|
||||
float u = __int_as_float(in.z);
|
||||
float v = __int_as_float(in.w);
|
||||
|
||||
shader_setup_from_displace(kg, &sd, object, prim, u, v);
|
||||
|
||||
/* evaluate */
|
||||
float3 P = sd.P;
|
||||
shader_eval_displacement(kg, &sd);
|
||||
out = sd.P - P;
|
||||
}
|
||||
else { // SHADER_EVAL_BACKGROUND
|
||||
/* setup ray */
|
||||
Ray ray;
|
||||
|
||||
ray.P = make_float3(0.0f, 0.0f, 0.0f);
|
||||
ray.D = make_float3(__int_as_float(in.x), __int_as_float(in.y), __int_as_float(in.z));
|
||||
ray.t = 0.0f;
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f);
|
||||
ray.dD.dy = make_float3(0.0f, 0.0f, 0.0f);
|
||||
ray.dP.dx = make_float3(0.0f, 0.0f, 0.0f);
|
||||
ray.dP.dy = make_float3(0.0f, 0.0f, 0.0f);
|
||||
#endif
|
||||
|
||||
/* setup shader data */
|
||||
shader_setup_from_background(kg, &sd, &ray);
|
||||
|
||||
/* evaluate */
|
||||
int flag = 0; /* we can't know which type of BSDF this is for */
|
||||
out = shader_eval_background(kg, &sd, flag);
|
||||
}
|
||||
|
||||
/* write output */
|
||||
output[i] = out;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -47,11 +47,11 @@ void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffe
|
||||
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
|
||||
}
|
||||
|
||||
/* Displacement */
|
||||
/* Shader Evaluate */
|
||||
|
||||
void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i)
|
||||
void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float3 *output, int type, int i)
|
||||
{
|
||||
kernel_displace(kg, input, offset, i);
|
||||
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -78,6 +78,13 @@ CCL_NAMESPACE_BEGIN
|
||||
//#define __MODIFY_TP__
|
||||
//#define __QBVH__
|
||||
|
||||
/* Shader Evaluation */
|
||||
|
||||
enum ShaderEvalType {
|
||||
SHADER_EVAL_DISPLACE,
|
||||
SHADER_EVAL_BACKGROUND
|
||||
};
|
||||
|
||||
/* Path Tracing */
|
||||
|
||||
enum PathTraceDimension {
|
||||
|
@ -89,25 +89,26 @@ bool MeshManager::displace(Device *device, Scene *scene, Mesh *mesh, Progress& p
|
||||
return false;
|
||||
|
||||
/* run device task */
|
||||
device_vector<float3> d_offset;
|
||||
d_offset.resize(d_input.size());
|
||||
device_vector<float3> d_output;
|
||||
d_output.resize(d_input.size());
|
||||
|
||||
device->mem_alloc(d_input, MEM_READ_ONLY);
|
||||
device->mem_copy_to(d_input);
|
||||
device->mem_alloc(d_offset, MEM_WRITE_ONLY);
|
||||
device->mem_alloc(d_output, MEM_WRITE_ONLY);
|
||||
|
||||
DeviceTask task(DeviceTask::DISPLACE);
|
||||
task.displace_input = d_input.device_pointer;
|
||||
task.displace_offset = d_offset.device_pointer;
|
||||
task.displace_x = 0;
|
||||
task.displace_w = d_input.size();
|
||||
DeviceTask task(DeviceTask::SHADER);
|
||||
task.shader_input = d_input.device_pointer;
|
||||
task.shader_output = d_output.device_pointer;
|
||||
task.shader_eval_type = SHADER_EVAL_DISPLACE;
|
||||
task.shader_x = 0;
|
||||
task.shader_w = d_input.size();
|
||||
|
||||
device->task_add(task);
|
||||
device->task_wait();
|
||||
|
||||
device->mem_copy_from(d_offset, 0, sizeof(float3)*d_offset.size());
|
||||
device->mem_copy_from(d_output, 0, sizeof(float3)*d_output.size());
|
||||
device->mem_free(d_input);
|
||||
device->mem_free(d_offset);
|
||||
device->mem_free(d_output);
|
||||
|
||||
if(progress.get_cancel())
|
||||
return false;
|
||||
@ -117,7 +118,7 @@ bool MeshManager::displace(Device *device, Scene *scene, Mesh *mesh, Progress& p
|
||||
done.resize(mesh->verts.size(), false);
|
||||
int k = 0;
|
||||
|
||||
float3 *offset = (float3*)d_offset.data_pointer;
|
||||
float3 *offset = (float3*)d_output.data_pointer;
|
||||
|
||||
for(size_t i = 0; i < mesh->triangles.size(); i++) {
|
||||
Mesh::Triangle t = mesh->triangles[i];
|
||||
|
Loading…
Reference in New Issue
Block a user