Cycles: some steps to getting OpenCL backend to compile.

This commit is contained in:
Brecht Van Lommel 2011-05-20 12:26:01 +00:00
parent 2e66cb520c
commit 63d4bafff5
25 changed files with 454 additions and 352 deletions

@ -55,6 +55,7 @@ public:
cl_int ciErr; cl_int ciErr;
map<string, device_vector<uchar>*> const_mem_map; map<string, device_vector<uchar>*> const_mem_map;
map<string, device_memory*> mem_map; map<string, device_memory*> mem_map;
device_ptr null_mem;
const char *opencl_error_string(cl_int err) const char *opencl_error_string(cl_int err)
{ {
@ -125,10 +126,10 @@ public:
ciErr = clGetPlatformIDs(1, &cpPlatform, NULL); ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
opencl_assert(ciErr); opencl_assert(ciErr);
ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL); ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &cdDevice, NULL);
opencl_assert(ciErr); opencl_assert(ciErr);
cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL /*clLogMessagesToStdoutAPPLE */, NULL, &ciErr); cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
opencl_assert(ciErr); opencl_assert(ciErr);
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr); cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr);
@ -137,10 +138,16 @@ public:
/* compile kernel */ /* compile kernel */
string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt()); string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
size_t source_len = source.size(); size_t source_len = source.size();
string build_options = "-I ../kernel -I ../util -Werror -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; //" + path_get("kernel") + " -Werror";
//printf("path %s\n", path_get("kernel").c_str());
//clUnloadCompiler(); string build_options = "";
//string csource = "../blender/intern/cycles";
//build_options += "-I " + csource + "/kernel -I " + csource + "/util";
build_options += " -I " + path_get("kernel"); /* todo: escape path */
build_options += " -Werror";
build_options += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END=";
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr); cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr);
@ -170,10 +177,15 @@ public:
opencl_assert(ciErr); opencl_assert(ciErr);
ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr); ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
opencl_assert(ciErr); opencl_assert(ciErr);
null_mem = (device_ptr)clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
} }
~OpenCLDevice() ~OpenCLDevice()
{ {
clReleaseMemObject(CL_MEM_PTR(null_mem));
map<string, device_vector<uchar>*>::iterator mt; map<string, device_vector<uchar>*>::iterator mt;
for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
mem_free(*(mt->second)); mem_free(*(mt->second));
@ -261,6 +273,7 @@ public:
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{ {
mem_alloc(mem, MEM_READ_ONLY); mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
mem_map[name] = &mem; mem_map[name] = &mem;
} }
@ -295,6 +308,11 @@ public:
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
#define KERNEL_TEX(type, ttype, name) \
ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
#include "kernel_textures.h"
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
@ -314,10 +332,20 @@ public:
cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name) cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
{ {
device_memory *mem = mem_map[name]; cl_mem ptr;
cl_mem ptr = CL_MEM_PTR(mem->device_pointer); cl_int size, err = 0;
cl_int size = mem->data_width;
cl_int err = 0; if(mem_map.find(name) != mem_map.end()) {
device_memory *mem = mem_map[name];
ptr = CL_MEM_PTR(mem->device_pointer);
size = mem->data_width;
}
else {
/* work around NULL not working, even though the spec says otherwise */
ptr = CL_MEM_PTR(null_mem);
size = 1;
}
err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr); err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
opencl_assert(err); opencl_assert(err);
@ -347,9 +375,11 @@ public:
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_R");
ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_G"); #define KERNEL_TEX(type, ttype, name) \
ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_B"); ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
#include "kernel_textures.h"
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);

@ -25,8 +25,11 @@ SET(headers
kernel_qbvh.h kernel_qbvh.h
kernel_random.h kernel_random.h
kernel_shader.h kernel_shader.h
kernel_textures.h
kernel_triangle.h kernel_triangle.h
kernel_types.h kernel_types.h)
SET(svm_headers
svm/bsdf.h svm/bsdf.h
svm/bsdf_ashikhmin_velvet.h svm/bsdf_ashikhmin_velvet.h
svm/bsdf_diffuse.h svm/bsdf_diffuse.h
@ -78,7 +81,7 @@ ELSE()
ENDIF() ENDIF()
IF(WITH_CYCLES_CUDA) IF(WITH_CYCLES_CUDA)
SET(cuda_sources kernel.cu ${headers}) SET(cuda_sources kernel.cu ${headers} ${svm_headers})
SET(cuda_cubins) SET(cuda_cubins)
FOREACH(arch ${CYCLES_CUDA_ARCH}) FOREACH(arch ${CYCLES_CUDA_ARCH})
@ -106,9 +109,23 @@ ENDIF()
INCLUDE_DIRECTORIES(. ../util osl svm) INCLUDE_DIRECTORIES(. ../util osl svm)
ADD_LIBRARY(cycles_kernel ${sources} ${headers}) ADD_LIBRARY(cycles_kernel ${sources} ${headers} ${svm_headers})
IF(WITH_CYCLES_CUDA) IF(WITH_CYCLES_CUDA)
ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda) ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda)
ENDIF() ENDIF()
# OPENCL kernel
IF(WITH_CYCLES_OPENCL)
SET(util_headers
../util/util_color.h
../util/util_math.h
../util/util_transform.h
../util/util_types.h)
INSTALL(FILES kernel.cl ${headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
INSTALL(FILES ${svm_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel/svm)
INSTALL(FILES ${util_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
ENDIF()

@ -23,71 +23,61 @@
#include "kernel_types.h" #include "kernel_types.h"
#include "kernel_globals.h" #include "kernel_globals.h"
typedef struct KernelGlobals {
__constant KernelData *data;
__global float *__response_curve_R;
int __response_curve_R_width;
__global float *__response_curve_G;
int __response_curve_G_width;
__global float *__response_curve_B;
int __response_curve_B_width;
} KernelGlobals;
#include "kernel_film.h" #include "kernel_film.h"
//#include "kernel_path.h" #include "kernel_path.h"
//#include "kernel_displace.h" //#include "kernel_displace.h"
__kernel void kernel_ocl_path_trace(__constant KernelData *data, __global float4 *buffer, __global uint *rng_state, int pass, int sx, int sy, int sw, int sh) __kernel void kernel_ocl_path_trace(
__constant KernelData *data,
__global float4 *buffer,
__global uint *rng_state,
#define KERNEL_TEX(type, ttype, name) \
__global type *name, \
int name##_width,
#include "kernel_textures.h"
int pass,
int sx, int sy, int sw, int sh)
{ {
KernelGlobals kglobals, *kg = &kglobals; KernelGlobals kglobals, *kg = &kglobals;
kg->data = data; kg->data = data;
int x = get_global_id(0); #define KERNEL_TEX(type, ttype, name) \
int y = get_global_id(1); kg->name = name; \
kg->name##_width = name##_width;
#include "kernel_textures.h"
int x = sx + get_global_id(0);
int y = sy + get_global_id(1);
int w = kernel_data.cam.width; int w = kernel_data.cam.width;
if(x < sx + sw && y < sy + sh) { if(x < sx + sw && y < sy + sh)
if(pass == 0) { kernel_path_trace(kg, buffer, rng_state, pass, x, y);
buffer[x + w*y].x = 0.5f;
buffer[x + w*y].y = 0.5f;
buffer[x + w*y].z = 0.5f;
}
else {
buffer[x + w*y].x += 0.5f;
buffer[x + w*y].y += 0.5f;
buffer[x + w*y].z += 0.5f;
}
//= make_float3(1.0f, 0.9f, 0.0f);
//kernel_path_trace(buffer, rng_state, pass, x, y);
}
} }
__kernel void kernel_ocl_tonemap( __kernel void kernel_ocl_tonemap(
__constant KernelData *data, __constant KernelData *data,
__global uchar4 *rgba, __global uchar4 *rgba,
__global float4 *buffer, __global float4 *buffer,
__global float *__response_curve_R,
int __response_curve_R_width, #define KERNEL_TEX(type, ttype, name) \
__global float *__response_curve_G, __global type *name, \
int __response_curve_G_width, int name##_width,
__global float *__response_curve_B, #include "kernel_textures.h"
int __response_curve_B_width,
int pass, int resolution, int pass, int resolution,
int sx, int sy, int sw, int sh) int sx, int sy, int sw, int sh)
{ {
KernelGlobals kglobals, *kg = &kglobals; KernelGlobals kglobals, *kg = &kglobals;
kg->data = data; kg->data = data;
kg->__response_curve_R = __response_curve_R;
kg->__response_curve_R_width = __response_curve_R_width; #define KERNEL_TEX(type, ttype, name) \
kg->__response_curve_G = __response_curve_G; kg->name = name; \
kg->__response_curve_G_width = __response_curve_G_width; kg->name##_width = name##_width;
kg->__response_curve_B = __response_curve_B; #include "kernel_textures.h"
kg->__response_curve_B_width = __response_curve_B_width;
int x = sx + get_global_id(0); int x = sx + get_global_id(0);
int y = sy + get_global_id(1); int y = sy + get_global_id(1);
@ -96,10 +86,10 @@ __kernel void kernel_ocl_tonemap(
kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y); kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y);
} }
__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) /*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
{ {
int x = sx + get_global_id(0); int x = sx + get_global_id(0);
kernel_displace(input, offset, x); kernel_displace(input, offset, x);
} }*/

@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
#define __device_inline __device__ __inline__ #define __device_inline __device__ __inline__
#define __global #define __global
#define __shared __shared__ #define __shared __shared__
#define __constant __constant__ #define __constant
/* No assert supported for CUDA */ /* No assert supported for CUDA */

@ -29,6 +29,8 @@ CCL_NAMESPACE_BEGIN
#define __device #define __device
#define __device_inline #define __device_inline
#define kernel_assert(cond)
__device float kernel_tex_interp_(__global float *data, int width, float x) __device float kernel_tex_interp_(__global float *data, int width, float x)
{ {
x = clamp(x, 0.0f, 1.0f)*width; x = clamp(x, 0.0f, 1.0f)*width;
@ -40,9 +42,20 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
return (1.0f - t)*data[index] + t*data[nindex]; return (1.0f - t)*data[index] + t*data[nindex];
} }
#define make_float3(x, y, z) ((float3)(x, y, z)) /* todo 1.1 */
#define __uint_as_float(x) as_float(x)
#define __float_as_uint(x) as_uint(x)
#define __int_as_float(x) as_float(x)
#define __float_as_int(x) as_int(x)
#define kernel_data (*kg->data) #define kernel_data (*kg->data)
#define kernel_tex_interp(t, x) \ #define kernel_tex_interp(t, x) \
kernel_tex_interp_(kg->t, kg->t##_width, x); kernel_tex_interp_(kg->t, kg->t##_width, x)
#define kernel_tex_fetch(t, index) \
kg->t[index]
#define NULL 0
CCL_NAMESPACE_END CCL_NAMESPACE_END

@ -18,190 +18,66 @@
/* Constant Globals */ /* Constant Globals */
#ifdef __KERNEL_CPU__
#ifdef WITH_OSL
#include "osl_globals.h"
#endif
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
the kernel, to access constant data. These are all stored as "textures", but the kernel, to access constant data. These are all stored as "textures", but
these are really just standard arrays. We can't use actually globals because these are really just standard arrays. We can't use actually globals because
multiple renders may be running inside the same process. */ multiple renders may be running inside the same process. */
typedef struct KernelGlobals {
#else
/* On the GPU, constant memory textures must be globals, so we can't put them
into a struct. As a result we don't actually use this struct and use actual
globals and simply pass along a NULL pointer everywhere, which we hope gets
optimized out. */
#ifdef __KERNEL_CUDA__
typedef struct KernelGlobals {} KernelGlobals;
#endif
#endif
/* globals */
__constant KernelData __data;
#ifndef __KERNEL_OPENCL__
/* bvh */
texture_float4 __bvh_nodes;
texture_float4 __tri_woop;
texture_uint __prim_index;
texture_uint __prim_object;
texture_uint __object_node;
/* objects */
texture_float4 __objects;
/* triangles */
texture_float4 __tri_normal;
texture_float4 __tri_vnormal;
texture_float4 __tri_vindex;
texture_float4 __tri_verts;
/* attributes */
texture_uint4 __attributes_map;
texture_float __attributes_float;
texture_float4 __attributes_float3;
/* lights */
texture_float4 __light_distribution;
texture_float4 __light_point;
/* shaders */
texture_uint4 __svm_nodes;
/* camera/film */
texture_float __filter_table;
texture_float __response_curve_R;
texture_float __response_curve_G;
texture_float __response_curve_B;
/* sobol */
texture_uint __sobol_directions;
/* image */
texture_image_uchar4 __tex_image_000;
texture_image_uchar4 __tex_image_001;
texture_image_uchar4 __tex_image_002;
texture_image_uchar4 __tex_image_003;
texture_image_uchar4 __tex_image_004;
texture_image_uchar4 __tex_image_005;
texture_image_uchar4 __tex_image_006;
texture_image_uchar4 __tex_image_007;
texture_image_uchar4 __tex_image_008;
texture_image_uchar4 __tex_image_009;
texture_image_uchar4 __tex_image_010;
texture_image_uchar4 __tex_image_011;
texture_image_uchar4 __tex_image_012;
texture_image_uchar4 __tex_image_013;
texture_image_uchar4 __tex_image_014;
texture_image_uchar4 __tex_image_015;
texture_image_uchar4 __tex_image_016;
texture_image_uchar4 __tex_image_017;
texture_image_uchar4 __tex_image_018;
texture_image_uchar4 __tex_image_019;
texture_image_uchar4 __tex_image_020;
texture_image_uchar4 __tex_image_021;
texture_image_uchar4 __tex_image_022;
texture_image_uchar4 __tex_image_023;
texture_image_uchar4 __tex_image_024;
texture_image_uchar4 __tex_image_025;
texture_image_uchar4 __tex_image_026;
texture_image_uchar4 __tex_image_027;
texture_image_uchar4 __tex_image_028;
texture_image_uchar4 __tex_image_029;
texture_image_uchar4 __tex_image_030;
texture_image_uchar4 __tex_image_031;
texture_image_uchar4 __tex_image_032;
texture_image_uchar4 __tex_image_033;
texture_image_uchar4 __tex_image_034;
texture_image_uchar4 __tex_image_035;
texture_image_uchar4 __tex_image_036;
texture_image_uchar4 __tex_image_037;
texture_image_uchar4 __tex_image_038;
texture_image_uchar4 __tex_image_039;
texture_image_uchar4 __tex_image_040;
texture_image_uchar4 __tex_image_041;
texture_image_uchar4 __tex_image_042;
texture_image_uchar4 __tex_image_043;
texture_image_uchar4 __tex_image_044;
texture_image_uchar4 __tex_image_045;
texture_image_uchar4 __tex_image_046;
texture_image_uchar4 __tex_image_047;
texture_image_uchar4 __tex_image_048;
texture_image_uchar4 __tex_image_049;
texture_image_uchar4 __tex_image_050;
texture_image_uchar4 __tex_image_051;
texture_image_uchar4 __tex_image_052;
texture_image_uchar4 __tex_image_053;
texture_image_uchar4 __tex_image_054;
texture_image_uchar4 __tex_image_055;
texture_image_uchar4 __tex_image_056;
texture_image_uchar4 __tex_image_057;
texture_image_uchar4 __tex_image_058;
texture_image_uchar4 __tex_image_059;
texture_image_uchar4 __tex_image_060;
texture_image_uchar4 __tex_image_061;
texture_image_uchar4 __tex_image_062;
texture_image_uchar4 __tex_image_063;
texture_image_uchar4 __tex_image_064;
texture_image_uchar4 __tex_image_065;
texture_image_uchar4 __tex_image_066;
texture_image_uchar4 __tex_image_067;
texture_image_uchar4 __tex_image_068;
texture_image_uchar4 __tex_image_069;
texture_image_uchar4 __tex_image_070;
texture_image_uchar4 __tex_image_071;
texture_image_uchar4 __tex_image_072;
texture_image_uchar4 __tex_image_073;
texture_image_uchar4 __tex_image_074;
texture_image_uchar4 __tex_image_075;
texture_image_uchar4 __tex_image_076;
texture_image_uchar4 __tex_image_077;
texture_image_uchar4 __tex_image_078;
texture_image_uchar4 __tex_image_079;
texture_image_uchar4 __tex_image_080;
texture_image_uchar4 __tex_image_081;
texture_image_uchar4 __tex_image_082;
texture_image_uchar4 __tex_image_083;
texture_image_uchar4 __tex_image_084;
texture_image_uchar4 __tex_image_085;
texture_image_uchar4 __tex_image_086;
texture_image_uchar4 __tex_image_087;
texture_image_uchar4 __tex_image_088;
texture_image_uchar4 __tex_image_089;
texture_image_uchar4 __tex_image_090;
texture_image_uchar4 __tex_image_091;
texture_image_uchar4 __tex_image_092;
texture_image_uchar4 __tex_image_093;
texture_image_uchar4 __tex_image_094;
texture_image_uchar4 __tex_image_095;
texture_image_uchar4 __tex_image_096;
texture_image_uchar4 __tex_image_097;
texture_image_uchar4 __tex_image_098;
texture_image_uchar4 __tex_image_099;
#endif
#ifdef __KERNEL_CPU__ #ifdef __KERNEL_CPU__
#ifdef WITH_OSL #ifdef WITH_OSL
//#include "osl_globals.h"
#endif
/* On the CPU, we also have the OSL globals here. Most data structures are shared typedef struct KernelGlobals {
with SVM, the difference is in the shaders and object/mesh attributes. */
OSLGlobals osl; #define KERNEL_TEX(type, ttype, name) ttype name;
#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
#include "kernel_textures.h"
KernelData __data;
#ifdef WITH_OSL
/* On the CPU, we also have the OSL globals here. Most data structures are shared
with SVM, the difference is in the shaders and object/mesh attributes. */
//OSLGlobals osl;
#endif
} KernelGLobals;
#endif #endif
/* For CUDA, constant memory textures must be globals, so we can't put them
into a struct. As a result we don't actually use this struct and use actual
globals and simply pass along a NULL pointer everywhere, which we hope gets
optimized out. */
#ifdef __KERNEL_CUDA__
__constant__ KernelData __data;
typedef struct KernelGlobals {} KernelGlobals;
#define KERNEL_TEX(type, ttype, name) ttype name;
#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
#include "kernel_textures.h"
#endif
/* OpenCL */
#ifdef __KERNEL_OPENCL__
typedef struct KernelGlobals {
__constant KernelData *data;
#define KERNEL_TEX(type, ttype, name) \
__global type *name; \
int name##_width;
#include "kernel_textures.h"
} KernelGlobals; } KernelGlobals;
#endif #endif
CCL_NAMESPACE_END CCL_NAMESPACE_END

@ -18,14 +18,14 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
struct LightSample { typedef struct LightSample {
float3 P; float3 P;
float3 Ng; float3 Ng;
int object; int object;
int prim; int prim;
int shader; int shader;
float weight; float weight;
}; } LightSample;
/* Point Light */ /* Point Light */

@ -0,0 +1,153 @@
#ifndef KERNEL_TEX
#define KERNEL_TEX(type, ttype, name)
#endif
#ifndef KERNEL_IMAGE_TEX
#define KERNEL_IMAGE_TEX(type, ttype, name)
#endif
/* bvh */
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
KERNEL_TEX(float4, texture_float4, __tri_woop)
KERNEL_TEX(uint, texture_uint, __prim_index)
KERNEL_TEX(uint, texture_uint, __prim_object)
KERNEL_TEX(uint, texture_uint, __object_node)
/* objects */
KERNEL_TEX(float4, texture_float4, __objects)
/* triangles */
KERNEL_TEX(float4, texture_float4, __tri_normal)
KERNEL_TEX(float4, texture_float4, __tri_vnormal)
KERNEL_TEX(float4, texture_float4, __tri_vindex)
KERNEL_TEX(float4, texture_float4, __tri_verts)
/* attributes */
KERNEL_TEX(uint4, texture_uint4, __attributes_map)
KERNEL_TEX(float, texture_float, __attributes_float)
KERNEL_TEX(float4, texture_float4, __attributes_float3)
/* lights */
KERNEL_TEX(float4, texture_float4, __light_distribution)
KERNEL_TEX(float4, texture_float4, __light_point)
/* shaders */
KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
/* camera/film */
KERNEL_TEX(float, texture_float, __filter_table)
KERNEL_TEX(float, texture_float, __response_curve_R)
KERNEL_TEX(float, texture_float, __response_curve_G)
KERNEL_TEX(float, texture_float, __response_curve_B)
/* sobol */
KERNEL_TEX(uint, texture_uint, __sobol_directions)
/* image */
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_000)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_001)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_002)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_003)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_004)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_007)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_008)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_009)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_010)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_011)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_012)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_013)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_014)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_015)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_016)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_017)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_018)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_019)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_020)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_021)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_022)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_023)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_024)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_025)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_026)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_027)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_028)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_029)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_030)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_031)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_032)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_033)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_034)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_035)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_036)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_037)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_038)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_039)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_040)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_041)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_042)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_043)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_044)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_045)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_046)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_047)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_048)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_049)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_050)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_051)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_052)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_053)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_054)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_055)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_056)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_057)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_058)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_059)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_060)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_061)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_062)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_063)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_064)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_065)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_066)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_067)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_068)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_069)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_070)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_071)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_072)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_073)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_074)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_075)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_076)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_077)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_078)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_079)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_080)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_081)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_082)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_083)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_084)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_085)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_086)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_087)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_088)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_089)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_090)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_091)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_092)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_093)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_094)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_095)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_096)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
#undef KERNEL_TEX
#undef KERNEL_IMAGE_TEX

@ -22,11 +22,11 @@ CCL_NAMESPACE_BEGIN
__device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v) __device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v)
{ {
/* load triangle vertices */ /* load triangle vertices */
float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index)); float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute point */ /* compute point */
float t = 1.0f - u - v; float t = 1.0f - u - v;
@ -50,11 +50,11 @@ __device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int
{ {
#if 0 #if 0
/* load triangle vertices */ /* load triangle vertices */
float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index)); float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute normal */ /* compute normal */
return normalize(cross(v2 - v0, v1 - v0)); return normalize(cross(v2 - v0, v1 - v0));
@ -68,11 +68,11 @@ __device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int
__device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v) __device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v)
{ {
/* load triangle vertices */ /* load triangle vertices */
float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index)); float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
float3 n0 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
float3 n1 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
float3 n2 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z))); float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
return normalize((1.0f - u - v)*n2 + u*n0 + v*n1); return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
} }
@ -80,11 +80,11 @@ __device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index,
__device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri) __device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri)
{ {
/* fetch triangle vertex coordinates */ /* fetch triangle vertex coordinates */
float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri)); float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri));
float3 p0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 p1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
float3 p2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute derivatives of P w.r.t. uv */ /* compute derivatives of P w.r.t. uv */
*dPdu = (p0 - p2); *dPdu = (p0 - p2);
@ -102,7 +102,7 @@ __device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd,
return kernel_tex_fetch(__attributes_float, offset + sd->prim); return kernel_tex_fetch(__attributes_float, offset + sd->prim);
} }
else if(elem == ATTR_ELEMENT_VERTEX) { else if(elem == ATTR_ELEMENT_VERTEX) {
float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x)); float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y)); float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
@ -142,14 +142,14 @@ __device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *s
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
return as_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
} }
else if(elem == ATTR_ELEMENT_VERTEX) { else if(elem == ATTR_ELEMENT_VERTEX) {
float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
#ifdef __RAY_DIFFERENTIALS__ #ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
@ -160,9 +160,9 @@ __device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *s
} }
else if(elem == ATTR_ELEMENT_CORNER) { else if(elem == ATTR_ELEMENT_CORNER) {
int tri = offset + sd->prim*3; int tri = offset + sd->prim*3;
float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
#ifdef __RAY_DIFFERENTIALS__ #ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;

@ -21,11 +21,7 @@
#include "kernel_math.h" #include "kernel_math.h"
#ifndef __KERNEL_OPENCL__ #include "svm/svm_types.h"
#include "svm_types.h"
#endif
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
@ -239,9 +235,7 @@ typedef struct ShaderData {
/* SVM closure data. we always sample a single closure, to get fixed /* SVM closure data. we always sample a single closure, to get fixed
* memory usage, svm_closure_data contains closure parameters. */ * memory usage, svm_closure_data contains closure parameters. */
#ifndef __KERNEL_OPENCL__
ClosureType svm_closure; ClosureType svm_closure;
#endif
float3 svm_closure_weight; float3 svm_closure_weight;
float svm_closure_data[3]; /* CUDA gives compile error if out of bounds */ float svm_closure_data[3]; /* CUDA gives compile error if out of bounds */
@ -291,11 +285,15 @@ typedef struct KernelCamera {
float shutterclose; float shutterclose;
/* differentials */ /* differentials */
float3 dx, dy; float3 dx;
float pad1;
float3 dy;
float pad2;
/* clipping */ /* clipping */
float nearclip; float nearclip;
float cliplength; float cliplength;
float pad3, pad4;
/* more matrices */ /* more matrices */
Transform screentoworld; Transform screentoworld;
@ -321,13 +319,14 @@ typedef struct KernelBackground {
typedef struct KernelSunSky { typedef struct KernelSunSky {
/* sun direction in spherical and cartesian */ /* sun direction in spherical and cartesian */
float theta, phi; float theta, phi, pad3, pad4;
float3 dir; float3 dir;
float pad; float pad;
/* perez function parameters */ /* perez function parameters */
float zenith_Y, zenith_x, zenith_y; float zenith_Y, zenith_x, zenith_y, pad2;
float perez_Y[5], perez_x[5], perez_y[5]; float perez_Y[5], perez_x[5], perez_y[5];
float pad5;
} KernelSunSky; } KernelSunSky;
typedef struct KernelIntegrator { typedef struct KernelIntegrator {
@ -348,7 +347,7 @@ typedef struct KernelIntegrator {
float blur_caustics; float blur_caustics;
/* padding */ /* padding */
int pad; int pad[2];
} KernelIntegrator; } KernelIntegrator;
typedef struct KernelBVH { typedef struct KernelBVH {

@ -139,8 +139,8 @@ __device int bsdf_ashikhmin_velvet_sample(const ShaderData *sd, float randu, flo
// TODO: find a better approximation for the retroreflective bounce // TODO: find a better approximation for the retroreflective bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx; *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy; *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
*domega_in_dx *= 125; *domega_in_dx *= 125.0f;
*domega_in_dy *= 125; *domega_in_dy *= 125.0f;
#endif #endif
} else } else
*pdf = 0.0f; *pdf = 0.0f;

@ -88,8 +88,8 @@ __device int bsdf_diffuse_sample(const ShaderData *sd, float randu, float randv,
// TODO: find a better approximation for the diffuse bounce // TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx; *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy; *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
*domega_in_dx *= 125; *domega_in_dx *= 125.0f;
*domega_in_dy *= 125; *domega_in_dy *= 125.0f;
#endif #endif
} }
else else
@ -151,8 +151,8 @@ __device int bsdf_translucent_sample(const ShaderData *sd, float randu, float ra
// TODO: find a better approximation for the diffuse bounce // TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx; *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy; *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
*domega_in_dx *= -125; *domega_in_dx *= -125.0f;
*domega_in_dy *= -125; *domega_in_dy *= -125.0f;
#endif #endif
} else } else
*pdf = 0; *pdf = 0;

@ -195,8 +195,8 @@ __device int bsdf_microfacet_ggx_sample(const ShaderData *sd, float randu, float
// derivatives a bit bigger. In theory this varies with the // derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and // roughness but the exact relationship is complex and
// requires more ops than are practical. // requires more ops than are practical.
*domega_in_dx *= 10; *domega_in_dx *= 10.0f;
*domega_in_dy *= 10; *domega_in_dy *= 10.0f;
#endif #endif
} }
} }
@ -246,8 +246,8 @@ __device int bsdf_microfacet_ggx_sample(const ShaderData *sd, float randu, float
// derivatives a bit bigger. In theory this varies with the // derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and // roughness but the exact relationship is complex and
// requires more ops than are practical. // requires more ops than are practical.
*domega_in_dx *= 10; *domega_in_dx *= 10.0f;
*domega_in_dy *= 10; *domega_in_dy *= 10.0f;
#endif #endif
} }
} }
@ -423,8 +423,8 @@ __device int bsdf_microfacet_beckmann_sample(const ShaderData *sd, float randu,
// derivatives a bit bigger. In theory this varies with the // derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and // roughness but the exact relationship is complex and
// requires more ops than are practical. // requires more ops than are practical.
*domega_in_dx *= 10; *domega_in_dx *= 10.0f;
*domega_in_dy *= 10; *domega_in_dy *= 10.0f;
#endif #endif
} }
} }
@ -478,8 +478,8 @@ __device int bsdf_microfacet_beckmann_sample(const ShaderData *sd, float randu,
// derivatives a bit bigger. In theory this varies with the // derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and // roughness but the exact relationship is complex and
// requires more ops than are practical. // requires more ops than are practical.
*domega_in_dx *= 10; *domega_in_dx *= 10.0f;
*domega_in_dy *= 10; *domega_in_dy *= 10.0f;
#endif #endif
} }
} }

@ -187,8 +187,8 @@ __device int bsdf_ward_sample(const ShaderData *sd, float randu, float randv, fl
// derivatives a bit bigger. In theory this varies with the // derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and // roughness but the exact relationship is complex and
// requires more ops than are practical. // requires more ops than are practical.
*domega_in_dx *= 10; *domega_in_dx *= 10.0f;
*domega_in_dy *= 10; *domega_in_dy *= 10.0f;
#endif #endif
} }
} }

@ -122,8 +122,8 @@ __device int bsdf_westin_backscatter_sample(const ShaderData *sd, float randu, f
// derivatives a bit bigger. In theory this varies with the // derivatives a bit bigger. In theory this varies with the
// exponent but the exact relationship is complex and // exponent but the exact relationship is complex and
// requires more ops than are practical. // requires more ops than are practical.
*domega_in_dx *= 10; *domega_in_dx *= 10.0f;
*domega_in_dy *= 10; *domega_in_dy *= 10.0f;
#endif #endif
} }
} }
@ -198,8 +198,8 @@ __device int bsdf_westin_sheen_sample(const ShaderData *sd, float randu, float r
// TODO: find a better approximation for the diffuse bounce // TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx; *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy; *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
*domega_in_dx *= 125; *domega_in_dx *= 125.0f;
*domega_in_dy *= 125; *domega_in_dy *= 125.0f;
#endif #endif
} else } else
pdf = 0; pdf = 0;

@ -41,7 +41,7 @@ __device float svm_blend(float3 p, NodeBlendType type, NodeBlendAxis axis)
return r*r; return r*r;
} }
else if(type == NODE_BLEND_EASING) { else if(type == NODE_BLEND_EASING) {
float r = min(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f); float r = fminf(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
float t = r*r; float t = r*r;
return (3.0f*t - 2.0f*t*r); return (3.0f*t - 2.0f*t*r);

@ -34,8 +34,9 @@ __device void svm_node_set_bump(ShaderData *sd, float *stack, uint c_offset, uin
float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry; float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
surfgrad *= 0.1f; /* todo: remove this factor */ surfgrad *= 0.1f; /* todo: remove this factor */
sd->N = normalize(fabsf(det)*sd->N - signf(det)*surfgrad); float absdet = fabsf(det);
sd->N = normalize(absdet*sd->N - signf(det)*surfgrad);
#endif #endif
} }

@ -23,12 +23,13 @@ CCL_NAMESPACE_BEGIN
__device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion) __device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
{ {
float3 r; float3 r;
float3 offset = make_float3(13.5f, 13.5f, 13.5f);
p /= size; p /= size;
r.x = noise_basis(p + make_float3(13.5f, 13.5f, 13.5f), basis) * distortion; r.x = noise_basis(p + offset, basis) * distortion;
r.y = noise_basis(p, basis) * distortion; r.y = noise_basis(p, basis) * distortion;
r.z = noise_basis(p - make_float3(13.5f, 13.5f, 13.5f), basis) * distortion; r.z = noise_basis(p - offset, basis) * distortion;
return noise_basis(p + r, distortion_basis); /* distorted-domain noise */ return noise_basis(p + r, distortion_basis); /* distorted-domain noise */
} }

@ -31,6 +31,9 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
also note that cuda has 128 textures limit, we use 100 now, since also note that cuda has 128 textures limit, we use 100 now, since
we still need some for other storage */ we still need some for other storage */
#ifdef __KERNEL_OPENCL__
r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
#else
switch(id) { switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break; case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break; case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
@ -136,6 +139,7 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
kernel_assert(0); kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f); return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
} }
#endif
return r; return r;
} }
@ -151,8 +155,11 @@ __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack
float4 f = svm_image_texture(kg, id, co.x, co.y); float4 f = svm_image_texture(kg, id, co.x, co.y);
float3 r = make_float3(f.x, f.y, f.z); float3 r = make_float3(f.x, f.y, f.z);
if(srgb) if(srgb) {
r = color_srgb_to_scene_linear(r); r.x = color_srgb_to_scene_linear(r.x);
r.y = color_srgb_to_scene_linear(r.y);
r.z = color_srgb_to_scene_linear(r.z);
}
stack_store_float3(stack, out_offset, r); stack_store_float3(stack, out_offset, r);
} }
@ -170,8 +177,11 @@ __device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float
float4 f = svm_image_texture(kg, id, u, v); float4 f = svm_image_texture(kg, id, u, v);
float3 r = make_float3(f.x, f.y, f.z); float3 r = make_float3(f.x, f.y, f.z);
if(srgb) if(srgb) {
r = color_srgb_to_scene_linear(r); r.x = color_srgb_to_scene_linear(r.x);
r.y = color_srgb_to_scene_linear(r.y);
r.z = color_srgb_to_scene_linear(r.z);
}
stack_store_float3(stack, out_offset, r); stack_store_float3(stack, out_offset, r);
} }

@ -41,7 +41,8 @@ __device float3 rgb_to_hsv(float3 rgb)
h = 0.0f; h = 0.0f;
} }
else { else {
c = (make_float3(cmax, cmax, cmax) - rgb)/cdelta; float3 cmax3 = make_float3(cmax, cmax, cmax);
c = (cmax3 - rgb)/cdelta;
if(rgb.x == cmax) h = c.z - c.y; if(rgb.x == cmax) h = c.z - c.y;
else if(rgb.y == cmax) h = 2.0f + c.x - c.z; else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
@ -91,26 +92,33 @@ __device float3 hsv_to_rgb(float3 hsv)
return rgb; return rgb;
} }
__device float3 svm_lerp(const float3 a, const float3 b, float t)
{
return (a * (1.0f - t) + b * t);
}
__device float3 svm_mix_blend(float t, float3 col1, float3 col2) __device float3 svm_mix_blend(float t, float3 col1, float3 col2)
{ {
return lerp(col1, col2, t); return svm_lerp(col1, col2, t);
} }
__device float3 svm_mix_add(float t, float3 col1, float3 col2) __device float3 svm_mix_add(float t, float3 col1, float3 col2)
{ {
return lerp(col1, col1 + col2, t); return svm_lerp(col1, col1 + col2, t);
} }
__device float3 svm_mix_mul(float t, float3 col1, float3 col2) __device float3 svm_mix_mul(float t, float3 col1, float3 col2)
{ {
return lerp(col1, col1 * col2, t); return svm_lerp(col1, col1 * col2, t);
} }
__device float3 svm_mix_screen(float t, float3 col1, float3 col2) __device float3 svm_mix_screen(float t, float3 col1, float3 col2)
{ {
float tm = 1.0f - t; float tm = 1.0f - t;
float3 one = make_float3(1.0f, 1.0f, 1.0f);
float3 tm3 = make_float3(tm, tm, tm);
return make_float3(1.0f, 1.0f, 1.0f) - (make_float3(tm, tm, tm) + t*(make_float3(1.0f, 1.0f, 1.0f) - col2))*(make_float3(1.0f, 1.0f, 1.0f) - col1); return one - (tm3 + t*(one - col2))*(one - col1);
} }
__device float3 svm_mix_overlay(float t, float3 col1, float3 col2) __device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
@ -139,7 +147,7 @@ __device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
__device float3 svm_mix_sub(float t, float3 col1, float3 col2) __device float3 svm_mix_sub(float t, float3 col1, float3 col2)
{ {
return lerp(col1, col1 - col2, t); return svm_lerp(col1, col1 - col2, t);
} }
__device float3 svm_mix_div(float t, float3 col1, float3 col2) __device float3 svm_mix_div(float t, float3 col1, float3 col2)
@ -157,7 +165,7 @@ __device float3 svm_mix_div(float t, float3 col1, float3 col2)
__device float3 svm_mix_diff(float t, float3 col1, float3 col2) __device float3 svm_mix_diff(float t, float3 col1, float3 col2)
{ {
return lerp(col1, fabs(col1 - col2), t); return svm_lerp(col1, fabs(col1 - col2), t);
} }
__device float3 svm_mix_dark(float t, float3 col1, float3 col2) __device float3 svm_mix_dark(float t, float3 col1, float3 col2)
@ -255,7 +263,7 @@ __device float3 svm_mix_hue(float t, float3 col1, float3 col2)
hsv.x = hsv2.x; hsv.x = hsv2.x;
float3 tmp = hsv_to_rgb(hsv); float3 tmp = hsv_to_rgb(hsv);
outcol = lerp(outcol, tmp, t); outcol = svm_lerp(outcol, tmp, t);
} }
return outcol; return outcol;
@ -302,7 +310,7 @@ __device float3 svm_mix_color(float t, float3 col1, float3 col2)
hsv.y = hsv2.y; hsv.y = hsv2.y;
float3 tmp = hsv_to_rgb(hsv); float3 tmp = hsv_to_rgb(hsv);
outcol = lerp(outcol, tmp, t); outcol = svm_lerp(outcol, tmp, t);
} }
return outcol; return outcol;

@ -49,7 +49,7 @@ __device float sky_angle_between(float thetav, float phiv, float theta, float ph
return safe_acosf(cospsi); return safe_acosf(cospsi);
} }
__device float sky_perez_function(float lam[5], float theta, float gamma) __device float sky_perez_function(__constant float *lam, float theta, float gamma)
{ {
float ctheta = cosf(theta); float ctheta = cosf(theta);
float cgamma = cosf(gamma); float cgamma = cosf(gamma);

@ -69,7 +69,7 @@ __device void voronoi(float3 p, NodeDistanceMetric distance_metric, float e, flo
float3 pd = p - (vp + ip); float3 pd = p - (vp + ip);
float d = voronoi_distance(distance_metric, pd, e); float d = voronoi_distance(distance_metric, pd, e);
vp += make_float3((float)xx, (float)yy, (float)zz); vp += ip;
if(d < da[0]) { if(d < da[0]) {
da[3] = da[2]; da[3] = da[2];

@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
* happens i have no idea, but consecutive values are problematic, maybe it * happens i have no idea, but consecutive values are problematic, maybe it
* generates an incorrect jump table. */ * generates an incorrect jump table. */
enum NodeType { typedef enum NodeType {
NODE_END = 0, NODE_END = 0,
NODE_CLOSURE_BSDF = 100, NODE_CLOSURE_BSDF = 100,
NODE_CLOSURE_EMISSION = 200, NODE_CLOSURE_EMISSION = 200,
@ -82,23 +82,23 @@ enum NodeType {
NODE_ATTR_BUMP_DX = 4400, NODE_ATTR_BUMP_DX = 4400,
NODE_ATTR_BUMP_DY = 4500, NODE_ATTR_BUMP_DY = 4500,
NODE_TEX_ENVIRONMENT = 4600 NODE_TEX_ENVIRONMENT = 4600
}; } NodeType;
enum NodeAttributeType { typedef enum NodeAttributeType {
NODE_ATTR_FLOAT = 0, NODE_ATTR_FLOAT = 0,
NODE_ATTR_FLOAT3 NODE_ATTR_FLOAT3
}; } NodeAttributeType;
enum NodeGeometry { typedef enum NodeGeometry {
NODE_GEOM_P = 0, NODE_GEOM_P = 0,
NODE_GEOM_N, NODE_GEOM_N,
NODE_GEOM_T, NODE_GEOM_T,
NODE_GEOM_I, NODE_GEOM_I,
NODE_GEOM_Ng, NODE_GEOM_Ng,
NODE_GEOM_uv NODE_GEOM_uv
}; } NodeGeometry;
enum NodeLightPath { typedef enum NodeLightPath {
NODE_LP_camera = 0, NODE_LP_camera = 0,
NODE_LP_shadow, NODE_LP_shadow,
NODE_LP_diffuse, NODE_LP_diffuse,
@ -106,16 +106,16 @@ enum NodeLightPath {
NODE_LP_reflection, NODE_LP_reflection,
NODE_LP_transmission, NODE_LP_transmission,
NODE_LP_backfacing NODE_LP_backfacing
}; } NodeLightPath;
enum NodeTexCoord { typedef enum NodeTexCoord {
NODE_TEXCO_OBJECT, NODE_TEXCO_OBJECT,
NODE_TEXCO_CAMERA, NODE_TEXCO_CAMERA,
NODE_TEXCO_WINDOW, NODE_TEXCO_WINDOW,
NODE_TEXCO_REFLECTION NODE_TEXCO_REFLECTION
}; } NodeTexCoord;
enum NodeMix { typedef enum NodeMix {
NODE_MIX_BLEND = 0, NODE_MIX_BLEND = 0,
NODE_MIX_ADD, NODE_MIX_ADD,
NODE_MIX_MUL, NODE_MIX_MUL,
@ -134,9 +134,9 @@ enum NodeMix {
NODE_MIX_COLOR, NODE_MIX_COLOR,
NODE_MIX_SOFT, NODE_MIX_SOFT,
NODE_MIX_LINEAR NODE_MIX_LINEAR
}; } NodeMix;
enum NodeMath { typedef enum NodeMath {
NODE_MATH_ADD, NODE_MATH_ADD,
NODE_MATH_SUBTRACT, NODE_MATH_SUBTRACT,
NODE_MATH_MULTIPLY, NODE_MATH_MULTIPLY,
@ -154,24 +154,24 @@ enum NodeMath {
NODE_MATH_ROUND, NODE_MATH_ROUND,
NODE_MATH_LESS_THAN, NODE_MATH_LESS_THAN,
NODE_MATH_GREATER_THAN NODE_MATH_GREATER_THAN
}; } NodeMath;
enum NodeVectorMath { typedef enum NodeVectorMath {
NODE_VECTOR_MATH_ADD, NODE_VECTOR_MATH_ADD,
NODE_VECTOR_MATH_SUBTRACT, NODE_VECTOR_MATH_SUBTRACT,
NODE_VECTOR_MATH_AVERAGE, NODE_VECTOR_MATH_AVERAGE,
NODE_VECTOR_MATH_DOT_PRODUCT, NODE_VECTOR_MATH_DOT_PRODUCT,
NODE_VECTOR_MATH_CROSS_PRODUCT, NODE_VECTOR_MATH_CROSS_PRODUCT,
NODE_VECTOR_MATH_NORMALIZE NODE_VECTOR_MATH_NORMALIZE
}; } NodeVectorMath;
enum NodeConvert { typedef enum NodeConvert {
NODE_CONVERT_FV, NODE_CONVERT_FV,
NODE_CONVERT_CF, NODE_CONVERT_CF,
NODE_CONVERT_VF NODE_CONVERT_VF
}; } NodeConvert;
enum NodeDistanceMetric { typedef enum NodeDistanceMetric {
NODE_VORONOI_DISTANCE_SQUARED, NODE_VORONOI_DISTANCE_SQUARED,
NODE_VORONOI_ACTUAL_DISTANCE, NODE_VORONOI_ACTUAL_DISTANCE,
NODE_VORONOI_MANHATTAN, NODE_VORONOI_MANHATTAN,
@ -179,9 +179,9 @@ enum NodeDistanceMetric {
NODE_VORONOI_MINKOVSKY_H, NODE_VORONOI_MINKOVSKY_H,
NODE_VORONOI_MINKOVSKY_4, NODE_VORONOI_MINKOVSKY_4,
NODE_VORONOI_MINKOVSKY NODE_VORONOI_MINKOVSKY
}; } NodeDistanceMetric;
enum NodeNoiseBasis { typedef enum NodeNoiseBasis {
NODE_NOISE_PERLIN, NODE_NOISE_PERLIN,
NODE_NOISE_VORONOI_F1, NODE_NOISE_VORONOI_F1,
NODE_NOISE_VORONOI_F2, NODE_NOISE_VORONOI_F2,
@ -190,30 +190,30 @@ enum NodeNoiseBasis {
NODE_NOISE_VORONOI_F2_F1, NODE_NOISE_VORONOI_F2_F1,
NODE_NOISE_VORONOI_CRACKLE, NODE_NOISE_VORONOI_CRACKLE,
NODE_NOISE_CELL_NOISE NODE_NOISE_CELL_NOISE
}; } NodeNoiseBasis;
enum NodeWaveType { typedef enum NodeWaveType {
NODE_WAVE_SINE, NODE_WAVE_SINE,
NODE_WAVE_SAW, NODE_WAVE_SAW,
NODE_WAVE_TRI NODE_WAVE_TRI
}; } NodeWaveType;
enum NodeMusgraveType { typedef enum NodeMusgraveType {
NODE_MUSGRAVE_MULTIFRACTAL, NODE_MUSGRAVE_MULTIFRACTAL,
NODE_MUSGRAVE_FBM, NODE_MUSGRAVE_FBM,
NODE_MUSGRAVE_HYBRID_MULTIFRACTAL, NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
NODE_MUSGRAVE_RIDGED_MULTIFRACTAL, NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
NODE_MUSGRAVE_HETERO_TERRAIN NODE_MUSGRAVE_HETERO_TERRAIN
}; } NodeMusgraveType;
enum NodeWoodType { typedef enum NodeWoodType {
NODE_WOOD_BANDS, NODE_WOOD_BANDS,
NODE_WOOD_RINGS, NODE_WOOD_RINGS,
NODE_WOOD_BAND_NOISE, NODE_WOOD_BAND_NOISE,
NODE_WOOD_RING_NOISE NODE_WOOD_RING_NOISE
}; } NodeWoodType;
enum NodeBlendType { typedef enum NodeBlendType {
NODE_BLEND_LINEAR, NODE_BLEND_LINEAR,
NODE_BLEND_QUADRATIC, NODE_BLEND_QUADRATIC,
NODE_BLEND_EASING, NODE_BLEND_EASING,
@ -221,37 +221,37 @@ enum NodeBlendType {
NODE_BLEND_RADIAL, NODE_BLEND_RADIAL,
NODE_BLEND_QUADRATIC_SPHERE, NODE_BLEND_QUADRATIC_SPHERE,
NODE_BLEND_SPHERICAL NODE_BLEND_SPHERICAL
}; } NodeBlendType;
enum NodeBlendAxis { typedef enum NodeBlendAxis {
NODE_BLEND_HORIZONTAL, NODE_BLEND_HORIZONTAL,
NODE_BLEND_VERTICAL NODE_BLEND_VERTICAL
}; } NodeBlendAxis;
enum NodeMarbleType { typedef enum NodeMarbleType {
NODE_MARBLE_SOFT, NODE_MARBLE_SOFT,
NODE_MARBLE_SHARP, NODE_MARBLE_SHARP,
NODE_MARBLE_SHARPER NODE_MARBLE_SHARPER
}; } NodeMarbleType;
enum NodeStucciType { typedef enum NodeStucciType {
NODE_STUCCI_PLASTIC, NODE_STUCCI_PLASTIC,
NODE_STUCCI_WALL_IN, NODE_STUCCI_WALL_IN,
NODE_STUCCI_WALL_OUT NODE_STUCCI_WALL_OUT
}; } NodeStucciType;
enum NodeVoronoiColoring { typedef enum NodeVoronoiColoring {
NODE_VORONOI_INTENSITY, NODE_VORONOI_INTENSITY,
NODE_VORONOI_POSITION, NODE_VORONOI_POSITION,
NODE_VORONOI_POSITION_OUTLINE, NODE_VORONOI_POSITION_OUTLINE,
NODE_VORONOI_POSITION_OUTLINE_INTENSITY NODE_VORONOI_POSITION_OUTLINE_INTENSITY
}; } NodeVoronoiColoring;
enum ShaderType { typedef enum ShaderType {
SHADER_TYPE_SURFACE, SHADER_TYPE_SURFACE,
SHADER_TYPE_VOLUME, SHADER_TYPE_VOLUME,
SHADER_TYPE_DISPLACEMENT SHADER_TYPE_DISPLACEMENT
}; } ShaderType;
/* Closure */ /* Closure */

@ -40,6 +40,8 @@ __device float color_scene_linear_to_srgb(float c)
return 1.055f * pow(c, 1.0f/2.4f) - 0.055f; return 1.055f * pow(c, 1.0f/2.4f) - 0.055f;
} }
#ifndef __KERNEL_OPENCL__
__device float3 color_srgb_to_scene_linear(float3 c) __device float3 color_srgb_to_scene_linear(float3 c)
{ {
return make_float3( return make_float3(
@ -56,6 +58,8 @@ __device float3 color_scene_linear_to_srgb(float3 c)
color_scene_linear_to_srgb(c.z)); color_scene_linear_to_srgb(c.z));
} }
#endif
CCL_NAMESPACE_END CCL_NAMESPACE_END
#endif /* __UTIL_COLOR_H__ */ #endif /* __UTIL_COLOR_H__ */

@ -506,13 +506,13 @@ __device_inline float3 fabs(float3 a)
return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
} }
__device_inline float3 as_float3(const float4& a) #endif
__device_inline float3 float4_to_float3(const float4 a)
{ {
return make_float3(a.x, a.y, a.z); return make_float3(a.x, a.y, a.z);
} }
#endif
#ifndef __KERNEL_GPU__ #ifndef __KERNEL_GPU__
__device_inline void print_float3(const char *label, const float3& a) __device_inline void print_float3(const char *label, const float3& a)