Cycles OpenCL: a few fixes to get things compiling after kernel changes,

for Apple OpenCL on OS X 10.8 and simple AO render.

Also environment variable CYCLES_OPENCL_TEST can now be set to CPU, GPU,
ACCELERATOR, DEFAULT or ALL values to test particuler devices.
This commit is contained in:
Brecht Van Lommel 2013-05-09 14:05:40 +00:00
parent d236b4d60f
commit d0ffbeec73
6 changed files with 49 additions and 26 deletions

@ -40,6 +40,26 @@ CCL_NAMESPACE_BEGIN
#define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p)) #define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
static cl_device_type opencl_device_type()
{
char *device = getenv("CYCLES_OPENCL_TEST");
if(device) {
if(strcmp(device, "ALL") == 0)
return CL_DEVICE_TYPE_ALL;
else if(strcmp(device, "DEFAULT") == 0)
return CL_DEVICE_TYPE_DEFAULT;
else if(strcmp(device, "CPU") == 0)
return CL_DEVICE_TYPE_CPU;
else if(strcmp(device, "GPU") == 0)
return CL_DEVICE_TYPE_GPU;
else if(strcmp(device, "ACCELERATOR") == 0)
return CL_DEVICE_TYPE_ACCELERATOR;
}
return CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
}
class OpenCLDevice : public Device class OpenCLDevice : public Device
{ {
public: public:
@ -181,7 +201,7 @@ public:
vector<cl_device_id> device_ids; vector<cl_device_id> device_ids;
cl_uint num_devices; cl_uint num_devices;
if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices))) if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices)))
return; return;
if(info.num > num_devices) { if(info.num > num_devices) {
@ -194,7 +214,7 @@ public:
device_ids.resize(num_devices); device_ids.resize(num_devices);
if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL))) if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
return; return;
cdDevice = device_ids[info.num]; cdDevice = device_ids[info.num];
@ -306,10 +326,10 @@ public:
build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
else if(platform_name == "Apple") else if(platform_name == "Apple")
build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_APPLE__ "; build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes";
else if(platform_name == "AMD Accelerated Parallel Processing") else if(platform_name == "AMD Accelerated Parallel Processing")
build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_AMD__ "; build_options += "-D__KERNEL_OPENCL_AMD__ ";
return build_options; return build_options;
} }
@ -754,12 +774,12 @@ void device_opencl_info(vector<DeviceInfo>& devices)
if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS)
return; return;
if(clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0) if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
return; return;
device_ids.resize(num_devices); device_ids.resize(num_devices);
if(clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL) != CL_SUCCESS) if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS)
return; return;
/* add devices */ /* add devices */

@ -26,6 +26,10 @@
#define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END #define CCL_NAMESPACE_END
#ifdef __KERNEL_OPENCL_AMD__
#define __CL_NO_FLOAT3__
#endif
#ifdef __CL_NO_FLOAT3__ #ifdef __CL_NO_FLOAT3__
#define float3 float4 #define float3 float4
#endif #endif

@ -52,7 +52,7 @@ __device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando,
shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ls->prim); shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ls->prim);
else else
#endif #endif
shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time); shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ~0);
ls->Ng = sd.Ng; ls->Ng = sd.Ng;

@ -430,11 +430,9 @@ __device void object_transform_light_sample(KernelGlobals *kg, LightSample *ls,
/* instance transform */ /* instance transform */
if(object >= 0) { if(object >= 0) {
#ifdef __OBJECT_MOTION__ #ifdef __OBJECT_MOTION__
Transform itfm; Transform tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
#else #else
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
#endif #endif
ls->P = transform_point(&tfm, ls->P); ls->P = transform_point(&tfm, ls->P);

@ -262,7 +262,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
__device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, __device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I, const float3 P, const float3 Ng, const float3 I,
int shader, int object, int prim, float u, float v, float t, float time, int segment = ~0) int shader, int object, int prim, float u, float v, float t, float time, int segment)
{ {
/* vectors */ /* vectors */
sd->P = P; sd->P = P;
@ -393,7 +393,7 @@ __device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
/* watch out: no instance transform currently */ /* watch out: no instance transform currently */
shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID); shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, ~0);
} }
/* ShaderData setup from ray into background */ /* ShaderData setup from ray into background */
@ -769,8 +769,9 @@ __device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
#ifdef __SVM__ #ifdef __SVM__
svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag); svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag);
#else #else
bsdf_diffuse_setup(&sd->closure);
sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f); sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f);
sd->closure.N = sd->N;
sd->flag |= bsdf_diffuse_setup(&sd->closure);
#endif #endif
} }
} }

@ -109,19 +109,6 @@ __device_inline Transform transform_transpose(const Transform a)
return t; return t;
} }
__device_inline Transform operator*(const Transform a, const Transform b)
{
Transform c = transform_transpose(b);
Transform t;
t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
return t;
}
__device_inline Transform make_transform(float a, float b, float c, float d, __device_inline Transform make_transform(float a, float b, float c, float d,
float e, float f, float g, float h, float e, float f, float g, float h,
float i, float j, float k, float l, float i, float j, float k, float l,
@ -139,6 +126,19 @@ __device_inline Transform make_transform(float a, float b, float c, float d,
#ifndef __KERNEL_GPU__ #ifndef __KERNEL_GPU__
__device_inline Transform operator*(const Transform a, const Transform b)
{
Transform c = transform_transpose(b);
Transform t;
t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
return t;
}
__device_inline void print_transform(const char *label, const Transform& t) __device_inline void print_transform(const char *label, const Transform& t)
{ {
print_float4(label, t.x); print_float4(label, t.x);