forked from bartvdbraak/blender
Cycles OpenCL: a few fixes to get things compiling after kernel changes,
for Apple OpenCL on OS X 10.8 and simple AO render. Also environment variable CYCLES_OPENCL_TEST can now be set to CPU, GPU, ACCELERATOR, DEFAULT or ALL values to test particuler devices.
This commit is contained in:
parent
d236b4d60f
commit
d0ffbeec73
@ -40,6 +40,26 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
#define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
|
#define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
|
||||||
|
|
||||||
|
static cl_device_type opencl_device_type()
|
||||||
|
{
|
||||||
|
char *device = getenv("CYCLES_OPENCL_TEST");
|
||||||
|
|
||||||
|
if(device) {
|
||||||
|
if(strcmp(device, "ALL") == 0)
|
||||||
|
return CL_DEVICE_TYPE_ALL;
|
||||||
|
else if(strcmp(device, "DEFAULT") == 0)
|
||||||
|
return CL_DEVICE_TYPE_DEFAULT;
|
||||||
|
else if(strcmp(device, "CPU") == 0)
|
||||||
|
return CL_DEVICE_TYPE_CPU;
|
||||||
|
else if(strcmp(device, "GPU") == 0)
|
||||||
|
return CL_DEVICE_TYPE_GPU;
|
||||||
|
else if(strcmp(device, "ACCELERATOR") == 0)
|
||||||
|
return CL_DEVICE_TYPE_ACCELERATOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
|
||||||
|
}
|
||||||
|
|
||||||
class OpenCLDevice : public Device
|
class OpenCLDevice : public Device
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -181,7 +201,7 @@ public:
|
|||||||
vector<cl_device_id> device_ids;
|
vector<cl_device_id> device_ids;
|
||||||
cl_uint num_devices;
|
cl_uint num_devices;
|
||||||
|
|
||||||
if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices)))
|
if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if(info.num > num_devices) {
|
if(info.num > num_devices) {
|
||||||
@ -194,7 +214,7 @@ public:
|
|||||||
|
|
||||||
device_ids.resize(num_devices);
|
device_ids.resize(num_devices);
|
||||||
|
|
||||||
if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL)))
|
if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cdDevice = device_ids[info.num];
|
cdDevice = device_ids[info.num];
|
||||||
@ -306,10 +326,10 @@ public:
|
|||||||
build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
|
build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
|
||||||
|
|
||||||
else if(platform_name == "Apple")
|
else if(platform_name == "Apple")
|
||||||
build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_APPLE__ ";
|
build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes";
|
||||||
|
|
||||||
else if(platform_name == "AMD Accelerated Parallel Processing")
|
else if(platform_name == "AMD Accelerated Parallel Processing")
|
||||||
build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_AMD__ ";
|
build_options += "-D__KERNEL_OPENCL_AMD__ ";
|
||||||
|
|
||||||
return build_options;
|
return build_options;
|
||||||
}
|
}
|
||||||
@ -754,12 +774,12 @@ void device_opencl_info(vector<DeviceInfo>& devices)
|
|||||||
if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS)
|
if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if(clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
|
if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
device_ids.resize(num_devices);
|
device_ids.resize(num_devices);
|
||||||
|
|
||||||
if(clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL) != CL_SUCCESS)
|
if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* add devices */
|
/* add devices */
|
||||||
|
@ -26,6 +26,10 @@
|
|||||||
#define CCL_NAMESPACE_BEGIN
|
#define CCL_NAMESPACE_BEGIN
|
||||||
#define CCL_NAMESPACE_END
|
#define CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#ifdef __KERNEL_OPENCL_AMD__
|
||||||
|
#define __CL_NO_FLOAT3__
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __CL_NO_FLOAT3__
|
#ifdef __CL_NO_FLOAT3__
|
||||||
#define float3 float4
|
#define float3 float4
|
||||||
#endif
|
#endif
|
||||||
|
@ -52,7 +52,7 @@ __device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando,
|
|||||||
shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ls->prim);
|
shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ls->prim);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time);
|
shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ~0);
|
||||||
|
|
||||||
ls->Ng = sd.Ng;
|
ls->Ng = sd.Ng;
|
||||||
|
|
||||||
|
@ -430,11 +430,9 @@ __device void object_transform_light_sample(KernelGlobals *kg, LightSample *ls,
|
|||||||
/* instance transform */
|
/* instance transform */
|
||||||
if(object >= 0) {
|
if(object >= 0) {
|
||||||
#ifdef __OBJECT_MOTION__
|
#ifdef __OBJECT_MOTION__
|
||||||
Transform itfm;
|
Transform tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
|
||||||
Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
|
|
||||||
#else
|
#else
|
||||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
|
||||||
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ls->P = transform_point(&tfm, ls->P);
|
ls->P = transform_point(&tfm, ls->P);
|
||||||
|
@ -262,7 +262,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
|
|||||||
|
|
||||||
__device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
|
__device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
|
||||||
const float3 P, const float3 Ng, const float3 I,
|
const float3 P, const float3 Ng, const float3 I,
|
||||||
int shader, int object, int prim, float u, float v, float t, float time, int segment = ~0)
|
int shader, int object, int prim, float u, float v, float t, float time, int segment)
|
||||||
{
|
{
|
||||||
/* vectors */
|
/* vectors */
|
||||||
sd->P = P;
|
sd->P = P;
|
||||||
@ -393,7 +393,7 @@ __device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
|
|||||||
|
|
||||||
/* watch out: no instance transform currently */
|
/* watch out: no instance transform currently */
|
||||||
|
|
||||||
shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID);
|
shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, ~0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ShaderData setup from ray into background */
|
/* ShaderData setup from ray into background */
|
||||||
@ -769,8 +769,9 @@ __device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
|
|||||||
#ifdef __SVM__
|
#ifdef __SVM__
|
||||||
svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag);
|
svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag);
|
||||||
#else
|
#else
|
||||||
bsdf_diffuse_setup(&sd->closure);
|
|
||||||
sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f);
|
sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f);
|
||||||
|
sd->closure.N = sd->N;
|
||||||
|
sd->flag |= bsdf_diffuse_setup(&sd->closure);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -109,19 +109,6 @@ __device_inline Transform transform_transpose(const Transform a)
|
|||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device_inline Transform operator*(const Transform a, const Transform b)
|
|
||||||
{
|
|
||||||
Transform c = transform_transpose(b);
|
|
||||||
Transform t;
|
|
||||||
|
|
||||||
t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
|
|
||||||
t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
|
|
||||||
t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
|
|
||||||
t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
|
|
||||||
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
__device_inline Transform make_transform(float a, float b, float c, float d,
|
__device_inline Transform make_transform(float a, float b, float c, float d,
|
||||||
float e, float f, float g, float h,
|
float e, float f, float g, float h,
|
||||||
float i, float j, float k, float l,
|
float i, float j, float k, float l,
|
||||||
@ -139,6 +126,19 @@ __device_inline Transform make_transform(float a, float b, float c, float d,
|
|||||||
|
|
||||||
#ifndef __KERNEL_GPU__
|
#ifndef __KERNEL_GPU__
|
||||||
|
|
||||||
|
__device_inline Transform operator*(const Transform a, const Transform b)
|
||||||
|
{
|
||||||
|
Transform c = transform_transpose(b);
|
||||||
|
Transform t;
|
||||||
|
|
||||||
|
t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
|
||||||
|
t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
|
||||||
|
t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
|
||||||
|
t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
__device_inline void print_transform(const char *label, const Transform& t)
|
__device_inline void print_transform(const char *label, const Transform& t)
|
||||||
{
|
{
|
||||||
print_float4(label, t.x);
|
print_float4(label, t.x);
|
||||||
|
Loading…
Reference in New Issue
Block a user