diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 3ab5d9ee69b..a9b062474f7 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -40,6 +40,26 @@ CCL_NAMESPACE_BEGIN #define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p)) +static cl_device_type opencl_device_type() +{ + char *device = getenv("CYCLES_OPENCL_TEST"); + + if(device) { + if(strcmp(device, "ALL") == 0) + return CL_DEVICE_TYPE_ALL; + else if(strcmp(device, "DEFAULT") == 0) + return CL_DEVICE_TYPE_DEFAULT; + else if(strcmp(device, "CPU") == 0) + return CL_DEVICE_TYPE_CPU; + else if(strcmp(device, "GPU") == 0) + return CL_DEVICE_TYPE_GPU; + else if(strcmp(device, "ACCELERATOR") == 0) + return CL_DEVICE_TYPE_ACCELERATOR; + } + + return CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; +} + class OpenCLDevice : public Device { public: @@ -181,7 +201,7 @@ public: vector device_ids; cl_uint num_devices; - if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices))) + if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices))) return; if(info.num > num_devices) { @@ -194,7 +214,7 @@ public: device_ids.resize(num_devices); - if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL))) + if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL))) return; cdDevice = device_ids[info.num]; @@ -306,10 +326,10 @@ public: build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; else if(platform_name == "Apple") - build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_APPLE__ "; + build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes"; else if(platform_name == "AMD Accelerated Parallel Processing") - build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_AMD__ "; + build_options += "-D__KERNEL_OPENCL_AMD__ "; return build_options; } @@ -754,12 +774,12 @@ void device_opencl_info(vector& devices) if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) return; - if(clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0) + if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0) return; device_ids.resize(num_devices); - if(clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL) != CL_SUCCESS) + if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS) return; /* add devices */ diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 999820891b2..6c41bfa5521 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -26,6 +26,10 @@ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END +#ifdef __KERNEL_OPENCL_AMD__ +#define __CL_NO_FLOAT3__ +#endif + #ifdef __CL_NO_FLOAT3__ #define float3 float4 #endif diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 6f7a3e71d27..bae1b4743e4 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -52,7 +52,7 @@ __device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando, shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ls->prim); else #endif - shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time); + shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, u, v, t, time, ~0); ls->Ng = sd.Ng; diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index f6fbd3599ad..2eab293963a 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -430,11 +430,9 @@ __device void object_transform_light_sample(KernelGlobals *kg, LightSample *ls, /* instance transform */ if(object >= 0) { #ifdef __OBJECT_MOTION__ - Transform itfm; - Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); + Transform tfm = object_fetch_transform_motion_test(kg, object, time, NULL); #else Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); #endif ls->P = transform_point(&tfm, ls->P); diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 21152efff4c..908c5a91768 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -262,7 +262,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData __device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, const float3 P, const float3 Ng, const float3 I, - int shader, int object, int prim, float u, float v, float t, float time, int segment = ~0) + int shader, int object, int prim, float u, float v, float t, float time, int segment) { /* vectors */ sd->P = P; @@ -393,7 +393,7 @@ __device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, /* watch out: no instance transform currently */ - shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID); + shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, ~0); } /* ShaderData setup from ray into background */ @@ -769,8 +769,9 @@ __device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, #ifdef __SVM__ svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag); #else - bsdf_diffuse_setup(&sd->closure); sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f); + sd->closure.N = sd->N; + sd->flag |= bsdf_diffuse_setup(&sd->closure); #endif } } diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index 10120ed5fdb..33761827dd7 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -109,19 +109,6 @@ __device_inline Transform transform_transpose(const Transform a) return t; } -__device_inline Transform operator*(const Transform a, const Transform b) -{ - Transform c = transform_transpose(b); - Transform t; - - t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w)); - t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w)); - t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w)); - t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w)); - - return t; -} - __device_inline Transform make_transform(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, @@ -139,6 +126,19 @@ __device_inline Transform make_transform(float a, float b, float c, float d, #ifndef __KERNEL_GPU__ +__device_inline Transform operator*(const Transform a, const Transform b) +{ + Transform c = transform_transpose(b); + Transform t; + + t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w)); + t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w)); + t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w)); + t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w)); + + return t; +} + __device_inline void print_transform(const char *label, const Transform& t) { print_float4(label, t.x);