diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h index 0eeb4cf9b05..4e834b7c500 100644 --- a/intern/cycles/kernel/svm/svm_mix.h +++ b/intern/cycles/kernel/svm/svm_mix.h @@ -247,24 +247,7 @@ ccl_device float3 svm_mix_soft(float t, float3 col1, float3 col2) ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2) { - float3 outcol = col1; - - if(col2.x > 0.5f) - outcol.x = col1.x + t*(2.0f*(col2.x - 0.5f)); - else - outcol.x = col1.x + t*(2.0f*(col2.x) - 1.0f); - - if(col2.y > 0.5f) - outcol.y = col1.y + t*(2.0f*(col2.y - 0.5f)); - else - outcol.y = col1.y + t*(2.0f*(col2.y) - 1.0f); - - if(col2.z > 0.5f) - outcol.z = col1.z + t*(2.0f*(col2.z - 0.5f)); - else - outcol.z = col1.z + t*(2.0f*(col2.z) - 1.0f); - - return outcol; + return col1 + t*(2.0f*col2 + make_float3(-1.0f, -1.0f, -1.0f)); } ccl_device float3 svm_mix_clamp(float3 col) diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index f35129c4178..2e73639d2bb 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -904,7 +904,7 @@ ccl_device_inline void print_float4(const char *label, const float4& a) ccl_device_inline int3 min(int3 a, int3 b) { -#ifdef __KERNEL_SSE__ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return _mm_min_epi32(a.m128, b.m128); #else return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); @@ -913,7 +913,7 @@ ccl_device_inline int3 min(int3 a, int3 b) ccl_device_inline int3 max(int3 a, int3 b) { -#ifdef __KERNEL_SSE__ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return _mm_max_epi32(a.m128, b.m128); #else return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); @@ -978,7 +978,7 @@ ccl_device_inline int4 operator>>(const int4& a, int i) ccl_device_inline int4 min(int4 a, int4 b) { -#ifdef __KERNEL_SSE__ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return _mm_min_epi32(a.m128, b.m128); #else return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); @@ -987,7 +987,7 @@ ccl_device_inline int4 min(int4 a, int4 b) ccl_device_inline int4 max(int4 a, int4 b) { -#ifdef __KERNEL_SSE__ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return _mm_max_epi32(a.m128, b.m128); #else return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); @@ -1192,26 +1192,20 @@ ccl_device_inline float safe_sqrtf(float f) ccl_device float safe_asinf(float a) { - if(a <= -1.0f) - return -M_PI_2_F; - else if(a >= 1.0f) - return M_PI_2_F; - - return asinf(a); + return asinf(clamp(a, -1.0f, 1.0f)); } ccl_device float safe_acosf(float a) { - if(a <= -1.0f) - return M_PI_F; - else if(a >= 1.0f) - return 0.0f; - - return acosf(a); + return acosf(clamp(a, -1.0f, 1.0f)); } ccl_device float compatible_powf(float x, float y) { +#ifdef __KERNEL_GPU__ + if(y == 0.0f) /* x^0 -> 1, including 0^0 */ + return 1.0f; + /* GPU pow doesn't accept negative x, do manual checks here */ if(x < 0.0f) { if(fmodf(-y, 2.0f) == 0.0f) @@ -1221,19 +1215,15 @@ ccl_device float compatible_powf(float x, float y) } else if(x == 0.0f) return 0.0f; - +#endif return powf(x, y); } ccl_device float safe_powf(float a, float b) { - if(b == 0.0f) - return 1.0f; - if(a == 0.0f) - return 0.0f; if(a < 0.0f && b != float_to_int(b)) return 0.0f; - + return compatible_powf(a, b); }