forked from bartvdbraak/blender
Cycles: Implement SSE-optimized path of util_max_axis()
The idea here is to avoid if statements which could cause wrong branch prediction. Gives a bit of measurable speedup up to ~1%. Still nice :) Inspired by Maxym Dmytrychenko, thanks!
This commit is contained in:
parent
3e71006448
commit
af411d918e
@ -1629,6 +1629,14 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
|
||||
|
||||
ccl_device_inline int util_max_axis(float3 vec)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
__m128 a = shuffle<0,0,1,1>(vec.m128);
|
||||
__m128 b = shuffle<1,2,2,1>(vec.m128);
|
||||
__m128 c = _mm_cmpgt_ps(a, b);
|
||||
int mask = _mm_movemask_ps(c) & 0x7;
|
||||
static const char tab[8] = {2, 2, 2, 0, 1, 2, 1, 0};
|
||||
return tab[mask];
|
||||
#else
|
||||
if(vec.x > vec.y) {
|
||||
if(vec.x > vec.z)
|
||||
return 0;
|
||||
@ -1641,6 +1649,7 @@ ccl_device_inline int util_max_axis(float3 vec)
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
Loading…
Reference in New Issue
Block a user