diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index f7270a14940..e59d8946950 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -38,43 +38,13 @@ ccl_device_inline int cmj_fast_mod_pow2(int a, int b) ccl_device_inline int cmj_fast_div_pow2(int a, int b) { kernel_assert(b > 1); -#if defined(__KERNEL_SSE2__) -# ifdef _MSC_VER - unsigned long ctz; - _BitScanForward(&ctz, b); - return a >> ctz; -# else - return a >> __builtin_ctz(b); -# endif -#elif defined(__KERNEL_CUDA__) - return a >> (__ffs(b) - 1); -#else - return a / b; -#endif + return a >> count_trailing_zeros(b); } ccl_device_inline uint cmj_w_mask(uint w) { kernel_assert(w > 1); -#if defined(__KERNEL_SSE2__) -# ifdef _MSC_VER - unsigned long leading_zero; - _BitScanReverse(&leading_zero, w); - return ((1 << (1 + leading_zero)) - 1); -# else - return ((1 << (32 - __builtin_clz(w))) - 1); -# endif -#elif defined(__KERNEL_CUDA__) - return ((1 << (32 - __clz(w))) - 1); -#else - w |= w >> 1; - w |= w >> 2; - w |= w >> 4; - w |= w >> 8; - w |= w >> 16; - - return w; -#endif + return ((1 << (32 - count_leading_zeros(w))) - 1); } ccl_device_inline uint cmj_permute(uint i, uint l, uint p) diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index dde0d31f467..9faf7149ce2 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -617,6 +617,57 @@ ccl_device float bits_to_01(uint bits) return bits * (1.0f / (float)0xFFFFFFFF); } +ccl_device_inline uint count_leading_zeros(uint x) +{ + assert(x != 0); +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) + return __clz(x); +#elif defined(__KERNEL_OPENCL__) + return clz(x); +#else +# ifdef _MSC_VER + unsigned long leading_zero = 0; + _BitScanReverse(&leading_zero, x); + return (31 - leading_zero); +# else + return __builtin_clz(x); +# endif +#endif +} + +ccl_device_inline uint count_trailing_zeros(uint x) +{ + assert(x != 0); +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) + return (__ffs(x) - 1); +#elif defined(__KERNEL_OPENCL__) + return (31 - count_leading_zeros(x & -x)); +#else +# ifdef _MSC_VER + unsigned long ctz = 0; + _BitScanForward(&ctz, x); + return ctz; +# else + return __builtin_ctz(x); +# endif +#endif +} + +ccl_device_inline uint find_first_set(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) + return __ffs(x); +#elif defined(__KERNEL_OPENCL__) + return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0; +#else +# ifdef _MSC_VER + return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0; +# else + return __builtin_ffs(x); +# endif +#endif +} + /* projections */ ccl_device_inline float2 map_to_tube(const float3 co) {