From cd6129d1ff6142c153a99917aa794b668e3b7dd2 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Mon, 6 Oct 2014 13:43:23 +0600 Subject: [PATCH] Cycles: Workaround dead-slow expf() on 64bit linux Single precision exponent on 64bit linux tends to be order of magnitude slower than double precision version even with single<->double precision conversion. Some feedback in the mailing lists also suggests that logf() is also slow, but this i didn't confirm here in the studio yet. Depending on the shader setup it gives ~3% with the secret agent shot and up to around 15% with the bmw scene here. --- intern/cycles/device/device_cpu.cpp | 5 +++++ intern/cycles/kernel/kernel_compat_cpu.h | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4623764d210..c9b8a5b726b 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -17,6 +17,11 @@ #include #include +/* So ImathMath is included before our kernel_cpu_compat. */ +#ifdef WITH_OSL +# include +#endif + #include "device.h" #include "device_intern.h" diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index c2aab93c87b..25531843993 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -25,6 +25,13 @@ #include "util_half.h" #include "util_types.h" +/* On 64bit linux single precision exponent is really slow comparing to the + * double precision version, even with float<->double conversion involved. + */ +#if !defined(__KERNEL_GPU__) && defined(__linux__) && defined(__x86_64__) +# define expf(x) ((float)exp((double)x)) +#endif + CCL_NAMESPACE_BEGIN /* Assertions inside the kernel only work for the CPU device, so we wrap it in