Cycles: Add utility macro ccl_ref

It is defined to & for CPU side compilation, and defined to an empty for any GPU platform. The idea here is to use this macro instead of #ifdef block with bunch of duplicated lines just to make it so CPU code is efficient. Eventually we might switch to references on CUDA as well, but that would require some intensive testing.
2017-08-08 14:34:59 +02:00 · 2017-08-08 14:34:59 +02:00 · fd397a7d28
commit fd397a7d28
parent 01ee88563b
3 changed files with 6 additions and 0 deletions
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@ -53,6 +53,10 @@
 #define ccl_may_alias
 #define ccl_addr_space
 #define ccl_restrict __restrict__
+/* TODO(sergey): In theory we might use references with CUDA, however
+ * performance impact yet to be investigated.
+ */
+#define ccl_ref
 #define ccl_align(n) __align__(n)

 #define ATTR_FALLTHROUGH
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@ -42,6 +42,7 @@
 #define ccl_local_param __local
 #define ccl_private __private
 #define ccl_restrict restrict
+#define ccl_ref
 #define ccl_align(n) __attribute__((aligned(n)))

 #ifdef __SPLIT_KERNEL__
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@ -35,6 +35,7 @@
 #  define ccl_local_param
 #  define ccl_private
 #  define ccl_restrict __restrict
+#  define ccl_ref &
 #  define __KERNEL_WITH_SSE_ALIGN__

 #  if defined(_WIN32) && !defined(FREE_WINDOWS)