From 040fa75d7b9d796d2818d731934caf77e4288b7f Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 9 Aug 2016 01:00:57 +0200 Subject: [PATCH] Fix Cycles CUDA adaptive kernel not working correctly after recent closure changes. --- intern/cycles/kernel/kernel_compat_cuda.h | 1 + intern/cycles/kernel/kernel_compat_opencl.h | 1 + intern/cycles/kernel/kernel_types.h | 13 ++++--------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index a039b414006..063220b542e 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -47,6 +47,7 @@ #define ccl_may_alias #define ccl_addr_space #define ccl_restrict __restrict__ +#define ccl_align(n) __align__(n) /* No assert supported for CUDA */ diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 8505cb85576..2ae89dde7c4 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -40,6 +40,7 @@ #define ccl_local __local #define ccl_private __private #define ccl_restrict restrict +#define ccl_align(n) __attribute__((aligned(n))) #ifdef __SPLIT_KERNEL__ # define ccl_addr_space __global diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 3923804ec96..f3b10c21b9d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -656,23 +656,18 @@ typedef struct AttributeDescriptor { * ShaderClosure has a fixed size, and any extra space must be allocated * with closure_alloc_extra(). * - * float3 is 12 bytes on CUDA and 16 bytes on CPU/OpenCL, we set the data - * size to ensure ShaderClosure is 80 bytes total everywhere. */ + * We pad the struct to 80 bytes and ensure it is aligned to 16 bytes, which + * we assume to be the maximum required alignment for any struct. */ #define SHADER_CLOSURE_BASE \ float3 weight; \ ClosureType type; \ float sample_weight \ -typedef ccl_addr_space struct ShaderClosure { +typedef ccl_addr_space struct ccl_align(16) ShaderClosure { SHADER_CLOSURE_BASE; - /* pad to 80 bytes, data types are aligned to own size */ -#ifdef __KERNEL_CUDA__ - float data[15]; -#else - float data[14]; -#endif + float data[14]; /* pad to 80 bytes */ } ShaderClosure; /* Shader Context