diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 1e2af9de8b3..2e8ca48c413 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -50,7 +50,8 @@ #endif #define ccl_device_noinline __device__ __noinline__ #define ccl_global -#define ccl_constant +#define ccl_static_constant __constant__ +#define ccl_constant const #define ccl_local __shared__ #define ccl_local_param #define ccl_private diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 36d6031d042..7f81523791b 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -36,6 +36,7 @@ #define ccl_device_forceinline ccl_device #define ccl_device_noinline ccl_device ccl_noinline #define ccl_may_alias +#define ccl_static_constant static __constant #define ccl_constant __constant #define ccl_global __global #define ccl_local __local diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h index b750ad87b7f..51590b18505 100644 --- a/intern/cycles/kernel/svm/svm_blackbody.h +++ b/intern/cycles/kernel/svm/svm_blackbody.h @@ -41,8 +41,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta float3 color_rgb = svm_math_blackbody_color(temperature); - if(stack_valid(col_offset)) - stack_store_float3(stack, col_offset, color_rgb); + stack_store_float3(stack, col_offset, color_rgb); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index f8aeeba4a21..1ce7777aac3 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -100,40 +100,42 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) return Fac; } -ccl_device float3 svm_math_blackbody_color(float t) { - /* Calculate color in range 800..12000 using an approximation - * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B - * Max absolute error for RGB is (0.00095, 0.00077, 0.00057), - * which is enough to get the same 8 bit/channel color. - */ +/* Calculate color in range 800..12000 using an approximation + * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B + * Max absolute error for RGB is (0.00095, 0.00077, 0.00057), + * which is enough to get the same 8 bit/channel color. + */ - const float rc[6][3] = { - { 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f }, - { 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f }, - { 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f }, - { 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f }, - { 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f }, - { 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f }, - }; +ccl_static_constant float blackbody_table_r[6][3] = { + { 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f }, + { 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f }, + { 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f }, + { 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f }, + { 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f }, + { 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f }, +}; - const float gc[6][3] = { - { -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f }, - { -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f }, - { -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f }, - { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f }, - { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f }, - { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f }, - }; +ccl_static_constant float blackbody_table_g[6][3] = { + { -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f }, + { -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f }, + { -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f }, + { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f }, + { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f }, + { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f }, +}; - const float bc[6][4] = { - { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */ - { 0.0f, 0.0f, 0.0f, 0.0f }, - { 0.0f, 0.0f, 0.0f, 0.0f }, - { -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f }, - { -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f }, - { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f }, - }; +ccl_static_constant float blackbody_table_b[6][4] = { + { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */ + { 0.0f, 0.0f, 0.0f, 0.0f }, + { 0.0f, 0.0f, 0.0f, 0.0f }, + { -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f }, + { -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f }, + { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f }, +}; + +ccl_device float3 svm_math_blackbody_color(float t) +{ if(t >= 12000.0f) { return make_float3(0.826270103f, 0.994478524f, 1.56626022f); } @@ -148,10 +150,14 @@ ccl_device float3 svm_math_blackbody_color(float t) { (t >= 1449.0f)? 2: (t >= 1167.0f)? 1: 0; + ccl_constant float *r = blackbody_table_r[i]; + ccl_constant float *g = blackbody_table_g[i]; + ccl_constant float *b = blackbody_table_b[i]; + const float t_inv = 1.0f / t; - return make_float3(rc[i][0] * t_inv + rc[i][1] * t + rc[i][2], - gc[i][0] * t_inv + gc[i][1] * t + gc[i][2], - ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]); + return make_float3(r[0] * t_inv + r[1] * t + r[2], + g[0] * t_inv + g[1] * t + g[2], + ((b[0] * t + b[1]) * t + b[2]) * t + b[3]); } ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma) diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h index 57030f3979d..855b356b397 100644 --- a/intern/cycles/kernel/svm/svm_wavelength.h +++ b/intern/cycles/kernel/svm/svm_wavelength.h @@ -34,44 +34,44 @@ CCL_NAMESPACE_BEGIN /* Wavelength to RGB */ +// CIE colour matching functions xBar, yBar, and zBar for +// wavelengths from 380 through 780 nanometers, every 5 +// nanometers. For a wavelength lambda in this range: +// cie_colour_match[(lambda - 380) / 5][0] = xBar +// cie_colour_match[(lambda - 380) / 5][1] = yBar +// cie_colour_match[(lambda - 380) / 5][2] = zBar +ccl_static_constant float cie_colour_match[81][3] = { + {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f}, + {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f}, + {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f}, + {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f}, + {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f}, + {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f}, + {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f}, + {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f}, + {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f}, + {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f}, + {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f}, + {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f}, + {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f}, + {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f}, + {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f}, + {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f}, + {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f}, + {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f}, + {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f}, + {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f}, + {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f}, + {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f}, + {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f}, + {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f}, + {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f}, + {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f}, + {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f} +}; + ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelength, uint color_out) { - // CIE colour matching functions xBar, yBar, and zBar for - // wavelengths from 380 through 780 nanometers, every 5 - // nanometers. For a wavelength lambda in this range: - // cie_colour_match[(lambda - 380) / 5][0] = xBar - // cie_colour_match[(lambda - 380) / 5][1] = yBar - // cie_colour_match[(lambda - 380) / 5][2] = zBar - const float cie_colour_match[81][3] = { - {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f}, - {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f}, - {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f}, - {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f}, - {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f}, - {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f}, - {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f}, - {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f}, - {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f}, - {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f}, - {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f}, - {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f}, - {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f}, - {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f}, - {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f}, - {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f}, - {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f}, - {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f}, - {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f}, - {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f}, - {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f}, - {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f}, - {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f}, - {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f}, - {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f}, - {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f}, - {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f} - }; - float lambda_nm = stack_load_float(stack, wavelength); float ii = (lambda_nm-380.0f) * (1.0f/5.0f); // scaled 0..80 int i = float_to_int(ii); @@ -82,7 +82,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt } else { ii -= i; - const float *c = cie_colour_match[i]; + ccl_constant float *c = cie_colour_match[i]; color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii); } @@ -92,8 +92,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt /* Clamp to zero if values are smaller */ color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - if(stack_valid(color_out)) - stack_store_float3(stack, color_out, color); + stack_store_float3(stack, color_out, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h index ae654092c87..98944a19022 100644 --- a/intern/cycles/util/util_defines.h +++ b/intern/cycles/util/util_defines.h @@ -30,7 +30,8 @@ # define ccl_device static inline # define ccl_device_noinline static # define ccl_global -# define ccl_constant +# define ccl_static_constant static const +# define ccl_constant const # define ccl_local # define ccl_local_param # define ccl_private