Fix T53001: more workarounds for crash in AMD compiler with recent drivers.

This commit is contained in:
Brecht Van Lommel 2017-10-05 16:41:41 +02:00
parent fb99ea79f8
commit 4537e85584
6 changed files with 83 additions and 76 deletions

@ -50,7 +50,8 @@
#endif
#define ccl_device_noinline __device__ __noinline__
#define ccl_global
#define ccl_constant
#define ccl_static_constant __constant__
#define ccl_constant const
#define ccl_local __shared__
#define ccl_local_param
#define ccl_private

@ -36,6 +36,7 @@
#define ccl_device_forceinline ccl_device
#define ccl_device_noinline ccl_device ccl_noinline
#define ccl_may_alias
#define ccl_static_constant static __constant
#define ccl_constant __constant
#define ccl_global __global
#define ccl_local __local

@ -41,8 +41,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
float3 color_rgb = svm_math_blackbody_color(temperature);
if(stack_valid(col_offset))
stack_store_float3(stack, col_offset, color_rgb);
stack_store_float3(stack, col_offset, color_rgb);
}
CCL_NAMESPACE_END

@ -100,40 +100,42 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
return Fac;
}
ccl_device float3 svm_math_blackbody_color(float t) {
/* Calculate color in range 800..12000 using an approximation
* a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
* Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
* which is enough to get the same 8 bit/channel color.
*/
/* Calculate color in range 800..12000 using an approximation
* a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
* Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
* which is enough to get the same 8 bit/channel color.
*/
const float rc[6][3] = {
{ 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
{ 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
{ 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
{ 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f },
{ 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f },
{ 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f },
};
ccl_static_constant float blackbody_table_r[6][3] = {
{ 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
{ 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
{ 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
{ 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f },
{ 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f },
{ 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f },
};
const float gc[6][3] = {
{ -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f },
{ -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f },
{ -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f },
{ -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
{ -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
{ -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
};
ccl_static_constant float blackbody_table_g[6][3] = {
{ -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f },
{ -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f },
{ -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f },
{ -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
{ -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
{ -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
};
const float bc[6][4] = {
{ 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
{ -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f },
{ 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f },
};
ccl_static_constant float blackbody_table_b[6][4] = {
{ 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
{ -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f },
{ 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f },
};
ccl_device float3 svm_math_blackbody_color(float t)
{
if(t >= 12000.0f) {
return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
}
@ -148,10 +150,14 @@ ccl_device float3 svm_math_blackbody_color(float t) {
(t >= 1449.0f)? 2:
(t >= 1167.0f)? 1: 0;
ccl_constant float *r = blackbody_table_r[i];
ccl_constant float *g = blackbody_table_g[i];
ccl_constant float *b = blackbody_table_b[i];
const float t_inv = 1.0f / t;
return make_float3(rc[i][0] * t_inv + rc[i][1] * t + rc[i][2],
gc[i][0] * t_inv + gc[i][1] * t + gc[i][2],
((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]);
return make_float3(r[0] * t_inv + r[1] * t + r[2],
g[0] * t_inv + g[1] * t + g[2],
((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
}
ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)

@ -34,44 +34,44 @@ CCL_NAMESPACE_BEGIN
/* Wavelength to RGB */
// CIE colour matching functions xBar, yBar, and zBar for
// wavelengths from 380 through 780 nanometers, every 5
// nanometers. For a wavelength lambda in this range:
// cie_colour_match[(lambda - 380) / 5][0] = xBar
// cie_colour_match[(lambda - 380) / 5][1] = yBar
// cie_colour_match[(lambda - 380) / 5][2] = zBar
ccl_static_constant float cie_colour_match[81][3] = {
{0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
{0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
{0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
{0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
{0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
{0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
{0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
{0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
{0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
{0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
{0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
{0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
{0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
{0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
{1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
{1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
{0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
{0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
{0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
{0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
{0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
{0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
{0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
{0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
{0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
{0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
{0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
};
ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelength, uint color_out)
{
// CIE colour matching functions xBar, yBar, and zBar for
// wavelengths from 380 through 780 nanometers, every 5
// nanometers. For a wavelength lambda in this range:
// cie_colour_match[(lambda - 380) / 5][0] = xBar
// cie_colour_match[(lambda - 380) / 5][1] = yBar
// cie_colour_match[(lambda - 380) / 5][2] = zBar
const float cie_colour_match[81][3] = {
{0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
{0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
{0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
{0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
{0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
{0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
{0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
{0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
{0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
{0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
{0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
{0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
{0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
{0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
{1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
{1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
{0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
{0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
{0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
{0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
{0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
{0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
{0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
{0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
{0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
{0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
{0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
};
float lambda_nm = stack_load_float(stack, wavelength);
float ii = (lambda_nm-380.0f) * (1.0f/5.0f); // scaled 0..80
int i = float_to_int(ii);
@ -82,7 +82,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt
}
else {
ii -= i;
const float *c = cie_colour_match[i];
ccl_constant float *c = cie_colour_match[i];
color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
}
@ -92,8 +92,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt
/* Clamp to zero if values are smaller */
color = max(color, make_float3(0.0f, 0.0f, 0.0f));
if(stack_valid(color_out))
stack_store_float3(stack, color_out, color);
stack_store_float3(stack, color_out, color);
}
CCL_NAMESPACE_END

@ -30,7 +30,8 @@
# define ccl_device static inline
# define ccl_device_noinline static
# define ccl_global
# define ccl_constant
# define ccl_static_constant static const
# define ccl_constant const
# define ccl_local
# define ccl_local_param
# define ccl_private