From 7b1c5712f888ea37bbccafd9ffd7a3a6a61e665f Mon Sep 17 00:00:00 2001 From: William Leeson Date: Wed, 27 Oct 2021 13:28:13 +0200 Subject: [PATCH] Cycles: Replace saturate with saturatef saturate is depricated in favour of __saturatef this replaces saturate with __saturatef on CUDA by createing a saturatef function which replaces all instances of saturate and are hooked up to the correct function on all platforms. Reviewed By: brecht Differential Revision: https://developer.blender.org/D13010 --- intern/cycles/kernel/closure/bsdf_microfacet.h | 18 +++++++++--------- .../kernel/closure/bsdf_microfacet_multi.h | 10 +++++----- intern/cycles/kernel/closure/bsdf_oren_nayar.h | 2 +- intern/cycles/kernel/closure/bsdf_toon.h | 8 ++++---- intern/cycles/kernel/film/passes.h | 2 +- intern/cycles/kernel/film/read.h | 6 +++--- intern/cycles/kernel/svm/bevel.h | 2 +- intern/cycles/kernel/svm/brick.h | 2 +- intern/cycles/kernel/svm/closure.h | 8 ++++---- intern/cycles/kernel/svm/color_util.h | 2 +- intern/cycles/kernel/svm/gradient.h | 2 +- intern/cycles/kernel/svm/hsv.h | 2 +- intern/cycles/kernel/svm/image.h | 6 +++--- intern/cycles/kernel/svm/musgrave.h | 8 ++++---- intern/cycles/kernel/svm/ramp.h | 4 ++-- intern/cycles/kernel/util/lookup_table.h | 4 ++-- intern/cycles/scene/constant_fold.cpp | 10 +++++----- intern/cycles/util/math.h | 7 ++++++- intern/cycles/util/math_float3.h | 2 +- 19 files changed, 55 insertions(+), 50 deletions(-) diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 83242a73685..466ba3e229e 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -315,8 +315,8 @@ ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf) { bsdf->extra = NULL; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = saturate(bsdf->alpha_y); + bsdf->alpha_x = saturatef(bsdf->alpha_x); + bsdf->alpha_y = saturatef(bsdf->alpha_y); bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID; @@ -336,8 +336,8 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsd { bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0); - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = saturate(bsdf->alpha_y); + bsdf->alpha_x = saturatef(bsdf->alpha_x); + bsdf->alpha_y = saturatef(bsdf->alpha_y); bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID; @@ -351,7 +351,7 @@ ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *b { bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0); - bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_x = saturatef(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID; @@ -365,7 +365,7 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf * { bsdf->extra = NULL; - bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_x = saturatef(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; @@ -783,8 +783,8 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf) { - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = saturate(bsdf->alpha_y); + bsdf->alpha_x = saturatef(bsdf->alpha_x); + bsdf->alpha_y = saturatef(bsdf->alpha_y); bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; return SD_BSDF | SD_BSDF_HAS_EVAL; @@ -800,7 +800,7 @@ ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBs ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf) { - bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_x = saturatef(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 77370fbec4e..5badbe9aa80 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -220,12 +220,12 @@ ccl_device_forceinline float mf_lambda(const float3 w, const float2 alpha) /* Height distribution CDF (based on page 4 of the supplemental implementation). */ ccl_device_forceinline float mf_invC1(const float h) { - return 2.0f * saturate(h) - 1.0f; + return 2.0f * saturatef(h) - 1.0f; } ccl_device_forceinline float mf_C1(const float h) { - return saturate(0.5f * (h + 1.0f)); + return saturatef(0.5f * (h + 1.0f)); } /* Masking function (based on page 16 of the supplemental implementation). */ @@ -284,7 +284,7 @@ ccl_device_forceinline float mf_ggx_albedo(float r) 0.027803f) * r + 0.00568739f; - return saturate(albedo); + return saturatef(albedo); } ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior) @@ -292,7 +292,7 @@ ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior) if (ior < 1.0f) { ior = 1.0f / ior; } - a = saturate(a); + a = saturatef(a); ior = clamp(ior, 1.0f, 3.0f); float I_1 = 0.0476898f * expf(-0.978352f * (ior - 0.65657f) * (ior - 0.65657f)) - 0.033756f * ior + 0.993261f; @@ -302,7 +302,7 @@ ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior) float R_2 = ((((5.3725f * a - 24.9307f) * a + 22.7437f) * a - 3.40751f) * a + 0.0986325f) * a + 0.00493504f; - return saturate(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f); + return saturatef(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f); } ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha) diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 00c2678f0a0..8827309a811 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -50,7 +50,7 @@ ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf) bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID; - sigma = saturate(sigma); + sigma = saturatef(sigma); float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma); diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h index 7f20a328b5e..20f3b8f0074 100644 --- a/intern/cycles/kernel/closure/bsdf_toon.h +++ b/intern/cycles/kernel/closure/bsdf_toon.h @@ -48,8 +48,8 @@ static_assert(sizeof(ShaderClosure) >= sizeof(ToonBsdf), "ToonBsdf is too large! ccl_device int bsdf_diffuse_toon_setup(ccl_private ToonBsdf *bsdf) { bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID; - bsdf->size = saturate(bsdf->size); - bsdf->smooth = saturate(bsdf->smooth); + bsdf->size = saturatef(bsdf->size); + bsdf->smooth = saturatef(bsdf->smooth); return SD_BSDF | SD_BSDF_HAS_EVAL; } @@ -146,8 +146,8 @@ ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc, ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf) { bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID; - bsdf->size = saturate(bsdf->size); - bsdf->smooth = saturate(bsdf->smooth); + bsdf->size = saturatef(bsdf->size); + bsdf->smooth = saturatef(bsdf->smooth); return SD_BSDF | SD_BSDF_HAS_EVAL; } diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h index 3a91d1653fe..22b4b779a17 100644 --- a/intern/cycles/kernel/film/passes.h +++ b/intern/cycles/kernel/film/passes.h @@ -312,7 +312,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals kg, const float mist_inv_depth = kernel_data.film.mist_inv_depth; const float depth = camera_distance(kg, sd->P); - float mist = saturate((depth - mist_start) * mist_inv_depth); + float mist = saturatef((depth - mist_start) * mist_inv_depth); /* Falloff */ const float mist_falloff = kernel_data.film.mist_falloff; diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h index a87eff3832e..d308a9818e2 100644 --- a/intern/cycles/kernel/film/read.h +++ b/intern/cycles/kernel/film/read.h @@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN * roulette. */ ccl_device_forceinline float film_transparency_to_alpha(float transparency) { - return saturate(1.0f - transparency); + return saturatef(1.0f - transparency); } ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict @@ -136,7 +136,7 @@ ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConve /* Note that we accumulate 1 - mist in the kernel to avoid having to * track the mist values in the integrator state. */ - pixel[0] = saturate(1.0f - f * scale_exposure); + pixel[0] = saturatef(1.0f - f * scale_exposure); } ccl_device_inline void film_get_pass_pixel_sample_count( @@ -458,7 +458,7 @@ ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow( const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure; const float transparency = in_matte[3] * scale; - const float alpha = saturate(1.0f - transparency); + const float alpha = saturatef(1.0f - transparency); const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha; diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h index 37c7caf1372..6799489514f 100644 --- a/intern/cycles/kernel/svm/bevel.h +++ b/intern/cycles/kernel/svm/bevel.h @@ -73,7 +73,7 @@ ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi) if (fabsf(f) < tolerance || f_ == 0.0f) break; - x = saturate(x - f / f_); + x = saturatef(x - f / f_); } return x; diff --git a/intern/cycles/kernel/svm/brick.h b/intern/cycles/kernel/svm/brick.h index 3c8729fa027..d8d01766106 100644 --- a/intern/cycles/kernel/svm/brick.h +++ b/intern/cycles/kernel/svm/brick.h @@ -56,7 +56,7 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p, x = (p.x + offset) - brick_width * bricknum; y = p.y - row_height * rownum; - float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)); + float tint = saturatef((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)); float min_dist = min(min(x, y), min(brick_width - x, row_height - y)); float mortar; diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index 1dcfe003f74..71952e9e0f8 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -173,9 +173,9 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, float fresnel = fresnel_dielectric_cos(cosNO, ior); // calculate weights of the diffuse and specular part - float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission)); + float diffuse_weight = (1.0f - saturatef(metallic)) * (1.0f - saturatef(transmission)); - float final_transmission = saturate(transmission) * (1.0f - saturate(metallic)); + float final_transmission = saturatef(transmission) * (1.0f - saturatef(metallic)); float specular_weight = (1.0f - final_transmission); // get the base color @@ -746,7 +746,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (bsdf) { bsdf->N = N; - bsdf->sigma = saturate(param1); + bsdf->sigma = saturatef(param1); sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf); } break; @@ -1233,7 +1233,7 @@ ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd, node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset); float weight = stack_load_float(stack, weight_offset); - weight = saturate(weight); + weight = saturatef(weight); float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) : 1.0f; diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h index 82024b61ba4..0c1a510e655 100644 --- a/intern/cycles/kernel/svm/color_util.h +++ b/intern/cycles/kernel/svm/color_util.h @@ -262,7 +262,7 @@ ccl_device float3 svm_mix_clamp(float3 col) ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2) { - float t = saturate(fac); + float t = saturatef(fac); switch (type) { case NODE_MIX_BLEND: diff --git a/intern/cycles/kernel/svm/gradient.h b/intern/cycles/kernel/svm/gradient.h index 852196b73dc..42d8dbef792 100644 --- a/intern/cycles/kernel/svm/gradient.h +++ b/intern/cycles/kernel/svm/gradient.h @@ -73,7 +73,7 @@ ccl_device_noinline void svm_node_tex_gradient(ccl_private ShaderData *sd, float3 co = stack_load_float3(stack, co_offset); float f = svm_gradient(co, (NodeGradientType)type); - f = saturate(f); + f = saturatef(f); if (stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f); diff --git a/intern/cycles/kernel/svm/hsv.h b/intern/cycles/kernel/svm/hsv.h index f6881fd4512..fdb266883fa 100644 --- a/intern/cycles/kernel/svm/hsv.h +++ b/intern/cycles/kernel/svm/hsv.h @@ -40,7 +40,7 @@ ccl_device_noinline void svm_node_hsv(KernelGlobals kg, /* Remember: `fmodf` doesn't work for negative numbers here. */ color.x = fmodf(color.x + hue + 0.5f, 1.0f); - color.y = saturate(color.y * sat); + color.y = saturatef(color.y * sat); color.z *= val; color = hsv_to_rgb(color); diff --git a/intern/cycles/kernel/svm/image.h b/intern/cycles/kernel/svm/image.h index 6ddf98a6ef1..2ebd3d4eb87 100644 --- a/intern/cycles/kernel/svm/image.h +++ b/intern/cycles/kernel/svm/image.h @@ -167,17 +167,17 @@ ccl_device_noinline void svm_node_tex_image_box(KernelGlobals kg, /* in case of blending, test for mixes between two textures */ if (N.z < (1.0f - limit) * (N.y + N.x)) { weight.x = N.x / (N.x + N.y); - weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend); + weight.x = saturatef((weight.x - 0.5f * (1.0f - blend)) / blend); weight.y = 1.0f - weight.x; } else if (N.x < (1.0f - limit) * (N.y + N.z)) { weight.y = N.y / (N.y + N.z); - weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend); + weight.y = saturatef((weight.y - 0.5f * (1.0f - blend)) / blend); weight.z = 1.0f - weight.y; } else if (N.y < (1.0f - limit) * (N.x + N.z)) { weight.x = N.x / (N.x + N.z); - weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend); + weight.x = saturatef((weight.x - 0.5f * (1.0f - blend)) / blend); weight.z = 1.0f - weight.x; } else { diff --git a/intern/cycles/kernel/svm/musgrave.h b/intern/cycles/kernel/svm/musgrave.h index 4225c3d2d71..85e32eee638 100644 --- a/intern/cycles/kernel/svm/musgrave.h +++ b/intern/cycles/kernel/svm/musgrave.h @@ -180,7 +180,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_1d( for (int i = 1; i < float_to_int(octaves); i++) { p *= lacunarity; - weight = saturate(signal * gain); + weight = saturatef(signal * gain); signal = offset - fabsf(snoise_1d(p)); signal *= signal; signal *= weight; @@ -351,7 +351,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_2d( for (int i = 1; i < float_to_int(octaves); i++) { p *= lacunarity; - weight = saturate(signal * gain); + weight = saturatef(signal * gain); signal = offset - fabsf(snoise_2d(p)); signal *= signal; signal *= weight; @@ -522,7 +522,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_3d( for (int i = 1; i < float_to_int(octaves); i++) { p *= lacunarity; - weight = saturate(signal * gain); + weight = saturatef(signal * gain); signal = offset - fabsf(snoise_3d(p)); signal *= signal; signal *= weight; @@ -693,7 +693,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d( for (int i = 1; i < float_to_int(octaves); i++) { p *= lacunarity; - weight = saturate(signal * gain); + weight = saturatef(signal * gain); signal = offset - fabsf(snoise_4d(p)); signal *= signal; signal *= weight; diff --git a/intern/cycles/kernel/svm/ramp.h b/intern/cycles/kernel/svm/ramp.h index 1dc3383956d..61093e0bd82 100644 --- a/intern/cycles/kernel/svm/ramp.h +++ b/intern/cycles/kernel/svm/ramp.h @@ -44,7 +44,7 @@ ccl_device_inline float float_ramp_lookup( return t0 + dy * f * (table_size - 1); } - f = saturate(f) * (table_size - 1); + f = saturatef(f) * (table_size - 1); /* clamp int as well in case of NaN */ int i = clamp(float_to_int(f), 0, table_size - 1); @@ -76,7 +76,7 @@ ccl_device_inline float4 rgb_ramp_lookup( return t0 + dy * f * (table_size - 1); } - f = saturate(f) * (table_size - 1); + f = saturatef(f) * (table_size - 1); /* clamp int as well in case of NaN */ int i = clamp(float_to_int(f), 0, table_size - 1); diff --git a/intern/cycles/kernel/util/lookup_table.h b/intern/cycles/kernel/util/lookup_table.h index 2c26e668d7b..3ffbb4856da 100644 --- a/intern/cycles/kernel/util/lookup_table.h +++ b/intern/cycles/kernel/util/lookup_table.h @@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int size) { - x = saturate(x) * (size - 1); + x = saturatef(x) * (size - 1); int index = min(float_to_int(x), size - 1); int nindex = min(index + 1, size - 1); @@ -39,7 +39,7 @@ ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int si ccl_device float lookup_table_read_2D( KernelGlobals kg, float x, float y, int offset, int xsize, int ysize) { - y = saturate(y) * (ysize - 1); + y = saturatef(y) * (ysize - 1); int index = min(float_to_int(y), ysize - 1); int nindex = min(index + 1, ysize - 1); diff --git a/intern/cycles/scene/constant_fold.cpp b/intern/cycles/scene/constant_fold.cpp index ca065e3f678..a1fa34e7628 100644 --- a/intern/cycles/scene/constant_fold.cpp +++ b/intern/cycles/scene/constant_fold.cpp @@ -68,15 +68,15 @@ void ConstantFolder::make_constant(float3 value) const void ConstantFolder::make_constant_clamp(float value, bool clamp) const { - make_constant(clamp ? saturate(value) : value); + make_constant(clamp ? saturatef(value) : value); } void ConstantFolder::make_constant_clamp(float3 value, bool clamp) const { if (clamp) { - value.x = saturate(value.x); - value.y = saturate(value.y); - value.z = saturate(value.z); + value.x = saturatef(value.x); + value.y = saturatef(value.y); + value.z = saturatef(value.z); } make_constant(value); @@ -215,7 +215,7 @@ void ConstantFolder::fold_mix(NodeMix type, bool clamp) const ShaderInput *color1_in = node->input("Color1"); ShaderInput *color2_in = node->input("Color2"); - float fac = saturate(node->get_float(fac_in->socket_type)); + float fac = saturatef(node->get_float(fac_in->socket_type)); bool fac_is_zero = !fac_in->link && fac == 0.0f; bool fac_is_one = !fac_in->link && fac == 1.0f; diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index e7fc492733f..e4c7df6e44a 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -347,10 +347,15 @@ ccl_device_inline float smoothstep(float edge0, float edge1, float x) } #ifndef __KERNEL_CUDA__ -ccl_device_inline float saturate(float a) +ccl_device_inline float saturatef(float a) { return clamp(a, 0.0f, 1.0f); } +#else +ccl_device_inline float saturatef(float a) +{ + return __saturatef(a); +} #endif /* __KERNEL_CUDA__ */ ccl_device_inline int float_to_int(float f) diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index e780d7e0a7c..81550c5d03c 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -408,7 +408,7 @@ ccl_device_inline float3 project(const float3 v, const float3 v_proj) ccl_device_inline float3 saturate3(float3 a) { - return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); + return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z)); } ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)