Code cleanup: avoid some warnings due to implicit uint/int/float/double conversion.

This commit is contained in:
Brecht Van Lommel 2013-06-07 16:06:17 +00:00
parent c24be7ec6e
commit d835d2f4e6
23 changed files with 96 additions and 62 deletions

@ -379,7 +379,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
params.background = background; params.background = background;
/* samples */ /* samples */
if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU){ if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU) {
if(background) { if(background) {
params.samples = get_int(cscene, "aa_samples"); params.samples = get_int(cscene, "aa_samples");
} }

@ -68,13 +68,13 @@ __device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro)
{ {
float sq; float sq;
sq = sqrt(3.0f*(1.0f - alpha_)); sq = sqrtf(3.0f*(1.0f - alpha_));
return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro; return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
} }
__device float bssrdf_compute_reduced_albedo(float A, float ro) __device float bssrdf_compute_reduced_albedo(float A, float ro)
{ {
const float tolerance = 1e-8; const float tolerance = 1e-8f;
const int max_iteration_count = 20; const int max_iteration_count = 20;
float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1; float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1;
int i; int i;
@ -138,8 +138,8 @@ __device float bssrdf_original(const BSSRDFParams *ss, float r)
float rr = r*r; float rr = r*r;
float sr, sv, Rdr, Rdv; float sr, sv, Rdr, Rdv;
sr = sqrt(rr + ss->zr*ss->zr); sr = sqrtf(rr + ss->zr*ss->zr);
sv = sqrt(rr + ss->zv*ss->zv); sv = sqrtf(rr + ss->zv*ss->zv);
Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr); Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr);
Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv); Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv);

@ -171,8 +171,8 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg,
/* decide which nodes to traverse next */ /* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__ #ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */ /* this visibility test gives a 5% performance hit, how to solve? */
*traverseChild0 = (c0max >= c0min) && (__float_as_int(cnodes.z) & visibility); *traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
*traverseChild1 = (c1max >= c1min) && (__float_as_int(cnodes.w) & visibility); *traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
#else #else
*traverseChild0 = (c0max >= c0min); *traverseChild0 = (c0max >= c0min);
*traverseChild1 = (c1max >= c1min); *traverseChild1 = (c1max >= c1min);

@ -88,7 +88,7 @@ template<typename T> struct texture_image {
float frac(float x, int *ix) float frac(float x, int *ix)
{ {
int i = (int)x - ((x < 0.0f)? 1: 0); int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
*ix = i; *ix = i;
return x - (float)i; return x - (float)i;
} }

@ -28,8 +28,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
/* setup shader data */ /* setup shader data */
int object = in.x; int object = in.x;
int prim = in.y; int prim = in.y;
float u = __int_as_float(in.z); float u = __uint_as_float(in.z);
float v = __int_as_float(in.w); float v = __uint_as_float(in.w);
shader_setup_from_displace(kg, &sd, object, prim, u, v); shader_setup_from_displace(kg, &sd, object, prim, u, v);
@ -41,8 +41,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
else { // SHADER_EVAL_BACKGROUND else { // SHADER_EVAL_BACKGROUND
/* setup ray */ /* setup ray */
Ray ray; Ray ray;
float u = __int_as_float(in.x); float u = __uint_as_float(in.x);
float v = __int_as_float(in.y); float v = __uint_as_float(in.y);
ray.P = make_float3(0.0f, 0.0f, 0.0f); ray.P = make_float3(0.0f, 0.0f, 0.0f);
ray.D = equirectangular_to_direction(u, v); ray.D = equirectangular_to_direction(u, v);

@ -94,7 +94,7 @@ __device float lookup_table_read(KernelGlobals *kg, float x, int offset, int siz
{ {
x = clamp(x, 0.0f, 1.0f)*(size-1); x = clamp(x, 0.0f, 1.0f)*(size-1);
int index = min((int)x, size-1); int index = min(float_to_int(x), size-1);
int nindex = min(index+1, size-1); int nindex = min(index+1, size-1);
float t = x - index; float t = x - index;
@ -110,7 +110,7 @@ __device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int off
{ {
y = clamp(y, 0.0f, 1.0f)*(ysize-1); y = clamp(y, 0.0f, 1.0f)*(ysize-1);
int index = min((int)y, ysize-1); int index = min(float_to_int(y), ysize-1);
int nindex = min(index+1, ysize-1); int nindex = min(index+1, ysize-1);
float t = y - index; float t = y - index;

@ -124,8 +124,8 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction)
if(sin_theta == 0.0f) if(sin_theta == 0.0f)
return 0.0f; return 0.0f;
int index_u = clamp((int)(uv.x * res), 0, res - 1); int index_u = clamp(float_to_int(uv.x * res), 0, res - 1);
int index_v = clamp((int)(uv.y * res), 0, res - 1); int index_v = clamp(float_to_int(uv.y * res), 0, res - 1);
/* pdfs in V direction */ /* pdfs in V direction */
float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + res); float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + res);

@ -201,7 +201,7 @@ __device_inline uint object_particle_id(KernelGlobals *kg, int object)
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
float4 f = kernel_tex_fetch(__objects, offset); float4 f = kernel_tex_fetch(__objects, offset);
return __float_as_int(f.w); return __float_as_uint(f.w);
} }
__device_inline float3 object_dupli_generated(KernelGlobals *kg, int object) __device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)

@ -44,7 +44,7 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
typedef struct PathState { typedef struct PathState {
uint flag; int flag;
int bounce; int bounce;
int diffuse_bounce; int diffuse_bounce;

@ -72,7 +72,7 @@ __device_inline float stack_load_float(float *stack, uint a)
__device_inline float stack_load_float_default(float *stack, uint a, uint value) __device_inline float stack_load_float_default(float *stack, uint a, uint value)
{ {
return (a == (uint)SVM_STACK_INVALID)? __int_as_float(value): stack_load_float(stack, a); return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a);
} }
__device_inline void stack_store_float(float *stack, uint a, float f) __device_inline void stack_store_float(float *stack, uint a, float f)
@ -118,7 +118,7 @@ __device_inline uint4 read_node(KernelGlobals *kg, int *offset)
__device_inline float4 read_node_float(KernelGlobals *kg, int *offset) __device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
{ {
uint4 node = kernel_tex_fetch(__svm_nodes, *offset); uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
float4 f = make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w)); float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
(*offset)++; (*offset)++;
return f; return f;
} }
@ -126,7 +126,7 @@ __device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
__device_inline float4 fetch_node_float(KernelGlobals *kg, int offset) __device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
{ {
uint4 node = kernel_tex_fetch(__svm_nodes, offset); uint4 node = kernel_tex_fetch(__svm_nodes, offset);
return make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w)); return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
} }
__device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w) __device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)

@ -38,14 +38,14 @@ __device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, flo
float offset = 0.0f; float offset = 0.0f;
float x, y; float x, y;
rownum = (int)floor(p.y / row_height); rownum = floor_to_int(p.y / row_height);
if(offset_frequency && squash_frequency) { if(offset_frequency && squash_frequency) {
brick_width *= ((int)(rownum) % squash_frequency ) ? 1.0f : squash_amount; /* squash */ brick_width *= ((int)(rownum) % squash_frequency ) ? 1.0f : squash_amount; /* squash */
offset = ((int)(rownum) % offset_frequency ) ? 0 : (brick_width*offset_amount); /* offset */ offset = ((int)(rownum) % offset_frequency ) ? 0 : (brick_width*offset_amount); /* offset */
} }
bricknum = (int)floor((p.x+offset) / brick_width); bricknum = floor_to_int((p.x+offset) / brick_width);
x = (p.x+offset) - brick_width*bricknum; x = (p.x+offset) - brick_width*bricknum;
y = p.y - row_height*rownum; y = p.y - row_height*rownum;

@ -29,9 +29,9 @@ __device_noinline float svm_checker(float3 p, float scale)
p.y = (p.y + 0.00001f)*0.9999f; p.y = (p.y + 0.00001f)*0.9999f;
p.z = (p.z + 0.00001f)*0.9999f; p.z = (p.z + 0.00001f)*0.9999f;
int xi = (int)fabsf(floorf(p.x)); int xi = float_to_int(fabsf(floorf(p.x)));
int yi = (int)fabsf(floorf(p.y)); int yi = float_to_int(fabsf(floorf(p.y)));
int zi = (int)fabsf(floorf(p.z)); int zi = float_to_int(fabsf(floorf(p.z)));
return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f; return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f;
} }

@ -119,8 +119,8 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
float3 N = stack_valid(data_node.y)? stack_load_float3(stack, data_node.y): sd->N; float3 N = stack_valid(data_node.y)? stack_load_float3(stack, data_node.y): sd->N;
#endif #endif
float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z); float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w); float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
switch(type) { switch(type) {
case CLOSURE_BSDF_DIFFUSE_ID: { case CLOSURE_BSDF_DIFFUSE_ID: {
@ -422,8 +422,8 @@ __device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *
float mix_weight = 1.0f; float mix_weight = 1.0f;
#endif #endif
float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z); float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
//float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w); //float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
switch(type) { switch(type) {
case CLOSURE_VOLUME_TRANSPARENT_ID: { case CLOSURE_VOLUME_TRANSPARENT_ID: {
@ -553,13 +553,13 @@ __device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight
__device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) __device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
{ {
float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b)); float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
svm_node_closure_store_weight(sd, weight); svm_node_closure_store_weight(sd, weight);
} }
__device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData *sd, uint r, uint g, uint b) __device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData *sd, uint r, uint g, uint b)
{ {
float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b)); float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
if(sd->object != ~0) if(sd->object != ~0)
weight /= object_surface_area(kg, sd->object); weight /= object_surface_area(kg, sd->object);

@ -53,7 +53,7 @@ __device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint fro
} }
case NODE_CONVERT_VI: { case NODE_CONVERT_VI: {
float3 f = stack_load_float3(stack, from); float3 f = stack_load_float3(stack, from);
int i = (f.x + f.y + f.z)*(1.0f/3.0f); int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f));
stack_store_int(stack, to, i); stack_store_int(stack, to, i);
break; break;
} }

@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
__device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint out_offset) __device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint out_offset)
{ {
float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __int_as_float(ior_value); float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value);
eta = fmaxf(eta, 1.0f + 1e-5f); eta = fmaxf(eta, 1.0f + 1e-5f);
eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
@ -37,7 +37,7 @@ __device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
{ {
uint blend_offset = node.y; uint blend_offset = node.y;
uint blend_value = node.z; uint blend_value = node.z;
float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __int_as_float(blend_value); float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value);
uint type, out_offset; uint type, out_offset;
decode_node_uchar4(node.w, &type, &out_offset, NULL, NULL); decode_node_uchar4(node.w, &type, &out_offset, NULL, NULL);

@ -77,7 +77,7 @@ __device float3 hsv_to_rgb(float3 hsv)
h = 0.0f; h = 0.0f;
h *= 6.0f; h *= 6.0f;
i = floor(h); i = floorf(h);
f = h - i; f = h - i;
rgb = make_float3(f, f, f); rgb = make_float3(f, f, f);
p = v*(1.0f-s); p = v*(1.0f-s);
@ -112,7 +112,7 @@ __device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint
/* remember: fmod doesn't work for negative numbers here */ /* remember: fmod doesn't work for negative numbers here */
color.x += hue + 0.5f; color.x += hue + 0.5f;
color.x = fmod(color.x, 1.0f); color.x = fmodf(color.x, 1.0f);
color.y *= sat; color.y *= sat;
color.z *= val; color.z *= val;

@ -32,10 +32,10 @@ __device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float
float rmd; float rmd;
float value = 0.0f; float value = 0.0f;
float pwr = 1.0f; float pwr = 1.0f;
float pwHL = pow(lacunarity, -H); float pwHL = powf(lacunarity, -H);
int i; int i;
for(i = 0; i < (int)octaves; i++) { for(i = 0; i < float_to_int(octaves); i++) {
value += snoise(p) * pwr; value += snoise(p) * pwr;
pwr *= pwHL; pwr *= pwHL;
p *= lacunarity; p *= lacunarity;
@ -60,10 +60,10 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba
float rmd; float rmd;
float value = 1.0f; float value = 1.0f;
float pwr = 1.0f; float pwr = 1.0f;
float pwHL = pow(lacunarity, -H); float pwHL = powf(lacunarity, -H);
int i; int i;
for(i = 0; i < (int)octaves; i++) { for(i = 0; i < float_to_int(octaves); i++) {
value *= (pwr * snoise(p) + 1.0f); value *= (pwr * snoise(p) + 1.0f);
pwr *= pwHL; pwr *= pwHL;
p *= lacunarity; p *= lacunarity;
@ -87,7 +87,7 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba
__device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset) __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
{ {
float value, increment, rmd; float value, increment, rmd;
float pwHL = pow(lacunarity, -H); float pwHL = powf(lacunarity, -H);
float pwr = pwHL; float pwr = pwHL;
int i; int i;
@ -95,7 +95,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b
value = offset + snoise(p); value = offset + snoise(p);
p *= lacunarity; p *= lacunarity;
for(i = 1; i < (int)octaves; i++) { for(i = 1; i < float_to_int(octaves); i++) {
increment = (snoise(p) + offset) * pwr * value; increment = (snoise(p) + offset) * pwr * value;
value += increment; value += increment;
pwr *= pwHL; pwr *= pwHL;
@ -122,7 +122,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b
__device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{ {
float result, signal, weight, rmd; float result, signal, weight, rmd;
float pwHL = pow(lacunarity, -H); float pwHL = powf(lacunarity, -H);
float pwr = pwHL; float pwr = pwHL;
int i; int i;
@ -130,7 +130,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB
weight = gain * result; weight = gain * result;
p *= lacunarity; p *= lacunarity;
for(i = 1; (weight > 0.001f) && (i < (int)octaves); i++) { for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
if(weight > 1.0f) if(weight > 1.0f)
weight = 1.0f; weight = 1.0f;
@ -159,7 +159,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB
__device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{ {
float result, signal, weight; float result, signal, weight;
float pwHL = pow(lacunarity, -H); float pwHL = powf(lacunarity, -H);
float pwr = pwHL; float pwr = pwHL;
int i; int i;
@ -168,7 +168,7 @@ __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseB
result = signal; result = signal;
weight = 1.0f; weight = 1.0f;
for(i = 1; i < (int)octaves; i++) { for(i = 1; i < float_to_int(octaves); i++) {
p *= lacunarity; p *= lacunarity;
weight = clamp(signal * gain, 0.0f, 1.0f); weight = clamp(signal * gain, 0.0f, 1.0f);
signal = offset - fabsf(snoise(p)); signal = offset - fabsf(snoise(p));

@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
__device int quick_floor(float x) __device int quick_floor(float x)
{ {
return (int)x - ((x < 0) ? 1 : 0); return float_to_int(x) - ((x < 0) ? 1 : 0);
} }
__device float bits_to_01(uint bits) __device float bits_to_01(uint bits)

@ -26,7 +26,7 @@ __device float4 rgb_ramp_lookup(KernelGlobals *kg, int offset, float f, bool int
f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1); f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1);
/* clamp int as well in case of NaN */ /* clamp int as well in case of NaN */
int i = clamp((int)f, 0, RAMP_TABLE_SIZE-1); int i = clamp(float_to_int(f), 0, RAMP_TABLE_SIZE-1);
float t = f - (float)i; float t = f - (float)i;
float4 a = fetch_node_float(kg, offset+i); float4 a = fetch_node_float(kg, offset+i);

@ -51,9 +51,9 @@ __device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
/* returns distances in da and point coords in pa */ /* returns distances in da and point coords in pa */
int xx, yy, zz, xi, yi, zi; int xx, yy, zz, xi, yi, zi;
xi = (int)floorf(p.x); xi = floor_to_int(p.x);
yi = (int)floorf(p.y); yi = floor_to_int(p.y);
zi = (int)floorf(p.z); zi = floor_to_int(p.z);
da[0] = 1e10f; da[0] = 1e10f;
da[1] = 1e10f; da[1] = 1e10f;
@ -186,7 +186,7 @@ __device float noise_wave(NodeWaveBasis wave, float a)
} }
else if(wave == NODE_WAVE_SAW) { else if(wave == NODE_WAVE_SAW) {
float b = 2.0f*M_PI_F; float b = 2.0f*M_PI_F;
int n = (int)(a / b); int n = float_to_int(a / b);
a -= n*b; a -= n*b;
if(a < 0.0f) a += b; if(a < 0.0f) a += b;
@ -212,7 +212,7 @@ __device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float o
int i, n; int i, n;
octaves = clamp(octaves, 0.0f, 16.0f); octaves = clamp(octaves, 0.0f, 16.0f);
n = (int)octaves; n = float_to_int(octaves);
for(i = 0; i <= n; i++) { for(i = 0; i <= n; i++) {
float t = noise_basis(fscale*p, basis); float t = noise_basis(fscale*p, basis);

@ -22,14 +22,14 @@ CCL_NAMESPACE_BEGIN
__device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset) __device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
{ {
stack_store_float(stack, out_offset, __int_as_float(ivalue)); stack_store_float(stack, out_offset, __uint_as_float(ivalue));
} }
__device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset) __device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
{ {
/* read extra data */ /* read extra data */
uint4 node1 = read_node(kg, offset); uint4 node1 = read_node(kg, offset);
float3 p = make_float3(__int_as_float(node1.y), __int_as_float(node1.z), __int_as_float(node1.w)); float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
stack_store_float3(stack, out_offset, p); stack_store_float3(stack, out_offset, p);
} }

@ -163,6 +163,25 @@ __device_inline float clamp(float a, float mn, float mx)
#endif #endif
__device_inline int float_to_int(float f)
{
#ifdef __KERNEL_SSE2__
return _mm_cvtt_ss2si(_mm_load_ss(&f));
#else
return (int)f;
#endif
}
__device_inline int floor_to_int(float f)
{
return float_to_int(floorf(f));
}
__device_inline int ceil_to_int(float f)
{
return float_to_int(ceilf(f));
}
__device_inline float signf(float f) __device_inline float signf(float f)
{ {
return (f < 0.0f)? -1.0f: 1.0f; return (f < 0.0f)? -1.0f: 1.0f;
@ -990,23 +1009,23 @@ __device_inline void print_int4(const char *label, const int4& a)
#ifndef __KERNEL_OPENCL__ #ifndef __KERNEL_OPENCL__
__device_inline unsigned int as_int(uint i) __device_inline int as_int(uint i)
{ {
union { unsigned int ui; int i; } u; union { uint ui; int i; } u;
u.ui = i; u.ui = i;
return u.i; return u.i;
} }
__device_inline unsigned int as_uint(int i) __device_inline uint as_uint(int i)
{ {
union { unsigned int ui; int i; } u; union { uint ui; int i; } u;
u.i = i; u.i = i;
return u.ui; return u.ui;
} }
__device_inline unsigned int as_uint(float f) __device_inline uint as_uint(float f)
{ {
union { unsigned int i; float f; } u; union { uint i; float f; } u;
u.f = f; u.f = f;
return u.i; return u.i;
} }

@ -70,6 +70,21 @@
#include <tmmintrin.h> /* SSE 3 */ #include <tmmintrin.h> /* SSE 3 */
#include <smmintrin.h> /* SSE 4 */ #include <smmintrin.h> /* SSE 4 */
#define __KERNEL_SSE2__
#define __KERNEL_SSE3__
#define __KERNEL_SSE4__
#else
#ifdef __x86_64__
#include <xmmintrin.h> /* SSE 1 */
#include <emmintrin.h> /* SSE 2 */
#define __KERNEL_SSE2__
#endif
#endif #endif
#ifndef _WIN32 #ifndef _WIN32