diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index b4dbd46bd71..b324385134b 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -379,7 +379,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use params.background = background; /* samples */ - if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU){ + if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU) { if(background) { params.samples = get_int(cscene, "aa_samples"); } diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 844e0433bae..486de4ca65f 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -68,13 +68,13 @@ __device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro) { float sq; - sq = sqrt(3.0f*(1.0f - alpha_)); + sq = sqrtf(3.0f*(1.0f - alpha_)); return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro; } __device float bssrdf_compute_reduced_albedo(float A, float ro) { - const float tolerance = 1e-8; + const float tolerance = 1e-8f; const int max_iteration_count = 20; float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1; int i; @@ -138,8 +138,8 @@ __device float bssrdf_original(const BSSRDFParams *ss, float r) float rr = r*r; float sr, sv, Rdr, Rdv; - sr = sqrt(rr + ss->zr*ss->zr); - sv = sqrt(rr + ss->zv*ss->zv); + sr = sqrtf(rr + ss->zr*ss->zr); + sv = sqrtf(rr + ss->zv*ss->zv); Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr); Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv); diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index aff9d586e7a..ae9677ed5cb 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -171,8 +171,8 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg, /* decide which nodes to traverse next */ #ifdef __VISIBILITY_FLAG__ /* this visibility test gives a 5% performance hit, how to solve? */ - *traverseChild0 = (c0max >= c0min) && (__float_as_int(cnodes.z) & visibility); - *traverseChild1 = (c1max >= c1min) && (__float_as_int(cnodes.w) & visibility); + *traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility); + *traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility); #else *traverseChild0 = (c0max >= c0min); *traverseChild1 = (c1max >= c1min); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 9972a63bfbb..a32b33a727a 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -88,7 +88,7 @@ template struct texture_image { float frac(float x, int *ix) { - int i = (int)x - ((x < 0.0f)? 1: 0); + int i = float_to_int(x) - ((x < 0.0f)? 1: 0); *ix = i; return x - (float)i; } diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h index 8b95e413b3f..c7fd03e7603 100644 --- a/intern/cycles/kernel/kernel_displace.h +++ b/intern/cycles/kernel/kernel_displace.h @@ -28,8 +28,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou /* setup shader data */ int object = in.x; int prim = in.y; - float u = __int_as_float(in.z); - float v = __int_as_float(in.w); + float u = __uint_as_float(in.z); + float v = __uint_as_float(in.w); shader_setup_from_displace(kg, &sd, object, prim, u, v); @@ -41,8 +41,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou else { // SHADER_EVAL_BACKGROUND /* setup ray */ Ray ray; - float u = __int_as_float(in.x); - float v = __int_as_float(in.y); + float u = __uint_as_float(in.x); + float v = __uint_as_float(in.y); ray.P = make_float3(0.0f, 0.0f, 0.0f); ray.D = equirectangular_to_direction(u, v); diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index abf1f5b4cb0..fbb5060c409 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -94,7 +94,7 @@ __device float lookup_table_read(KernelGlobals *kg, float x, int offset, int siz { x = clamp(x, 0.0f, 1.0f)*(size-1); - int index = min((int)x, size-1); + int index = min(float_to_int(x), size-1); int nindex = min(index+1, size-1); float t = x - index; @@ -110,7 +110,7 @@ __device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int off { y = clamp(y, 0.0f, 1.0f)*(ysize-1); - int index = min((int)y, ysize-1); + int index = min(float_to_int(y), ysize-1); int nindex = min(index+1, ysize-1); float t = y - index; diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index d4d78e413d2..9f198c6c595 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -124,8 +124,8 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction) if(sin_theta == 0.0f) return 0.0f; - int index_u = clamp((int)(uv.x * res), 0, res - 1); - int index_v = clamp((int)(uv.y * res), 0, res - 1); + int index_u = clamp(float_to_int(uv.x * res), 0, res - 1); + int index_v = clamp(float_to_int(uv.y * res), 0, res - 1); /* pdfs in V direction */ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + res); diff --git a/intern/cycles/kernel/kernel_object.h b/intern/cycles/kernel/kernel_object.h index 40aa4753daa..bb5ed50c995 100644 --- a/intern/cycles/kernel/kernel_object.h +++ b/intern/cycles/kernel/kernel_object.h @@ -201,7 +201,7 @@ __device_inline uint object_particle_id(KernelGlobals *kg, int object) int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; float4 f = kernel_tex_fetch(__objects, offset); - return __float_as_int(f.w); + return __float_as_uint(f.w); } __device_inline float3 object_dupli_generated(KernelGlobals *kg, int object) diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index d2de9ba2b44..5915dfed08b 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -44,7 +44,7 @@ CCL_NAMESPACE_BEGIN typedef struct PathState { - uint flag; + int flag; int bounce; int diffuse_bounce; diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 8f1d2aa0e16..16b684e79a0 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -72,7 +72,7 @@ __device_inline float stack_load_float(float *stack, uint a) __device_inline float stack_load_float_default(float *stack, uint a, uint value) { - return (a == (uint)SVM_STACK_INVALID)? __int_as_float(value): stack_load_float(stack, a); + return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a); } __device_inline void stack_store_float(float *stack, uint a, float f) @@ -118,7 +118,7 @@ __device_inline uint4 read_node(KernelGlobals *kg, int *offset) __device_inline float4 read_node_float(KernelGlobals *kg, int *offset) { uint4 node = kernel_tex_fetch(__svm_nodes, *offset); - float4 f = make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w)); + float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w)); (*offset)++; return f; } @@ -126,7 +126,7 @@ __device_inline float4 read_node_float(KernelGlobals *kg, int *offset) __device_inline float4 fetch_node_float(KernelGlobals *kg, int offset) { uint4 node = kernel_tex_fetch(__svm_nodes, offset); - return make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w)); + return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w)); } __device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w) diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index 49466c07a97..43dc1a2f295 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -38,14 +38,14 @@ __device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, flo float offset = 0.0f; float x, y; - rownum = (int)floor(p.y / row_height); + rownum = floor_to_int(p.y / row_height); if(offset_frequency && squash_frequency) { brick_width *= ((int)(rownum) % squash_frequency ) ? 1.0f : squash_amount; /* squash */ offset = ((int)(rownum) % offset_frequency ) ? 0 : (brick_width*offset_amount); /* offset */ } - bricknum = (int)floor((p.x+offset) / brick_width); + bricknum = floor_to_int((p.x+offset) / brick_width); x = (p.x+offset) - brick_width*bricknum; y = p.y - row_height*rownum; diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h index c5db0383bc5..ffac07e9cd5 100644 --- a/intern/cycles/kernel/svm/svm_checker.h +++ b/intern/cycles/kernel/svm/svm_checker.h @@ -29,9 +29,9 @@ __device_noinline float svm_checker(float3 p, float scale) p.y = (p.y + 0.00001f)*0.9999f; p.z = (p.z + 0.00001f)*0.9999f; - int xi = (int)fabsf(floorf(p.x)); - int yi = (int)fabsf(floorf(p.y)); - int zi = (int)fabsf(floorf(p.z)); + int xi = float_to_int(fabsf(floorf(p.x))); + int yi = float_to_int(fabsf(floorf(p.y))); + int zi = float_to_int(fabsf(floorf(p.z))); return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f; } diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index bf9823aa53e..847195134e8 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -119,8 +119,8 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st float3 N = stack_valid(data_node.y)? stack_load_float3(stack, data_node.y): sd->N; #endif - float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z); - float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w); + float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); + float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); switch(type) { case CLOSURE_BSDF_DIFFUSE_ID: { @@ -422,8 +422,8 @@ __device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float * float mix_weight = 1.0f; #endif - float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z); - //float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w); + float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); + //float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); switch(type) { case CLOSURE_VOLUME_TRANSPARENT_ID: { @@ -553,13 +553,13 @@ __device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight __device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) { - float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b)); + float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)); svm_node_closure_store_weight(sd, weight); } __device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData *sd, uint r, uint g, uint b) { - float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b)); + float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)); if(sd->object != ~0) weight /= object_surface_area(kg, sd->object); diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h index f74915a4bc9..0050813e2c0 100644 --- a/intern/cycles/kernel/svm/svm_convert.h +++ b/intern/cycles/kernel/svm/svm_convert.h @@ -53,7 +53,7 @@ __device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint fro } case NODE_CONVERT_VI: { float3 f = stack_load_float3(stack, from); - int i = (f.x + f.y + f.z)*(1.0f/3.0f); + int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f)); stack_store_int(stack, to, i); break; } diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h index d5b415a87ce..492e6070dfd 100644 --- a/intern/cycles/kernel/svm/svm_fresnel.h +++ b/intern/cycles/kernel/svm/svm_fresnel.h @@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN __device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint out_offset) { - float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __int_as_float(ior_value); + float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value); eta = fmaxf(eta, 1.0f + 1e-5f); eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; @@ -37,7 +37,7 @@ __device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) { uint blend_offset = node.y; uint blend_value = node.z; - float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __int_as_float(blend_value); + float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value); uint type, out_offset; decode_node_uchar4(node.w, &type, &out_offset, NULL, NULL); diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index 26b6141ee3f..348f13f59f2 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -77,7 +77,7 @@ __device float3 hsv_to_rgb(float3 hsv) h = 0.0f; h *= 6.0f; - i = floor(h); + i = floorf(h); f = h - i; rgb = make_float3(f, f, f); p = v*(1.0f-s); @@ -112,7 +112,7 @@ __device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint /* remember: fmod doesn't work for negative numbers here */ color.x += hue + 0.5f; - color.x = fmod(color.x, 1.0f); + color.x = fmodf(color.x, 1.0f); color.y *= sat; color.z *= val; diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h index 425909e59f1..f0ad19a8061 100644 --- a/intern/cycles/kernel/svm/svm_musgrave.h +++ b/intern/cycles/kernel/svm/svm_musgrave.h @@ -32,10 +32,10 @@ __device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float float rmd; float value = 0.0f; float pwr = 1.0f; - float pwHL = pow(lacunarity, -H); + float pwHL = powf(lacunarity, -H); int i; - for(i = 0; i < (int)octaves; i++) { + for(i = 0; i < float_to_int(octaves); i++) { value += snoise(p) * pwr; pwr *= pwHL; p *= lacunarity; @@ -60,10 +60,10 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba float rmd; float value = 1.0f; float pwr = 1.0f; - float pwHL = pow(lacunarity, -H); + float pwHL = powf(lacunarity, -H); int i; - for(i = 0; i < (int)octaves; i++) { + for(i = 0; i < float_to_int(octaves); i++) { value *= (pwr * snoise(p) + 1.0f); pwr *= pwHL; p *= lacunarity; @@ -87,7 +87,7 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset) { float value, increment, rmd; - float pwHL = pow(lacunarity, -H); + float pwHL = powf(lacunarity, -H); float pwr = pwHL; int i; @@ -95,7 +95,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b value = offset + snoise(p); p *= lacunarity; - for(i = 1; i < (int)octaves; i++) { + for(i = 1; i < float_to_int(octaves); i++) { increment = (snoise(p) + offset) * pwr * value; value += increment; pwr *= pwHL; @@ -122,7 +122,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) { float result, signal, weight, rmd; - float pwHL = pow(lacunarity, -H); + float pwHL = powf(lacunarity, -H); float pwr = pwHL; int i; @@ -130,7 +130,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB weight = gain * result; p *= lacunarity; - for(i = 1; (weight > 0.001f) && (i < (int)octaves); i++) { + for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { if(weight > 1.0f) weight = 1.0f; @@ -159,7 +159,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) { float result, signal, weight; - float pwHL = pow(lacunarity, -H); + float pwHL = powf(lacunarity, -H); float pwr = pwHL; int i; @@ -168,7 +168,7 @@ __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseB result = signal; weight = 1.0f; - for(i = 1; i < (int)octaves; i++) { + for(i = 1; i < float_to_int(octaves); i++) { p *= lacunarity; weight = clamp(signal * gain, 0.0f, 1.0f); signal = offset - fabsf(snoise(p)); diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 5ead6486dd6..a55c635b679 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN __device int quick_floor(float x) { - return (int)x - ((x < 0) ? 1 : 0); + return float_to_int(x) - ((x < 0) ? 1 : 0); } __device float bits_to_01(uint bits) diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index d33a2dfdc74..24d6dc3c282 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -26,7 +26,7 @@ __device float4 rgb_ramp_lookup(KernelGlobals *kg, int offset, float f, bool int f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1); /* clamp int as well in case of NaN */ - int i = clamp((int)f, 0, RAMP_TABLE_SIZE-1); + int i = clamp(float_to_int(f), 0, RAMP_TABLE_SIZE-1); float t = f - (float)i; float4 a = fetch_node_float(kg, offset+i); diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h index a4f6691435c..ba5b772b3a1 100644 --- a/intern/cycles/kernel/svm/svm_texture.h +++ b/intern/cycles/kernel/svm/svm_texture.h @@ -51,9 +51,9 @@ __device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2) /* returns distances in da and point coords in pa */ int xx, yy, zz, xi, yi, zi; - xi = (int)floorf(p.x); - yi = (int)floorf(p.y); - zi = (int)floorf(p.z); + xi = floor_to_int(p.x); + yi = floor_to_int(p.y); + zi = floor_to_int(p.z); da[0] = 1e10f; da[1] = 1e10f; @@ -186,7 +186,7 @@ __device float noise_wave(NodeWaveBasis wave, float a) } else if(wave == NODE_WAVE_SAW) { float b = 2.0f*M_PI_F; - int n = (int)(a / b); + int n = float_to_int(a / b); a -= n*b; if(a < 0.0f) a += b; @@ -212,7 +212,7 @@ __device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float o int i, n; octaves = clamp(octaves, 0.0f, 16.0f); - n = (int)octaves; + n = float_to_int(octaves); for(i = 0; i <= n; i++) { float t = noise_basis(fscale*p, basis); diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h index 80cb285f80c..86d98ee67d6 100644 --- a/intern/cycles/kernel/svm/svm_value.h +++ b/intern/cycles/kernel/svm/svm_value.h @@ -22,14 +22,14 @@ CCL_NAMESPACE_BEGIN __device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset) { - stack_store_float(stack, out_offset, __int_as_float(ivalue)); + stack_store_float(stack, out_offset, __uint_as_float(ivalue)); } __device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset) { /* read extra data */ uint4 node1 = read_node(kg, offset); - float3 p = make_float3(__int_as_float(node1.y), __int_as_float(node1.z), __int_as_float(node1.w)); + float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w)); stack_store_float3(stack, out_offset, p); } diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 6fe1b2bcf54..68668d88d44 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -163,6 +163,25 @@ __device_inline float clamp(float a, float mn, float mx) #endif +__device_inline int float_to_int(float f) +{ +#ifdef __KERNEL_SSE2__ + return _mm_cvtt_ss2si(_mm_load_ss(&f)); +#else + return (int)f; +#endif +} + +__device_inline int floor_to_int(float f) +{ + return float_to_int(floorf(f)); +} + +__device_inline int ceil_to_int(float f) +{ + return float_to_int(ceilf(f)); +} + __device_inline float signf(float f) { return (f < 0.0f)? -1.0f: 1.0f; @@ -990,23 +1009,23 @@ __device_inline void print_int4(const char *label, const int4& a) #ifndef __KERNEL_OPENCL__ -__device_inline unsigned int as_int(uint i) +__device_inline int as_int(uint i) { - union { unsigned int ui; int i; } u; + union { uint ui; int i; } u; u.ui = i; return u.i; } -__device_inline unsigned int as_uint(int i) +__device_inline uint as_uint(int i) { - union { unsigned int ui; int i; } u; + union { uint ui; int i; } u; u.i = i; return u.ui; } -__device_inline unsigned int as_uint(float f) +__device_inline uint as_uint(float f) { - union { unsigned int i; float f; } u; + union { uint i; float f; } u; u.f = f; return u.i; } diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index bb6de1197e7..fe1cb61ffa9 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -70,6 +70,21 @@ #include /* SSE 3 */ #include /* SSE 4 */ +#define __KERNEL_SSE2__ +#define __KERNEL_SSE3__ +#define __KERNEL_SSE4__ + +#else + +#ifdef __x86_64__ + +#include /* SSE 1 */ +#include /* SSE 2 */ + +#define __KERNEL_SSE2__ + +#endif + #endif #ifndef _WIN32