Code cleanup: avoid some warnings due to implicit uint/int/float/double conversion.

2013-06-07 16:06:17 +00:00 · 2013-06-07 16:06:17 +00:00 · d835d2f4e6
commit d835d2f4e6
parent c24be7ec6e
23 changed files with 96 additions and 62 deletions
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@ -379,7 +379,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
 	params.background = background;

 	/* samples */
-	if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU){
+	if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU) {
 		if(background) {
 			params.samples = get_int(cscene, "aa_samples");
 		}
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@ -68,13 +68,13 @@ __device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro)
 {
 	float sq;

-	sq = sqrt(3.0f*(1.0f - alpha_));
+	sq = sqrtf(3.0f*(1.0f - alpha_));
 	return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
 }

 __device float bssrdf_compute_reduced_albedo(float A, float ro)
 {
-	const float tolerance = 1e-8;
+	const float tolerance = 1e-8f;
 	const int max_iteration_count = 20;
 	float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1;
 	int i;
@ -138,8 +138,8 @@ __device float bssrdf_original(const BSSRDFParams *ss, float r)
 	float rr = r*r;
 	float sr, sv, Rdr, Rdv;

-	sr = sqrt(rr + ss->zr*ss->zr);
-	sv = sqrt(rr + ss->zv*ss->zv);
+	sr = sqrtf(rr + ss->zr*ss->zr);
+	sv = sqrtf(rr + ss->zv*ss->zv);

 	Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr);
 	Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv);
--- a/intern/cycles/kernel/kernel_bvh.h
+++ b/intern/cycles/kernel/kernel_bvh.h
@ -171,8 +171,8 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg,
 	/* decide which nodes to traverse next */
 #ifdef __VISIBILITY_FLAG__
 	/* this visibility test gives a 5% performance hit, how to solve? */
-	*traverseChild0 = (c0max >= c0min) && (__float_as_int(cnodes.z) & visibility);
-	*traverseChild1 = (c1max >= c1min) && (__float_as_int(cnodes.w) & visibility);
+	*traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
+	*traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
 #else
 	*traverseChild0 = (c0max >= c0min);
 	*traverseChild1 = (c1max >= c1min);
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@ -88,7 +88,7 @@ template<typename T> struct texture_image  {

 	float frac(float x, int *ix)
 	{
-		int i = (int)x - ((x < 0.0f)? 1: 0);
+		int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
 		*ix = i;
 		return x - (float)i;
 	}
--- a/intern/cycles/kernel/kernel_displace.h
+++ b/intern/cycles/kernel/kernel_displace.h
@ -28,8 +28,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
 		/* setup shader data */
 		int object = in.x;
 		int prim = in.y;
-		float u = __int_as_float(in.z);
-		float v = __int_as_float(in.w);
+		float u = __uint_as_float(in.z);
+		float v = __uint_as_float(in.w);

 		shader_setup_from_displace(kg, &sd, object, prim, u, v);

@ -41,8 +41,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
 	else { // SHADER_EVAL_BACKGROUND
 		/* setup ray */
 		Ray ray;
-		float u = __int_as_float(in.x);
-		float v = __int_as_float(in.y);
+		float u = __uint_as_float(in.x);
+		float v = __uint_as_float(in.y);

 		ray.P = make_float3(0.0f, 0.0f, 0.0f);
 		ray.D = equirectangular_to_direction(u, v);
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@ -94,7 +94,7 @@ __device float lookup_table_read(KernelGlobals *kg, float x, int offset, int siz
 {
 	x = clamp(x, 0.0f, 1.0f)*(size-1);

-	int index = min((int)x, size-1);
+	int index = min(float_to_int(x), size-1);
 	int nindex = min(index+1, size-1);
 	float t = x - index;

@ -110,7 +110,7 @@ __device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int off
 {
 	y = clamp(y, 0.0f, 1.0f)*(ysize-1);

-	int index = min((int)y, ysize-1);
+	int index = min(float_to_int(y), ysize-1);
 	int nindex = min(index+1, ysize-1);
 	float t = y - index;

--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@ -124,8 +124,8 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction)
 	if(sin_theta == 0.0f)
 		return 0.0f;

-	int index_u = clamp((int)(uv.x * res), 0, res - 1);
-	int index_v = clamp((int)(uv.y * res), 0, res - 1);
+	int index_u = clamp(float_to_int(uv.x * res), 0, res - 1);
+	int index_v = clamp(float_to_int(uv.y * res), 0, res - 1);

 	/* pdfs in V direction */
 	float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + res);
--- a/intern/cycles/kernel/kernel_object.h
+++ b/intern/cycles/kernel/kernel_object.h
@ -201,7 +201,7 @@ __device_inline uint object_particle_id(KernelGlobals *kg, int object)

 	int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
 	float4 f = kernel_tex_fetch(__objects, offset);
-	return __float_as_int(f.w);
+	return __float_as_uint(f.w);
 }

 __device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@ -44,7 +44,7 @@
 CCL_NAMESPACE_BEGIN

 typedef struct PathState {
-	uint flag;
+	int flag;
 	int bounce;

 	int diffuse_bounce;
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@ -72,7 +72,7 @@ __device_inline float stack_load_float(float *stack, uint a)

 __device_inline float stack_load_float_default(float *stack, uint a, uint value)
 {
-	return (a == (uint)SVM_STACK_INVALID)? __int_as_float(value): stack_load_float(stack, a);
+	return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a);
 }

 __device_inline void stack_store_float(float *stack, uint a, float f)
@ -118,7 +118,7 @@ __device_inline uint4 read_node(KernelGlobals *kg, int *offset)
 __device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
 {
 	uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
-	float4 f = make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w));
+	float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
 	(*offset)++;
 	return f;
 }
@ -126,7 +126,7 @@ __device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
 __device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
 {
 	uint4 node = kernel_tex_fetch(__svm_nodes, offset);
-	return make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w));
+	return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
 }

 __device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@ -38,14 +38,14 @@ __device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, flo
 	float offset = 0.0f;
 	float x, y;

-	rownum = (int)floor(p.y / row_height);
+	rownum = floor_to_int(p.y / row_height);
 	
 	if(offset_frequency && squash_frequency) {
 		brick_width *= ((int)(rownum) % squash_frequency ) ? 1.0f : squash_amount; /* squash */
 		offset = ((int)(rownum) % offset_frequency ) ? 0 : (brick_width*offset_amount); /* offset */
 	}

-	bricknum = (int)floor((p.x+offset) / brick_width);
+	bricknum = floor_to_int((p.x+offset) / brick_width);

 	x = (p.x+offset) - brick_width*bricknum;
 	y = p.y - row_height*rownum;
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@ -29,9 +29,9 @@ __device_noinline float svm_checker(float3 p, float scale)
 	p.y = (p.y + 0.00001f)*0.9999f;
 	p.z = (p.z + 0.00001f)*0.9999f;

-	int xi = (int)fabsf(floorf(p.x));
-	int yi = (int)fabsf(floorf(p.y));
-	int zi = (int)fabsf(floorf(p.z));
+	int xi = float_to_int(fabsf(floorf(p.x)));
+	int yi = float_to_int(fabsf(floorf(p.y)));
+	int zi = float_to_int(fabsf(floorf(p.z)));

 	return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f;
 }
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@ -119,8 +119,8 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
 	float3 N = stack_valid(data_node.y)? stack_load_float3(stack, data_node.y): sd->N; 
 #endif

-	float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z);
-	float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w);
+	float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
+	float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);

 	switch(type) {
 		case CLOSURE_BSDF_DIFFUSE_ID: {
@ -422,8 +422,8 @@ __device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *
 	float mix_weight = 1.0f;
 #endif

-	float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z);
-	//float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w);
+	float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
+	//float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);

 	switch(type) {
 		case CLOSURE_VOLUME_TRANSPARENT_ID: {
@ -553,13 +553,13 @@ __device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight

 __device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
 {
-	float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b));
+	float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
 	svm_node_closure_store_weight(sd, weight);
 }

 __device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData *sd, uint r, uint g, uint b)
 {
-	float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b));
+	float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));

 	if(sd->object != ~0)
 		weight /= object_surface_area(kg, sd->object);
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@ -53,7 +53,7 @@ __device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint fro
 		}
 		case NODE_CONVERT_VI: {
 			float3 f = stack_load_float3(stack, from);
-			int i = (f.x + f.y + f.z)*(1.0f/3.0f);
+			int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f));
 			stack_store_int(stack, to, i);
 			break;
 		}
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN

 __device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint out_offset)
 {
-	float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __int_as_float(ior_value);
+	float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value);
 	eta = fmaxf(eta, 1.0f + 1e-5f);
 	eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;

@ -37,7 +37,7 @@ __device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
 {
 	uint blend_offset = node.y;
 	uint blend_value = node.z;
-	float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __int_as_float(blend_value);
+	float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value);

 	uint type, out_offset;
 	decode_node_uchar4(node.w, &type, &out_offset, NULL, NULL);
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@ -77,7 +77,7 @@ __device float3 hsv_to_rgb(float3 hsv)
 			h = 0.0f;
 		
 		h *= 6.0f;
-		i = floor(h);
+		i = floorf(h);
 		f = h - i;
 		rgb = make_float3(f, f, f);
 		p = v*(1.0f-s);
@ -112,7 +112,7 @@ __device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint

 	/* remember: fmod doesn't work for negative numbers here */
 	color.x += hue + 0.5f;
-	color.x = fmod(color.x, 1.0f);
+	color.x = fmodf(color.x, 1.0f);
 	color.y *= sat;
 	color.z *= val;

--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@ -32,10 +32,10 @@ __device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float
 	float rmd;
 	float value = 0.0f;
 	float pwr = 1.0f;
-	float pwHL = pow(lacunarity, -H);
+	float pwHL = powf(lacunarity, -H);
 	int i;

-	for(i = 0; i < (int)octaves; i++) {
+	for(i = 0; i < float_to_int(octaves); i++) {
 		value += snoise(p) * pwr;
 		pwr *= pwHL;
 		p *= lacunarity;
@ -60,10 +60,10 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba
 	float rmd;
 	float value = 1.0f;
 	float pwr = 1.0f;
-	float pwHL = pow(lacunarity, -H);
+	float pwHL = powf(lacunarity, -H);
 	int i;

-	for(i = 0; i < (int)octaves; i++) {
+	for(i = 0; i < float_to_int(octaves); i++) {
 		value *= (pwr * snoise(p) + 1.0f);
 		pwr *= pwHL;
 		p *= lacunarity;
@ -87,7 +87,7 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba
 __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
 {
 	float value, increment, rmd;
-	float pwHL = pow(lacunarity, -H);
+	float pwHL = powf(lacunarity, -H);
 	float pwr = pwHL;
 	int i;

@ -95,7 +95,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b
 	value = offset + snoise(p);
 	p *= lacunarity;

-	for(i = 1; i < (int)octaves; i++) {
+	for(i = 1; i < float_to_int(octaves); i++) {
 		increment = (snoise(p) + offset) * pwr * value;
 		value += increment;
 		pwr *= pwHL;
@ -122,7 +122,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b
 __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
 {
 	float result, signal, weight, rmd;
-	float pwHL = pow(lacunarity, -H);
+	float pwHL = powf(lacunarity, -H);
 	float pwr = pwHL;
 	int i;

@ -130,7 +130,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB
 	weight = gain * result;
 	p *= lacunarity;

-	for(i = 1; (weight > 0.001f) && (i < (int)octaves); i++) {
+	for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
 		if(weight > 1.0f)
 			weight = 1.0f;

@ -159,7 +159,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB
 __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
 {
 	float result, signal, weight;
-	float pwHL = pow(lacunarity, -H);
+	float pwHL = powf(lacunarity, -H);
 	float pwr = pwHL;
 	int i;

@ -168,7 +168,7 @@ __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseB
 	result = signal;
 	weight = 1.0f;

-	for(i = 1; i < (int)octaves; i++) {
+	for(i = 1; i < float_to_int(octaves); i++) {
 		p *= lacunarity;
 		weight = clamp(signal * gain, 0.0f, 1.0f);
 		signal = offset - fabsf(snoise(p));
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN

 __device int quick_floor(float x)
 {
-	return (int)x - ((x < 0) ? 1 : 0);
+	return float_to_int(x) - ((x < 0) ? 1 : 0);
 }

 __device float bits_to_01(uint bits)
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@ -26,7 +26,7 @@ __device float4 rgb_ramp_lookup(KernelGlobals *kg, int offset, float f, bool int
 	f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1);

 	/* clamp int as well in case of NaN */
-	int i = clamp((int)f, 0, RAMP_TABLE_SIZE-1);
+	int i = clamp(float_to_int(f), 0, RAMP_TABLE_SIZE-1);
 	float t = f - (float)i;

 	float4 a = fetch_node_float(kg, offset+i);
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@ -51,9 +51,9 @@ __device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
 	/* returns distances in da and point coords in pa */
 	int xx, yy, zz, xi, yi, zi;

-	xi = (int)floorf(p.x);
-	yi = (int)floorf(p.y);
-	zi = (int)floorf(p.z);
+	xi = floor_to_int(p.x);
+	yi = floor_to_int(p.y);
+	zi = floor_to_int(p.z);

 	da[0] = 1e10f;
 	da[1] = 1e10f;
@ -186,7 +186,7 @@ __device float noise_wave(NodeWaveBasis wave, float a)
 	}
 	else if(wave == NODE_WAVE_SAW) {
 		float b = 2.0f*M_PI_F;
-		int n = (int)(a / b);
+		int n = float_to_int(a / b);
 		a -= n*b;
 		if(a < 0.0f) a += b;

@ -212,7 +212,7 @@ __device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float o
 	int i, n;

 	octaves = clamp(octaves, 0.0f, 16.0f);
-	n = (int)octaves;
+	n = float_to_int(octaves);

 	for(i = 0; i <= n; i++) {
 		float t = noise_basis(fscale*p, basis);
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@ -22,14 +22,14 @@ CCL_NAMESPACE_BEGIN

 __device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
 {
-	stack_store_float(stack, out_offset, __int_as_float(ivalue));
+	stack_store_float(stack, out_offset, __uint_as_float(ivalue));
 }

 __device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
 {
 	/* read extra data */
 	uint4 node1 = read_node(kg, offset);
-	float3 p = make_float3(__int_as_float(node1.y), __int_as_float(node1.z), __int_as_float(node1.w));
+	float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));

 	stack_store_float3(stack, out_offset, p);
 }
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@ -163,6 +163,25 @@ __device_inline float clamp(float a, float mn, float mx)

 #endif

+__device_inline int float_to_int(float f)
+{
+#ifdef __KERNEL_SSE2__
+	return _mm_cvtt_ss2si(_mm_load_ss(&f));
+#else
+	return (int)f;
+#endif
+}
+
+__device_inline int floor_to_int(float f)
+{
+	return float_to_int(floorf(f));
+}
+
+__device_inline int ceil_to_int(float f)
+{
+	return float_to_int(ceilf(f));
+}
+
 __device_inline float signf(float f)
 {
 	return (f < 0.0f)? -1.0f: 1.0f;
@ -990,23 +1009,23 @@ __device_inline void print_int4(const char *label, const int4& a)

 #ifndef __KERNEL_OPENCL__

-__device_inline unsigned int as_int(uint i)
+__device_inline int as_int(uint i)
 {
-	union { unsigned int ui; int i; } u;
+	union { uint ui; int i; } u;
 	u.ui = i;
 	return u.i;
 }

-__device_inline unsigned int as_uint(int i)
+__device_inline uint as_uint(int i)
 {
-	union { unsigned int ui; int i; } u;
+	union { uint ui; int i; } u;
 	u.i = i;
 	return u.ui;
 }

-__device_inline unsigned int as_uint(float f)
+__device_inline uint as_uint(float f)
 {
-	union { unsigned int i; float f; } u;
+	union { uint i; float f; } u;
 	u.f = f;
 	return u.i;
 }
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@ -70,6 +70,21 @@
 #include <tmmintrin.h> /* SSE 3 */
 #include <smmintrin.h> /* SSE 4 */

+#define __KERNEL_SSE2__
+#define __KERNEL_SSE3__
+#define __KERNEL_SSE4__
+
+#else
+
+#ifdef __x86_64__
+
+#include <xmmintrin.h> /* SSE 1 */
+#include <emmintrin.h> /* SSE 2 */
+
+#define __KERNEL_SSE2__
+
+#endif
+
 #endif

 #ifndef _WIN32