Cycles: Code cleanup, split kernel

2015-05-27 00:13:32 +05:00 · 2015-05-27 00:13:32 +05:00 · 92022218c2
commit 92022218c2
parent da192fb3a7
9 changed files with 161 additions and 78 deletions
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@ -130,18 +130,24 @@ ccl_device char kernel_background_buffer_update(
 #ifdef __WORK_STEALING__
 	my_work = work_array[ray_index];
 	sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
-	get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
+	get_pixel_tile_position(&pixel_x, &pixel_y,
 	                        &tile_x, &tile_y,
 	                        my_work,
 	                        sw, sh, sx, sy,
 	                        parallel_samples,
 	                        ray_index);
 	my_sample_tile = 0;
 	initial_per_sample_output_buffers = per_sample_output_buffers;
 	initial_rng = rng_state;
-#else // __WORK_STEALING__
+#else  /* __WORK_STEALING__ */
 	sample = work_array[ray_index];
 	int tile_index = ray_index / parallel_samples;
 	/* buffer and rng_state's stride is "stride". Find x and y using ray_index */
 	tile_x = tile_index % sw;
 	tile_y = tile_index / sw;
 	my_sample_tile = ray_index - (tile_index * parallel_samples);
-#endif
+#endif  /* __WORK_STEALING__ */
 	rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
 	per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
@ -189,11 +195,12 @@ ccl_device char kernel_background_buffer_update(
 			/* If work is invalid, this means no more work is available and the thread may exit */
 			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
 		}
-#else
+#else  /* __WORK_STEALING__ */
 		if((sample + parallel_samples) >= end_sample) {
 			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
 		}
-#endif
+#endif  /* __WORK_STEALING__ */
 		if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
 #ifdef __WORK_STEALING__
 			work_array[ray_index] = my_work;
@ -208,20 +215,22 @@ ccl_device char kernel_background_buffer_update(
 			/* Remap per_sample_output_buffers according to the current work */
 			per_sample_output_buffers = initial_per_sample_output_buffers
 				+ (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
-#else
+#else  /* __WORK_STEALING__ */
 			work_array[ray_index] = sample + parallel_samples;
 			sample = work_array[ray_index];
 			/* Get ray position from ray index */
 			pixel_x = sx + ((ray_index / parallel_samples) % sw);
 			pixel_y = sy + ((ray_index / parallel_samples) / sw);
-#endif
+#endif  /* __WORK_STEALING__ */
-			/* initialize random numbers and ray */
+			/* Initialize random numbers and ray. */
 			kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
 			if(ray->t != 0.0f) {
-				/* Initialize throughput, L_transparent, Ray, PathState; These rays proceed with path-iteration*/
+				/* Initialize throughput, L_transparent, Ray, PathState;
 				 * These rays proceed with path-iteration.
 				 */
 				*throughput = make_float3(1.0f, 1.0f, 1.0f);
 				*L_transparent = 0.0f;
 				path_radiance_init(L, kernel_data.film.use_light_pass);
@ -232,9 +241,9 @@ ccl_device char kernel_background_buffer_update(
 				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
 				enqueue_flag = 1;
 			} else {
-				/*These rays do not participate in path-iteration */
+				/* These rays do not participate in path-iteration. */
 				float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				/* accumulate result in output buffer */
+				/* Accumulate result in output buffer. */
 				kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
 				path_rng_end(kg, rng_state, *rng);
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@ -287,9 +287,9 @@ ccl_device void kernel_data_init(
 		work_pool_wgs[group_index] = 0;
 	}
 	barrier(CLK_LOCAL_MEM_FENCE);
-#endif // __WORK_STEALING__
+#endif  /* __WORK_STEALING__ */
-	/* Initialize queue data and queue index */
+	/* Initialize queue data and queue index. */
 	if(thread_index < queuesize) {
 		/* Initialize active ray queue */
 		Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
@ -319,7 +319,9 @@ ccl_device void kernel_data_init(
 		int ray_index = x + y * (sw * parallel_samples);
-		/* This is the first assignment to ray_state; So we dont use ASSIGN_RAY_STATE macro */
+		/* This is the first assignment to ray_state;
 		 * So we dont use ASSIGN_RAY_STATE macro.
 		 */
 		ray_state[ray_index] = RAY_ACTIVE;
 		unsigned int my_sample;
@ -331,58 +333,76 @@ ccl_device void kernel_data_init(
 #ifdef __WORK_STEALING__
 		unsigned int my_work = 0;
-		/* get work */
+		/* Get work. */
 		get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
-		/* Get the sample associated with the work */
+		/* Get the sample associated with the work. */
 		my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
 		my_sample_tile = 0;
-		/* Get pixel and tile position associated with the work */
+		/* Get pixel and tile position associated with the work. */
-		get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
+		get_pixel_tile_position(&pixel_x, &pixel_y,
 		                        &tile_x, &tile_y,
 		                        my_work,
 		                        sw, sh, sx, sy,
 		                        parallel_samples,
 		                        ray_index);
 		work_array[ray_index] = my_work;
-#else // __WORK_STEALING__
+#else  /* __WORK_STEALING__ */
 		unsigned int tile_index = ray_index / parallel_samples;
 		tile_x = tile_index % sw;
 		tile_y = tile_index / sw;
 		my_sample_tile = ray_index - (tile_index * parallel_samples);
 		my_sample = my_sample_tile + start_sample;
-		/* Initialize work array */
+		/* Initialize work array. */
 		work_array[ray_index] = my_sample ;
-		/* Calculate pixel position of this ray */
+		/* Calculate pixel position of this ray. */
 		pixel_x = sx + tile_x;
 		pixel_y = sy + tile_y;
-#endif // __WORK_STEALING__
+#endif  /* __WORK_STEALING__ */
 		rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
-		/* Initialise per_sample_output_buffers to all zeros */
+		/* Initialise per_sample_output_buffers to all zeros. */
 		per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
 		int per_sample_output_buffers_iterator = 0;
-		for(per_sample_output_buffers_iterator = 0; per_sample_output_buffers_iterator < kernel_data.film.pass_stride; per_sample_output_buffers_iterator++) {
+		for(per_sample_output_buffers_iterator = 0;
 		    per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
 		    per_sample_output_buffers_iterator++)
 		{
 			per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
 		}
-		/* initialize random numbers and ray */
+		/* Initialize random numbers and ray. */
-		kernel_path_trace_setup(kg, rng_state, my_sample, pixel_x, pixel_y, &rng_coop[ray_index], &Ray_coop[ray_index]);
+		kernel_path_trace_setup(kg,
 		                        rng_state,
 		                        my_sample,
 		                        pixel_x, pixel_y,
 		                        &rng_coop[ray_index],
 		                        &Ray_coop[ray_index]);
 		if(Ray_coop[ray_index].t != 0.0f) {
-			/* Initialize throuput, L_transparent, Ray, PathState; These rays proceed with path-iteration*/
+			/* Initialize throuput, L_transparent, Ray, PathState;
 			 * These rays proceed with path-iteration.
 			 */
 			throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
 			L_transparent_coop[ray_index] = 0.0f;
 			path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
-			path_state_init(kg, &PathState_coop[ray_index], &rng_coop[ray_index], my_sample, &Ray_coop[ray_index]);
+			path_state_init(kg,
 			                &PathState_coop[ray_index],
 			                &rng_coop[ray_index],
 			                my_sample,
 			                &Ray_coop[ray_index]);
 #ifdef __KERNEL_DEBUG__
 			debug_data_init(&debugdata_coop[ray_index]);
 #endif
 		} else {
-			/*These rays do not participate in path-iteration */
+			/* These rays do not participate in path-iteration. */
 			float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			/* accumulate result in output buffer */
+			/* Accumulate result in output buffer. */
 			kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
 			path_rng_end(kg, rng_state, rng_coop[ray_index]);
@ -390,7 +410,7 @@ ccl_device void kernel_data_init(
 		}
 	}
-	/* Mark rest of the ray-state indices as RAY_INACTIVE */
+	/* Mark rest of the ray-state indices as RAY_INACTIVE. */
 	if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) {
 		/* First assignment, hence we dont use ASSIGN_RAY_STATE macro */
 		ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE;
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@ -63,7 +63,7 @@ ccl_device char kernel_direct_lighting(
 {
 	char enqueue_flag = 0;
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		/* Load kernel globals structure and ShaderData structure */
+		/* Load kernel globals structure and ShaderData structure. */
 		KernelGlobals *kg = (KernelGlobals *)globals;
 		ShaderData *sd = (ShaderData *)shader_data;
 		ShaderData *sd_DL  = (ShaderData *)shader_DL;
@ -72,15 +72,22 @@ ccl_device char kernel_direct_lighting(
 		/* direct lighting */
 #ifdef __EMISSION__
-		if((kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) {
+		if((kernel_data.integrator.use_direct_light &&
-			/* sample illumination from lights to find path contribution */
+		    (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
 		{
 			/* Sample illumination from lights to find path contribution. */
 			ccl_global RNG* rng = &rng_coop[ray_index];
 			float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
 			float light_u, light_v;
 			path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
 			LightSample ls;
-			light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
+			light_sample(kg,
 			             light_t, light_u, light_v,
 			             ccl_fetch(sd, time),
 			             ccl_fetch(sd, P),
 			             state->bounce,
 			             &ls);
 			Ray light_ray;
 #ifdef __OBJECT_MOTION__
@ -89,17 +96,21 @@ ccl_device char kernel_direct_lighting(
 			BsdfEval L_light;
 			bool is_lamp;
-			if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce, sd_DL)) {
+			if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp,
-				/* write intermediate data to global memory to access from the next kernel */
+			                   state->bounce, state->transparent_bounce, sd_DL))
 			{
 				/* Write intermediate data to global memory to access from
 				 * the next kernel.
 				 */
 				LightRay_coop[ray_index] = light_ray;
 				BSDFEval_coop[ray_index] = L_light;
 				ISLamp_coop[ray_index] = is_lamp;
-				/// mark ray state for next shadow kernel
+				/* Mark ray state for next shadow kernel. */
 				ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
 				enqueue_flag = 1;
 			}
 		}
-#endif
+#endif  /* __EMISSION__ */
 	}
 	return enqueue_flag;
 }
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@ -121,21 +121,30 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 #ifdef __WORK_STEALING__
 		my_work = work_array[ray_index];
 		sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
-		get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
+		get_pixel_tile_position(&pixel_x, &pixel_y,
 		                        &tile_x, &tile_y,
 		                        my_work,
 		                        sw, sh, sx, sy,
 		                        parallel_samples,
 		                        ray_index);
 		my_sample_tile = 0;
-#else // __WORK_STEALING__
+#else  /* __WORK_STEALING__ */
 		sample = work_array[ray_index];
-		/* buffer's stride is "stride"; Find x and y using ray_index */
+		/* Buffer's stride is "stride"; Find x and y using ray_index. */
 		int tile_index = ray_index / parallel_samples;
 		tile_x = tile_index % sw;
 		tile_y = tile_index / sw;
 		my_sample_tile = ray_index - (tile_index * parallel_samples);
-#endif // __WORK_STEALING__
+#endif  /* __WORK_STEALING__ */
-		per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
+		per_sample_output_buffers +=
 		    (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) *
 		    kernel_data.film.pass_stride;
 		/* holdout */
 #ifdef __HOLDOUT__
-		if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) {
+		if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) &&
 		   (state->flag & PATH_RAY_CAMERA))
 		{
 			if(kernel_data.background.transparent) {
 				float3 holdout_weight;
@ -153,20 +162,24 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 				*enqueue_flag = 1;
 			}
 		}
-#endif
+#endif  /* __HOLDOUT__ */
 	}
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		PathRadiance *L = &PathRadiance_coop[ray_index];
-		/* holdout mask objects do not write data passes */
+		/* Holdout mask objects do not write data passes. */
-		kernel_write_data_passes(kg, per_sample_output_buffers, L, sd, sample, state, throughput);
+		kernel_write_data_passes(kg,
-
+		                         per_sample_output_buffers,
-		/* blurring of bsdf after bounces, for rays that have a small likelihood
+		                         L,
-		 * of following this particular path (diffuse, rough glossy) */
+		                         sd,
 		                         sample,
 		                         state,
 		                         throughput);
 		/* Blurring of bsdf after bounces, for rays that have a small likelihood
 		 * of following this particular path (diffuse, rough glossy.
 		 */
 		if(kernel_data.integrator.filter_glossy != FLT_MAX) {
 			float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
 			if(blur_pdf < 1.0f) {
 				float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
 				shader_bsdf_blur(kg, sd, blur_roughness);
@ -176,15 +189,21 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 #ifdef __EMISSION__
 		/* emission */
 		if(ccl_fetch(sd, flag) & SD_EMISSION) {
-			/* todo: is isect.t wrong here for transparent surfaces? */
+			/* TODO(sergey): is isect.t wrong here for transparent surfaces? */
-			float3 emission = indirect_primitive_emission(kg, sd, Intersection_coop[ray_index].t, state->flag, state->ray_pdf);
+			float3 emission = indirect_primitive_emission(
 			        kg,
 			        sd,
 			        Intersection_coop[ray_index].t,
 			        state->flag,
 			        state->ray_pdf);
 			path_radiance_accum_emission(L, throughput, emission, state->bounce);
 		}
-#endif
+#endif  /* __EMISSION__ */
-		/* path termination. this is a strange place to put the termination, it's
+		/* Path termination. this is a strange place to put the termination, it's
 		 * mainly due to the mixed in MIS that we use. gives too many unneeded
-		 * shader evaluations, only need emission if we are going to terminate */
+		 * shader evaluations, only need emission if we are going to terminate.
 		 */
 		float probability = path_state_terminate_probability(kg, state, throughput);
 		if(probability == 0.0f) {
@ -195,7 +214,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 		if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 			if(probability != 1.0f) {
 				float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
 				if(terminate >= probability) {
 					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
 					*enqueue_flag = 1;
@ -209,7 +227,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 #ifdef __AO__
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion || (ccl_fetch(sd, flag) & SD_AO)) {
+		if(kernel_data.integrator.use_ambient_occlusion ||
 		   (ccl_fetch(sd, flag) & SD_AO))
 		{
 			/* todo: solve correlation */
 			float bsdf_u, bsdf_v;
 			path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
@ -240,5 +260,5 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 			}
 		}
 	}
-#endif
+#endif  /* __AO__ */
 }
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@ -56,7 +56,9 @@ ccl_device void kernel_lamp_emission(
        int parallel_samples,                  /* Number of samples to be processed in parallel */
        int ray_index)
 {
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
 	   IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
 	{
 		KernelGlobals *kg = (KernelGlobals *)globals;
 		ShaderData *sd = (ShaderData *)shader_data;
 		PathRadiance *L = &PathRadiance_coop[ray_index];
@ -84,7 +86,8 @@ ccl_device void kernel_lamp_emission(
 				path_radiance_accum_emission(L, throughput, emission, state.bounce);
 			}
 		}
-#endif
+#endif  /* __LAMP_MIS__ */
 		/* __VOLUME__ feature is disabled */
 #if 0
 #ifdef __VOLUME__
@ -149,7 +152,7 @@ ccl_device void kernel_lamp_emission(
 				}
 			}
 			else
-#endif
+#endif  /* __VOLUME_DECOUPLED__ */
 			{
 				/* integrate along volume segment with distance sampling */
 				ShaderData volume_sd;
@ -167,10 +170,10 @@ ccl_device void kernel_lamp_emission(
 					else
 						break;
 				}
-#endif
+#endif  /* __VOLUME_SCATTER__ */
-			}
+			}
-		}
+		}
-#endif
+#endif  /* __VOLUME__ */
 #endif
 	}
 }
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@ -81,14 +81,16 @@ ccl_device char kernel_next_iteration_setup(
 {
 	char enqueue_flag = 0;
-	/* Load kernel globals structure and ShaderData structure */
+	/* Load kernel globals structure and ShaderData structure. */
 	KernelGlobals *kg = (KernelGlobals *)globals;
 	ShaderData *sd = (ShaderData *)shader_data;
 	PathRadiance *L = 0x0;
 	ccl_global PathState *state = 0x0;
-	/* Path radiance update for AO/Direct_lighting's shadow blocked */
+	/* Path radiance update for AO/Direct_lighting's shadow blocked. */
-	if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
+	if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
 	   IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
 	 {
 		state = &PathState_coop[ray_index];
 		L = &PathRadiance_coop[ray_index];
 		float3 _throughput = throughput_coop[ray_index];
@ -97,7 +99,12 @@ ccl_device char kernel_next_iteration_setup(
 			float3 shadow = LightRay_ao_coop[ray_index].P;
 			char update_path_radiance = LightRay_ao_coop[ray_index].t;
 			if(update_path_radiance) {
-				path_radiance_accum_ao(L, _throughput, AOAlpha_coop[ray_index], AOBSDF_coop[ray_index], shadow, state->bounce);
+				path_radiance_accum_ao(L,
 				                       _throughput,
 				                       AOAlpha_coop[ray_index],
 				                       AOBSDF_coop[ray_index],
 				                       shadow,
 				                       state->bounce);
 			}
 			REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
 		}
@ -107,7 +114,13 @@ ccl_device char kernel_next_iteration_setup(
 			char update_path_radiance = LightRay_dl_coop[ray_index].t;
 			if(update_path_radiance) {
 				BsdfEval L_light = BSDFEval_coop[ray_index];
-				path_radiance_accum_light(L, _throughput, &L_light, shadow, 1.0f, state->bounce, ISLamp_coop[ray_index]);
+				path_radiance_accum_light(L,
 				                          _throughput,
 				                          &L_light,
 				                          shadow,
 				                          1.0f,
 				                          state->bounce,
 				                          ISLamp_coop[ray_index]);
 			}
 			REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
 		}
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@ -63,7 +63,12 @@ ccl_device void kernel_shader_eval(
 		ccl_global PathState *state = &PathState_coop[ray_index];
 		Ray ray = Ray_coop[ray_index];
-		shader_setup_from_ray(kg, sd, isect, &ray, state->bounce, state->transparent_bounce);
+		shader_setup_from_ray(kg,
 		                      sd,
 		                      isect,
 		                      &ray,
 		                      state->bounce,
 		                      state->transparent_bounce);
 		float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
 		shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
 	}
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@ -14,8 +14,8 @@
 * limitations under the License.
 */
-#ifndef  _KERNEL_SPLIT_H_
+#ifndef  __KERNEL_SPLIT_H__
-#define  _KERNEL_SPLIT_H_
+#define  __KERNEL_SPLIT_H__
 #include "kernel_compat_opencl.h"
 #include "kernel_math.h"
@ -59,4 +59,4 @@
 #include "kernel_queues.h"
 #include "kernel_work_stealing.h"
-#endif
+#endif  /* __KERNEL_SPLIT_H__ */
--- a/intern/cycles/kernel/split/kernel_sum_all_radiance.h
+++ b/intern/cycles/kernel/split/kernel_sum_all_radiance.h
@ -48,7 +48,9 @@ ccl_device void kernel_sum_all_radiance(
 		for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) {
 			for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
-				*(buffer + pass_stride_iterator) = (start_sample == 0 && sample_iterator == 0) ? *(per_sample_output_buffer + pass_stride_iterator)
+				*(buffer + pass_stride_iterator) =
 				        (start_sample == 0 && sample_iterator == 0)
 				                ? *(per_sample_output_buffer + pass_stride_iterator)
 				                : *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
 			}
 			per_sample_output_buffer += sample_stride;