forked from bartvdbraak/blender
700722f686
Quite straightforward, main trick is happening in path_source_replace_includes(). Reviewers: brecht, dingto, lukasstockner97, juicyfruit Differential Revision: https://developer.blender.org/D1794
194 lines
7.5 KiB
C
194 lines
7.5 KiB
C
/*
|
|
* Copyright 2011-2015 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef __KERNEL_WORK_STEALING_H__
|
|
#define __KERNEL_WORK_STEALING_H__
|
|
|
|
/*
|
|
* Utility functions for work stealing
|
|
*/
|
|
|
|
#ifdef __WORK_STEALING__
|
|
|
|
#ifdef __KERNEL_OPENCL__
|
|
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
|
#endif
|
|
|
|
uint get_group_id_with_ray_index(uint ray_index,
|
|
uint tile_dim_x,
|
|
uint tile_dim_y,
|
|
uint parallel_samples,
|
|
int dim)
|
|
{
|
|
if(dim == 0) {
|
|
uint x_span = ray_index % (tile_dim_x * parallel_samples);
|
|
return x_span / get_local_size(0);
|
|
}
|
|
else /*if(dim == 1)*/ {
|
|
kernel_assert(dim == 1);
|
|
uint y_span = ray_index / (tile_dim_x * parallel_samples);
|
|
return y_span / get_local_size(1);
|
|
}
|
|
}
|
|
|
|
uint get_total_work(uint tile_dim_x,
|
|
uint tile_dim_y,
|
|
uint grp_idx,
|
|
uint grp_idy,
|
|
uint num_samples)
|
|
{
|
|
uint threads_within_tile_border_x =
|
|
(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
|
|
: get_local_size(0);
|
|
uint threads_within_tile_border_y =
|
|
(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
|
|
: get_local_size(1);
|
|
|
|
threads_within_tile_border_x =
|
|
(threads_within_tile_border_x == 0) ? get_local_size(0)
|
|
: threads_within_tile_border_x;
|
|
threads_within_tile_border_y =
|
|
(threads_within_tile_border_y == 0) ? get_local_size(1)
|
|
: threads_within_tile_border_y;
|
|
|
|
return threads_within_tile_border_x *
|
|
threads_within_tile_border_y *
|
|
num_samples;
|
|
}
|
|
|
|
/* Returns 0 in case there is no next work available */
|
|
/* Returns 1 in case work assigned is valid */
|
|
int get_next_work(ccl_global uint *work_pool,
|
|
ccl_private uint *my_work,
|
|
uint tile_dim_x,
|
|
uint tile_dim_y,
|
|
uint num_samples,
|
|
uint parallel_samples,
|
|
uint ray_index)
|
|
{
|
|
uint grp_idx = get_group_id_with_ray_index(ray_index,
|
|
tile_dim_x,
|
|
tile_dim_y,
|
|
parallel_samples,
|
|
0);
|
|
uint grp_idy = get_group_id_with_ray_index(ray_index,
|
|
tile_dim_x,
|
|
tile_dim_y,
|
|
parallel_samples,
|
|
1);
|
|
uint total_work = get_total_work(tile_dim_x,
|
|
tile_dim_y,
|
|
grp_idx,
|
|
grp_idy,
|
|
num_samples);
|
|
uint group_index = grp_idy * get_num_groups(0) + grp_idx;
|
|
*my_work = atomic_inc(&work_pool[group_index]);
|
|
return (*my_work < total_work) ? 1 : 0;
|
|
}
|
|
|
|
/* This function assumes that the passed my_work is valid. */
|
|
/* Decode sample number w.r.t. assigned my_work. */
|
|
uint get_my_sample(uint my_work,
|
|
uint tile_dim_x,
|
|
uint tile_dim_y,
|
|
uint parallel_samples,
|
|
uint ray_index)
|
|
{
|
|
uint grp_idx = get_group_id_with_ray_index(ray_index,
|
|
tile_dim_x,
|
|
tile_dim_y,
|
|
parallel_samples,
|
|
0);
|
|
uint grp_idy = get_group_id_with_ray_index(ray_index,
|
|
tile_dim_x,
|
|
tile_dim_y,
|
|
parallel_samples,
|
|
1);
|
|
uint threads_within_tile_border_x =
|
|
(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
|
|
: get_local_size(0);
|
|
uint threads_within_tile_border_y =
|
|
(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
|
|
: get_local_size(1);
|
|
|
|
threads_within_tile_border_x =
|
|
(threads_within_tile_border_x == 0) ? get_local_size(0)
|
|
: threads_within_tile_border_x;
|
|
threads_within_tile_border_y =
|
|
(threads_within_tile_border_y == 0) ? get_local_size(1)
|
|
: threads_within_tile_border_y;
|
|
|
|
return my_work /
|
|
(threads_within_tile_border_x * threads_within_tile_border_y);
|
|
}
|
|
|
|
/* Decode pixel and tile position w.r.t. assigned my_work. */
|
|
void get_pixel_tile_position(ccl_private uint *pixel_x,
|
|
ccl_private uint *pixel_y,
|
|
ccl_private uint *tile_x,
|
|
ccl_private uint *tile_y,
|
|
uint my_work,
|
|
uint tile_dim_x,
|
|
uint tile_dim_y,
|
|
uint tile_offset_x,
|
|
uint tile_offset_y,
|
|
uint parallel_samples,
|
|
uint ray_index)
|
|
{
|
|
uint grp_idx = get_group_id_with_ray_index(ray_index,
|
|
tile_dim_x,
|
|
tile_dim_y,
|
|
parallel_samples,
|
|
0);
|
|
uint grp_idy = get_group_id_with_ray_index(ray_index,
|
|
tile_dim_x,
|
|
tile_dim_y,
|
|
parallel_samples,
|
|
1);
|
|
uint threads_within_tile_border_x =
|
|
(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
|
|
: get_local_size(0);
|
|
uint threads_within_tile_border_y =
|
|
(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
|
|
: get_local_size(1);
|
|
|
|
threads_within_tile_border_x =
|
|
(threads_within_tile_border_x == 0) ? get_local_size(0)
|
|
: threads_within_tile_border_x;
|
|
threads_within_tile_border_y =
|
|
(threads_within_tile_border_y == 0) ? get_local_size(1)
|
|
: threads_within_tile_border_y;
|
|
|
|
uint total_associated_pixels =
|
|
threads_within_tile_border_x * threads_within_tile_border_y;
|
|
uint work_group_pixel_index = my_work % total_associated_pixels;
|
|
uint work_group_pixel_x =
|
|
work_group_pixel_index % threads_within_tile_border_x;
|
|
uint work_group_pixel_y =
|
|
work_group_pixel_index / threads_within_tile_border_x;
|
|
|
|
*pixel_x =
|
|
tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
|
|
*pixel_y =
|
|
tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
|
|
*tile_x = *pixel_x - tile_offset_x;
|
|
*tile_y = *pixel_y - tile_offset_y;
|
|
}
|
|
|
|
#endif /* __WORK_STEALING__ */
|
|
|
|
#endif /* __KERNEL_WORK_STEALING_H__ */
|