Merge branch 'blender-v3.1-release'
This commit is contained in:
commit
a9a05d5597
@ -283,7 +283,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
|
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
@ -298,7 +301,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
|
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
@ -310,7 +316,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0);
|
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0);
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
@ -323,7 +332,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0);
|
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0);
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices + indices_offset,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
@ -336,7 +348,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
|
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices + indices_offset,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
@ -379,7 +394,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
|
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
@ -412,7 +430,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|||||||
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
|
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
|
||||||
|
|
||||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||||
num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
num_states,
|
||||||
|
indices,
|
||||||
|
num_indices,
|
||||||
|
ccl_gpu_kernel_lambda_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
||||||
|
@ -22,19 +22,20 @@ CCL_NAMESPACE_BEGIN
|
|||||||
template<uint blocksize, typename IsActiveOp>
|
template<uint blocksize, typename IsActiveOp>
|
||||||
__device__
|
__device__
|
||||||
#endif
|
#endif
|
||||||
void gpu_parallel_active_index_array_impl(const uint num_states,
|
void
|
||||||
ccl_global int *indices,
|
gpu_parallel_active_index_array_impl(const uint num_states,
|
||||||
ccl_global int *num_indices,
|
ccl_global int *indices,
|
||||||
|
ccl_global int *num_indices,
|
||||||
#ifdef __KERNEL_METAL__
|
#ifdef __KERNEL_METAL__
|
||||||
const uint is_active,
|
const uint is_active,
|
||||||
const uint blocksize,
|
const uint blocksize,
|
||||||
const int thread_index,
|
const int thread_index,
|
||||||
const uint state_index,
|
const uint state_index,
|
||||||
const int ccl_gpu_warp_size,
|
const int ccl_gpu_warp_size,
|
||||||
const int thread_warp,
|
const int thread_warp,
|
||||||
const int warp_index,
|
const int warp_index,
|
||||||
const int num_warps,
|
const int num_warps,
|
||||||
threadgroup int *warp_offset)
|
threadgroup int *warp_offset)
|
||||||
{
|
{
|
||||||
#else
|
#else
|
||||||
IsActiveOp is_active_op)
|
IsActiveOp is_active_op)
|
||||||
@ -65,7 +66,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
|
|||||||
ccl_gpu_syncthreads();
|
ccl_gpu_syncthreads();
|
||||||
|
|
||||||
/* Last thread in block converts per-warp sizes to offsets, increments global size of
|
/* Last thread in block converts per-warp sizes to offsets, increments global size of
|
||||||
* index array and gets offset to write to. */
|
* index array and gets offset to write to. */
|
||||||
if (thread_index == blocksize - 1) {
|
if (thread_index == blocksize - 1) {
|
||||||
/* TODO: parallelize this. */
|
/* TODO: parallelize this. */
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
@ -91,15 +92,27 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
|
|||||||
#ifdef __KERNEL_METAL__
|
#ifdef __KERNEL_METAL__
|
||||||
|
|
||||||
# define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \
|
# define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \
|
||||||
const uint is_active = (ccl_gpu_global_id_x() < num_states) ? is_active_op(ccl_gpu_global_id_x()) : 0; \
|
const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \
|
||||||
gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active, \
|
is_active_op(ccl_gpu_global_id_x()) : \
|
||||||
metal_local_size, metal_local_id, metal_global_id, simdgroup_size, simd_lane_index, \
|
0; \
|
||||||
simd_group_index, num_simd_groups, simdgroup_offset)
|
gpu_parallel_active_index_array_impl(num_states, \
|
||||||
|
indices, \
|
||||||
|
num_indices, \
|
||||||
|
is_active, \
|
||||||
|
metal_local_size, \
|
||||||
|
metal_local_id, \
|
||||||
|
metal_global_id, \
|
||||||
|
simdgroup_size, \
|
||||||
|
simd_lane_index, \
|
||||||
|
simd_group_index, \
|
||||||
|
num_simd_groups, \
|
||||||
|
simdgroup_offset)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
# define gpu_parallel_active_index_array(blocksize, num_states, indices, num_indices, is_active_op) \
|
# define gpu_parallel_active_index_array( \
|
||||||
gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)
|
blocksize, num_states, indices, num_indices, is_active_op) \
|
||||||
|
gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user