forked from bartvdbraak/blender
Fix T40228: cycles CUDA multi GPU + world MIS giving error.
This commit is contained in:
parent
2305e3289b
commit
865dfa8a7e
@ -762,6 +762,8 @@ public:
|
|||||||
if(task.get_cancel())
|
if(task.get_cancel())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
int shader_w = min(shader_chunk_size, end - shader_x);
|
||||||
|
|
||||||
/* pass in parameters */
|
/* pass in parameters */
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
|
|
||||||
@ -780,13 +782,15 @@ public:
|
|||||||
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
|
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
|
||||||
offset += sizeof(shader_x);
|
offset += sizeof(shader_x);
|
||||||
|
|
||||||
|
cuda_assert(cuParamSeti(cuShader, offset, shader_w));
|
||||||
|
offset += sizeof(shader_w);
|
||||||
|
|
||||||
cuda_assert(cuParamSetSize(cuShader, offset));
|
cuda_assert(cuParamSetSize(cuShader, offset));
|
||||||
|
|
||||||
/* launch kernel */
|
/* launch kernel */
|
||||||
int threads_per_block;
|
int threads_per_block;
|
||||||
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
|
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
|
||||||
|
|
||||||
int shader_w = min(shader_chunk_size, end - shader_x);
|
|
||||||
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
|
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
|
||||||
|
|
||||||
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
|
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
|
||||||
|
@ -146,20 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
|
|||||||
|
|
||||||
extern "C" __global__ void
|
extern "C" __global__ void
|
||||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||||
kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
|
kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw)
|
||||||
{
|
{
|
||||||
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
|
if(x < sx + sw)
|
||||||
|
kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" __global__ void
|
extern "C" __global__ void
|
||||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||||
kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx)
|
kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw)
|
||||||
{
|
{
|
||||||
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
|
if(x < sx + sw)
|
||||||
|
kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user