Code cleanup: style, require ; for cuda_assert, opencl_assert

This commit is contained in:
Campbell Barton 2014-05-04 03:49:56 +10:00
parent 95d885b3f4
commit 1618329b00
3 changed files with 137 additions and 127 deletions

@ -177,8 +177,14 @@ static void display_info(Progress& progress)
interactive = options.interactive? "On":"Off";
str = string_printf("%s Time: %.2f Latency: %.4f Sample: %d Average: %.4f Interactive: %s",
status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
str = string_printf(
"%s"
" Time: %.2f"
" Latency: %.4f"
" Sample: %d"
" Average: %.4f"
" Interactive: %s",
status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
view_display_info(str.c_str());

@ -139,7 +139,7 @@ public:
/*cuda_abort();*/ \
cuda_error_documentation(); \
} \
}
} (void)0
bool cuda_error_(CUresult result, const string& stmt)
{
@ -166,7 +166,7 @@ public:
void cuda_push_context()
{
cuda_assert(cuCtxSetCurrent(cuContext))
cuda_assert(cuCtxSetCurrent(cuContext));
}
void cuda_pop_context()
@ -174,7 +174,7 @@ public:
cuda_assert(cuCtxSetCurrent(NULL));
}
CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
: Device(info, stats, background_)
{
first_error = true;
@ -212,8 +212,8 @@ public:
if(cuda_error_(result, "cuCtxCreate"))
return;
cuda_assert(cuStreamCreate(&cuStream, 0))
cuda_assert(cuEventCreate(&tileDone, 0x1))
cuda_assert(cuStreamCreate(&cuStream, 0));
cuda_assert(cuEventCreate(&tileDone, 0x1));
int major, minor;
cuDeviceComputeCapability(&major, &minor, cuDevId);
@ -231,9 +231,9 @@ public:
{
task_pool.stop();
cuda_assert(cuEventDestroy(tileDone))
cuda_assert(cuStreamDestroy(cuStream))
cuda_assert(cuCtxDestroy(cuContext))
cuda_assert(cuEventDestroy(tileDone));
cuda_assert(cuStreamDestroy(cuStream));
cuda_assert(cuCtxDestroy(cuContext));
}
bool support_device(bool experimental, bool branched)
@ -376,7 +376,7 @@ public:
cuda_push_context();
CUdeviceptr device_pointer;
size_t size = mem.memory_size();
cuda_assert(cuMemAlloc(&device_pointer, size))
cuda_assert(cuMemAlloc(&device_pointer, size));
mem.device_pointer = (device_ptr)device_pointer;
stats.mem_alloc(size);
cuda_pop_context();
@ -386,7 +386,7 @@ public:
{
cuda_push_context();
if(mem.device_pointer)
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()))
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
cuda_pop_context();
}
@ -398,7 +398,7 @@ public:
cuda_push_context();
if(mem.device_pointer) {
cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
(CUdeviceptr)((uchar*)mem.device_pointer + offset), size))
(CUdeviceptr)((uchar*)mem.device_pointer + offset), size));
}
else {
memset((char*)mem.data_pointer + offset, 0, size);
@ -412,7 +412,7 @@ public:
cuda_push_context();
if(mem.device_pointer)
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()))
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
cuda_pop_context();
}
@ -420,7 +420,7 @@ public:
{
if(mem.device_pointer) {
cuda_push_context();
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)))
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
cuda_pop_context();
mem.device_pointer = 0;
@ -435,9 +435,9 @@ public:
size_t bytes;
cuda_push_context();
cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name))
cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
//assert(bytes == size);
cuda_assert(cuMemcpyHtoD(mem, host, size))
cuda_assert(cuMemcpyHtoD(mem, host, size));
cuda_pop_context();
}
@ -464,7 +464,7 @@ public:
CUtexref texref = NULL;
cuda_push_context();
cuda_assert(cuModuleGetTexRef(&texref, cuModule, name))
cuda_assert(cuModuleGetTexRef(&texref, cuModule, name));
if(!texref) {
cuda_pop_context();
@ -480,7 +480,7 @@ public:
desc.Format = format;
desc.NumChannels = mem.data_elements;
cuda_assert(cuArrayCreate(&handle, &desc))
cuda_assert(cuArrayCreate(&handle, &desc));
if(!handle) {
cuda_pop_context();
@ -498,23 +498,23 @@ public:
param.WidthInBytes = param.srcPitch;
param.Height = mem.data_height;
cuda_assert(cuMemcpy2D(&param))
cuda_assert(cuMemcpy2D(&param));
}
else
cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size))
cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT))
cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
if(interpolation == INTERPOLATION_CLOSEST) {
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT))
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
}
else if (interpolation == INTERPOLATION_LINEAR){
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR))
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR));
}
else {/* CUBIC and SMART are unsupported for CUDA */
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR))
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR));
}
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES))
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
mem.device_pointer = (device_ptr)handle;
@ -528,20 +528,20 @@ public:
cuda_push_context();
cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size))
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT))
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER))
cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
}
if(periodic) {
cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP))
cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP))
cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP));
cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP));
}
else {
cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP))
cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP))
cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP));
cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP));
}
cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements))
cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
cuda_pop_context();
}
@ -554,17 +554,17 @@ public:
CUdeviceptr cumem;
size_t cubytes;
cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name))
cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name));
if(cubytes == 8) {
/* 64 bit device pointer */
uint64_t ptr = mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes))
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
else {
/* 32 bit device pointer */
uint32_t ptr = (uint32_t)mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes))
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
cuda_pop_context();
@ -605,10 +605,12 @@ public:
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
if(branched && support_device(true, branched))
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"))
else
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
if(branched && support_device(true, branched)) {
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
}
if(have_error())
return;
@ -616,43 +618,43 @@ public:
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer)))
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer)));
offset += sizeof(d_buffer);
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)))
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)));
offset += sizeof(d_rng_state);
offset = align_up(offset, __alignof(sample));
cuda_assert(cuParamSeti(cuPathTrace, offset, sample))
cuda_assert(cuParamSeti(cuPathTrace, offset, sample));
offset += sizeof(sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x))
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x));
offset += sizeof(rtile.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y))
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y));
offset += sizeof(rtile.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w))
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w));
offset += sizeof(rtile.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h))
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h));
offset += sizeof(rtile.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset))
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset));
offset += sizeof(rtile.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride))
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride));
offset += sizeof(rtile.stride);
cuda_assert(cuParamSetSize(cuPathTrace, offset))
cuda_assert(cuParamSetSize(cuPathTrace, offset));
/* launch kernel */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace))
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace));
/*int num_registers;
cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace))
cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace));
printf("threads_per_block %d\n", threads_per_block);
printf("num_registers %d\n", num_registers);*/
@ -662,16 +664,16 @@ public:
int xblocks = (rtile.w + xthreads - 1)/xthreads;
int yblocks = (rtile.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1));
if(info.display_device) {
/* don't use async for device used for display, locks up UI too much */
cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
cuda_assert(cuCtxSynchronize())
cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks));
cuda_assert(cuCtxSynchronize());
}
else {
cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream))
cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream));
}
cuda_pop_context();
@ -689,58 +691,60 @@ public:
CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */
if(rgba_half)
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"))
else
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"))
if(rgba_half) {
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
}
else {
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
}
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba)))
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba)));
offset += sizeof(d_rgba);
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)))
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)));
offset += sizeof(d_buffer);
float sample_scale = 1.0f/(task.sample + 1);
offset = align_up(offset, __alignof(sample_scale));
cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale))
cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale));
offset += sizeof(sample_scale);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x))
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x));
offset += sizeof(task.x);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y))
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y));
offset += sizeof(task.y);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w))
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w));
offset += sizeof(task.w);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h))
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h));
offset += sizeof(task.h);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset))
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset));
offset += sizeof(task.offset);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride))
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride));
offset += sizeof(task.stride);
cuda_assert(cuParamSetSize(cuFilmConvert, offset))
cuda_assert(cuParamSetSize(cuFilmConvert, offset));
/* launch kernel */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert))
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
int xthreads = (int)sqrt((float)threads_per_block);
int ythreads = (int)sqrt((float)threads_per_block);
int xblocks = (task.w + xthreads - 1)/xthreads;
int yblocks = (task.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1))
cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks))
cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1));
cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks));
unmap_pixels((rgba_byte)? rgba_byte: rgba_half);
@ -759,7 +763,7 @@ public:
CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"))
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
/* do tasks in smaller chunks, so we can cancel it */
const int shader_chunk_size = 65536;
@ -773,35 +777,35 @@ public:
/* pass in parameters */
int offset = 0;
cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)))
cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
offset += sizeof(d_input);
cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)))
cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
offset += sizeof(d_output);
int shader_eval_type = task.shader_eval_type;
offset = align_up(offset, __alignof(shader_eval_type));
cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type))
cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
offset += sizeof(task.shader_eval_type);
cuda_assert(cuParamSeti(cuShader, offset, shader_x))
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
offset += sizeof(shader_x);
cuda_assert(cuParamSetSize(cuShader, offset))
cuda_assert(cuParamSetSize(cuShader, offset));
/* launch kernel */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader))
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
int shader_w = min(shader_chunk_size, end - shader_x);
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1))
cuda_assert(cuLaunchGrid(cuShader, xblocks, 1))
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
cuda_assert(cuCtxSynchronize())
cuda_assert(cuCtxSynchronize());
}
cuda_pop_context();
@ -814,8 +818,8 @@ public:
CUdeviceptr buffer;
size_t bytes;
cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0))
cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource))
cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
return buffer;
}
@ -828,7 +832,7 @@ public:
if(!background) {
PixelMem pmem = pixel_mem_map[mem];
cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0))
cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
}
}
@ -917,7 +921,7 @@ public:
cuda_push_context();
cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource))
cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
glDeleteBuffers(1, &pmem.cuPBO);
glDeleteTextures(1, &pmem.cuTexId);
@ -1038,8 +1042,8 @@ public:
if(!info.display_device && sample == sync_sample) {
cuda_push_context();
cuda_assert(cuEventRecord(tileDone, cuStream))
cuda_assert(cuEventSynchronize(tileDone))
cuda_assert(cuEventRecord(tileDone, cuStream));
cuda_assert(cuEventSynchronize(tileDone));
/* Do some time keeping to find out if we need to sync less */
boost::posix_time::ptime current_time(boost::posix_time::microsec_clock::local_time());
@ -1065,7 +1069,7 @@ public:
shader(*task);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_assert(cuCtxSynchronize());
cuda_pop_context();
}
}
@ -1086,7 +1090,7 @@ public:
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_assert(cuCtxSynchronize());
cuda_pop_context();
}
else {

@ -416,7 +416,7 @@ public:
error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
} \
}
} (void)0
void opencl_assert_err(cl_int err, const char* where)
{
@ -846,7 +846,7 @@ public:
{
/* this is blocking */
size_t size = mem.memory_size();
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL))
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL));
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
@ -854,7 +854,7 @@ public:
size_t offset = elem*y*w;
size_t size = elem*w*h;
opencl_assert(clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL))
opencl_assert(clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL));
}
void mem_zero(device_memory& mem)
@ -868,7 +868,7 @@ public:
void mem_free(device_memory& mem)
{
if(mem.device_pointer) {
opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)))
opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
@ -944,8 +944,8 @@ public:
size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)};
/* run kernel */
opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL))
opencl_assert(clFlush(cqCommandQueue))
opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
opencl_assert(clFlush(cqCommandQueue));
}
void path_trace(RenderTile& rtile, int sample)
@ -965,21 +965,21 @@ public:
/* sample arguments */
cl_uint narg = 0;
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state));
#define KERNEL_TEX(type, ttype, name) \
set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
#include "kernel_textures.h"
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride))
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset));
opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride));
enqueue_kernel(ckPathTraceKernel, d_w, d_h);
}
@ -1020,21 +1020,21 @@ public:
cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel;
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer));
#define KERNEL_TEX(type, ttype, name) \
set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
#include "kernel_textures.h"
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride))
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset));
opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride));
@ -1054,17 +1054,17 @@ public:
/* sample arguments */
cl_uint narg = 0;
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data))
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input))
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output))
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data));
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input));
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output));
#define KERNEL_TEX(type, ttype, name) \
set_kernel_arg_mem(ckShaderKernel, &narg, #name);
#include "kernel_textures.h"
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type))
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x))
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w))
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
enqueue_kernel(ckShaderKernel, task.shader_w, 1);
}