diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 074d6404f07..9871273381e 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -177,8 +177,14 @@ static void display_info(Progress& progress) interactive = options.interactive? "On":"Off"; - str = string_printf("%s Time: %.2f Latency: %.4f Sample: %d Average: %.4f Interactive: %s", - status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); + str = string_printf( + "%s" + " Time: %.2f" + " Latency: %.4f" + " Sample: %d" + " Average: %.4f" + " Interactive: %s", + status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); view_display_info(str.c_str()); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0257153df4f..29b348d86ac 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -139,7 +139,7 @@ public: /*cuda_abort();*/ \ cuda_error_documentation(); \ } \ - } + } (void)0 bool cuda_error_(CUresult result, const string& stmt) { @@ -166,7 +166,7 @@ public: void cuda_push_context() { - cuda_assert(cuCtxSetCurrent(cuContext)) + cuda_assert(cuCtxSetCurrent(cuContext)); } void cuda_pop_context() @@ -174,7 +174,7 @@ public: cuda_assert(cuCtxSetCurrent(NULL)); } - CUDADevice(DeviceInfo& info, Stats &stats, bool background_) + CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_) { first_error = true; @@ -212,8 +212,8 @@ public: if(cuda_error_(result, "cuCtxCreate")) return; - cuda_assert(cuStreamCreate(&cuStream, 0)) - cuda_assert(cuEventCreate(&tileDone, 0x1)) + cuda_assert(cuStreamCreate(&cuStream, 0)); + cuda_assert(cuEventCreate(&tileDone, 0x1)); int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); @@ -231,9 +231,9 @@ public: { task_pool.stop(); - cuda_assert(cuEventDestroy(tileDone)) - cuda_assert(cuStreamDestroy(cuStream)) - cuda_assert(cuCtxDestroy(cuContext)) + cuda_assert(cuEventDestroy(tileDone)); + cuda_assert(cuStreamDestroy(cuStream)); + cuda_assert(cuCtxDestroy(cuContext)); } bool support_device(bool experimental, bool branched) @@ -376,7 +376,7 @@ public: cuda_push_context(); CUdeviceptr device_pointer; size_t size = mem.memory_size(); - cuda_assert(cuMemAlloc(&device_pointer, size)) + cuda_assert(cuMemAlloc(&device_pointer, size)); mem.device_pointer = (device_ptr)device_pointer; stats.mem_alloc(size); cuda_pop_context(); @@ -386,7 +386,7 @@ public: { cuda_push_context(); if(mem.device_pointer) - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())) + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); cuda_pop_context(); } @@ -398,7 +398,7 @@ public: cuda_push_context(); if(mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, - (CUdeviceptr)((uchar*)mem.device_pointer + offset), size)) + (CUdeviceptr)((uchar*)mem.device_pointer + offset), size)); } else { memset((char*)mem.data_pointer + offset, 0, size); @@ -412,7 +412,7 @@ public: cuda_push_context(); if(mem.device_pointer) - cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())) + cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); cuda_pop_context(); } @@ -420,7 +420,7 @@ public: { if(mem.device_pointer) { cuda_push_context(); - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))) + cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); cuda_pop_context(); mem.device_pointer = 0; @@ -435,9 +435,9 @@ public: size_t bytes; cuda_push_context(); - cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)) + cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); //assert(bytes == size); - cuda_assert(cuMemcpyHtoD(mem, host, size)) + cuda_assert(cuMemcpyHtoD(mem, host, size)); cuda_pop_context(); } @@ -464,7 +464,7 @@ public: CUtexref texref = NULL; cuda_push_context(); - cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)) + cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)); if(!texref) { cuda_pop_context(); @@ -480,7 +480,7 @@ public: desc.Format = format; desc.NumChannels = mem.data_elements; - cuda_assert(cuArrayCreate(&handle, &desc)) + cuda_assert(cuArrayCreate(&handle, &desc)); if(!handle) { cuda_pop_context(); @@ -498,23 +498,23 @@ public: param.WidthInBytes = param.srcPitch; param.Height = mem.data_height; - cuda_assert(cuMemcpy2D(¶m)) + cuda_assert(cuMemcpy2D(¶m)); } else - cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size)) + cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size)); - cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)) + cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)); if(interpolation == INTERPOLATION_CLOSEST) { - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)) + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); } else if (interpolation == INTERPOLATION_LINEAR){ - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)) + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)); } else {/* CUBIC and SMART are unsupported for CUDA */ - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)) + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)); } - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)) + cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)); mem.device_pointer = (device_ptr)handle; @@ -528,20 +528,20 @@ public: cuda_push_context(); - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)) - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)) - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)) + cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)); + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); + cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)); } if(periodic) { - cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)) - cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)) + cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)); + cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)); } else { - cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP)) - cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP)) + cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP)); + cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP)); } - cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)) + cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); cuda_pop_context(); } @@ -554,17 +554,17 @@ public: CUdeviceptr cumem; size_t cubytes; - cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)) + cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)); if(cubytes == 8) { /* 64 bit device pointer */ uint64_t ptr = mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)) + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } else { /* 32 bit device pointer */ uint32_t ptr = (uint32_t)mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)) + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } cuda_pop_context(); @@ -605,10 +605,12 @@ public: CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state); /* get kernel function */ - if(branched && support_device(true, branched)) - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")) - else - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")) + if(branched && support_device(true, branched)) { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); + } + else { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); + } if(have_error()) return; @@ -616,43 +618,43 @@ public: /* pass in parameters */ int offset = 0; - cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer))) + cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer))); offset += sizeof(d_buffer); - cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))) + cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))); offset += sizeof(d_rng_state); offset = align_up(offset, __alignof(sample)); - cuda_assert(cuParamSeti(cuPathTrace, offset, sample)) + cuda_assert(cuParamSeti(cuPathTrace, offset, sample)); offset += sizeof(sample); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)); offset += sizeof(rtile.x); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)); offset += sizeof(rtile.y); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)); offset += sizeof(rtile.w); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)); offset += sizeof(rtile.h); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)); offset += sizeof(rtile.offset); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)); offset += sizeof(rtile.stride); - cuda_assert(cuParamSetSize(cuPathTrace, offset)) + cuda_assert(cuParamSetSize(cuPathTrace, offset)); /* launch kernel */ int threads_per_block; - cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace)) + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace)); /*int num_registers; - cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace)) + cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace)); printf("threads_per_block %d\n", threads_per_block); printf("num_registers %d\n", num_registers);*/ @@ -662,16 +664,16 @@ public: int xblocks = (rtile.w + xthreads - 1)/xthreads; int yblocks = (rtile.h + ythreads - 1)/ythreads; - cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)) - cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)) + cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)); if(info.display_device) { /* don't use async for device used for display, locks up UI too much */ - cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks)) - cuda_assert(cuCtxSynchronize()) + cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks)); + cuda_assert(cuCtxSynchronize()); } else { - cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream)) + cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream)); } cuda_pop_context(); @@ -689,58 +691,60 @@ public: CUdeviceptr d_buffer = cuda_device_ptr(buffer); /* get kernel function */ - if(rgba_half) - cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")) - else - cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")) + if(rgba_half) { + cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")); + } + else { + cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")); + } /* pass in parameters */ int offset = 0; - cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba))) + cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba))); offset += sizeof(d_rgba); - cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer))) + cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer))); offset += sizeof(d_buffer); float sample_scale = 1.0f/(task.sample + 1); offset = align_up(offset, __alignof(sample_scale)); - cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale)) + cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale)); offset += sizeof(sample_scale); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x)); offset += sizeof(task.x); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y)); offset += sizeof(task.y); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w)); offset += sizeof(task.w); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)); offset += sizeof(task.h); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)); offset += sizeof(task.offset); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)); offset += sizeof(task.stride); - cuda_assert(cuParamSetSize(cuFilmConvert, offset)) + cuda_assert(cuParamSetSize(cuFilmConvert, offset)); /* launch kernel */ int threads_per_block; - cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert)) + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert)); int xthreads = (int)sqrt((float)threads_per_block); int ythreads = (int)sqrt((float)threads_per_block); int xblocks = (task.w + xthreads - 1)/xthreads; int yblocks = (task.h + ythreads - 1)/ythreads; - cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)) - cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1)) - cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks)) + cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1)); + cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks)); unmap_pixels((rgba_byte)? rgba_byte: rgba_half); @@ -759,7 +763,7 @@ public: CUdeviceptr d_output = cuda_device_ptr(task.shader_output); /* get kernel function */ - cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")) + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); /* do tasks in smaller chunks, so we can cancel it */ const int shader_chunk_size = 65536; @@ -773,35 +777,35 @@ public: /* pass in parameters */ int offset = 0; - cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))) + cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); offset += sizeof(d_input); - cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))) + cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); offset += sizeof(d_output); int shader_eval_type = task.shader_eval_type; offset = align_up(offset, __alignof(shader_eval_type)); - cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)) + cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); offset += sizeof(task.shader_eval_type); - cuda_assert(cuParamSeti(cuShader, offset, shader_x)) + cuda_assert(cuParamSeti(cuShader, offset, shader_x)); offset += sizeof(shader_x); - cuda_assert(cuParamSetSize(cuShader, offset)) + cuda_assert(cuParamSetSize(cuShader, offset)); /* launch kernel */ int threads_per_block; - cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)) + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); int shader_w = min(shader_chunk_size, end - shader_x); int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; - cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)) - cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)) - cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)) + cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); + cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); - cuda_assert(cuCtxSynchronize()) + cuda_assert(cuCtxSynchronize()); } cuda_pop_context(); @@ -814,8 +818,8 @@ public: CUdeviceptr buffer; size_t bytes; - cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)) - cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)) + cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)); + cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)); return buffer; } @@ -828,7 +832,7 @@ public: if(!background) { PixelMem pmem = pixel_mem_map[mem]; - cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)) + cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)); } } @@ -917,7 +921,7 @@ public: cuda_push_context(); - cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)) + cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); glDeleteBuffers(1, &pmem.cuPBO); glDeleteTextures(1, &pmem.cuTexId); @@ -1038,8 +1042,8 @@ public: if(!info.display_device && sample == sync_sample) { cuda_push_context(); - cuda_assert(cuEventRecord(tileDone, cuStream)) - cuda_assert(cuEventSynchronize(tileDone)) + cuda_assert(cuEventRecord(tileDone, cuStream)); + cuda_assert(cuEventSynchronize(tileDone)); /* Do some time keeping to find out if we need to sync less */ boost::posix_time::ptime current_time(boost::posix_time::microsec_clock::local_time()); @@ -1065,7 +1069,7 @@ public: shader(*task); cuda_push_context(); - cuda_assert(cuCtxSynchronize()) + cuda_assert(cuCtxSynchronize()); cuda_pop_context(); } } @@ -1086,7 +1090,7 @@ public: film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); cuda_push_context(); - cuda_assert(cuCtxSynchronize()) + cuda_assert(cuCtxSynchronize()); cuda_pop_context(); } else { diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 30762721931..2e759d1a36f 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -416,7 +416,7 @@ public: error_msg = message; \ fprintf(stderr, "%s\n", message.c_str()); \ } \ - } + } (void)0 void opencl_assert_err(cl_int err, const char* where) { @@ -846,7 +846,7 @@ public: { /* this is blocking */ size_t size = mem.memory_size(); - opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL)) + opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL)); } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) @@ -854,7 +854,7 @@ public: size_t offset = elem*y*w; size_t size = elem*w*h; - opencl_assert(clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL)) + opencl_assert(clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL)); } void mem_zero(device_memory& mem) @@ -868,7 +868,7 @@ public: void mem_free(device_memory& mem) { if(mem.device_pointer) { - opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))) + opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); mem.device_pointer = 0; stats.mem_free(mem.memory_size()); @@ -944,8 +944,8 @@ public: size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)}; /* run kernel */ - opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL)) - opencl_assert(clFlush(cqCommandQueue)) + opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL)); + opencl_assert(clFlush(cqCommandQueue)); } void path_trace(RenderTile& rtile, int sample) @@ -965,21 +965,21 @@ public: /* sample arguments */ cl_uint narg = 0; - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state)) + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state)); #define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_mem(ckPathTraceKernel, &narg, #name); #include "kernel_textures.h" - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset)) - opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride)) + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride)); enqueue_kernel(ckPathTraceKernel, d_w, d_h); } @@ -1020,21 +1020,21 @@ public: cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel; - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer)) + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer)); #define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); #include "kernel_textures.h" - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset)) - opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride)) + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride)); @@ -1054,17 +1054,17 @@ public: /* sample arguments */ cl_uint narg = 0; - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data)) - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input)) - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output)) + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output)); #define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_mem(ckShaderKernel, &narg, #name); #include "kernel_textures.h" - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)) - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)) - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)) + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); enqueue_kernel(ckShaderKernel, task.shader_w, 1); }