diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index b9bbeb9a25b..0be2c322dfa 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -1760,7 +1760,7 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising) denoising.render_buffer.samples = rtile.sample; denoising.buffer.gpu_temporary_mem = true; - denoising.run_denoising(&rtile); + denoising.run_denoising(rtile); } void CUDADevice::adaptive_sampling_filter(uint filter_sample, diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index a5833369a17..115b05e3911 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -439,10 +439,10 @@ class Device { { return 0; } - virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) + virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/) { } - virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) + virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/) { } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 8f68e66a1b4..2e4761562a5 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -1040,7 +1040,7 @@ class CPUDevice : public Device { denoising.render_buffer.samples = tile.sample; denoising.buffer.gpu_temporary_mem = false; - denoising.run_denoising(&tile); + denoising.run_denoising(tile); } void thread_render(DeviceTask &task) diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 89de80a5bcd..38c42d15cab 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -71,29 +71,30 @@ DenoisingTask::~DenoisingTask() tile_info_mem.free(); } -void DenoisingTask::set_render_buffer(RenderTile *rtiles) +void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors) { - for (int i = 0; i < 9; i++) { - tile_info->offsets[i] = rtiles[i].offset; - tile_info->strides[i] = rtiles[i].stride; - tile_info->buffers[i] = rtiles[i].buffer; + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + RenderTile &rtile = neighbors.tiles[i]; + tile_info->offsets[i] = rtile.offset; + tile_info->strides[i] = rtile.stride; + tile_info->buffers[i] = rtile.buffer; } - tile_info->x[0] = rtiles[3].x; - tile_info->x[1] = rtiles[4].x; - tile_info->x[2] = rtiles[5].x; - tile_info->x[3] = rtiles[5].x + rtiles[5].w; - tile_info->y[0] = rtiles[1].y; - tile_info->y[1] = rtiles[4].y; - tile_info->y[2] = rtiles[7].y; - tile_info->y[3] = rtiles[7].y + rtiles[7].h; + tile_info->x[0] = neighbors.tiles[3].x; + tile_info->x[1] = neighbors.tiles[4].x; + tile_info->x[2] = neighbors.tiles[5].x; + tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w; + tile_info->y[0] = neighbors.tiles[1].y; + tile_info->y[1] = neighbors.tiles[4].y; + tile_info->y[2] = neighbors.tiles[7].y; + tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h; - target_buffer.offset = rtiles[9].offset; - target_buffer.stride = rtiles[9].stride; - target_buffer.ptr = rtiles[9].buffer; + target_buffer.offset = neighbors.target.offset; + target_buffer.stride = neighbors.target.stride; + target_buffer.ptr = neighbors.target.buffer; - if (do_prefilter && rtiles[9].buffers) { + if (do_prefilter && neighbors.target.buffers) { target_buffer.denoising_output_offset = - rtiles[9].buffers->params.get_denoising_prefiltered_offset(); + neighbors.target.buffers->params.get_denoising_prefiltered_offset(); } else { target_buffer.denoising_output_offset = 0; @@ -320,12 +321,11 @@ void DenoisingTask::reconstruct() functions.solve(target_buffer.ptr); } -void DenoisingTask::run_denoising(RenderTile *tile) +void DenoisingTask::run_denoising(RenderTile &tile) { - RenderTile rtiles[10]; - rtiles[4] = *tile; - functions.map_neighbor_tiles(rtiles); - set_render_buffer(rtiles); + RenderTileNeighbors neighbors(tile); + functions.map_neighbor_tiles(neighbors); + set_render_buffer(neighbors); setup_denoising_buffer(); @@ -347,7 +347,7 @@ void DenoisingTask::run_denoising(RenderTile *tile) write_buffer(); } - functions.unmap_neighbor_tiles(rtiles); + functions.unmap_neighbor_tiles(neighbors); } CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 4c122e981eb..2c0dc23b44a 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -102,8 +102,8 @@ class DenoisingTask { device_ptr output_ptr)> detect_outliers; function write_feature; - function map_neighbor_tiles; - function unmap_neighbor_tiles; + function map_neighbor_tiles; + function unmap_neighbor_tiles; } functions; /* Stores state of the current Reconstruction operation, @@ -154,7 +154,7 @@ class DenoisingTask { DenoisingTask(Device *device, const DeviceTask &task); ~DenoisingTask(); - void run_denoising(RenderTile *tile); + void run_denoising(RenderTile &tile); struct DenoiseBuffers { int pass_stride; @@ -179,7 +179,7 @@ class DenoisingTask { protected: Device *device; - void set_render_buffer(RenderTile *rtiles); + void set_render_buffer(RenderTileNeighbors &neighbors); void setup_denoising_buffer(); void prefilter_shadowing(); void prefilter_features(); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index d38cbfccb6f..9ea8782d0f0 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -584,20 +584,22 @@ class MultiDevice : public Device { return -1; } - void map_neighbor_tiles(Device *sub_device, RenderTile *tiles) + void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) { - for (int i = 0; i < 9; i++) { - if (!tiles[i].buffers) { + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + RenderTile &tile = neighbors.tiles[i]; + + if (!tile.buffers) { continue; } - device_vector &mem = tiles[i].buffers->buffer; - tiles[i].buffer = mem.device_pointer; + device_vector &mem = tile.buffers->buffer; + tile.buffer = mem.device_pointer; if (mem.device == this && matching_rendering_and_denoising_devices) { /* Skip unnecessary copies in viewport mode (buffer covers the * whole image), but still need to fix up the tile device pointer. */ - map_tile(sub_device, tiles[i]); + map_tile(sub_device, tile); continue; } @@ -610,15 +612,15 @@ class MultiDevice : public Device { * also required for the case where a CPU thread is denoising * a tile rendered on the GPU. In that case we have to avoid * overwriting the buffer being de-noised by the CPU thread. */ - if (!tiles[i].buffers->map_neighbor_copied) { - tiles[i].buffers->map_neighbor_copied = true; + if (!tile.buffers->map_neighbor_copied) { + tile.buffers->map_neighbor_copied = true; mem.copy_from_device(); } if (mem.device == this) { /* Can re-use memory if tile is already allocated on the sub device. */ - map_tile(sub_device, tiles[i]); - mem.swap_device(sub_device, mem.device_size, tiles[i].buffer); + map_tile(sub_device, tile); + mem.swap_device(sub_device, mem.device_size, tile.buffer); } else { mem.swap_device(sub_device, 0, 0); @@ -626,40 +628,42 @@ class MultiDevice : public Device { mem.copy_to_device(); - tiles[i].buffer = mem.device_pointer; - tiles[i].device_size = mem.device_size; + tile.buffer = mem.device_pointer; + tile.device_size = mem.device_size; mem.restore_device(); } } } - void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles) + void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) { - device_vector &mem = tiles[9].buffers->buffer; + RenderTile &target_tile = neighbors.target; + device_vector &mem = target_tile.buffers->buffer; if (mem.device == this && matching_rendering_and_denoising_devices) { return; } /* Copy denoised result back to the host. */ - mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer); + mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer); mem.copy_from_device(); mem.restore_device(); /* Copy denoised result to the original device. */ mem.copy_to_device(); - for (int i = 0; i < 9; i++) { - if (!tiles[i].buffers) { + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + RenderTile &tile = neighbors.tiles[i]; + if (!tile.buffers) { continue; } - device_vector &mem = tiles[i].buffers->buffer; + device_vector &mem = tile.buffers->buffer; if (mem.device != sub_device && mem.device != this) { /* Free up memory again if it was allocated for the copy above. */ - mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer); + mem.swap_device(sub_device, tile.device_size, tile.buffer); sub_device->mem_free(mem); mem.restore_device(); } diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 266222c74c5..35856f48213 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -801,19 +801,18 @@ class OptiXDevice : public CUDADevice { // 0 1 2 // 3 4 5 // 6 7 8 9 - RenderTile rtiles[10]; - rtiles[4] = rtile; - task.map_neighbor_tiles(rtiles, this); - rtile = rtiles[4]; // Tile may have been modified by mapping code + RenderTileNeighbors neighbors(rtile); + task.map_neighbor_tiles(neighbors, this); + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + RenderTile &target_tile = neighbors.target; + rtile = center_tile; // Tile may have been modified by mapping code // Calculate size of the tile to denoise (including overlap) - int4 rect = make_int4( - rtiles[4].x, rtiles[4].y, rtiles[4].x + rtiles[4].w, rtiles[4].y + rtiles[4].h); + int4 rect = center_tile.bounds(); // Overlap between tiles has to be at least 64 pixels // TODO(pmours): Query this value from OptiX rect = rect_expand(rect, 64); - int4 clip_rect = make_int4( - rtiles[3].x, rtiles[1].y, rtiles[5].x + rtiles[5].w, rtiles[7].y + rtiles[7].h); + int4 clip_rect = neighbors.bounds(); rect = rect_clip(rect, clip_rect); int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y); int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y); @@ -834,14 +833,14 @@ class OptiXDevice : public CUDADevice { device_only_memory input(this, "denoiser input"); device_vector tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE); - if ((!rtiles[0].buffer || rtiles[0].buffer == rtile.buffer) && - (!rtiles[1].buffer || rtiles[1].buffer == rtile.buffer) && - (!rtiles[2].buffer || rtiles[2].buffer == rtile.buffer) && - (!rtiles[3].buffer || rtiles[3].buffer == rtile.buffer) && - (!rtiles[5].buffer || rtiles[5].buffer == rtile.buffer) && - (!rtiles[6].buffer || rtiles[6].buffer == rtile.buffer) && - (!rtiles[7].buffer || rtiles[7].buffer == rtile.buffer) && - (!rtiles[8].buffer || rtiles[8].buffer == rtile.buffer)) { + bool contiguous_memory = true; + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) { + contiguous_memory = false; + } + } + + if (contiguous_memory) { // Tiles are in continous memory, so can just subtract overlap offset input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride; // Stride covers the whole width of the image and not just a single tile @@ -856,19 +855,19 @@ class OptiXDevice : public CUDADevice { input_stride *= rect_size.x; TileInfo *tile_info = tile_info_mem.alloc(1); - for (int i = 0; i < 9; i++) { - tile_info->offsets[i] = rtiles[i].offset; - tile_info->strides[i] = rtiles[i].stride; - tile_info->buffers[i] = rtiles[i].buffer; + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + tile_info->offsets[i] = neighbors.tiles[i].offset; + tile_info->strides[i] = neighbors.tiles[i].stride; + tile_info->buffers[i] = neighbors.tiles[i].buffer; } - tile_info->x[0] = rtiles[3].x; - tile_info->x[1] = rtiles[4].x; - tile_info->x[2] = rtiles[5].x; - tile_info->x[3] = rtiles[5].x + rtiles[5].w; - tile_info->y[0] = rtiles[1].y; - tile_info->y[1] = rtiles[4].y; - tile_info->y[2] = rtiles[7].y; - tile_info->y[3] = rtiles[7].y + rtiles[7].h; + tile_info->x[0] = neighbors.tiles[3].x; + tile_info->x[1] = neighbors.tiles[4].x; + tile_info->x[2] = neighbors.tiles[5].x; + tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w; + tile_info->y[0] = neighbors.tiles[1].y; + tile_info->y[1] = neighbors.tiles[4].y; + tile_info->y[2] = neighbors.tiles[7].y; + tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h; tile_info_mem.copy_to_device(); void *args[] = { @@ -977,10 +976,10 @@ class OptiXDevice : public CUDADevice { int2 output_offset = overlap_offset; overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually # else - output_layers[0].data = rtiles[9].buffer + pixel_offset; - output_layers[0].width = rtiles[9].w; - output_layers[0].height = rtiles[9].h; - output_layers[0].rowStrideInBytes = rtiles[9].stride * pixel_stride; + output_layers[0].data = target_tile.buffer + pixel_offset; + output_layers[0].width = target_tile.w; + output_layers[0].height = target_tile.h; + output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride; output_layers[0].pixelStrideInBytes = pixel_stride; # endif output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3; @@ -1002,26 +1001,26 @@ class OptiXDevice : public CUDADevice { # if OPTIX_DENOISER_NO_PIXEL_STRIDE void *output_args[] = {&input_ptr, - &rtiles[9].buffer, + &target_tile.buffer, &output_offset.x, &output_offset.y, &rect_size.x, &rect_size.y, - &rtiles[9].x, - &rtiles[9].y, - &rtiles[9].w, - &rtiles[9].h, - &rtiles[9].offset, - &rtiles[9].stride, + &target_tile.x, + &target_tile.y, + &target_tile.w, + &target_tile.h, + &target_tile.offset, + &target_tile.stride, &task.pass_stride, &rtile.sample}; launch_filter_kernel( - "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args); + "kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args); # endif check_result_cuda_ret(cuStreamSynchronize(0)); - task.unmap_neighbor_tiles(rtiles, this); + task.unmap_neighbor_tiles(neighbors, this); } else { // Run CUDA denoising kernels diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 600973b8100..21da55d50d4 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -29,6 +29,7 @@ CCL_NAMESPACE_BEGIN class Device; class RenderBuffers; class RenderTile; +class RenderTileNeighbors; class Tile; enum DenoiserType { @@ -150,8 +151,8 @@ class DeviceTask { function update_tile_sample; function release_tile; function get_cancel; - function map_neighbor_tiles; - function unmap_neighbor_tiles; + function map_neighbor_tiles; + function unmap_neighbor_tiles; uint tile_types; DenoiseParams denoising; diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 8c94815b193..e851749949d 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -1850,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising) denoising.render_buffer.samples = rtile.sample; denoising.buffer.gpu_temporary_mem = true; - denoising.run_denoising(&rtile); + denoising.run_denoising(rtile); } void OpenCLDevice::shader(DeviceTask &task) diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 975bae2239c..06b6094e6c9 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -52,7 +52,7 @@ class BufferParams { /* passes */ vector passes; bool denoising_data_pass; - /* If only some light path types should be denoised, an additional pass is needed. */ + /* If only some light path types should be target, an additional pass is needed. */ bool denoising_clean_pass; /* When we're prefiltering the passes during rendering, we need to keep both the * original and the prefiltered data around because neighboring tiles might still @@ -149,6 +149,50 @@ class RenderTile { RenderBuffers *buffers; RenderTile(); + + int4 bounds() const + { + return make_int4(x, /* xmin */ + y, /* ymin */ + x + w, /* xmax */ + y + h); /* ymax */ + } +}; + +/* Render Tile Neighbors + * Set of neighboring tiles used for denoising. Tile order: + * 0 1 2 + * 3 4 5 + * 6 7 8 */ + +class RenderTileNeighbors { + public: + static const int SIZE = 9; + static const int CENTER = 4; + + RenderTile tiles[SIZE]; + RenderTile target; + + RenderTileNeighbors(const RenderTile ¢er) + { + tiles[CENTER] = center; + } + + int4 bounds() const + { + return make_int4(tiles[3].x, /* xmin */ + tiles[1].y, /* ymin */ + tiles[5].x + tiles[5].w, /* xmax */ + tiles[7].y + tiles[7].h); /* ymax */ + } + + void set_bounds_from_center() + { + tiles[3].x = tiles[CENTER].x; + tiles[1].y = tiles[CENTER].y; + tiles[5].x = tiles[CENTER].x + tiles[CENTER].w; + tiles[7].y = tiles[CENTER].y + tiles[CENTER].h; + } }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/denoising.cpp b/intern/cycles/render/denoising.cpp index 4055bc4773b..76408ca4849 100644 --- a/intern/cycles/render/denoising.cpp +++ b/intern/cycles/render/denoising.cpp @@ -271,42 +271,45 @@ bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile & * * However, since there is only one large memory, the denoised result has to be written to * a different buffer to avoid having to copy an entire horizontal slice of the image. */ -void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device) +void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device) { + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + RenderTile &target_tile = neighbors.target; + /* Fill tile information. */ - for (int i = 0; i < 9; i++) { - if (i == 4) { + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + if (i == RenderTileNeighbors::CENTER) { continue; } + RenderTile &tile = neighbors.tiles[i]; int dx = (i % 3) - 1; int dy = (i / 3) - 1; - tiles[i].x = clamp(tiles[4].x + dx * denoiser->tile_size.x, 0, image.width); - tiles[i].w = clamp(tiles[4].x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tiles[i].x; - tiles[i].y = clamp(tiles[4].y + dy * denoiser->tile_size.y, 0, image.height); - tiles[i].h = clamp(tiles[4].y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - - tiles[i].y; + tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width); + tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x; + tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height); + tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y; - tiles[i].buffer = tiles[4].buffer; - tiles[i].offset = tiles[4].offset; - tiles[i].stride = image.width; + tile.buffer = center_tile.buffer; + tile.offset = center_tile.offset; + tile.stride = image.width; } /* Allocate output buffer. */ device_vector *output_mem = new device_vector( tile_device, "denoising_output", MEM_READ_WRITE); - output_mem->alloc(OUTPUT_NUM_CHANNELS * tiles[4].w * tiles[4].h); + output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h); /* Fill output buffer with noisy image, assumed by kernel_filter_finalize * when skipping denoising of some pixels. */ float *result = output_mem->data(); - float *in = &image.pixels[image.num_channels * (tiles[4].y * image.width + tiles[4].x)]; + float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)]; const DenoiseImageLayer &layer = image.layers[current_layer]; const int *input_to_image_channel = layer.input_to_image_channel.data(); - for (int y = 0; y < tiles[4].h; y++) { - for (int x = 0; x < tiles[4].w; x++, result += OUTPUT_NUM_CHANNELS) { + for (int y = 0; y < center_tile.h; y++) { + for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) { for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) { result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]]; } @@ -317,35 +320,38 @@ void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device) output_mem->copy_to_device(); /* Fill output tile info. */ - tiles[9] = tiles[4]; - tiles[9].buffer = output_mem->device_pointer; - tiles[9].stride = tiles[9].w; - tiles[9].offset -= tiles[9].x + tiles[9].y * tiles[9].stride; + target_tile = center_tile; + target_tile.buffer = output_mem->device_pointer; + target_tile.stride = target_tile.w; + target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride; thread_scoped_lock output_lock(output_mutex); - assert(output_pixels.count(tiles[4].tile_index) == 0); - output_pixels[tiles[9].tile_index] = output_mem; + assert(output_pixels.count(center_tile.tile_index) == 0); + output_pixels[target_tile.tile_index] = output_mem; } -void DenoiseTask::unmap_neighboring_tiles(RenderTile *tiles) +void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors) { + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + RenderTile &target_tile = neighbors.target; + thread_scoped_lock output_lock(output_mutex); - assert(output_pixels.count(tiles[4].tile_index) == 1); - device_vector *output_mem = output_pixels[tiles[9].tile_index]; - output_pixels.erase(tiles[4].tile_index); + assert(output_pixels.count(center_tile.tile_index) == 1); + device_vector *output_mem = output_pixels[target_tile.tile_index]; + output_pixels.erase(center_tile.tile_index); output_lock.unlock(); /* Copy denoised pixels from device. */ - output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * tiles[9].w, tiles[9].h); + output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h); float *result = output_mem->data(); - float *out = &image.pixels[image.num_channels * (tiles[9].y * image.width + tiles[9].x)]; + float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)]; const DenoiseImageLayer &layer = image.layers[current_layer]; const int *output_to_image_channel = layer.output_to_image_channel.data(); - for (int y = 0; y < tiles[9].h; y++) { - for (int x = 0; x < tiles[9].w; x++, result += OUTPUT_NUM_CHANNELS) { + for (int y = 0; y < target_tile.h; y++) { + for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) { for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) { out[image.num_channels * x + output_to_image_channel[i]] = result[i]; } diff --git a/intern/cycles/render/denoising.h b/intern/cycles/render/denoising.h index 5c6f913cb38..c1b4d0a5596 100644 --- a/intern/cycles/render/denoising.h +++ b/intern/cycles/render/denoising.h @@ -196,8 +196,8 @@ class DenoiseTask { /* Device task callbacks */ bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile); - void map_neighboring_tiles(RenderTile *tiles, Device *tile_device); - void unmap_neighboring_tiles(RenderTile *tiles); + void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device); + void unmap_neighboring_tiles(RenderTileNeighbors &neighbors); void release_tile(); bool get_cancel(); }; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index d728b982ed5..9383750bd28 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -536,7 +536,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise) denoising_cond.notify_all(); } -void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) +void Session::map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device) { thread_scoped_lock tile_lock(tile_mutex); @@ -546,75 +546,77 @@ void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) tile_manager.state.buffer.full_x + tile_manager.state.buffer.width, tile_manager.state.buffer.full_y + tile_manager.state.buffer.height); + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + if (!tile_manager.schedule_denoising) { /* Fix up tile slices with overlap. */ if (tile_manager.slice_overlap != 0) { - int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y); - tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y; - tiles[4].y = y; + int y = max(center_tile.y - tile_manager.slice_overlap, image_region.y); + center_tile.h = min(center_tile.y + center_tile.h + tile_manager.slice_overlap, + image_region.w) - + y; + center_tile.y = y; } /* Tiles are not being denoised individually, which means the entire image is processed. */ - tiles[3].x = tiles[4].x; - tiles[1].y = tiles[4].y; - tiles[5].x = tiles[4].x + tiles[4].w; - tiles[7].y = tiles[4].y + tiles[4].h; + neighbors.set_bounds_from_center(); } else { - int center_idx = tiles[4].tile_index; + int center_idx = center_tile.tile_index; assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); for (int dy = -1, i = 0; dy <= 1; dy++) { for (int dx = -1; dx <= 1; dx++, i++) { + RenderTile &rtile = neighbors.tiles[i]; int nindex = tile_manager.get_neighbor_index(center_idx, i); if (nindex >= 0) { Tile *tile = &tile_manager.state.tiles[nindex]; - tiles[i].x = image_region.x + tile->x; - tiles[i].y = image_region.y + tile->y; - tiles[i].w = tile->w; - tiles[i].h = tile->h; + rtile.x = image_region.x + tile->x; + rtile.y = image_region.y + tile->y; + rtile.w = tile->w; + rtile.h = tile->h; if (buffers) { - tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride); + tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); - tiles[i].buffer = buffers->buffer.device_pointer; - tiles[i].buffers = buffers; + rtile.buffer = buffers->buffer.device_pointer; + rtile.buffers = buffers; } else { assert(tile->buffers); - tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); + tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); - tiles[i].buffer = tile->buffers->buffer.device_pointer; - tiles[i].buffers = tile->buffers; + rtile.buffer = tile->buffers->buffer.device_pointer; + rtile.buffers = tile->buffers; } } else { - int px = tiles[4].x + dx * params.tile_size.x; - int py = tiles[4].y + dy * params.tile_size.y; + int px = center_tile.x + dx * params.tile_size.x; + int py = center_tile.y + dy * params.tile_size.y; - tiles[i].x = clamp(px, image_region.x, image_region.z); - tiles[i].y = clamp(py, image_region.y, image_region.w); - tiles[i].w = tiles[i].h = 0; + rtile.x = clamp(px, image_region.x, image_region.z); + rtile.y = clamp(py, image_region.y, image_region.w); + rtile.w = rtile.h = 0; - tiles[i].buffer = (device_ptr)NULL; - tiles[i].buffers = NULL; + rtile.buffer = (device_ptr)NULL; + rtile.buffers = NULL; } } } } - assert(tiles[4].buffers); - device->map_neighbor_tiles(tile_device, tiles); + assert(center_tile.buffers); + device->map_neighbor_tiles(tile_device, neighbors); /* The denoised result is written back to the original tile. */ - tiles[9] = tiles[4]; + neighbors.target = center_tile; } -void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device) +void Session::unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device) { thread_scoped_lock tile_lock(tile_mutex); - device->unmap_neighbor_tiles(tile_device, tiles); + device->unmap_neighbor_tiles(tile_device, neighbors); } void Session::run_cpu() diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 0141629762c..e3ac054ead3 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -198,8 +198,8 @@ class Session { void update_tile_sample(RenderTile &tile); void release_tile(RenderTile &tile, const bool need_denoise); - void map_neighbor_tiles(RenderTile *tiles, Device *tile_device); - void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device); + void map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device); + void unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device); bool device_use_gl;