forked from bartvdbraak/blender
Cleanup: reduce hardcoded numbers in denoising neighbor tiles code
This commit is contained in:
parent
e65c78cd43
commit
93791381fe
@ -1760,7 +1760,7 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
|
||||
denoising.render_buffer.samples = rtile.sample;
|
||||
denoising.buffer.gpu_temporary_mem = true;
|
||||
|
||||
denoising.run_denoising(&rtile);
|
||||
denoising.run_denoising(rtile);
|
||||
}
|
||||
|
||||
void CUDADevice::adaptive_sampling_filter(uint filter_sample,
|
||||
|
@ -439,10 +439,10 @@ class Device {
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
|
||||
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
|
||||
{
|
||||
}
|
||||
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
|
||||
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1040,7 +1040,7 @@ class CPUDevice : public Device {
|
||||
denoising.render_buffer.samples = tile.sample;
|
||||
denoising.buffer.gpu_temporary_mem = false;
|
||||
|
||||
denoising.run_denoising(&tile);
|
||||
denoising.run_denoising(tile);
|
||||
}
|
||||
|
||||
void thread_render(DeviceTask &task)
|
||||
|
@ -71,29 +71,30 @@ DenoisingTask::~DenoisingTask()
|
||||
tile_info_mem.free();
|
||||
}
|
||||
|
||||
void DenoisingTask::set_render_buffer(RenderTile *rtiles)
|
||||
void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors)
|
||||
{
|
||||
for (int i = 0; i < 9; i++) {
|
||||
tile_info->offsets[i] = rtiles[i].offset;
|
||||
tile_info->strides[i] = rtiles[i].stride;
|
||||
tile_info->buffers[i] = rtiles[i].buffer;
|
||||
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
|
||||
RenderTile &rtile = neighbors.tiles[i];
|
||||
tile_info->offsets[i] = rtile.offset;
|
||||
tile_info->strides[i] = rtile.stride;
|
||||
tile_info->buffers[i] = rtile.buffer;
|
||||
}
|
||||
tile_info->x[0] = rtiles[3].x;
|
||||
tile_info->x[1] = rtiles[4].x;
|
||||
tile_info->x[2] = rtiles[5].x;
|
||||
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
|
||||
tile_info->y[0] = rtiles[1].y;
|
||||
tile_info->y[1] = rtiles[4].y;
|
||||
tile_info->y[2] = rtiles[7].y;
|
||||
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
|
||||
tile_info->x[0] = neighbors.tiles[3].x;
|
||||
tile_info->x[1] = neighbors.tiles[4].x;
|
||||
tile_info->x[2] = neighbors.tiles[5].x;
|
||||
tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
|
||||
tile_info->y[0] = neighbors.tiles[1].y;
|
||||
tile_info->y[1] = neighbors.tiles[4].y;
|
||||
tile_info->y[2] = neighbors.tiles[7].y;
|
||||
tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
|
||||
|
||||
target_buffer.offset = rtiles[9].offset;
|
||||
target_buffer.stride = rtiles[9].stride;
|
||||
target_buffer.ptr = rtiles[9].buffer;
|
||||
target_buffer.offset = neighbors.target.offset;
|
||||
target_buffer.stride = neighbors.target.stride;
|
||||
target_buffer.ptr = neighbors.target.buffer;
|
||||
|
||||
if (do_prefilter && rtiles[9].buffers) {
|
||||
if (do_prefilter && neighbors.target.buffers) {
|
||||
target_buffer.denoising_output_offset =
|
||||
rtiles[9].buffers->params.get_denoising_prefiltered_offset();
|
||||
neighbors.target.buffers->params.get_denoising_prefiltered_offset();
|
||||
}
|
||||
else {
|
||||
target_buffer.denoising_output_offset = 0;
|
||||
@ -320,12 +321,11 @@ void DenoisingTask::reconstruct()
|
||||
functions.solve(target_buffer.ptr);
|
||||
}
|
||||
|
||||
void DenoisingTask::run_denoising(RenderTile *tile)
|
||||
void DenoisingTask::run_denoising(RenderTile &tile)
|
||||
{
|
||||
RenderTile rtiles[10];
|
||||
rtiles[4] = *tile;
|
||||
functions.map_neighbor_tiles(rtiles);
|
||||
set_render_buffer(rtiles);
|
||||
RenderTileNeighbors neighbors(tile);
|
||||
functions.map_neighbor_tiles(neighbors);
|
||||
set_render_buffer(neighbors);
|
||||
|
||||
setup_denoising_buffer();
|
||||
|
||||
@ -347,7 +347,7 @@ void DenoisingTask::run_denoising(RenderTile *tile)
|
||||
write_buffer();
|
||||
}
|
||||
|
||||
functions.unmap_neighbor_tiles(rtiles);
|
||||
functions.unmap_neighbor_tiles(neighbors);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -102,8 +102,8 @@ class DenoisingTask {
|
||||
device_ptr output_ptr)>
|
||||
detect_outliers;
|
||||
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
|
||||
function<void(RenderTile *rtiles)> map_neighbor_tiles;
|
||||
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
|
||||
function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles;
|
||||
function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles;
|
||||
} functions;
|
||||
|
||||
/* Stores state of the current Reconstruction operation,
|
||||
@ -154,7 +154,7 @@ class DenoisingTask {
|
||||
DenoisingTask(Device *device, const DeviceTask &task);
|
||||
~DenoisingTask();
|
||||
|
||||
void run_denoising(RenderTile *tile);
|
||||
void run_denoising(RenderTile &tile);
|
||||
|
||||
struct DenoiseBuffers {
|
||||
int pass_stride;
|
||||
@ -179,7 +179,7 @@ class DenoisingTask {
|
||||
protected:
|
||||
Device *device;
|
||||
|
||||
void set_render_buffer(RenderTile *rtiles);
|
||||
void set_render_buffer(RenderTileNeighbors &neighbors);
|
||||
void setup_denoising_buffer();
|
||||
void prefilter_shadowing();
|
||||
void prefilter_features();
|
||||
|
@ -584,20 +584,22 @@ class MultiDevice : public Device {
|
||||
return -1;
|
||||
}
|
||||
|
||||
void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
|
||||
void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
|
||||
{
|
||||
for (int i = 0; i < 9; i++) {
|
||||
if (!tiles[i].buffers) {
|
||||
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
|
||||
RenderTile &tile = neighbors.tiles[i];
|
||||
|
||||
if (!tile.buffers) {
|
||||
continue;
|
||||
}
|
||||
|
||||
device_vector<float> &mem = tiles[i].buffers->buffer;
|
||||
tiles[i].buffer = mem.device_pointer;
|
||||
device_vector<float> &mem = tile.buffers->buffer;
|
||||
tile.buffer = mem.device_pointer;
|
||||
|
||||
if (mem.device == this && matching_rendering_and_denoising_devices) {
|
||||
/* Skip unnecessary copies in viewport mode (buffer covers the
|
||||
* whole image), but still need to fix up the tile device pointer. */
|
||||
map_tile(sub_device, tiles[i]);
|
||||
map_tile(sub_device, tile);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -610,15 +612,15 @@ class MultiDevice : public Device {
|
||||
* also required for the case where a CPU thread is denoising
|
||||
* a tile rendered on the GPU. In that case we have to avoid
|
||||
* overwriting the buffer being de-noised by the CPU thread. */
|
||||
if (!tiles[i].buffers->map_neighbor_copied) {
|
||||
tiles[i].buffers->map_neighbor_copied = true;
|
||||
if (!tile.buffers->map_neighbor_copied) {
|
||||
tile.buffers->map_neighbor_copied = true;
|
||||
mem.copy_from_device();
|
||||
}
|
||||
|
||||
if (mem.device == this) {
|
||||
/* Can re-use memory if tile is already allocated on the sub device. */
|
||||
map_tile(sub_device, tiles[i]);
|
||||
mem.swap_device(sub_device, mem.device_size, tiles[i].buffer);
|
||||
map_tile(sub_device, tile);
|
||||
mem.swap_device(sub_device, mem.device_size, tile.buffer);
|
||||
}
|
||||
else {
|
||||
mem.swap_device(sub_device, 0, 0);
|
||||
@ -626,40 +628,42 @@ class MultiDevice : public Device {
|
||||
|
||||
mem.copy_to_device();
|
||||
|
||||
tiles[i].buffer = mem.device_pointer;
|
||||
tiles[i].device_size = mem.device_size;
|
||||
tile.buffer = mem.device_pointer;
|
||||
tile.device_size = mem.device_size;
|
||||
|
||||
mem.restore_device();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
|
||||
void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
|
||||
{
|
||||
device_vector<float> &mem = tiles[9].buffers->buffer;
|
||||
RenderTile &target_tile = neighbors.target;
|
||||
device_vector<float> &mem = target_tile.buffers->buffer;
|
||||
|
||||
if (mem.device == this && matching_rendering_and_denoising_devices) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Copy denoised result back to the host. */
|
||||
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
|
||||
mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
|
||||
mem.copy_from_device();
|
||||
mem.restore_device();
|
||||
|
||||
/* Copy denoised result to the original device. */
|
||||
mem.copy_to_device();
|
||||
|
||||
for (int i = 0; i < 9; i++) {
|
||||
if (!tiles[i].buffers) {
|
||||
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
|
||||
RenderTile &tile = neighbors.tiles[i];
|
||||
if (!tile.buffers) {
|
||||
continue;
|
||||
}
|
||||
|
||||
device_vector<float> &mem = tiles[i].buffers->buffer;
|
||||
device_vector<float> &mem = tile.buffers->buffer;
|
||||
|
||||
if (mem.device != sub_device && mem.device != this) {
|
||||
/* Free up memory again if it was allocated for the copy above. */
|
||||
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
|
||||
mem.swap_device(sub_device, tile.device_size, tile.buffer);
|
||||
sub_device->mem_free(mem);
|
||||
mem.restore_device();
|
||||
}
|
||||
|
@ -801,19 +801,18 @@ class OptiXDevice : public CUDADevice {
|
||||
// 0 1 2
|
||||
// 3 4 5
|
||||
// 6 7 8 9
|
||||
RenderTile rtiles[10];
|
||||
rtiles[4] = rtile;
|
||||
task.map_neighbor_tiles(rtiles, this);
|
||||
rtile = rtiles[4]; // Tile may have been modified by mapping code
|
||||
RenderTileNeighbors neighbors(rtile);
|
||||
task.map_neighbor_tiles(neighbors, this);
|
||||
RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
|
||||
RenderTile &target_tile = neighbors.target;
|
||||
rtile = center_tile; // Tile may have been modified by mapping code
|
||||
|
||||
// Calculate size of the tile to denoise (including overlap)
|
||||
int4 rect = make_int4(
|
||||
rtiles[4].x, rtiles[4].y, rtiles[4].x + rtiles[4].w, rtiles[4].y + rtiles[4].h);
|
||||
int4 rect = center_tile.bounds();
|
||||
// Overlap between tiles has to be at least 64 pixels
|
||||
// TODO(pmours): Query this value from OptiX
|
||||
rect = rect_expand(rect, 64);
|
||||
int4 clip_rect = make_int4(
|
||||
rtiles[3].x, rtiles[1].y, rtiles[5].x + rtiles[5].w, rtiles[7].y + rtiles[7].h);
|
||||
int4 clip_rect = neighbors.bounds();
|
||||
rect = rect_clip(rect, clip_rect);
|
||||
int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y);
|
||||
int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y);
|
||||
@ -834,14 +833,14 @@ class OptiXDevice : public CUDADevice {
|
||||
device_only_memory<float> input(this, "denoiser input");
|
||||
device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE);
|
||||
|
||||
if ((!rtiles[0].buffer || rtiles[0].buffer == rtile.buffer) &&
|
||||
(!rtiles[1].buffer || rtiles[1].buffer == rtile.buffer) &&
|
||||
(!rtiles[2].buffer || rtiles[2].buffer == rtile.buffer) &&
|
||||
(!rtiles[3].buffer || rtiles[3].buffer == rtile.buffer) &&
|
||||
(!rtiles[5].buffer || rtiles[5].buffer == rtile.buffer) &&
|
||||
(!rtiles[6].buffer || rtiles[6].buffer == rtile.buffer) &&
|
||||
(!rtiles[7].buffer || rtiles[7].buffer == rtile.buffer) &&
|
||||
(!rtiles[8].buffer || rtiles[8].buffer == rtile.buffer)) {
|
||||
bool contiguous_memory = true;
|
||||
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
|
||||
if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) {
|
||||
contiguous_memory = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (contiguous_memory) {
|
||||
// Tiles are in continous memory, so can just subtract overlap offset
|
||||
input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride;
|
||||
// Stride covers the whole width of the image and not just a single tile
|
||||
@ -856,19 +855,19 @@ class OptiXDevice : public CUDADevice {
|
||||
input_stride *= rect_size.x;
|
||||
|
||||
TileInfo *tile_info = tile_info_mem.alloc(1);
|
||||
for (int i = 0; i < 9; i++) {
|
||||
tile_info->offsets[i] = rtiles[i].offset;
|
||||
tile_info->strides[i] = rtiles[i].stride;
|
||||
tile_info->buffers[i] = rtiles[i].buffer;
|
||||
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
|
||||
tile_info->offsets[i] = neighbors.tiles[i].offset;
|
||||
tile_info->strides[i] = neighbors.tiles[i].stride;
|
||||
tile_info->buffers[i] = neighbors.tiles[i].buffer;
|
||||
}
|
||||
tile_info->x[0] = rtiles[3].x;
|
||||
tile_info->x[1] = rtiles[4].x;
|
||||
tile_info->x[2] = rtiles[5].x;
|
||||
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
|
||||
tile_info->y[0] = rtiles[1].y;
|
||||
tile_info->y[1] = rtiles[4].y;
|
||||
tile_info->y[2] = rtiles[7].y;
|
||||
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
|
||||
tile_info->x[0] = neighbors.tiles[3].x;
|
||||
tile_info->x[1] = neighbors.tiles[4].x;
|
||||
tile_info->x[2] = neighbors.tiles[5].x;
|
||||
tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
|
||||
tile_info->y[0] = neighbors.tiles[1].y;
|
||||
tile_info->y[1] = neighbors.tiles[4].y;
|
||||
tile_info->y[2] = neighbors.tiles[7].y;
|
||||
tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
|
||||
tile_info_mem.copy_to_device();
|
||||
|
||||
void *args[] = {
|
||||
@ -977,10 +976,10 @@ class OptiXDevice : public CUDADevice {
|
||||
int2 output_offset = overlap_offset;
|
||||
overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually
|
||||
# else
|
||||
output_layers[0].data = rtiles[9].buffer + pixel_offset;
|
||||
output_layers[0].width = rtiles[9].w;
|
||||
output_layers[0].height = rtiles[9].h;
|
||||
output_layers[0].rowStrideInBytes = rtiles[9].stride * pixel_stride;
|
||||
output_layers[0].data = target_tile.buffer + pixel_offset;
|
||||
output_layers[0].width = target_tile.w;
|
||||
output_layers[0].height = target_tile.h;
|
||||
output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride;
|
||||
output_layers[0].pixelStrideInBytes = pixel_stride;
|
||||
# endif
|
||||
output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
@ -1002,26 +1001,26 @@ class OptiXDevice : public CUDADevice {
|
||||
|
||||
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
|
||||
void *output_args[] = {&input_ptr,
|
||||
&rtiles[9].buffer,
|
||||
&target_tile.buffer,
|
||||
&output_offset.x,
|
||||
&output_offset.y,
|
||||
&rect_size.x,
|
||||
&rect_size.y,
|
||||
&rtiles[9].x,
|
||||
&rtiles[9].y,
|
||||
&rtiles[9].w,
|
||||
&rtiles[9].h,
|
||||
&rtiles[9].offset,
|
||||
&rtiles[9].stride,
|
||||
&target_tile.x,
|
||||
&target_tile.y,
|
||||
&target_tile.w,
|
||||
&target_tile.h,
|
||||
&target_tile.offset,
|
||||
&target_tile.stride,
|
||||
&task.pass_stride,
|
||||
&rtile.sample};
|
||||
launch_filter_kernel(
|
||||
"kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
|
||||
"kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args);
|
||||
# endif
|
||||
|
||||
check_result_cuda_ret(cuStreamSynchronize(0));
|
||||
|
||||
task.unmap_neighbor_tiles(rtiles, this);
|
||||
task.unmap_neighbor_tiles(neighbors, this);
|
||||
}
|
||||
else {
|
||||
// Run CUDA denoising kernels
|
||||
|
@ -29,6 +29,7 @@ CCL_NAMESPACE_BEGIN
|
||||
class Device;
|
||||
class RenderBuffers;
|
||||
class RenderTile;
|
||||
class RenderTileNeighbors;
|
||||
class Tile;
|
||||
|
||||
enum DenoiserType {
|
||||
@ -150,8 +151,8 @@ class DeviceTask {
|
||||
function<void(RenderTile &)> update_tile_sample;
|
||||
function<void(RenderTile &)> release_tile;
|
||||
function<bool()> get_cancel;
|
||||
function<void(RenderTile *, Device *)> map_neighbor_tiles;
|
||||
function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
|
||||
function<void(RenderTileNeighbors &, Device *)> map_neighbor_tiles;
|
||||
function<void(RenderTileNeighbors &, Device *)> unmap_neighbor_tiles;
|
||||
|
||||
uint tile_types;
|
||||
DenoiseParams denoising;
|
||||
|
@ -1850,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
|
||||
denoising.render_buffer.samples = rtile.sample;
|
||||
denoising.buffer.gpu_temporary_mem = true;
|
||||
|
||||
denoising.run_denoising(&rtile);
|
||||
denoising.run_denoising(rtile);
|
||||
}
|
||||
|
||||
void OpenCLDevice::shader(DeviceTask &task)
|
||||
|
@ -52,7 +52,7 @@ class BufferParams {
|
||||
/* passes */
|
||||
vector<Pass> passes;
|
||||
bool denoising_data_pass;
|
||||
/* If only some light path types should be denoised, an additional pass is needed. */
|
||||
/* If only some light path types should be target, an additional pass is needed. */
|
||||
bool denoising_clean_pass;
|
||||
/* When we're prefiltering the passes during rendering, we need to keep both the
|
||||
* original and the prefiltered data around because neighboring tiles might still
|
||||
@ -149,6 +149,50 @@ class RenderTile {
|
||||
RenderBuffers *buffers;
|
||||
|
||||
RenderTile();
|
||||
|
||||
int4 bounds() const
|
||||
{
|
||||
return make_int4(x, /* xmin */
|
||||
y, /* ymin */
|
||||
x + w, /* xmax */
|
||||
y + h); /* ymax */
|
||||
}
|
||||
};
|
||||
|
||||
/* Render Tile Neighbors
|
||||
* Set of neighboring tiles used for denoising. Tile order:
|
||||
* 0 1 2
|
||||
* 3 4 5
|
||||
* 6 7 8 */
|
||||
|
||||
class RenderTileNeighbors {
|
||||
public:
|
||||
static const int SIZE = 9;
|
||||
static const int CENTER = 4;
|
||||
|
||||
RenderTile tiles[SIZE];
|
||||
RenderTile target;
|
||||
|
||||
RenderTileNeighbors(const RenderTile ¢er)
|
||||
{
|
||||
tiles[CENTER] = center;
|
||||
}
|
||||
|
||||
int4 bounds() const
|
||||
{
|
||||
return make_int4(tiles[3].x, /* xmin */
|
||||
tiles[1].y, /* ymin */
|
||||
tiles[5].x + tiles[5].w, /* xmax */
|
||||
tiles[7].y + tiles[7].h); /* ymax */
|
||||
}
|
||||
|
||||
void set_bounds_from_center()
|
||||
{
|
||||
tiles[3].x = tiles[CENTER].x;
|
||||
tiles[1].y = tiles[CENTER].y;
|
||||
tiles[5].x = tiles[CENTER].x + tiles[CENTER].w;
|
||||
tiles[7].y = tiles[CENTER].y + tiles[CENTER].h;
|
||||
}
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -271,42 +271,45 @@ bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &
|
||||
*
|
||||
* However, since there is only one large memory, the denoised result has to be written to
|
||||
* a different buffer to avoid having to copy an entire horizontal slice of the image. */
|
||||
void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device)
|
||||
void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
|
||||
{
|
||||
RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
|
||||
RenderTile &target_tile = neighbors.target;
|
||||
|
||||
/* Fill tile information. */
|
||||
for (int i = 0; i < 9; i++) {
|
||||
if (i == 4) {
|
||||
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
|
||||
if (i == RenderTileNeighbors::CENTER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
RenderTile &tile = neighbors.tiles[i];
|
||||
int dx = (i % 3) - 1;
|
||||
int dy = (i / 3) - 1;
|
||||
tiles[i].x = clamp(tiles[4].x + dx * denoiser->tile_size.x, 0, image.width);
|
||||
tiles[i].w = clamp(tiles[4].x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tiles[i].x;
|
||||
tiles[i].y = clamp(tiles[4].y + dy * denoiser->tile_size.y, 0, image.height);
|
||||
tiles[i].h = clamp(tiles[4].y + (dy + 1) * denoiser->tile_size.y, 0, image.height) -
|
||||
tiles[i].y;
|
||||
tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width);
|
||||
tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x;
|
||||
tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height);
|
||||
tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y;
|
||||
|
||||
tiles[i].buffer = tiles[4].buffer;
|
||||
tiles[i].offset = tiles[4].offset;
|
||||
tiles[i].stride = image.width;
|
||||
tile.buffer = center_tile.buffer;
|
||||
tile.offset = center_tile.offset;
|
||||
tile.stride = image.width;
|
||||
}
|
||||
|
||||
/* Allocate output buffer. */
|
||||
device_vector<float> *output_mem = new device_vector<float>(
|
||||
tile_device, "denoising_output", MEM_READ_WRITE);
|
||||
output_mem->alloc(OUTPUT_NUM_CHANNELS * tiles[4].w * tiles[4].h);
|
||||
output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h);
|
||||
|
||||
/* Fill output buffer with noisy image, assumed by kernel_filter_finalize
|
||||
* when skipping denoising of some pixels. */
|
||||
float *result = output_mem->data();
|
||||
float *in = &image.pixels[image.num_channels * (tiles[4].y * image.width + tiles[4].x)];
|
||||
float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)];
|
||||
|
||||
const DenoiseImageLayer &layer = image.layers[current_layer];
|
||||
const int *input_to_image_channel = layer.input_to_image_channel.data();
|
||||
|
||||
for (int y = 0; y < tiles[4].h; y++) {
|
||||
for (int x = 0; x < tiles[4].w; x++, result += OUTPUT_NUM_CHANNELS) {
|
||||
for (int y = 0; y < center_tile.h; y++) {
|
||||
for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
|
||||
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
|
||||
result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]];
|
||||
}
|
||||
@ -317,35 +320,38 @@ void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device)
|
||||
output_mem->copy_to_device();
|
||||
|
||||
/* Fill output tile info. */
|
||||
tiles[9] = tiles[4];
|
||||
tiles[9].buffer = output_mem->device_pointer;
|
||||
tiles[9].stride = tiles[9].w;
|
||||
tiles[9].offset -= tiles[9].x + tiles[9].y * tiles[9].stride;
|
||||
target_tile = center_tile;
|
||||
target_tile.buffer = output_mem->device_pointer;
|
||||
target_tile.stride = target_tile.w;
|
||||
target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride;
|
||||
|
||||
thread_scoped_lock output_lock(output_mutex);
|
||||
assert(output_pixels.count(tiles[4].tile_index) == 0);
|
||||
output_pixels[tiles[9].tile_index] = output_mem;
|
||||
assert(output_pixels.count(center_tile.tile_index) == 0);
|
||||
output_pixels[target_tile.tile_index] = output_mem;
|
||||
}
|
||||
|
||||
void DenoiseTask::unmap_neighboring_tiles(RenderTile *tiles)
|
||||
void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors)
|
||||
{
|
||||
RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
|
||||
RenderTile &target_tile = neighbors.target;
|
||||
|
||||
thread_scoped_lock output_lock(output_mutex);
|
||||
assert(output_pixels.count(tiles[4].tile_index) == 1);
|
||||
device_vector<float> *output_mem = output_pixels[tiles[9].tile_index];
|
||||
output_pixels.erase(tiles[4].tile_index);
|
||||
assert(output_pixels.count(center_tile.tile_index) == 1);
|
||||
device_vector<float> *output_mem = output_pixels[target_tile.tile_index];
|
||||
output_pixels.erase(center_tile.tile_index);
|
||||
output_lock.unlock();
|
||||
|
||||
/* Copy denoised pixels from device. */
|
||||
output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * tiles[9].w, tiles[9].h);
|
||||
output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h);
|
||||
|
||||
float *result = output_mem->data();
|
||||
float *out = &image.pixels[image.num_channels * (tiles[9].y * image.width + tiles[9].x)];
|
||||
float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)];
|
||||
|
||||
const DenoiseImageLayer &layer = image.layers[current_layer];
|
||||
const int *output_to_image_channel = layer.output_to_image_channel.data();
|
||||
|
||||
for (int y = 0; y < tiles[9].h; y++) {
|
||||
for (int x = 0; x < tiles[9].w; x++, result += OUTPUT_NUM_CHANNELS) {
|
||||
for (int y = 0; y < target_tile.h; y++) {
|
||||
for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
|
||||
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
|
||||
out[image.num_channels * x + output_to_image_channel[i]] = result[i];
|
||||
}
|
||||
|
@ -196,8 +196,8 @@ class DenoiseTask {
|
||||
|
||||
/* Device task callbacks */
|
||||
bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile);
|
||||
void map_neighboring_tiles(RenderTile *tiles, Device *tile_device);
|
||||
void unmap_neighboring_tiles(RenderTile *tiles);
|
||||
void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
|
||||
void unmap_neighboring_tiles(RenderTileNeighbors &neighbors);
|
||||
void release_tile();
|
||||
bool get_cancel();
|
||||
};
|
||||
|
@ -536,7 +536,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
|
||||
denoising_cond.notify_all();
|
||||
}
|
||||
|
||||
void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
|
||||
void Session::map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
|
||||
{
|
||||
thread_scoped_lock tile_lock(tile_mutex);
|
||||
|
||||
@ -546,75 +546,77 @@ void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
|
||||
tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
|
||||
tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
|
||||
|
||||
RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
|
||||
|
||||
if (!tile_manager.schedule_denoising) {
|
||||
/* Fix up tile slices with overlap. */
|
||||
if (tile_manager.slice_overlap != 0) {
|
||||
int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y);
|
||||
tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y;
|
||||
tiles[4].y = y;
|
||||
int y = max(center_tile.y - tile_manager.slice_overlap, image_region.y);
|
||||
center_tile.h = min(center_tile.y + center_tile.h + tile_manager.slice_overlap,
|
||||
image_region.w) -
|
||||
y;
|
||||
center_tile.y = y;
|
||||
}
|
||||
|
||||
/* Tiles are not being denoised individually, which means the entire image is processed. */
|
||||
tiles[3].x = tiles[4].x;
|
||||
tiles[1].y = tiles[4].y;
|
||||
tiles[5].x = tiles[4].x + tiles[4].w;
|
||||
tiles[7].y = tiles[4].y + tiles[4].h;
|
||||
neighbors.set_bounds_from_center();
|
||||
}
|
||||
else {
|
||||
int center_idx = tiles[4].tile_index;
|
||||
int center_idx = center_tile.tile_index;
|
||||
assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
|
||||
|
||||
for (int dy = -1, i = 0; dy <= 1; dy++) {
|
||||
for (int dx = -1; dx <= 1; dx++, i++) {
|
||||
RenderTile &rtile = neighbors.tiles[i];
|
||||
int nindex = tile_manager.get_neighbor_index(center_idx, i);
|
||||
if (nindex >= 0) {
|
||||
Tile *tile = &tile_manager.state.tiles[nindex];
|
||||
|
||||
tiles[i].x = image_region.x + tile->x;
|
||||
tiles[i].y = image_region.y + tile->y;
|
||||
tiles[i].w = tile->w;
|
||||
tiles[i].h = tile->h;
|
||||
rtile.x = image_region.x + tile->x;
|
||||
rtile.y = image_region.y + tile->y;
|
||||
rtile.w = tile->w;
|
||||
rtile.h = tile->h;
|
||||
|
||||
if (buffers) {
|
||||
tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride);
|
||||
tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
|
||||
|
||||
tiles[i].buffer = buffers->buffer.device_pointer;
|
||||
tiles[i].buffers = buffers;
|
||||
rtile.buffer = buffers->buffer.device_pointer;
|
||||
rtile.buffers = buffers;
|
||||
}
|
||||
else {
|
||||
assert(tile->buffers);
|
||||
tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
|
||||
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
|
||||
|
||||
tiles[i].buffer = tile->buffers->buffer.device_pointer;
|
||||
tiles[i].buffers = tile->buffers;
|
||||
rtile.buffer = tile->buffers->buffer.device_pointer;
|
||||
rtile.buffers = tile->buffers;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int px = tiles[4].x + dx * params.tile_size.x;
|
||||
int py = tiles[4].y + dy * params.tile_size.y;
|
||||
int px = center_tile.x + dx * params.tile_size.x;
|
||||
int py = center_tile.y + dy * params.tile_size.y;
|
||||
|
||||
tiles[i].x = clamp(px, image_region.x, image_region.z);
|
||||
tiles[i].y = clamp(py, image_region.y, image_region.w);
|
||||
tiles[i].w = tiles[i].h = 0;
|
||||
rtile.x = clamp(px, image_region.x, image_region.z);
|
||||
rtile.y = clamp(py, image_region.y, image_region.w);
|
||||
rtile.w = rtile.h = 0;
|
||||
|
||||
tiles[i].buffer = (device_ptr)NULL;
|
||||
tiles[i].buffers = NULL;
|
||||
rtile.buffer = (device_ptr)NULL;
|
||||
rtile.buffers = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(tiles[4].buffers);
|
||||
device->map_neighbor_tiles(tile_device, tiles);
|
||||
assert(center_tile.buffers);
|
||||
device->map_neighbor_tiles(tile_device, neighbors);
|
||||
|
||||
/* The denoised result is written back to the original tile. */
|
||||
tiles[9] = tiles[4];
|
||||
neighbors.target = center_tile;
|
||||
}
|
||||
|
||||
void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
|
||||
void Session::unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
|
||||
{
|
||||
thread_scoped_lock tile_lock(tile_mutex);
|
||||
device->unmap_neighbor_tiles(tile_device, tiles);
|
||||
device->unmap_neighbor_tiles(tile_device, neighbors);
|
||||
}
|
||||
|
||||
void Session::run_cpu()
|
||||
|
@ -198,8 +198,8 @@ class Session {
|
||||
void update_tile_sample(RenderTile &tile);
|
||||
void release_tile(RenderTile &tile, const bool need_denoise);
|
||||
|
||||
void map_neighbor_tiles(RenderTile *tiles, Device *tile_device);
|
||||
void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device);
|
||||
void map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
|
||||
void unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
|
||||
|
||||
bool device_use_gl;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user