Fix adaptive sampling artifacts on tile boundaries

Implement an overscan support for tiles, so that adaptive sampling can
rely on the pixels neighbourhood.

Differential Revision: https://developer.blender.org/D12599
This commit is contained in:
Sergey Sharybin 2021-09-21 17:03:22 +02:00
parent 758f3f7456
commit 6e268a749f
14 changed files with 189 additions and 70 deletions

@ -927,6 +927,9 @@ BufferParams BlenderSync::get_buffer_params(
params.height = height;
}
params.window_width = params.width;
params.window_height = params.height;
return params;
}

@ -768,7 +768,13 @@ void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass
destination.num_components = 3;
destination.pixel_stride = context.buffer_params.pass_stride;
pass_accessor.get_render_tile_pixels(context.render_buffers, context.buffer_params, destination);
BufferParams buffer_params = context.buffer_params;
buffer_params.window_x = 0;
buffer_params.window_y = 0;
buffer_params.window_width = buffer_params.width;
buffer_params.window_height = buffer_params.height;
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
}
bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)

@ -289,7 +289,13 @@ class OIDNDenoiseContext {
* pixels. */
const PassAccessorCPU pass_accessor(pass_access_info, 1.0f, num_samples_);
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params_, destination);
BufferParams buffer_params = buffer_params_;
buffer_params.window_x = 0;
buffer_params.window_y = 0;
buffer_params.window_width = buffer_params.width;
buffer_params.window_height = buffer_params.height;
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params, destination);
}
/* Read pass pixels using PassAccessor into a temporary buffer which is owned by the pass.. */

@ -99,17 +99,22 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
{
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
const float *buffer_data = render_buffers->buffer.data();
const int64_t pass_stride = buffer_params.pass_stride;
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
const float *window_data = render_buffers->buffer.data() + buffer_params.window_x * pass_stride +
buffer_params.window_y * buffer_row_stride;
const int pixel_stride = destination.pixel_stride ? destination.pixel_stride :
destination.num_components;
tbb::parallel_for(0, buffer_params.height, [&](int64_t y) {
int64_t pixel_index = y * buffer_params.width;
for (int64_t x = 0; x < buffer_params.width; ++x, ++pixel_index) {
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
const float *buffer = buffer_data + input_pixel_offset;
float *pixel = destination.pixels + (pixel_index + destination.offset) * pixel_stride;
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
const float *buffer = window_data + y * buffer_row_stride;
float *pixel = destination.pixels +
(y * buffer_params.width + destination.offset) * pixel_stride;
for (int64_t x = 0; x < buffer_params.window_width;
++x, buffer += pass_stride, pixel += pixel_stride) {
processor(kfilm_convert, buffer, pixel);
}
});
@ -123,26 +128,28 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
const Destination &destination,
const Processor &processor) const
{
const float *buffer_data = render_buffers->buffer.data();
const int64_t pass_stride = buffer_params.pass_stride;
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
const float *window_data = render_buffers->buffer.data() + buffer_params.window_x * pass_stride +
buffer_params.window_y * buffer_row_stride;
half4 *dst_start = destination.pixels_half_rgba + destination.offset;
const int destination_stride = destination.stride != 0 ? destination.stride :
buffer_params.width;
tbb::parallel_for(0, buffer_params.height, [&](int64_t y) {
int64_t pixel_index = y * buffer_params.width;
half4 *dst_row_start = dst_start + y * destination_stride;
for (int64_t x = 0; x < buffer_params.width; ++x, ++pixel_index) {
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
const float *buffer = buffer_data + input_pixel_offset;
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
const float *buffer = window_data + y * buffer_row_stride;
half4 *pixel = dst_start + y * destination_stride;
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
float pixel[4];
processor(kfilm_convert, buffer, pixel);
float pixel_rgba[4];
processor(kfilm_convert, buffer, pixel_rgba);
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
half4 *pixel_half_rgba = dst_row_start + x;
float4_store_half(&pixel_half_rgba->x, make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
float4_store_half(&pixel->x,
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
}
});
}

@ -43,10 +43,13 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
KernelFilmConvert kfilm_convert;
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
const int work_size = buffer_params.width * buffer_params.height;
const int work_size = buffer_params.window_width * buffer_params.window_height;
const int destination_stride = destination.stride != 0 ? destination.stride :
buffer_params.width;
buffer_params.window_width;
const int offset = buffer_params.window_x * buffer_params.pass_stride +
buffer_params.window_y * buffer_params.stride * buffer_params.pass_stride;
if (destination.d_pixels) {
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
@ -55,8 +58,8 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
const_cast<device_ptr *>(&destination.d_pixels),
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
const_cast<int *>(&work_size),
const_cast<int *>(&buffer_params.width),
const_cast<int *>(&buffer_params.offset),
const_cast<int *>(&buffer_params.window_width),
const_cast<int *>(&offset),
const_cast<int *>(&buffer_params.stride),
const_cast<int *>(&destination.offset),
const_cast<int *>(&destination_stride)};
@ -70,8 +73,8 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
const_cast<device_ptr *>(&destination.d_pixels_half_rgba),
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
const_cast<int *>(&work_size),
const_cast<int *>(&buffer_params.width),
const_cast<int *>(&buffer_params.offset),
const_cast<int *>(&buffer_params.window_width),
const_cast<int *>(&offset),
const_cast<int *>(&buffer_params.stride),
const_cast<int *>(&destination.offset),
const_cast<int *>(&destination_stride)};

@ -282,6 +282,12 @@ static BufferParams scale_buffer_params(const BufferParams &params, int resoluti
scaled_params.width = max(1, params.width / resolution_divider);
scaled_params.height = max(1, params.height / resolution_divider);
scaled_params.window_x = params.window_x / resolution_divider;
scaled_params.window_y = params.window_y / resolution_divider;
scaled_params.window_width = params.window_width / resolution_divider;
scaled_params.window_height = params.window_height / resolution_divider;
scaled_params.full_x = params.full_x / resolution_divider;
scaled_params.full_y = params.full_y / resolution_divider;
scaled_params.full_width = params.full_width / resolution_divider;
@ -1005,12 +1011,12 @@ bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
int2 PathTrace::get_render_tile_size() const
{
if (full_frame_state_.render_buffers) {
return make_int2(full_frame_state_.render_buffers->params.width,
full_frame_state_.render_buffers->params.height);
return make_int2(full_frame_state_.render_buffers->params.window_width,
full_frame_state_.render_buffers->params.window_height);
}
const Tile &tile = tile_manager_.get_current_tile();
return make_int2(tile.width, tile.height);
return make_int2(tile.window_width, tile.window_height);
}
int2 PathTrace::get_render_tile_offset() const
@ -1020,7 +1026,7 @@ int2 PathTrace::get_render_tile_offset() const
}
const Tile &tile = tile_manager_.get_current_tile();
return make_int2(tile.x, tile.y);
return make_int2(tile.x + tile.window_x, tile.y + tile.window_y);
}
int2 PathTrace::get_render_size() const

@ -191,8 +191,10 @@ PassAccessor::Destination PathTraceWork::get_display_destination_template(
PassAccessor::Destination destination(film_->get_display_pass());
const int2 display_texture_size = display->get_texture_size();
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x +
effective_buffer_params_.window_x;
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y +
effective_buffer_params_.window_y;
destination.offset = texture_y * display_texture_size.x + texture_x;
destination.stride = display_texture_size.x;

@ -737,13 +737,13 @@ void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
{
const int full_x = effective_buffer_params_.full_x;
const int full_y = effective_buffer_params_.full_y;
const int width = effective_buffer_params_.width;
const int height = effective_buffer_params_.height;
const int final_width = buffers_->params.width;
const int final_height = buffers_->params.height;
const int width = effective_buffer_params_.window_width;
const int height = effective_buffer_params_.window_height;
const int final_width = buffers_->params.window_width;
const int final_height = buffers_->params.window_height;
const int texture_x = full_x - effective_full_params_.full_x;
const int texture_y = full_y - effective_full_params_.full_y;
const int texture_x = full_x - effective_full_params_.full_x + effective_buffer_params_.window_x;
const int texture_y = full_y - effective_full_params_.full_y + effective_buffer_params_.window_y;
/* Re-allocate display memory if needed, and make sure the device pointer is allocated.
*

@ -424,8 +424,12 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k
return;
}
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride;
ccl_global const float *buffer = render_buffer + render_buffer_offset;
const int x = render_pixel_index % width;
const int y = render_pixel_index / width;
ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
y * stride * kfilm_convert->pass_stride;
ccl_global float *pixel = pixels +
(render_pixel_index + dst_offset) * kfilm_convert->pixel_stride;
@ -451,17 +455,17 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
return;
}
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride;
ccl_global const float *buffer = render_buffer + render_buffer_offset;
const int x = render_pixel_index % width;
const int y = render_pixel_index / width;
ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
y * stride * kfilm_convert->pass_stride;
float pixel[4];
processor(kfilm_convert, buffer, pixel);
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
const int x = render_pixel_index % width;
const int y = render_pixel_index / width;
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
float4_store_half((ccl_global half *)out, make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
}

@ -97,6 +97,11 @@ NODE_DEFINE(BufferParams)
SOCKET_INT(width, "Width", 0);
SOCKET_INT(height, "Height", 0);
SOCKET_INT(window_x, "Window X", 0);
SOCKET_INT(window_y, "Window Y", 0);
SOCKET_INT(window_width, "Window Width", 0);
SOCKET_INT(window_height, "Window Height", 0);
SOCKET_INT(full_x, "Full X", 0);
SOCKET_INT(full_y, "Full Y", 0);
SOCKET_INT(full_width, "Full Width", 0);
@ -233,13 +238,31 @@ void BufferParams::update_offset_stride()
bool BufferParams::modified(const BufferParams &other) const
{
if (!(width == other.width && height == other.height && full_x == other.full_x &&
full_y == other.full_y && full_width == other.full_width &&
full_height == other.full_height && offset == other.offset && stride == other.stride &&
pass_stride == other.pass_stride && layer == other.layer && view == other.view &&
exposure == other.exposure &&
use_approximate_shadow_catcher == other.use_approximate_shadow_catcher &&
use_transparent_background == other.use_transparent_background)) {
if (width != other.width || height != other.height) {
return true;
}
if (full_x != other.full_x || full_y != other.full_y || full_width != other.full_width ||
full_height != other.full_height) {
return true;
}
if (window_x != other.window_x || window_y != other.window_y ||
window_width != other.window_width || window_height != other.window_height) {
return true;
}
if (offset != other.offset || stride != other.stride || pass_stride != other.pass_stride) {
return true;
}
if (layer != other.layer || view != other.view) {
return false;
}
if (exposure != other.exposure ||
use_approximate_shadow_catcher != other.use_approximate_shadow_catcher ||
use_transparent_background != other.use_transparent_background) {
return true;
}

@ -82,6 +82,15 @@ class BufferParams : public Node {
int width = 0;
int height = 0;
/* Windows defines which part of the buffers is visible. The part outside of the window is
* considered an "overscan".
*
* Window X and Y are relative to the position of the buffer in the full buffer. */
int window_x = 0;
int window_y = 0;
int window_width = 0;
int window_height = 0;
/* Offset into and width/height of the full buffer. */
int full_x = 0;
int full_y = 0;

@ -286,12 +286,20 @@ RenderWork Session::run_update_for_next_iteration()
BufferParams tile_params = buffer_params_;
const Tile &tile = tile_manager_.get_current_tile();
tile_params.width = tile.width;
tile_params.height = tile.height;
tile_params.window_x = tile.window_x;
tile_params.window_y = tile.window_y;
tile_params.window_width = tile.window_width;
tile_params.window_height = tile.window_height;
tile_params.full_x = tile.x + buffer_params_.full_x;
tile_params.full_y = tile.y + buffer_params_.full_y;
tile_params.full_width = buffer_params_.full_width;
tile_params.full_height = buffer_params_.full_height;
tile_params.update_offset_stride();
path_trace_->reset(buffer_params_, tile_params);

@ -372,8 +372,17 @@ void TileManager::update(const BufferParams &params, const Scene *scene)
configure_image_spec_from_buffer(&write_state_.image_spec, buffer_params_, tile_size_);
const DenoiseParams denoise_params = scene->integrator->get_denoise_params();
const AdaptiveSampling adaptive_sampling = scene->integrator->get_adaptive_sampling();
node_to_image_spec_atttributes(
&write_state_.image_spec, &denoise_params, ATTR_DENOISE_SOCKET_PREFIX);
if (adaptive_sampling.use) {
overscan_ = 4;
}
else {
overscan_ = 0;
}
}
bool TileManager::done()
@ -399,19 +408,29 @@ Tile TileManager::get_tile_for_index(int index) const
/* TODO(sergey): Consider using hilbert spiral, or. maybe, even configurable. Not sure this
* brings a lot of value since this is only applicable to BIG tiles. */
const int tile_y = index / tile_state_.num_tiles_x;
const int tile_x = index - tile_y * tile_state_.num_tiles_x;
const int tile_index_y = index / tile_state_.num_tiles_x;
const int tile_index_x = index - tile_index_y * tile_state_.num_tiles_x;
const int tile_x = tile_index_x * tile_size_.x;
const int tile_y = tile_index_y * tile_size_.y;
Tile tile;
tile.x = tile_x * tile_size_.x;
tile.y = tile_y * tile_size_.y;
tile.width = tile_size_.x;
tile.height = tile_size_.y;
tile.x = tile_x - overscan_;
tile.y = tile_y - overscan_;
tile.width = tile_size_.x + 2 * overscan_;
tile.height = tile_size_.y + 2 * overscan_;
tile.x = max(tile.x, 0);
tile.y = max(tile.y, 0);
tile.width = min(tile.width, buffer_params_.width - tile.x);
tile.height = min(tile.height, buffer_params_.height - tile.y);
tile.window_x = tile_x - tile.x;
tile.window_y = tile_y - tile.y;
tile.window_width = min(tile_size_.x, buffer_params_.width - (tile.x + tile.window_x));
tile.window_height = min(tile_size_.y, buffer_params_.height - (tile.y + tile.window_y));
return tile;
}
@ -483,11 +502,22 @@ bool TileManager::write_tile(const RenderBuffers &tile_buffers)
DCHECK_EQ(tile_buffers.params.pass_stride, buffer_params_.pass_stride);
vector<float> pixel_storage;
const BufferParams &tile_params = tile_buffers.params;
const float *pixels = tile_buffers.buffer.data();
const int tile_x = tile_params.full_x - buffer_params_.full_x;
const int tile_y = tile_params.full_y - buffer_params_.full_y;
const int tile_x = tile_params.full_x - buffer_params_.full_x + tile_params.window_x;
const int tile_y = tile_params.full_y - buffer_params_.full_y + tile_params.window_y;
const int64_t pass_stride = tile_params.pass_stride;
const int64_t tile_row_stride = tile_params.width * pass_stride;
const int64_t xstride = pass_stride * sizeof(float);
const int64_t ystride = xstride * tile_params.width;
const int64_t zstride = ystride * tile_params.height;
const float *pixels = tile_buffers.buffer.data() + tile_params.window_x * pass_stride +
tile_params.window_y * tile_row_stride;
VLOG(3) << "Write tile at " << tile_x << ", " << tile_y;
@ -499,13 +529,16 @@ bool TileManager::write_tile(const RenderBuffers &tile_buffers)
* The only thing we have to ensure is that the tile_x and tile_y are a multiple of the
* image tile size, which happens in compute_render_tile_size. */
if (!write_state_.tile_out->write_tiles(tile_x,
tile_x + tile_params.width,
tile_x + tile_params.window_width,
tile_y,
tile_y + tile_params.height,
tile_y + tile_params.window_height,
0,
1,
TypeDesc::FLOAT,
pixels)) {
pixels,
xstride,
ystride,
zstride)) {
LOG(ERROR) << "Error writing tile " << write_state_.tile_out->geterror();
return false;
}
@ -531,12 +564,15 @@ void TileManager::finish_write_tiles()
++tile_index) {
const Tile tile = get_tile_for_index(tile_index);
VLOG(3) << "Write dummy tile at " << tile.x << ", " << tile.y;
const int tile_x = tile.x + tile.window_x;
const int tile_y = tile.y + tile.window_y;
write_state_.tile_out->write_tiles(tile.x,
tile.x + tile.width,
tile.y,
tile.y + tile.height,
VLOG(3) << "Write dummy tile at " << tile_x << ", " << tile_y;
write_state_.tile_out->write_tiles(tile_x,
tile_x + tile.window_width,
tile_y,
tile_y + tile.window_height,
0,
1,
TypeDesc::FLOAT,

@ -35,6 +35,9 @@ class Tile {
int x = 0, y = 0;
int width = 0, height = 0;
int window_x = 0, window_y = 0;
int window_width = 0, window_height = 0;
Tile()
{
}
@ -128,6 +131,9 @@ class TileManager {
int2 tile_size_ = make_int2(0, 0);
/* Number of extra pixels around the actual tile to render. */
int overscan_ = 0;
BufferParams buffer_params_;
/* Tile scheduling state. */