forked from bartvdbraak/blender
Merge branch 'blender-v3.0-release' to bring in D13042:
Fix performance decrease with Scrambling Distance on
This commit is contained in:
commit
c49d2cbe92
@ -258,7 +258,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
|||||||
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
||||||
* schedules work in halves of available number of paths. */
|
* schedules work in halves of available number of paths. */
|
||||||
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
||||||
|
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
|
||||||
|
0);
|
||||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||||
start_sample,
|
start_sample,
|
||||||
samples_num,
|
samples_num,
|
||||||
|
@ -46,7 +46,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
|
|||||||
return next_power_of_two(x);
|
return next_power_of_two(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||||
|
const int2 &image_size,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
const int max_num_path_states,
|
const int max_num_path_states,
|
||||||
const float scrambling_distance)
|
const float scrambling_distance)
|
||||||
@ -73,7 +74,7 @@ TileSize tile_calculate_best_size(const int2 &image_size,
|
|||||||
|
|
||||||
TileSize tile_size;
|
TileSize tile_size;
|
||||||
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
||||||
if (scrambling_distance < 0.9f) {
|
if (scrambling_distance < 0.9f && accel_rt) {
|
||||||
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
||||||
tile_size.width = min(image_size.x, max_num_path_states);
|
tile_size.width = min(image_size.x, max_num_path_states);
|
||||||
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
||||||
|
@ -49,7 +49,8 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
|
|||||||
* of active path states.
|
* of active path states.
|
||||||
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
||||||
* possible, and have as many threads active for every tile as possible. */
|
* possible, and have as many threads active for every tile as possible. */
|
||||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||||
|
const int2 &image_size,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
const int max_num_path_states,
|
const int max_num_path_states,
|
||||||
const float scrambling_distance);
|
const float scrambling_distance);
|
||||||
|
@ -28,6 +28,11 @@ WorkTileScheduler::WorkTileScheduler()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
|
||||||
|
{
|
||||||
|
accelerated_rt_ = accelerated_rt;
|
||||||
|
}
|
||||||
|
|
||||||
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
||||||
{
|
{
|
||||||
max_num_path_states_ = max_num_path_states;
|
max_num_path_states_ = max_num_path_states;
|
||||||
@ -61,7 +66,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
|||||||
void WorkTileScheduler::reset_scheduler_state()
|
void WorkTileScheduler::reset_scheduler_state()
|
||||||
{
|
{
|
||||||
tile_size_ = tile_calculate_best_size(
|
tile_size_ = tile_calculate_best_size(
|
||||||
image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
||||||
|
|
||||||
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
||||||
|
|
||||||
|
@ -31,6 +31,9 @@ class WorkTileScheduler {
|
|||||||
public:
|
public:
|
||||||
WorkTileScheduler();
|
WorkTileScheduler();
|
||||||
|
|
||||||
|
/* To indicate if there is accelerated RT support. */
|
||||||
|
void set_accelerated_rt(bool state);
|
||||||
|
|
||||||
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
||||||
*
|
*
|
||||||
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
||||||
@ -55,6 +58,9 @@ class WorkTileScheduler {
|
|||||||
protected:
|
protected:
|
||||||
void reset_scheduler_state();
|
void reset_scheduler_state();
|
||||||
|
|
||||||
|
/* Used to indicate if there is accelerated ray tracing. */
|
||||||
|
bool accelerated_rt_ = false;
|
||||||
|
|
||||||
/* Maximum allowed path states to be used.
|
/* Maximum allowed path states to be used.
|
||||||
*
|
*
|
||||||
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
||||||
|
@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile,
|
|||||||
ccl_private uint *y,
|
ccl_private uint *y,
|
||||||
ccl_private uint *sample)
|
ccl_private uint *sample)
|
||||||
{
|
{
|
||||||
#if 0
|
uint sample_offset, pixel_offset;
|
||||||
/* Keep threads for the same sample together. */
|
|
||||||
uint tile_pixels = tile->w * tile->h;
|
if (kernel_data.integrator.scrambling_distance < 0.9f) {
|
||||||
uint sample_offset = global_work_index / tile_pixels;
|
/* Keep threads for the same sample together. */
|
||||||
uint pixel_offset = global_work_index - sample_offset * tile_pixels;
|
uint tile_pixels = tile->w * tile->h;
|
||||||
#else
|
sample_offset = global_work_index / tile_pixels;
|
||||||
/* Keeping threads for the same pixel together.
|
pixel_offset = global_work_index - sample_offset * tile_pixels;
|
||||||
* Appears to improve performance by a few % on CUDA and OptiX. */
|
}
|
||||||
uint sample_offset = global_work_index % tile->num_samples;
|
else {
|
||||||
uint pixel_offset = global_work_index / tile->num_samples;
|
/* Keeping threads for the same pixel together.
|
||||||
#endif
|
* Appears to improve performance by a few % on CUDA and OptiX. */
|
||||||
|
sample_offset = global_work_index % tile->num_samples;
|
||||||
|
pixel_offset = global_work_index / tile->num_samples;
|
||||||
|
}
|
||||||
|
|
||||||
uint y_offset = pixel_offset / tile->w;
|
uint y_offset = pixel_offset / tile->w;
|
||||||
uint x_offset = pixel_offset - y_offset * tile->w;
|
uint x_offset = pixel_offset - y_offset * tile->w;
|
||||||
|
@ -24,26 +24,26 @@ CCL_NAMESPACE_BEGIN
|
|||||||
TEST(tile_calculate_best_size, Basic)
|
TEST(tile_calculate_best_size, Basic)
|
||||||
{
|
{
|
||||||
/* Make sure CPU-like case is handled properly. */
|
/* Make sure CPU-like case is handled properly. */
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
|
||||||
|
|
||||||
/* Enough path states to fit an entire image with all samples. */
|
/* Enough path states to fit an entire image with all samples. */
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
|
||||||
TileSize(1920, 1080, 1));
|
TileSize(1920, 1080, 1));
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
|
||||||
TileSize(1920, 1080, 100));
|
TileSize(1920, 1080, 100));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(tile_calculate_best_size, Extreme)
|
TEST(tile_calculate_best_size, Extreme)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072, 1.0f),
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 262144, 131072, 1.0f),
|
||||||
TileSize(1, 1, 512));
|
TileSize(1, 1, 512));
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072, 1.0f),
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 1048576, 131072, 1.0f),
|
||||||
TileSize(1, 1, 1024));
|
TileSize(1, 1, 1024));
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072, 1.0f),
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 10485760, 131072, 1.0f),
|
||||||
TileSize(1, 1, 4096));
|
TileSize(1, 1, 4096));
|
||||||
|
|
||||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
|
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
|
||||||
TileSize(1, 1, 1024));
|
TileSize(1, 1, 1024));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user