forked from bartvdbraak/blender
Merge branch 'blender-v3.0-release' to bring in D13042:
Fix performance decrease with Scrambling Distance on
This commit is contained in:
commit
c49d2cbe92
@ -258,7 +258,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
||||
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
||||
* schedules work in halves of available number of paths. */
|
||||
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
||||
|
||||
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
|
||||
0);
|
||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||
start_sample,
|
||||
samples_num,
|
||||
|
@ -46,7 +46,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
|
||||
return next_power_of_two(x);
|
||||
}
|
||||
|
||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
||||
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||
const int2 &image_size,
|
||||
const int num_samples,
|
||||
const int max_num_path_states,
|
||||
const float scrambling_distance)
|
||||
@ -73,7 +74,7 @@ TileSize tile_calculate_best_size(const int2 &image_size,
|
||||
|
||||
TileSize tile_size;
|
||||
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
||||
if (scrambling_distance < 0.9f) {
|
||||
if (scrambling_distance < 0.9f && accel_rt) {
|
||||
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
||||
tile_size.width = min(image_size.x, max_num_path_states);
|
||||
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
||||
|
@ -49,7 +49,8 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
|
||||
* of active path states.
|
||||
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
||||
* possible, and have as many threads active for every tile as possible. */
|
||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
||||
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||
const int2 &image_size,
|
||||
const int num_samples,
|
||||
const int max_num_path_states,
|
||||
const float scrambling_distance);
|
||||
|
@ -28,6 +28,11 @@ WorkTileScheduler::WorkTileScheduler()
|
||||
{
|
||||
}
|
||||
|
||||
void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
|
||||
{
|
||||
accelerated_rt_ = accelerated_rt;
|
||||
}
|
||||
|
||||
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
||||
{
|
||||
max_num_path_states_ = max_num_path_states;
|
||||
@ -61,7 +66,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
||||
void WorkTileScheduler::reset_scheduler_state()
|
||||
{
|
||||
tile_size_ = tile_calculate_best_size(
|
||||
image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
||||
accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
||||
|
||||
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
||||
|
||||
|
@ -31,6 +31,9 @@ class WorkTileScheduler {
|
||||
public:
|
||||
WorkTileScheduler();
|
||||
|
||||
/* To indicate if there is accelerated RT support. */
|
||||
void set_accelerated_rt(bool state);
|
||||
|
||||
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
||||
*
|
||||
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
||||
@ -55,6 +58,9 @@ class WorkTileScheduler {
|
||||
protected:
|
||||
void reset_scheduler_state();
|
||||
|
||||
/* Used to indicate if there is accelerated ray tracing. */
|
||||
bool accelerated_rt_ = false;
|
||||
|
||||
/* Maximum allowed path states to be used.
|
||||
*
|
||||
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
||||
|
@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile,
|
||||
ccl_private uint *y,
|
||||
ccl_private uint *sample)
|
||||
{
|
||||
#if 0
|
||||
uint sample_offset, pixel_offset;
|
||||
|
||||
if (kernel_data.integrator.scrambling_distance < 0.9f) {
|
||||
/* Keep threads for the same sample together. */
|
||||
uint tile_pixels = tile->w * tile->h;
|
||||
uint sample_offset = global_work_index / tile_pixels;
|
||||
uint pixel_offset = global_work_index - sample_offset * tile_pixels;
|
||||
#else
|
||||
sample_offset = global_work_index / tile_pixels;
|
||||
pixel_offset = global_work_index - sample_offset * tile_pixels;
|
||||
}
|
||||
else {
|
||||
/* Keeping threads for the same pixel together.
|
||||
* Appears to improve performance by a few % on CUDA and OptiX. */
|
||||
uint sample_offset = global_work_index % tile->num_samples;
|
||||
uint pixel_offset = global_work_index / tile->num_samples;
|
||||
#endif
|
||||
sample_offset = global_work_index % tile->num_samples;
|
||||
pixel_offset = global_work_index / tile->num_samples;
|
||||
}
|
||||
|
||||
uint y_offset = pixel_offset / tile->w;
|
||||
uint x_offset = pixel_offset - y_offset * tile->w;
|
||||
|
@ -24,26 +24,26 @@ CCL_NAMESPACE_BEGIN
|
||||
TEST(tile_calculate_best_size, Basic)
|
||||
{
|
||||
/* Make sure CPU-like case is handled properly. */
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
|
||||
|
||||
/* Enough path states to fit an entire image with all samples. */
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
|
||||
TileSize(1920, 1080, 1));
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
|
||||
TileSize(1920, 1080, 100));
|
||||
}
|
||||
|
||||
TEST(tile_calculate_best_size, Extreme)
|
||||
{
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072, 1.0f),
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 262144, 131072, 1.0f),
|
||||
TileSize(1, 1, 512));
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072, 1.0f),
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 1048576, 131072, 1.0f),
|
||||
TileSize(1, 1, 1024));
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072, 1.0f),
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 10485760, 131072, 1.0f),
|
||||
TileSize(1, 1, 4096));
|
||||
|
||||
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
|
||||
EXPECT_EQ(tile_calculate_best_size(false, make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
|
||||
TileSize(1, 1, 1024));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user