EEVEE-Next: Shadow: Limit view per shadow map projection

This limits the number of tilemaps per LOD that can be fed to avoid the
easy to hit "Too many shadow updates" (#119757).

This allows for a max 64 tilemaps to be updated at once at their lowest
requested LOD (so ~10.6667 point lights if every faces of the punctual
shadow map is needed, but likely more in practice).

Unfortunately this is still quite low and will surely be hit quite soon
with directional shadow added to it. One idea to workaround this would
be to time slice the update of some lights, but this opens a whole can
of worms that I'm not ready to open for now so I created #119890 for
future reference.

Some notes, most lights seems to request around 3 LODs. It might help
to allow requesting at least 2 LODs if we are rendering since volumes
might want lower LOD available for volumes.

I added a very simplistic heuristic that also lowers the max tilemaps
when transforming, animation playback or navigating the 3D view to
improve the responsiveness of the engine. Note that this doesn't
only lowers the resolution to the minimum requested one. So it should
be good enough in most cases.

Pull Request: https://projects.blender.org/blender/blender/pulls/119889
This commit is contained in:
Clément Foucault 2024-03-26 20:33:31 +01:00 committed by Clément Foucault
parent 627ec2666b
commit 2a600b4a83
7 changed files with 283 additions and 43 deletions

@ -248,6 +248,17 @@ class Instance {
return DRW_state_is_playback();
}
bool is_transforming() const
{
BLI_assert_msg(is_image_render(), "Need to be checked first otherwise this is unsafe");
return (G.moving & (G_TRANSFORM_OBJ | G_TRANSFORM_EDIT)) != 0;
}
bool is_navigating() const
{
return DRW_state_is_navigating();
}
bool use_scene_lights() const
{
return (!v3d) ||

@ -1127,6 +1127,7 @@ void ShadowModule::end_sync()
/* Mark tiles that are redundant in the mipmap chain as unused. */
PassSimple::Sub &sub = pass.sub("MaskLod");
sub.shader_set(inst_.shaders.static_shader_get(SHADOW_PAGE_MASK));
sub.push_constant("max_view_per_tilemap", &max_view_per_tilemap_);
sub.bind_ssbo("tilemaps_buf", tilemap_pool.tilemaps_data);
sub.bind_ssbo("tiles_buf", tilemap_pool.tiles_data);
sub.dispatch(int3(1, 1, tilemap_pool.tilemaps_data.size()));
@ -1293,7 +1294,7 @@ float ShadowModule::tilemap_pixel_radius()
bool ShadowModule::shadow_update_finished()
{
if (inst_.is_viewport()) {
if (!inst_.is_image_render()) {
/* For viewport, only run the shadow update once per redraw.
* This avoids the stall from the read-back and freezes from long shadow update. */
return true;
@ -1314,6 +1315,37 @@ bool ShadowModule::shadow_update_finished()
return stats.page_rendered_count == stats.page_update_count;
}
int ShadowModule::max_view_per_tilemap()
{
if (inst_.is_image_render()) {
/* No need to limit updates per lights as we ensure all lights levels will be rendered.
* is_image_render. */
return SHADOW_TILEMAP_LOD;
}
/* For now very simple heuristic. Can be improved later by taking into consideration how many
* tilemaps are updating, but we cannot know the ones updated by casters. */
int potential_view_count = 0;
for (auto i : IndexRange(tilemap_pool.tilemaps_data.size())) {
if (tilemap_pool.tilemaps_data[i].projection_type == SHADOW_PROJECTION_CUBEFACE) {
potential_view_count += SHADOW_TILEMAP_LOD;
}
else {
potential_view_count += 1;
}
}
int max_view_count = divide_ceil_u(SHADOW_VIEW_MAX, potential_view_count);
/* For viewport interactivity, have a hard maximum. This allows smoother experience. */
if (inst_.is_transforming() || inst_.is_navigating()) {
max_view_count = math::min(2, max_view_count);
}
/* For animation playback, we always want the maximum performance. */
if (inst_.is_playback()) {
max_view_count = math::min(1, max_view_count);
}
return max_view_count;
}
void ShadowModule::set_view(View &view, GPUTexture *depth_tx)
{
if (enabled_ == false) {
@ -1329,6 +1361,7 @@ void ShadowModule::set_view(View &view, GPUTexture *depth_tx)
GPU_texture_get_mipmap_size(depth_tx, 0, target_size);
dispatch_depth_scan_size_ = math::divide_ceil(target_size, int3(SHADOW_DEPTH_SCAN_GROUP_SIZE));
max_view_per_tilemap_ = max_view_per_tilemap();
pixel_world_radius_ = screen_pixel_radius(view, int2(target_size));
data_.tilemap_projection_ratio = tilemap_pixel_radius() / pixel_world_radius_;

@ -259,6 +259,7 @@ class ShadowModule {
float pixel_world_radius_;
int2 usage_tag_fb_resolution_;
int usage_tag_fb_lod_ = 5;
int max_view_per_tilemap_ = 1;
/* Statistics that are read back to CPU after a few frame (to avoid stall). */
SwapChain<ShadowStatisticsBuf, 5> statistics_buf_;
@ -379,6 +380,9 @@ class ShadowModule {
float screen_pixel_radius(const View &view, const int2 &extent);
/** Compute approximate punctual shadow pixel world space radius, 1 unit away of the light. */
float tilemap_pixel_radius();
/* Returns the maximum number of view per shadow projection for a single update loop. */
int max_view_per_tilemap();
};
/** \} */

@ -3,63 +3,169 @@
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Virtual shadow-mapping: Usage un-tagging
* Virtual shadow-mapping: Usage un-tagging.
*
* Remove used tag from masked tiles (LOD overlap).
* Remove used tag from masked tiles (LOD overlap) or for load balancing (reducing the number of
* views per shadow map).
*/
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
shared uint usage_grid[SHADOW_TILEMAP_RES / 2][SHADOW_TILEMAP_RES / 2];
/* Reuse the same enum values for these transient flag during the amend phase.
* They are never written to the tile data SSBO. */
#define SHADOW_TILE_AMENDED SHADOW_IS_RENDERED
/* Visibility value to write back. */
#define SHADOW_TILE_MASKED SHADOW_IS_ALLOCATED
shared uint tiles_local[SHADOW_TILEDATA_PER_TILEMAP];
shared uint levels_rendered;
int shadow_tile_offset_lds(ivec2 tile, int lod)
{
return shadow_tile_offset(tile, 0, lod);
}
/* Deactivate threads that are not part of this LOD. Will only let pass threads which tile
* coordinate fits the given tilemap LOD. */
bool thread_mask(ivec2 tile_co, int lod)
{
const uint lod_size = uint(SHADOW_TILEMAP_RES);
return all(lessThan(tile_co, ivec2(lod_size >> lod)));
}
void main()
{
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy);
uint tilemap_index = gl_GlobalInvocationID.z;
ShadowTileMapData tilemap = tilemaps_buf[tilemap_index];
/* NOTE: Barriers are ok since this branch is taken by all threads. */
if (tilemap.projection_type == SHADOW_PROJECTION_CUBEFACE) {
/* Load all data to LDS. Allows us to do some modification on the flag bits and only flush to
* main memory the usage bit. */
for (int lod = 0; lod <= SHADOW_TILEMAP_LOD; lod++) {
if (thread_mask(tile_co, lod)) {
int tile_offset = shadow_tile_offset(tile_co, tilemap.tiles_index, lod);
ShadowTileDataPacked tile_data = tiles_buf[tile_offset];
if ((tile_data & SHADOW_IS_USED) == 0) {
/* Do not consider this tile as going to be rendered if it is not used.
* Simplify checks later. This is a local modification. */
tile_data &= ~SHADOW_DO_UPDATE;
}
/* Clear these flags as they could contain any values. */
tile_data &= ~(SHADOW_TILE_AMENDED | SHADOW_TILE_MASKED);
int tile_lds = shadow_tile_offset_lds(tile_co, lod);
tiles_local[tile_lds] = tile_data;
}
}
#if 1 /* Can be disabled for debugging. */
/* For each level collect the number of used (or masked) tile that are covering the tile from
* the level underneath. If this adds up to 4 the underneath tile is flag unused as its data
* is not needed for rendering.
*
* This is because 2 receivers can tag used the same area of the shadow-map but with different
* LODs. */
bool is_used = false;
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy);
uint lod_size = uint(SHADOW_TILEMAP_RES);
for (int lod = 0; lod <= SHADOW_TILEMAP_LOD; lod++, lod_size >>= 1u) {
bool thread_active = all(lessThan(tile_co, ivec2(lod_size)));
* This is because 2 receivers can tag "used" the same area of the shadow-map but with
* different LODs. */
for (int lod = 1; lod <= SHADOW_TILEMAP_LOD; lod++) {
barrier();
if (thread_mask(tile_co, lod)) {
ivec2 tile_co_prev_lod = tile_co * 2;
int prev_lod = lod - 1;
ShadowTileData tile;
if (thread_active) {
int tile_offset = shadow_tile_offset(tile_co, tilemap.tiles_index, lod);
tile = shadow_tile_unpack(tiles_buf[tile_offset]);
#ifdef SHADOW_FORCE_LOD0
if (lod == 0) {
tiles_buf[tile_offset] |= SHADOW_IS_USED;
int tile_0 = shadow_tile_offset_lds(tile_co_prev_lod + ivec2(0, 0), prev_lod);
int tile_1 = shadow_tile_offset_lds(tile_co_prev_lod + ivec2(1, 0), prev_lod);
int tile_2 = shadow_tile_offset_lds(tile_co_prev_lod + ivec2(0, 1), prev_lod);
int tile_3 = shadow_tile_offset_lds(tile_co_prev_lod + ivec2(1, 1), prev_lod);
/* Is masked if all tiles from the previous level were tagged as used. */
bool is_masked = ((tiles_local[tile_0] & tiles_local[tile_1] & tiles_local[tile_2] &
tiles_local[tile_3]) &
SHADOW_IS_USED) != 0;
int tile_offset = shadow_tile_offset_lds(tile_co, lod);
if (is_masked) {
/* Consider this tile occluding lower levels. Use SHADOW_IS_USED flag for that. */
tiles_local[tile_offset] |= SHADOW_IS_USED;
/* Do not consider this tile when checking which tilemap level to render in next loop. */
tiles_local[tile_offset] &= ~SHADOW_DO_UPDATE;
/* Tag as modified so that we can amend it inside the `tiles_buf`. */
tiles_local[tile_offset] |= SHADOW_TILE_AMENDED;
/* Visibility value to write back. */
tiles_local[tile_offset] |= SHADOW_TILE_MASKED;
}
else {
tiles_buf[tile_offset] &= ~SHADOW_IS_USED;
}
#else
if (lod > 0 && usage_grid[tile_co.y][tile_co.x] == 4u) {
/* Remove the usage flag as this tile is completely covered by higher LOD tiles. */
tiles_buf[tile_offset] &= ~SHADOW_IS_USED;
/* Consider this tile occluding lower levels. */
tile.is_used = true;
}
#endif
/* Reset count for next level. */
usage_grid[tile_co.y / 2][tile_co.x / 2] = 0u;
}
}
#endif
barrier();
#if 1 /* Can be disabled for debugging. */
/* Count the number of LOD level to render for this tilemap and to clamp it to a maximum number
* of view per tilemap.
* This avoid flooding the 64 view limit per redraw with ~3-4 LOD levels per tilemaps leaving
* some lights unshadowed.
* The clamped LOD levels' tiles need to be merged to the highest LOD allowed. */
if (thread_active) {
if (tile.is_used) {
atomicAdd(usage_grid[tile_co.y / 2][tile_co.x / 2], 1u);
/* Construct bitmask of LODs that contain tiles to render (i.e: that will request a view). */
if (gl_LocalInvocationIndex == 0u) {
levels_rendered = 0u;
}
barrier();
for (int lod = 0; lod <= SHADOW_TILEMAP_LOD; lod++) {
/* TODO(fclem): Could maybe speedup using WaveAllBitOr. */
if (thread_mask(tile_co, lod)) {
int tile_offset = shadow_tile_offset_lds(tile_co, lod);
if ((tiles_local[tile_offset] & SHADOW_DO_UPDATE) != 0) {
atomicOr(levels_rendered, 1u << lod);
}
}
}
barrier();
/* If there is more LODs to update than the load balancing heuristic allows. */
if (bitCount(levels_rendered) > max_view_per_tilemap) {
/* Find the cutoff LOD that contain tiles to render. */
int max_lod = findMSB(levels_rendered);
/* Allow more than one level. */
for (int i = 1; i < max_view_per_tilemap; i++) {
max_lod = findMSB(levels_rendered & ~(~0u << max_lod));
}
/* Collapse all bits to highest level. */
for (int lod = 0; lod < max_lod; lod++) {
if (thread_mask(tile_co, lod)) {
int tile_offset = shadow_tile_offset_lds(tile_co, lod);
if ((tiles_local[tile_offset] & SHADOW_DO_UPDATE) != 0) {
/* This tile is now masked and not considered for rendering. */
tiles_local[tile_offset] |= SHADOW_TILE_MASKED | SHADOW_TILE_AMENDED;
/* Note that we can have multiple thread writting to this tile. */
int tile_bottom_offset = shadow_tile_offset_lds(tile_co >> (max_lod - lod), max_lod);
/* Tag the associated tile in max_lod to be used as it contains the shadowmap area
* covered by this collapsed tile. */
atomicOr(tiles_local[tile_bottom_offset], uint(SHADOW_TILE_AMENDED));
/* This tile could have been masked by the masking phase.
* Make sure the flag is unset. */
atomicAnd(tiles_local[tile_bottom_offset], ~uint(SHADOW_TILE_MASKED));
}
}
}
}
#endif
barrier();
/* Flush back visibility bits to the tile SSBO. */
for (int lod = 0; lod <= SHADOW_TILEMAP_LOD; lod++) {
if (thread_mask(tile_co, lod)) {
int tile_lds = shadow_tile_offset_lds(tile_co, lod);
if ((tiles_local[tile_lds] & SHADOW_TILE_AMENDED) != 0) {
int tile_offset = shadow_tile_offset(tile_co, tilemap.tiles_index, lod);
/* Note that we only flush the visibility so that cached pages can be reused. */
if ((tiles_local[tile_lds] & SHADOW_TILE_MASKED) != 0) {
tiles_buf[tile_offset] &= ~SHADOW_IS_USED;
}
else {
tiles_buf[tile_offset] |= SHADOW_IS_USED;
}
}
}
}

@ -130,6 +130,7 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_tag_usage_volume)
GPU_SHADER_CREATE_INFO(eevee_shadow_page_mask)
.do_static_compilation(true)
.local_group_size(SHADOW_TILEMAP_RES, SHADOW_TILEMAP_RES)
.push_constant(Type::INT, "max_view_per_tilemap")
.storage_buf(0, Qualifier::READ, "ShadowTileMapData", "tilemaps_buf[]")
.storage_buf(1, Qualifier::READ_WRITE, SHADOW_TILE_DATA_PACKED, "tiles_buf[]")
.additional_info("eevee_shared")

@ -1164,7 +1164,7 @@ static void test_eevee_shadow_finalize()
}
DRAW_TEST(eevee_shadow_finalize)
static void test_eevee_shadow_page_mask()
static void test_eevee_shadow_page_mask_ex(int max_view_per_tilemap)
{
GPU_render_begin();
ShadowTileMapDataBuf tilemaps_data = {"tilemaps_data"};
@ -1195,19 +1195,22 @@ static void test_eevee_shadow_page_mask()
/* Init all LOD to true. */
for (auto i : IndexRange(SHADOW_TILEDATA_PER_TILEMAP)) {
tile.is_used = true;
tile.do_update = true;
tiles_data[i] = shadow_tile_pack(tile);
}
/* Init all of LOD0 to false. */
for (auto i : IndexRange(square_i(SHADOW_TILEMAP_RES))) {
tile.is_used = false;
tile.do_update = false;
tiles_data[i] = shadow_tile_pack(tile);
}
/* Bottom Left of the LOD0 to true. */
for (auto y : IndexRange((SHADOW_TILEMAP_RES / 2) + 1)) {
for (auto y : IndexRange((SHADOW_TILEMAP_RES / 2))) {
for (auto x : IndexRange((SHADOW_TILEMAP_RES / 2) + 1)) {
tile.is_used = true;
tile.do_update = true;
tiles_data[x + y * SHADOW_TILEMAP_RES] = shadow_tile_pack(tile);
}
}
@ -1215,6 +1218,7 @@ static void test_eevee_shadow_page_mask()
/* All Bottom of the LOD0 to true. */
for (auto x : IndexRange(SHADOW_TILEMAP_RES)) {
tile.is_used = true;
tile.do_update = true;
tiles_data[x] = shadow_tile_pack(tile);
}
@ -1223,6 +1227,7 @@ static void test_eevee_shadow_page_mask()
for (auto y : IndexRange((SHADOW_TILEMAP_RES / 8))) {
for (auto x : IndexRange((SHADOW_TILEMAP_RES / 8))) {
tile.is_used = false;
tile.do_update = false;
tiles_data[x + y * (SHADOW_TILEMAP_RES / 2) + lod0_len] = shadow_tile_pack(tile);
}
}
@ -1233,6 +1238,7 @@ static void test_eevee_shadow_page_mask()
int x = SHADOW_TILEMAP_RES / 4;
int y = SHADOW_TILEMAP_RES / 4;
tile.is_used = false;
tile.do_update = false;
tiles_data[x + y * (SHADOW_TILEMAP_RES / 2) + lod0_len] = shadow_tile_pack(tile);
}
@ -1245,6 +1251,7 @@ static void test_eevee_shadow_page_mask()
PassSimple pass("Test");
pass.shader_set(sh);
pass.push_constant("max_view_per_tilemap", max_view_per_tilemap);
pass.bind_ssbo("tilemaps_buf", tilemaps_data);
pass.bind_ssbo("tiles_buf", tiles_data);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
@ -1273,7 +1280,7 @@ static void test_eevee_shadow_page_mask()
"xxxxxxxxxxxxxxxxx---------------"
"xxxxxxxxxxxxxxxxx---------------"
"xxxxxxxxxxxxxxxxx---------------"
"xxxxxxxxxxxxxxxxx---------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
@ -1306,6 +1313,23 @@ static void test_eevee_shadow_page_mask()
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx";
StringRefNull expected_lod1_collapsed =
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxx-xxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxx";
StringRefNull expected_lod2 =
"--------"
"--------"
@ -1315,6 +1339,15 @@ static void test_eevee_shadow_page_mask()
"--------"
"--------"
"--------";
StringRefNull expected_lod2_collapsed =
"xxxxxxxx"
"xxxxxxxx"
"xxxxxxxx"
"xxxxxxxx"
"xxxxxxxx"
"xxxxxxxx"
"xxxxxxxx"
"xxxxxxxx";
StringRefNull expected_lod3 =
"----"
"----"
@ -1333,9 +1366,40 @@ static void test_eevee_shadow_page_mask()
return result;
};
EXPECT_EQ(stringify_result(lod0_ofs, lod0_len), expected_lod0);
EXPECT_EQ(stringify_result(lod1_ofs, lod1_len), expected_lod1);
EXPECT_EQ(stringify_result(lod2_ofs, lod2_len), expected_lod2);
auto empty_result = [&](uint len) -> std::string {
std::string result = "";
for ([[maybe_unused]] const int i : IndexRange(len)) {
result += "-";
}
return result;
};
if (max_view_per_tilemap >= 3) {
EXPECT_EQ(stringify_result(lod0_ofs, lod0_len), expected_lod0);
}
else {
EXPECT_EQ(stringify_result(lod0_ofs, lod0_len), empty_result(lod0_len));
}
if (max_view_per_tilemap > 2) {
EXPECT_EQ(stringify_result(lod1_ofs, lod1_len), expected_lod1);
}
else if (max_view_per_tilemap == 2) {
EXPECT_EQ(stringify_result(lod1_ofs, lod1_len), expected_lod1_collapsed);
}
else {
EXPECT_EQ(stringify_result(lod1_ofs, lod1_len), empty_result(lod1_len));
}
if (max_view_per_tilemap > 1) {
EXPECT_EQ(stringify_result(lod2_ofs, lod2_len), expected_lod2);
}
else if (max_view_per_tilemap == 1) {
EXPECT_EQ(stringify_result(lod2_ofs, lod2_len), expected_lod2_collapsed);
}
else {
EXPECT_EQ(stringify_result(lod2_ofs, lod2_len), empty_result(lod2_len));
}
EXPECT_EQ(stringify_result(lod3_ofs, lod3_len), expected_lod3);
EXPECT_EQ(stringify_result(lod4_ofs, lod4_len), expected_lod4);
EXPECT_EQ(stringify_result(lod5_ofs, lod5_len), expected_lod5);
@ -1344,6 +1408,18 @@ static void test_eevee_shadow_page_mask()
DRW_shaders_free();
GPU_render_end();
}
static void test_eevee_shadow_page_mask()
{
/* Expect default behavior. */
test_eevee_shadow_page_mask_ex(999);
/* Expect default behavior. */
test_eevee_shadow_page_mask_ex(3);
/* Expect LOD0 merged into LOD1. */
test_eevee_shadow_page_mask_ex(2);
/* Expect LOD0 and LOD1 merged into LOD2. */
test_eevee_shadow_page_mask_ex(1);
}
DRAW_TEST(eevee_shadow_page_mask)
static void test_eevee_surfel_list()

@ -2033,7 +2033,16 @@ inline float4 uintBitsToFloat(uint4 f)
#define bitfieldReverse reverse_bits
#define bitfieldExtract extract_bits
#define bitfieldInsert insert_bits
#define bitCount popcount
/* popcount returns the same type as T. bitCount always returns int. */
template<typename T> int bitCount(T x)
{
return int(popcount(x));
}
template<typename T, int n> vec<int, n> bitCount(vec<T, n> x)
{
return vec<int, n>(popcount(x));
}
template<typename T> int findLSB(T x)
{