Module: Mesa Branch: staging/23.0 Commit: de650de6d8b9ce40996ed21e86fbd6d0e19076f7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=de650de6d8b9ce40996ed21e86fbd6d0e19076f7
Author: Samuel Pitoiset <[email protected]> Date: Mon Jan 16 10:13:12 2023 +0100 ac/nir: clear unused components before storing XFB outputs to LDS Shader variables don't always exactly match intrinsics and they might contain unused slots. Fixes a bunch of regressions with RADV_PERFTEST=ngg_streamout on RDNA2, and also fixes RDNA3 NGG streamout. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8099 Fixes: cd22bf90e79 ("ac/nir/ngg: refine nogs outputs handling") Signed-off-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20735> (cherry picked from commit 84241b1f75aa34466d0939f656bcc5aa0658c0a3) --- .pick_status.json | 2 +- src/amd/common/ac_nir_lower_ngg.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index bc66eb2efa3..b22b4d60b43 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -157,7 +157,7 @@ "description": "ac/nir: clear unused components before storing XFB outputs to LDS", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "cd22bf90e79b9d6a47f763d304e942550ab937a7" }, diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 5fe8c429484..4a0f3bc5df1 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -1684,6 +1684,13 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s) util_bitcount64(b->shader->info.outputs_written & BITFIELD64_MASK(slot)); unsigned mask = xfb_mask[slot]; + + /* Clear unused components. */ + for (unsigned i = 0; i < 4; i++) { + if (!s->outputs[slot][i]) + mask &= ~BITFIELD_BIT(i); + } + while (mask) { int start, count; u_bit_scan_consecutive_range(&mask, &start, &count); @@ -1706,6 +1713,14 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s) unsigned mask_lo = xfb_mask_16bit_lo[slot]; unsigned mask_hi = xfb_mask_16bit_hi[slot]; + /* Clear unused components. */ + for (unsigned i = 0; i < 4; i++) { + if (!s->outputs_16bit_lo[slot][i]) + mask_lo &= ~BITFIELD_BIT(i); + if (!s->outputs_16bit_hi[slot][i]) + mask_hi &= ~BITFIELD_BIT(i); + } + nir_ssa_def **outputs_lo = s->outputs_16bit_lo[slot]; nir_ssa_def **outputs_hi = s->outputs_16bit_hi[slot]; nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);
