Module: Mesa
Branch: staging/23.0
Commit: de650de6d8b9ce40996ed21e86fbd6d0e19076f7
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=de650de6d8b9ce40996ed21e86fbd6d0e19076f7

Author: Samuel Pitoiset <[email protected]>
Date:   Mon Jan 16 10:13:12 2023 +0100

ac/nir: clear unused components before storing XFB outputs to LDS

Shader variables don't always exactly match intrinsics and they might
contain unused slots.

Fixes a bunch of regressions with RADV_PERFTEST=ngg_streamout on RDNA2,
and also fixes RDNA3 NGG streamout.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8099
Fixes: cd22bf90e79 ("ac/nir/ngg: refine nogs outputs handling")
Signed-off-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20735>
(cherry picked from commit 84241b1f75aa34466d0939f656bcc5aa0658c0a3)

---

 .pick_status.json                 |  2 +-
 src/amd/common/ac_nir_lower_ngg.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index bc66eb2efa3..b22b4d60b43 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -157,7 +157,7 @@
         "description": "ac/nir: clear unused components before storing XFB 
outputs to LDS",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "cd22bf90e79b9d6a47f763d304e942550ab937a7"
     },
diff --git a/src/amd/common/ac_nir_lower_ngg.c 
b/src/amd/common/ac_nir_lower_ngg.c
index 5fe8c429484..4a0f3bc5df1 100644
--- a/src/amd/common/ac_nir_lower_ngg.c
+++ b/src/amd/common/ac_nir_lower_ngg.c
@@ -1684,6 +1684,13 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, 
lower_ngg_nogs_state *s)
          util_bitcount64(b->shader->info.outputs_written & 
BITFIELD64_MASK(slot));
 
       unsigned mask = xfb_mask[slot];
+
+      /* Clear unused components. */
+      for (unsigned i = 0; i < 4; i++) {
+         if (!s->outputs[slot][i])
+            mask &= ~BITFIELD_BIT(i);
+      }
+
       while (mask) {
          int start, count;
          u_bit_scan_consecutive_range(&mask, &start, &count);
@@ -1706,6 +1713,14 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, 
lower_ngg_nogs_state *s)
       unsigned mask_lo = xfb_mask_16bit_lo[slot];
       unsigned mask_hi = xfb_mask_16bit_hi[slot];
 
+      /* Clear unused components. */
+      for (unsigned i = 0; i < 4; i++) {
+         if (!s->outputs_16bit_lo[slot][i])
+            mask_lo &= ~BITFIELD_BIT(i);
+         if (!s->outputs_16bit_hi[slot][i])
+            mask_hi &= ~BITFIELD_BIT(i);
+      }
+
       nir_ssa_def **outputs_lo = s->outputs_16bit_lo[slot];
       nir_ssa_def **outputs_hi = s->outputs_16bit_hi[slot];
       nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);

Reply via email to