Module: Mesa
Branch: staging/23.0
Commit: c8991ee56fcff7075482ec5d2b491ac523cb5c51
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8991ee56fcff7075482ec5d2b491ac523cb5c51

Author: Lynne <[email protected]>
Date:   Wed Mar 15 19:17:22 2023 +0100

aco_validate: allow for wave32 in p_dual_src_export_gfx11

Fixes RADV_PERFTEST=pswave32

Fixes: bb90d29660b ("aco: add p_dual_src_export_gfx11 for dual source blending 
on GFX11")
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21934>
(cherry picked from commit f5e5ec180cd04a49ca3c1f8e889c9e35e5a717ca)

---

 .pick_status.json                 | 2 +-
 src/amd/compiler/aco_validate.cpp | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 1140789eafe..cd564bf79c9 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -5197,7 +5197,7 @@
         "description": "aco_validate: allow for wave32 in 
p_dual_src_export_gfx11",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "bb90d29660bb44326188809da2deec0675c8264a"
     },
diff --git a/src/amd/compiler/aco_validate.cpp 
b/src/amd/compiler/aco_validate.cpp
index 0494f4cc5a2..1e83c261111 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -534,9 +534,8 @@ validate_ir(Program* program)
                check(instr->definitions[2].getTemp().type() == RegType::vgpr &&
                         instr->definitions[2].getTemp().size() == 1,
                      "Third definition of p_dual_src_export_gfx11 must be a 
v1", instr.get());
-               check(instr->definitions[3].getTemp().type() == RegType::sgpr &&
-                        instr->definitions[3].getTemp().size() == 2,
-                     "Fourth definition of p_dual_src_export_gfx11 must be a 
s2", instr.get());
+               check(instr->definitions[3].regClass() == program->lane_mask,
+                     "Fourth definition of p_dual_src_export_gfx11 must be a 
lane mask", instr.get());
                check(instr->definitions[4].physReg() == vcc,
                      "Fifth definition of p_dual_src_export_gfx11 must be 
vcc", instr.get());
                check(instr->definitions[5].physReg() == scc,

Reply via email to