Module: Mesa Branch: staging/23.0 Commit: c8991ee56fcff7075482ec5d2b491ac523cb5c51 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8991ee56fcff7075482ec5d2b491ac523cb5c51
Author: Lynne <[email protected]> Date: Wed Mar 15 19:17:22 2023 +0100 aco_validate: allow for wave32 in p_dual_src_export_gfx11 Fixes RADV_PERFTEST=pswave32 Fixes: bb90d29660b ("aco: add p_dual_src_export_gfx11 for dual source blending on GFX11") Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21934> (cherry picked from commit f5e5ec180cd04a49ca3c1f8e889c9e35e5a717ca) --- .pick_status.json | 2 +- src/amd/compiler/aco_validate.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 1140789eafe..cd564bf79c9 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5197,7 +5197,7 @@ "description": "aco_validate: allow for wave32 in p_dual_src_export_gfx11", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "bb90d29660bb44326188809da2deec0675c8264a" }, diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 0494f4cc5a2..1e83c261111 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -534,9 +534,8 @@ validate_ir(Program* program) check(instr->definitions[2].getTemp().type() == RegType::vgpr && instr->definitions[2].getTemp().size() == 1, "Third definition of p_dual_src_export_gfx11 must be a v1", instr.get()); - check(instr->definitions[3].getTemp().type() == RegType::sgpr && - instr->definitions[3].getTemp().size() == 2, - "Fourth definition of p_dual_src_export_gfx11 must be a s2", instr.get()); + check(instr->definitions[3].regClass() == program->lane_mask, + "Fourth definition of p_dual_src_export_gfx11 must be a lane mask", instr.get()); check(instr->definitions[4].physReg() == vcc, "Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get()); check(instr->definitions[5].physReg() == scc,
