Module: Mesa
Branch: main
Commit: 967c52097e841700daf845557875b8ccd3e08cfc
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=967c52097e841700daf845557875b8ccd3e08cfc

Author: Rhys Perry <pendingchao...@gmail.com>
Date:   Tue Nov  7 17:45:59 2023 +0000

aco: workaround LS VGPR initialization bug in RADV prologs

Signed-off-by: Rhys Perry <pendingchao...@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26111>

---

 src/amd/compiler/aco_instruction_selection.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 79625b6cf1f..d77f166c391 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -12799,6 +12799,27 @@ select_vs_prolog(Program* program, const struct 
aco_vs_prolog_info* pinfo, ac_sh
             }
          }
 
+         /* If there are no HS threads, SPI mistakenly loads the LS VGPRs 
starting at VGPR 0. */
+         if (info->hw_stage == AC_HW_HULL_SHADER && 
options->has_ls_vgpr_init_bug) {
+            /* We don't want load_vb_descs() to write vcc. */
+            assert(program->dev.sgpr_limit <= vcc.reg());
+
+            bld.sop2(aco_opcode::s_bfe_u32, Definition(vcc, s1), 
Definition(scc, s1),
+                     get_arg_fixed(args, args->merged_wave_info), 
Operand::c32((8u << 16) | 8u));
+            bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm), 
Operand::c32(-1), Operand::zero(),
+                     Operand(scc, s1));
+
+            /* These copies are ordered so that vertex_id=tcs_patch_id doesn't 
overwrite vertex_id
+             * before instance_id=vertex_id. */
+            ac_arg src_args[] = {args->vertex_id, args->tcs_rel_ids, 
args->tcs_patch_id};
+            ac_arg dst_args[] = {args->instance_id, args->vs_rel_patch_id, 
args->vertex_id};
+            for (unsigned i = 0; i < 3; i++) {
+               bld.vop2(aco_opcode::v_cndmask_b32, 
Definition(get_arg_reg(args, dst_args[i]), v1),
+                        get_arg_fixed(args, src_args[i]), get_arg_fixed(args, 
dst_args[i]),
+                        Operand(vcc, bld.lm));
+            }
+         }
+
          bool needs_instance_index =
             pinfo->instance_rate_inputs &
             ~(pinfo->zero_divisors | pinfo->nontrivial_divisors); /* divisor 
is 1 */

Reply via email to