Module: Mesa Branch: main Commit: 967c52097e841700daf845557875b8ccd3e08cfc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=967c52097e841700daf845557875b8ccd3e08cfc
Author: Rhys Perry <pendingchao...@gmail.com> Date: Tue Nov 7 17:45:59 2023 +0000 aco: workaround LS VGPR initialization bug in RADV prologs Signed-off-by: Rhys Perry <pendingchao...@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26111> --- src/amd/compiler/aco_instruction_selection.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 79625b6cf1f..d77f166c391 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12799,6 +12799,27 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh } } + /* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */ + if (info->hw_stage == AC_HW_HULL_SHADER && options->has_ls_vgpr_init_bug) { + /* We don't want load_vb_descs() to write vcc. */ + assert(program->dev.sgpr_limit <= vcc.reg()); + + bld.sop2(aco_opcode::s_bfe_u32, Definition(vcc, s1), Definition(scc, s1), + get_arg_fixed(args, args->merged_wave_info), Operand::c32((8u << 16) | 8u)); + bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm), Operand::c32(-1), Operand::zero(), + Operand(scc, s1)); + + /* These copies are ordered so that vertex_id=tcs_patch_id doesn't overwrite vertex_id + * before instance_id=vertex_id. */ + ac_arg src_args[] = {args->vertex_id, args->tcs_rel_ids, args->tcs_patch_id}; + ac_arg dst_args[] = {args->instance_id, args->vs_rel_patch_id, args->vertex_id}; + for (unsigned i = 0; i < 3; i++) { + bld.vop2(aco_opcode::v_cndmask_b32, Definition(get_arg_reg(args, dst_args[i]), v1), + get_arg_fixed(args, src_args[i]), get_arg_fixed(args, dst_args[i]), + Operand(vcc, bld.lm)); + } + } + bool needs_instance_index = pinfo->instance_rate_inputs & ~(pinfo->zero_divisors | pinfo->nontrivial_divisors); /* divisor is 1 */