Module: Mesa Branch: main Commit: 1410735a625a680591349d553429598641e5531e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1410735a625a680591349d553429598641e5531e
Author: Rhys Perry <pendingchao...@gmail.com> Date: Fri Nov 17 11:21:51 2023 +0000 aco: implement msad_4x8 Signed-off-by: Rhys Perry <pendingchao...@gmail.com> Reviewed-by: Georg Lehmann <dadschoo...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26907> --- src/amd/compiler/aco_instruction_selection.cpp | 7 ++++++- src/amd/compiler/aco_instruction_selection_setup.cpp | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bdf81dff358..be1468f1fd5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -927,7 +927,7 @@ emit_vop3a_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, T Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)}; bool has_sgpr = false; for (unsigned i = 0; i < num_sources; i++) { - src[i] = get_alu_src(ctx, instr->src[swap_srcs ? 1 - i : i]); + src[i] = get_alu_src(ctx, instr->src[(swap_srcs && i < 2) ? 1 - i : i]); if (has_sgpr) src[i] = as_vgpr(ctx, src[i]); else @@ -3425,6 +3425,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, false); break; } + case nir_op_msad_4x8: { + assert(dst.regClass() == v1); + emit_vop3a_instruction(ctx, instr, aco_opcode::v_msad_u8, dst, false, 3u, true); + break; + } case nir_op_fquantize2f16: { Temp src = get_alu_src(ctx, instr->src[0]); Temp f16; diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 72fffc87167..9004ef2e632 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -393,6 +393,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_frexp_exp: case nir_op_cube_amd: case nir_op_sad_u8x4: + case nir_op_msad_4x8: case nir_op_udot_4x8_uadd: case nir_op_sdot_4x8_iadd: case nir_op_sudot_4x8_iadd: