Module: Mesa Branch: main Commit: c3dd1931d99121dd319d7802b543a5b0957516f9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3dd1931d99121dd319d7802b543a5b0957516f9
Author: Rhys Perry <[email protected]> Date: Wed Jan 4 14:52:34 2023 +0000 aco: allow Builder::Result to be dereferenced Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Georg Lehmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20251> --- src/amd/compiler/aco_builder_h.py | 8 ++ src/amd/compiler/aco_insert_exec_mask.cpp | 4 +- src/amd/compiler/aco_instruction_selection.cpp | 20 +++-- src/amd/compiler/aco_lower_to_hw_instr.cpp | 11 ++- src/amd/compiler/tests/helpers.cpp | 4 +- src/amd/compiler/tests/test_assembler.cpp | 94 ++++++++++++------------ src/amd/compiler/tests/test_hard_clause.cpp | 6 +- src/amd/compiler/tests/test_optimizer.cpp | 16 ++-- src/amd/compiler/tests/test_optimizer_postRA.cpp | 12 +-- src/amd/compiler/tests/test_sdwa.cpp | 14 ++-- 10 files changed, 96 insertions(+), 93 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 41912825be7..f970854d50f 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -160,6 +160,14 @@ public: aco_ptr<Instruction> get_ptr() const { return aco_ptr<Instruction>(instr); } + + Instruction * operator * () const { + return instr; + } + + Instruction * operator -> () const { + return instr; + } }; struct Op { diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 09f2d63b4e3..feffd5acd71 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -816,7 +816,7 @@ add_branch_code(exec_ctx& ctx, Block* block) Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]); - r.instr->branch().selection_control = sel_ctrl; + r->branch().selection_control = sel_ctrl; return; } @@ -832,7 +832,7 @@ add_branch_code(exec_ctx& ctx, Block* block) Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]); - r.instr->branch().selection_control = sel_ctrl; + r->branch().selection_control = sel_ctrl; return; } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index cf034bcb755..5c40c31a78a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -265,7 +265,7 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask) // DPP8 comes last, as it does not allow several modifiers like `abs` that are available with DPP16 Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src); for (unsigned i = 0; i < 8; i++) { - ret.instr->dpp8().lane_sel[i] = (((i & and_mask) | or_mask) ^ xor_mask) & 0x7; + ret->dpp8().lane_sel[i] = (((i & and_mask) | or_mask) ^ xor_mask) & 0x7; } return ret; } @@ -1016,7 +1016,7 @@ emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te Builder bld(ctx->program, ctx->block); bld.is_precise = instr->exact; VOP3P_instruction& vop3p = - bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p(); + bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7)->vop3p(); vop3p.clamp = clamp; u_foreach_bit (i, neg_lo) vop3p.neg_lo[i] = true; @@ -1363,7 +1363,7 @@ uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) } else { add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1); } - add.instr->vop3().clamp = 1; + add->vop3().clamp = 1; return dst.getTemp(); } @@ -1382,7 +1382,7 @@ usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) } else { sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1); } - sub.instr->vop3().clamp = 1; + sub->vop3().clamp = 1; return dst.getTemp(); } @@ -1978,8 +1978,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) carry1 = bld.tmp(bld.lm); bld.vop2_e64(aco_opcode::v_addc_co_u32, Definition(dst1), Definition(carry1), as_vgpr(ctx, src01), as_vgpr(ctx, src11), carry0) - .instr->vop3() - .clamp = 1; + ->vop3().clamp = 1; } else { Temp no_sat1 = bld.tmp(v1); carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp(); @@ -2221,8 +2220,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) carry1 = bld.tmp(bld.lm); bld.vop2_e64(aco_opcode::v_subb_co_u32, Definition(dst1), Definition(carry1), as_vgpr(ctx, src01), as_vgpr(ctx, src11), carry0) - .instr->vop3() - .clamp = 1; + ->vop3().clamp = 1; } else { Temp no_sat1 = bld.tmp(v1); carry1 = bld.vsub32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp(); @@ -5152,7 +5150,7 @@ emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp s offen, swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc, /* dlc*/ false, slc); - r.instr->mubuf().sync = sync; + r->mubuf().sync = sync; } void @@ -5402,7 +5400,7 @@ emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, bld.m0(prim_mask), idx, component); if (ctx->program->dev.has_16bank_lds) - interp_p1.instr->operands[0].setLateKill(true); + interp_p1->operands[0].setLateKill(true); bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx, component); @@ -6022,7 +6020,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) default: unreachable("unimplemented or forbidden load_push_constant."); } - bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true; + bld.smem(op, Definition(vec), ptr, index)->smem().prevent_overflow = true; if (!aligned) { Operand byte_offset = index_cv ? Operand::c32((offset + index_cv->u32) % 4) : Operand(index); diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 3c82fe2577a..97bace94960 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2383,7 +2383,7 @@ lower_to_hw_instr(Program* program) } } else { SDWA_instruction& sdwa = - bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa(); + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa(); sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext); } } @@ -2421,7 +2421,7 @@ lower_to_hw_instr(Program* program) } else if (offset == 0 && (dst.regClass() == v1 || program->gfx_level <= GFX7)) { bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits)); } else if (has_sdwa && (op.regClass() != s1 || program->gfx_level >= GFX9)) { - bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().dst_sel = + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa().dst_sel = SubdwordSel(bits / 8, offset / 8, false); } else if (program->gfx_level >= GFX11) { uint8_t swiz[] = {4, 5, 6, 7}; @@ -2438,8 +2438,7 @@ lower_to_hw_instr(Program* program) } else { assert(dst.regClass() == v2b); bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op) - .instr->sdwa() - .sel[1] = SubdwordSel::ubyte; + ->sdwa().sel[1] = SubdwordSel::ubyte; } break; } @@ -2580,7 +2579,7 @@ lower_to_hw_instr(Program* program) Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), src0); for (unsigned j = 0; j < 8; j++) { - ret.instr->dpp8().lane_sel[j] = j ^ 1; + ret->dpp8().lane_sel[j] = j ^ 1; } /* Swap even lanes between mrt0 and mrt1. */ @@ -2593,7 +2592,7 @@ lower_to_hw_instr(Program* program) ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, Definition(dst0, v1), Operand(tmp.physReg(), v1)); for (unsigned j = 0; j < 8; j++) { - ret.instr->dpp8().lane_sel[j] = j ^ 1; + ret->dpp8().lane_sel[j] = j ^ 1; } mrt0[i] = Operand(dst0, v1); diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index bfd9b723a77..fae66aaf95d 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -287,11 +287,11 @@ Temp fabs(Temp src, Builder b) { if (src.bytes() == 2) { Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src); - res.instr->vop3().abs[1] = true; + res->vop3().abs[1] = true; return res; } else { Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src); - res.instr->vop3().abs[1] = true; + res->vop3().abs[1] = true; return res; } } diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index eee7f658af6..e46ee13954e 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -407,10 +407,10 @@ BEGIN_TEST(assembler.gfx11.smem) bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1); //! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000 - bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1).instr->smem().glc = true; + bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true; //! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000 - bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1).instr->smem().dlc = true; + bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true; finish_assembler_test(); END_TEST @@ -448,45 +448,45 @@ BEGIN_TEST(assembler.gfx11.mubuf) bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true); //! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a - bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false).instr->mubuf().idxen = true; + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen = true; //! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14 - bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true).instr->mubuf().idxen = true; + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen = true; //! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80 bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false); /* Various flags */ //! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80 - bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().glc = true; + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().glc = true; //! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80 - bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().dlc = true; + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().dlc = true; //! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80 - bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().slc = true; + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().slc = true; //! buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80 - bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().tfe = true; + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().tfe = true; /* LDS */ //! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080 - bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; //! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080 - bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; //! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080 - bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; //! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080 - bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; //! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080 - bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; //! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080 - bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; /* Stores */ //! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80 @@ -531,26 +531,26 @@ BEGIN_TEST(assembler.gfx11.mtbuf) bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true); //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a - bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false).instr->mtbuf().idxen = true; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)->mtbuf().idxen = true; //! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14 - bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true).instr->mtbuf().idxen = true; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)->mtbuf().idxen = true; //! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80 bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84, false); /* Various flags */ //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80 - bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().glc = true; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().glc = true; //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80 - bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().dlc = true; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().dlc = true; //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80 - bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().slc = true; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().slc = true; //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80 - bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().tfe = true; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().tfe = true; /* Stores */ //! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80 @@ -591,46 +591,46 @@ BEGIN_TEST(assembler.gfx11.mimg) bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1); //! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414 - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2).instr->mimg().dim = ac_image_2d; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim = ac_image_2d; //! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a - bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().dmask = 0x1; + bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1; /* Various flags */ //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().dlc = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc = true; //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().glc = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc = true; //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().slc = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc = true; //! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().tfe = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().tfe = true; //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().lwe = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().lwe = true; //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().r128 = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().r128 = true; //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().a16 = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().a16 = true; //! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().d16 = true; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().d16 = true; /* NSA */ //! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028 - bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40))).instr->mimg().dim = ac_image_2d; + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40)))->mimg().dim = ac_image_2d; /* Stores */ //! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1); //! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14 - bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2).instr->mimg().dim = ac_image_2d; + bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2)->mimg().dim = ac_image_2d; finish_assembler_test(); END_TEST @@ -681,13 +681,13 @@ BEGIN_TEST(assembler.gfx11.flat) /* Various flags */ //! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014 - bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)).instr->flat().slc = true; + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true; //! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014 - bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)).instr->flat().glc = true; + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true; //! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014 - bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)).instr->flat().dlc = true; + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true; /* Stores */ //! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14 @@ -717,7 +717,7 @@ BEGIN_TEST(assembler.gfx11.exp) bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3, false, true); //>> exp mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001 - bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], op_m0, 0xf, 3).instr->exp().row_en = true; + bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], op_m0, 0xf, 3)->exp().row_en = true; finish_assembler_test(); END_TEST @@ -748,13 +748,13 @@ BEGIN_TEST(assembler.gfx11.vinterp) bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0); //! v_interp_p10_f32 v42, -v10, v20, s30 ; cd00002a 207a290a - bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp_inreg().neg[0] = true; + bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true; //! v_interp_p10_f32 v42, v10, -v20, s30 ; cd00002a 407a290a - bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp_inreg().neg[1] = true; + bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true; //! v_interp_p10_f32 v42, v10, v20, -s30 ; cd00002a 807a290a - bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp_inreg().neg[2] = true; + bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true; //! v_interp_p10_f16_f32 v42, v10, v20, s30 op_sel:[1,0,0,0] ; cd02082a 007a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1); @@ -769,7 +769,7 @@ BEGIN_TEST(assembler.gfx11.vinterp) bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8); //! v_interp_p10_f32 v42, v10, v20, s30 clamp ; cd00802a 007a290a - bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp_inreg().clamp = true; + bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true; finish_assembler_test(); END_TEST @@ -785,25 +785,25 @@ BEGIN_TEST(assembler.gfx11.ldsdir) op.setFixed(m0); //>> lds_direct_load v42 wait_vdst:15 ; ce1f002a - bld.ldsdir(aco_opcode::lds_direct_load, dst, op).instr->ldsdir().wait_vdst = 15; + bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 15; //! lds_direct_load v42 wait_vdst:6 ; ce16002a - bld.ldsdir(aco_opcode::lds_direct_load, dst, op).instr->ldsdir().wait_vdst = 6; + bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 6; //! lds_direct_load v42 ; ce10002a - bld.ldsdir(aco_opcode::lds_direct_load, dst, op).instr->ldsdir().wait_vdst = 0; + bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 0; //! lds_param_load v42, attr56.x wait_vdst:8 ; ce08e02a - bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0).instr->ldsdir().wait_vdst = 8; + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 8; //! lds_param_load v42, attr56.x ; ce00e02a - bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0).instr->ldsdir().wait_vdst = 0; + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 0; //! lds_param_load v42, attr34.y ; ce00892a - bld.ldsdir(aco_opcode::lds_param_load, dst, op, 34, 1).instr->ldsdir().wait_vdst = 0; + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 34, 1)->ldsdir().wait_vdst = 0; //! lds_param_load v42, attr12.z ; ce00322a - bld.ldsdir(aco_opcode::lds_param_load, dst, op, 12, 2).instr->ldsdir().wait_vdst = 0; + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 12, 2)->ldsdir().wait_vdst = 0; finish_assembler_test(); END_TEST diff --git a/src/amd/compiler/tests/test_hard_clause.cpp b/src/amd/compiler/tests/test_hard_clause.cpp index d6b5a6be037..2a2f1e56956 100644 --- a/src/amd/compiler/tests/test_hard_clause.cpp +++ b/src/amd/compiler/tests/test_hard_clause.cpp @@ -32,8 +32,7 @@ static void create_mubuf(Temp desc=Temp(0, s8), unsigned vtx_binding=0) desc_op.setFixed(PhysReg(0)); bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), desc_op, Operand(PhysReg(256), v1), Operand::zero(), 0, false) - .instr->mubuf() - .vtx_binding = vtx_binding; + ->mubuf().vtx_binding = vtx_binding; } static void create_mubuf_store() @@ -49,8 +48,7 @@ static void create_mtbuf(Temp desc=Temp(0, s8), unsigned vtx_binding=0) bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(PhysReg(256), v1), desc_op, Operand(PhysReg(256), v1), Operand::zero(), V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false) - .instr->mtbuf() - .vtx_binding = vtx_binding; + ->mtbuf().vtx_binding = vtx_binding; } static void create_flat() diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 86d8c2f5f05..e501fd076bd 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -698,7 +698,7 @@ BEGIN_TEST(optimize.add3) //! v1: %res1 = v_add_u32 %a, %tmp1 //! p_unit_test 1, %res1 tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); - tmp.instr->vop3().clamp = true; + tmp->vop3().clamp = true; writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); //! v1: %tmp2 = v_add_u32 %b, %c @@ -706,7 +706,7 @@ BEGIN_TEST(optimize.add3) //! p_unit_test 2, %res2 tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); - tmp.instr->vop3().clamp = true; + tmp->vop3().clamp = true; writeout(2, tmp); finish_opt_test(); @@ -1030,7 +1030,7 @@ BEGIN_TEST(optimizer.dpp) //! v1: %res3 = v_add_f32 -%a, %b row_mirror bound_ctrl:1 //! p_unit_test 3, %res3 auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); - tmp3.instr->dpp16().neg[0] = true; + tmp3->dpp16().neg[0] = true; Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp3, b); writeout(3, res3); @@ -1038,7 +1038,7 @@ BEGIN_TEST(optimizer.dpp) //! p_unit_test 4, %res4 Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp4, b); - res4.instr->vop3().neg[0] = true; + res4->vop3().neg[0] = true; writeout(4, res4); //! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1 @@ -1046,22 +1046,22 @@ BEGIN_TEST(optimizer.dpp) //! p_unit_test 5, %res5 Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp5, b); - res5.instr->vop3().clamp = true; + res5->vop3().clamp = true; writeout(5, res5); //! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1 //! p_unit_test 6, %res6 auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); - tmp6.instr->dpp16().neg[0] = true; + tmp6->dpp16().neg[0] = true; auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp6, b); - res6.instr->vop3().abs[0] = true; + res6->vop3().abs[0] = true; writeout(6, res6); //! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1 //! p_unit_test 7, %res7 Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), b, tmp7); - res7.instr->vop3().abs[0] = true; + res7->vop3().abs[0] = true; writeout(7, res7); /* vcc */ diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index 1f4840995da..ac87f9c2ae9 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -375,7 +375,7 @@ BEGIN_TEST(optimizer_postRA.dpp) //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 //! p_unit_test 3, %res3:v[2] auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); - tmp3.instr->dpp16().neg[0] = true; + tmp3->dpp16().neg[0] = true; Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b); writeout(3, Operand(res3, reg_v2)); @@ -383,7 +383,7 @@ BEGIN_TEST(optimizer_postRA.dpp) //! p_unit_test 4, %res4:v[2] Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b); - res4.instr->vop3().neg[0] = true; + res4->vop3().neg[0] = true; writeout(4, Operand(res4, reg_v2)); //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 @@ -391,22 +391,22 @@ BEGIN_TEST(optimizer_postRA.dpp) //! p_unit_test 5, %res5:v[2] Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b); - res5.instr->vop3().clamp = true; + res5->vop3().clamp = true; writeout(5, Operand(res5, reg_v2)); //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1 //! p_unit_test 6, %res6:v[2] auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); - tmp6.instr->dpp16().neg[0] = true; + tmp6->dpp16().neg[0] = true; auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b); - res6.instr->vop3().abs[0] = true; + res6->vop3().abs[0] = true; writeout(6, Operand(res6, reg_v2)); //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1 //! p_unit_test 7, %res7:v[2] Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2)); - res7.instr->vop3().abs[0] = true; + res7->vop3().abs[0] = true; writeout(7, Operand(res7, reg_v2)); /* vcc */ diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp index 9df87eb568c..3942869e0d2 100644 --- a/src/amd/compiler/tests/test_sdwa.cpp +++ b/src/amd/compiler/tests/test_sdwa.cpp @@ -34,12 +34,12 @@ BEGIN_TEST(validate.sdwa.allow) //>> Validation results: //! Validation passed - SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa(); + SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa(); sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true; - sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]).instr->sdwa(); + bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]); - sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa(); + sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa(); sdwa->sel[0] = SubdwordSel::sbyte2; sdwa->sel[1] = SubdwordSel::uword1; @@ -105,7 +105,7 @@ BEGIN_TEST(validate.sdwa.vopc) bld.vopc_sdwa(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), inputs[0], inputs[1]); //~gfx(9|10)! SDWA VOPC clamp only supported on GFX8: s2: %_:vcc = v_cmp_eq_f32 %vgpr0, %vgpr1 clamp src0_sel:dword src1_sel:dword - bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1]).instr->sdwa().clamp = true; + bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])->sdwa().clamp = true; //! Validation failed @@ -123,7 +123,7 @@ BEGIN_TEST(validate.sdwa.omod) //~gfx8! SDWA omod only supported on GFX9+: v1: %_ = v_mul_f32 %vgpr0, %vgpr1 *2 dst_sel:dword src0_sel:dword src1_sel:dword //~gfx8! Validation failed //~gfx(9|10)! Validation passed - bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa().omod = 1; + bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa().omod = 1; finish_validator_test(); } @@ -385,7 +385,7 @@ BEGIN_TEST(optimize.sdwa.from_vop3) //! p_unit_test 0, %res0 Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), Operand::zero()); - VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3(); + VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->vop3(); mul->neg[0] = true; mul->abs[0] = true; writeout(0, mul->definitions[0].getTemp()); @@ -396,7 +396,7 @@ BEGIN_TEST(optimize.sdwa.from_vop3) //! p_unit_test 1, %res1 byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), Operand::zero()); - mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3(); + mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->vop3(); mul->omod = 2; writeout(1, mul->definitions[0].getTemp());
