https://github.com/maryammo created https://github.com/llvm/llvm-project/pull/133272
This patch adds the following Dense Math Facility 16-bit half-precision floating-point calculation instructions: dmxvf16gerx2, dmxvf16gerx2pp, dmxvf16gerx2pn, dmxvf16gerx2np, dmxvf16gerx2nn, pmdmxvf16gerx2, pmdmxvf16gerx2pp, pmdmxvf16gerx2pn, pmdmxvf16gerx2np, pmdmxvf16gerx2nn, along with their corresponding intrinsics and tests. >From 109f52126c42231776d7cd2402827cedb036a240 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas <marya...@ca.ibm.com> Date: Thu, 27 Mar 2025 16:30:41 +0000 Subject: [PATCH] [PowerPC] Add dense math half-precision floating-point outer-product accumulate to DMR instructions This patch adds the following Dense Math Facility 16-bit half-precision floating-point calculation instructions: dmxvf16gerx2, dmxvf16gerx2pp, dmxvf16gerx2pn, dmxvf16gerx2np, dmxvf16gerx2nn, pmdmxvf16gerx2, pmdmxvf16gerx2pp, pmdmxvf16gerx2pn, pmdmxvf16gerx2np, pmdmxvf16gerx2nn, along with their corresponding intrinsics and tests. --- llvm/include/llvm/IR/IntrinsicsPowerPC.td | 7 + llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 116 ++++- .../test/CodeGen/PowerPC/dmf-outer-product.ll | 483 ++++++++++++++++++ .../PowerPC/ppc-encoding-ISAFuture.txt | 30 ++ .../PowerPC/ppc64le-encoding-ISAFuture.txt | 30 ++ llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s | 50 ++ 6 files changed, 714 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index b57102ef68f09..bcc3fc6f0fc13 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1755,6 +1755,13 @@ let TargetPrefix = "ppc" in { defm int_ppc_mma_pmdmxvbf16gerx2 : PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; + + // MMA+ Half-precision Outer Product Intrinsic Definitions. + defm int_ppc_mma_dmxvf16gerx2 : + PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmdmxvf16gerx2 : + PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; } // XL Compat intrinsics. diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index 8ea0924f09b43..331649bddafb7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -207,7 +207,7 @@ multiclass DMR_BF16_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, } } -multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, +multiclass DMR_BF16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, string asmstr> { defm NAME : DMR_BF16_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { @@ -228,9 +228,30 @@ multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, } } +multiclass DMR_F16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { + def PM#NAME : + MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !or(xo, 0x01), (outs dmr:$AT), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, string asmstr> { - defm NAME : DMR_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>; + defm NAME : DMR_BF16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, IsISAFuture] in { def PN : XX3Form_AT3_XAp5B6< opcode, !xor(xo, 0xF9), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), @@ -270,6 +291,48 @@ multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL, } } +multiclass DMR_NEG_UM_M284_XOXORd11188<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : DMR_F16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, IsISAFuture] in { + def PN : XX3Form_AT3_XAp5B6< + opcode, !xor(xo, 0xD1), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAp5B6< + opcode, !xor(xo, 0x11), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAp5B6< + opcode, !xor(xo, 0x88), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { + def PM#NAME#PN : + MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0xD1), (outs dmr:$AT), + !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0x11), (outs dmr:$AT), + !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0x88), (outs dmr:$AT), + !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + let Predicates = [IsISAFuture] in { def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226, (outs vsrprc:$XAp, vsrprc:$XBp), @@ -347,6 +410,11 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { defm DMXVBF16GERX2 : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:$XB), "dmxvbf16gerx2", "$AT, $XAp, $XB">; +// DMXVF16GERX2, DMXVF16GERX2PP, DMXVF16GERX2PN, dMXVF16GERX2NP, DMXVF16GERX2NN +// PMDMXVF16GERX2, PMDMXVF16GERX2PP, PMDMXVF16GERX2PN, PMDMXVF16GERX2NP, PMDMXVF16GERX2NN +defm DMXVF16GERX2 : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$XB), + "dmxvf16gerx2", "$AT, $XAp, $XB">; + // MMA+ Intrinsics let Predicates = [MMA, IsISAFuture] in { def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)), @@ -371,6 +439,21 @@ let Predicates = [MMA, IsISAFuture] in { def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), (DMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>; + + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2 v256i1:$XAp, v16i8:$XB)), + (DMXVF16GERX2 $XAp, RCCp.BToVSRC)>; + + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + (DMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC)>; + + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + (DMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC)>; + + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + (DMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC)>; + + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + (DMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>; } let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { @@ -419,4 +502,33 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { Msk2Imm:$PMSK)), (PMDMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMDMXVF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMDMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMDMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMDMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMDMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; } diff --git a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll index e3b43062f417c..9a02689002459 100644 --- a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll @@ -769,3 +769,486 @@ entry: store <1024 x i1> %call, ptr %resp, align 64 ret void } + +declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1>, <16 x i8>) +define void @test_dmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvf16gerx2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv v2, 16(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: lxv v3, 0(r3) +; CHECK-NEXT: dmxvf16gerx2 dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvf16gerx2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: lxv v3, 16(r3) +; CHECK-BE-NEXT: dmxvf16gerx2 dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>) + +define void @test_dmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvf16gerx2pp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: dmxvf16gerx2pp dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvf16gerx2pp: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: dmxvf16gerx2pp dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1>, <256 x i1>, <16 x i8>) + +define void @test_dmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvf16gerx2pn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: dmxvf16gerx2pn dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvf16gerx2pn: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: dmxvf16gerx2pn dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1>, <256 x i1>, <16 x i8>) + +define void @test_dmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvf16gerx2np: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: dmxvf16gerx2np dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvf16gerx2np: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: dmxvf16gerx2np dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1>, <256 x i1>, <16 x i8>) + +define void @test_dmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvf16gerx2nn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: dmxvf16gerx2nn dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvf16gerx2nn: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: dmxvf16gerx2nn dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvf16gerx2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv v2, 16(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: lxv v3, 0(r3) +; CHECK-NEXT: pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvf16gerx2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: lxv v3, 16(r3) +; CHECK-BE-NEXT: pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvf16gerx2pp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: pmdmxvf16gerx2pp dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvf16gerx2pp: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: pmdmxvf16gerx2pp dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvf16gerx2pn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: pmdmxvf16gerx2pn dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvf16gerx2pn: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: pmdmxvf16gerx2pn dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvf16gerx2np: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: pmdmxvf16gerx2np dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvf16gerx2np: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: pmdmxvf16gerx2np dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvf16gerx2nn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: pmdmxvbf16gerx2nn dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvf16gerx2nn: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: pmdmxvbf16gerx2nn dmr0, vsp34, vs0, 33, 5, 2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2) + store <1024 x i1> %call, ptr %resp, align 64 + ret void +} diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index c3b5fa36641c6..78687deff0db2 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -123,3 +123,33 @@ #CHECK: pmdmxvbf16gerx2nn 1, 2, 4, 8, 4, 2 0x07,0x90,0x80,0x84,0xec,0x82,0x27,0x50 + +#CHECK: dmxvf16gerx2 1, 0, 2 +0xec,0x80,0x12,0x18 + +#CHECK: dmxvf16gerx2pp 1, 0, 2 +0xec,0x80,0x12,0x10 + +#CHECK: dmxvf16gerx2pn 1, 0, 2 +0xec,0x80,0x14,0x98 + +#CHECK: dmxvf16gerx2np 1, 0, 2 +0xec,0x80,0x12,0x98 + +#CHECK: dmxvf16gerx2nn 1, 0, 2 +0xec,0x80,0x16,0x50 + +#CHECK: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 +0x07,0x90,0xc0,0xc5,0xec,0x02,0x22,0x18 + +#CHECK: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 +0x07,0x90,0xc0,0xc5,0xec,0x02,0x22,0x10 + +#CHECK: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 +0x07,0x90,0xc0,0xc5,0xec,0x02,0x24,0x98 + +#CHECK: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 +0x07,0x90,0xc0,0xc5,0xec,0x02,0x22,0x98 + +#CHECK: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 +0x07,0x90,0xc0,0xc5,0xec,0x02,0x26,0x50 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index 2691e243191f9..43a6039ee7fdc 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -117,3 +117,33 @@ #CHECK: pmdmxvbf16gerx2nn 1, 2, 4, 8, 4, 2 0x84,0x80,0x90,0x07,0x50,0x27,0x82,0xec + +#CHECK: dmxvf16gerx2 1, 0, 2 +0x18,0x12,0x80,0xec + +#CHECK: dmxvf16gerx2pp 1, 0, 2 +0x10,0x12,0x80,0xec + +#CHECK: dmxvf16gerx2pn 1, 0, 2 +0x98,0x14,0x80,0xec + +#CHECK: dmxvf16gerx2np 1, 0, 2 +0x98,0x12,0x80,0xec + +#CHECK: dmxvf16gerx2nn 1, 0, 2 +0x50,0x16,0x80,0xec + +#CHECK: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 +0xc5,0xc0,0x90,0x07,0x18,0x22,0x02,0xec + +#CHECK: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 +0xc5,0xc0,0x90,0x07,0x10,0x22,0x02,0xec + +#CHECK: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 +0xc5,0xc0,0x90,0x07,0x98,0x24,0x02,0xec + +#CHECK: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 +0xc5,0xc0,0x90,0x07,0x98,0x22,0x02,0xec + +#CHECK: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 +0xc5,0xc0,0x90,0x07,0x50,0x26,0x02,0xec diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index fe512e7e42382..f58f6f981b94c 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -176,3 +176,53 @@ #CHECK-BE-SAME: 0xec,0x82,0x27,0x50] #CHECK-LE: pmdmxvbf16gerx2nn 1, 2, 4, 8, 4, 2 # encoding: [0x84,0x80,0x90,0x07, #CHECK-LE-SAME: 0x50,0x27,0x82,0xec] + + dmxvf16gerx2 1, 0, 2 +#CHECK-BE: dmxvf16gerx2 1, 0, 2 # encoding: [0xec,0x80,0x12,0x18] +#CHECK-LE: dmxvf16gerx2 1, 0, 2 # encoding: [0x18,0x12,0x80,0xec] + + dmxvf16gerx2pp 1, 0, 2 +#CHECK-BE: dmxvf16gerx2pp 1, 0, 2 # encoding: [0xec,0x80,0x12,0x10] +#CHECK-LE: dmxvf16gerx2pp 1, 0, 2 # encoding: [0x10,0x12,0x80,0xec] + + dmxvf16gerx2pn 1, 0, 2 +#CHECK-BE: dmxvf16gerx2pn 1, 0, 2 # encoding: [0xec,0x80,0x14,0x98] +#CHECK-LE: dmxvf16gerx2pn 1, 0, 2 # encoding: [0x98,0x14,0x80,0xec] + + dmxvf16gerx2np 1, 0, 2 +#CHECK-BE: dmxvf16gerx2np 1, 0, 2 # encoding: [0xec,0x80,0x12,0x98] +#CHECK-LE: dmxvf16gerx2np 1, 0, 2 # encoding: [0x98,0x12,0x80,0xec] + + dmxvf16gerx2nn 1, 0, 2 +#CHECK-BE: dmxvf16gerx2nn 1, 0, 2 # encoding: [0xec,0x80,0x16,0x50] +#CHECK-LE: dmxvf16gerx2nn 1, 0, 2 # encoding: [0x50,0x16,0x80,0xec] + + pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 +#CHECK-BE: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5, +#CHECK-BE-SAME: 0xec,0x02,0x22,0x18] +#CHECK-LE: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07, +#CHECK-LE-SAME: 0x18,0x22,0x02,0xec] + + pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 +#CHECK-BE: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5, +#CHECK-BE-SAME: 0xec,0x02,0x22,0x10] +#CHECK-LE: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07, +#CHECK-LE-SAME: 0x10,0x22,0x02,0xec] + + pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 +#CHECK-BE: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5, +#CHECK-BE-SAME: 0xec,0x02,0x24,0x98] +#CHECK-LE: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07, +#CHECK-LE-SAME: 0x98,0x24,0x02,0xec] + + pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 +#CHECK-BE: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5, +#CHECK-BE-SAME: 0xec,0x02,0x22,0x98] +#CHECK-LE: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07, +#CHECK-LE-SAME: 0x98,0x22,0x02,0xec] + + pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 +#CHECK-BE: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5, +#CHECK-BE-SAME: 0xec,0x02,0x26,0x50] +#CHECK-LE: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07, +#CHECK-LE-SAME: 0x50,0x26,0x02,0xec] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits