Author: RolandF77 Date: 2025-08-25T12:14:55-04:00 New Revision: d1cbe6ed747e606308b8c51321b988fda7dbf058
URL: https://github.com/llvm/llvm-project/commit/d1cbe6ed747e606308b8c51321b988fda7dbf058 DIFF: https://github.com/llvm/llvm-project/commit/d1cbe6ed747e606308b8c51321b988fda7dbf058.diff LOG: [PowerPC] Add DMF builtins for build and disassemble (#153097) Add support for PPC Dense Math builtins mma_build_dmr and mma_disassemble_dmr builtins. Added: Modified: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/TargetBuiltins/PPC.cpp clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/test/CodeGen/PowerPC/dmr-enable.ll Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 79df84abd74f0..22926b6a7d095 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1100,6 +1100,10 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false, "mma,isa-future-instructions") UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true, "mma,isa-future-instructions") +UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false, + "mma,isa-future-instructions") +UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false, + "mma,isa-future-instructions") // MMA builtins with positive/negative multiply/accumulate. UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV", diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index 270e9fc976f23..ba65cf1ce9b90 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -1152,10 +1152,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, CallOps.push_back(Acc); } if (BuiltinID == PPC::BI__builtin_mma_dmmr || - BuiltinID == PPC::BI__builtin_mma_dmxor) { + BuiltinID == PPC::BI__builtin_mma_dmxor || + BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) { Address Addr = EmitPointerWithAlignment(E->getArg(1)); Ops[1] = Builder.CreateLoad(Addr); } + if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) + return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign()); for (unsigned i=1; i<Ops.size(); i++) CallOps.push_back(Ops[i]); llvm::Function *F = CGM.getIntrinsic(ID); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c index 4aafc09602228..c66f5e2a32919 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c @@ -93,18 +93,36 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_dmf_basic -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz() -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr %res1, align 128 -// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr %res2, align 128 -// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr %p, align 128 -// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]]) -// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr %res2, align 128 +// CHECK-LABEL: @test_dmf_basic( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz() +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]]) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128 +// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128 +// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]]) +// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128 +// CHECK-NEXT: ret void +// void test_dmf_basic(char *p, char *res1, char *res2) { __dmr1024 x[2]; __builtin_mma_dmsetdmrz(&x[0]); __builtin_mma_dmmr((__dmr1024*)res1, &x[0]); __builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p); } + +// CHECK-LABEL: @test_dmf_basic2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128 +// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128 +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128 +// CHECK-NEXT: ret void +// +void test_dmf_basic2(char *p1, char *res1, char *res2, + vector unsigned char *v) { + vector unsigned char vv = *v; + __builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv); + __builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1); +} diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c index 5a92d6e982511..ea2b99b0e5b20 100644 --- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c +++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c @@ -16,6 +16,8 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) __builtin_mma_dmsetdmrz(&vdmr); __builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp); __builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp); + __builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc); + __builtin_mma_disassemble_dmr(vdmrp, &vdmr); // CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops // CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops @@ -26,4 +28,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) // CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions // CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions // CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions +// CHECK: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions } diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 94afa94bfb1ee..fb97230e0f8eb 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1708,6 +1708,16 @@ let TargetPrefix = "ppc" in { DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty, llvm_i32_ty], [IntrNoMem]>; + def int_ppc_mma_disassemble_dmr : + DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_v1024i1_ty], + [IntrWriteMem, IntrArgMemOnly]>; + + def int_ppc_mma_build_dmr : + DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + // MMA Reduced-Precision: Outer Product Intrinsic Definitions. defm int_ppc_mma_xvi4ger8 : PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7022e9e9dae99..5039f5df7a128 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11274,6 +11274,24 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getMergeValues(RetOps, dl); } + case Intrinsic::ppc_mma_build_dmr: { + SmallVector<SDValue, 8> Pairs; + SmallVector<SDValue, 8> Chains; + for (int i = 1; i < 9; i += 2) { + SDValue Hi = Op.getOperand(i); + SDValue Lo = Op.getOperand(i + 1); + if (Hi->getOpcode() == ISD::LOAD) + Chains.push_back(Hi.getValue(1)); + if (Lo->getOpcode() == ISD::LOAD) + Chains.push_back(Lo.getValue(1)); + Pairs.push_back( + DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo})); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG); + return DAG.getMergeValues({Value, TF}, dl); + } + case Intrinsic::ppc_mma_dmxxextfdmr512: { assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future"); auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); @@ -11610,6 +11628,10 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(0)), 0); } + case Intrinsic::ppc_mma_disassemble_dmr: { + return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2), + Op.getOperand(ArgStart + 1), MachinePointerInfo()); + } default: break; } @@ -12099,6 +12121,24 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op, return DAG.getMergeValues({DmrPValue, TF}, dl); } +SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs, + const SDLoc &dl, + SelectionDAG &DAG) const { + SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0], + Pairs[1]), + 0); + SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32); + SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1, + Pairs[2], Pairs[3]), + 0); + SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32); + SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32); + + return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, + {RC, Lo, LoSub, Hi, HiSub}), + 0); +} + SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 559d583096927..6d5172ec247f6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1345,6 +1345,8 @@ namespace llvm { SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs, + const SDLoc &dl, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll index 1e3014405ac4e..a505ac4c2434a 100644 --- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll +++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll @@ -367,6 +367,69 @@ entry: ret void } +define void @tbuild(ptr %p1, ptr %p2, ptr %res1, ptr %res2, ptr %v) { +; CHECK-LABEL: tbuild: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv v3, 0(r7) +; CHECK-NEXT: vmr v2, v3 +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: tbuild: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv v3, 0(r7) +; CHECK-BE-NEXT: vmr v2, v3 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <16 x i8>, ptr %v, align 16 + %1 = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0) + store <1024 x i1> %1, ptr %res2, align 128 + %2 = load <1024 x i1>, ptr %p1, align 128 + tail call void @llvm.ppc.mma.disassemble.dmr(ptr %res1, <1024 x i1> %2) + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare void @llvm.ppc.mma.disassemble.dmr(ptr, <1024 x i1>) declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz() declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>) declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits