https://github.com/mandlebug updated https://github.com/llvm/llvm-project/pull/193786
>From e680d93e23759c64da8bb43c634100af929c63ec Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Mon, 20 Apr 2026 18:38:54 -0400 Subject: [PATCH 01/26] First draft of out of line pointer glue for AIX. * The toc restore after the call breaks scheduling after the DAG has been selected. Need to investigate further. * Add MIR to the test to check the intermediate representation also. * Not sure if the option is better as a target option for the whole compilation unit, or if it should be a feature attribute on the function making the call. --- llvm/include/llvm/CodeGen/CommandFlags.h | 2 + llvm/include/llvm/Target/TargetOptions.h | 8 +- llvm/lib/CodeGen/CommandFlags.cpp | 8 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 82 +++++++++++++++++-- llvm/lib/Target/PowerPC/PPCSubtarget.h | 4 + .../CodeGen/PowerPC/aix-no-inline-glue.ll | 41 ++++++++++ 6 files changed, 136 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index d10e8732c1562..b24c73408d861 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -154,6 +154,8 @@ LLVM_ABI bool getJMCInstrument(); LLVM_ABI bool getXCOFFReadOnlyPointers(); +LLVM_ABI bool getXCOFFInlineGlueCode(); + enum SaveStatsMode { None, Cwd, Obj }; LLVM_ABI SaveStatsMode getSaveStats(); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 89e29d5f102f6..dfe87fd40d5b8 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -140,7 +140,7 @@ class TargetOptions { DebugStrictDwarf(false), Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false), EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false), - VerifyArgABICompliance(true) {} + VerifyArgABICompliance(true), XCOFFInlineGlueCode(true) {} /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. @@ -353,6 +353,12 @@ class TargetOptions { /// (lack) these extensions. unsigned VerifyArgABICompliance : 1; + /// When set to true, the code to form an indirect call sequence + /// is placed inline at the call site. When false an indirect call + /// is implemented with a branch to a trampoline which contains the + /// indriect call sequence. + unsigned XCOFFInlineGlueCode : 1; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 403fd49c56984..1de4fb9a9414d 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -120,6 +120,7 @@ CGOPT(bool, DebugStrictDwarf) CGOPT(unsigned, AlignLoops) CGOPT(bool, JMCInstrument) CGOPT(bool, XCOFFReadOnlyPointers) +CGOPT(bool, XCOFFInlineGlueCode) CGOPT(codegen::SaveStatsMode, SaveStats) #define CGBINDOPT(NAME) \ @@ -534,6 +535,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(XCOFFReadOnlyPointers); + static cl::opt<bool> XCOFFInlineGlueCode( + "xcoff-inline-glue-code", + cl::desc("Emir inline glue code for indirect calls"), cl::init(true)); + CGBINDOPT(XCOFFInlineGlueCode); + static cl::opt<bool> DisableIntegratedAS( "no-integrated-as", cl::desc("Disable integrated assembler"), cl::init(false)); @@ -635,7 +641,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.LoopAlignment = getAlignLoops(); Options.JMCInstrument = getJMCInstrument(); Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers(); - + Options.XCOFFInlineGlueCode = getXCOFFInlineGlueCode(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d21ccb59f9962..9bd2b0107b501 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5359,8 +5359,11 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, // immediately followed by a load of the TOC pointer from the stack save // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC // as it is not saved or used. - RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC - : PPCISD::BCTRL; + if (!TM.Options.XCOFFInlineGlueCode) + RetOpc = PPCISD::CALL; + else + RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC + : PPCISD::BCTRL; } else if (Subtarget.isUsingPCRelativeCalls()) { assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."); RetOpc = PPCISD::CALL_NOTOC; @@ -5539,6 +5542,11 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, // copies together, a TOC access in the caller could be scheduled between // the assignment of the callee TOC and the branch to the callee, which leads // to incorrect code. + // On AIX there is a feature ("out of line glue code") which uses a special + // trampoline function __ptrgl to do the indirect call. If this option is + // enabled we instead simply load the address of the descriptor into r11, + // with the arguments in the 'normal' registers and branch to the __ptrgl + // stub. // Start by loading the function address from the descriptor. SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart); @@ -5600,6 +5608,18 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl); } +static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee, + SDValue &Glue, SDValue &Chain, + SDValue CallSeqStart, const CallBase *CB, + const SDLoc &dl, bool hasNest, + const PPCSubtarget &Subtarget) { + const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister(); + SDValue MoveToPhysicalReg = + DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue); + Chain = MoveToPhysicalReg.getValue(0); + Glue = MoveToPhysicalReg.getValue(1); +} + static void buildCallOperands(SmallVectorImpl<SDValue> &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, @@ -5611,13 +5631,29 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, // MVT for a general purpose register. const MVT RegVT = Subtarget.getScalarIntVT(); + const TargetMachine &TM = Subtarget.getTargetMachine(); + // First operand is always the chain. Ops.push_back(Chain); // If it's a direct call pass the callee as the second operand. if (!CFlags.IsIndirect) Ops.push_back(Callee); - else { + else if (!TM.Options.XCOFFInlineGlueCode) { + // An indirect call with out of line glue code. We create a target + // external symbol for '.__ptrgl' as the callee. + auto &Context = DAG.getMachineFunction().getContext(); + MCSectionXCOFF *Sec = Context.getXCOFFSection( + ".__ptrgl", SectionKind::getMetadata(), + XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER)); + MCSymbolXCOFF *CalleeSym = Sec->getQualNameSymbol(); + Callee = DAG.getTargetExternalSymbol(CalleeSym->getName().data(), + Callee.getValueType(), 0); + Ops.push_back(Callee); + // Add the register used to pass the descriptor address. + Ops.push_back( + DAG.getRegister(Subtarget.getGlueCodeDescriptorRegister(), RegVT)); + } else { assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect."); // For the TOC based ABIs, we have saved the TOC pointer to the linkage area @@ -5689,8 +5725,10 @@ SDValue PPCTargetLowering::FinishCall( unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const { + const auto &TM = getTargetMachine(); + if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || - Subtarget.isAIXABI()) + (Subtarget.isAIXABI() && !TM.Options.XCOFFInlineGlueCode)) setUsesTOCBasePtr(DAG); unsigned CallOpc = @@ -5700,8 +5738,12 @@ SDValue PPCTargetLowering::FinishCall( if (!CFlags.IsIndirect) Callee = transformCallee(Callee, DAG, dl, Subtarget); else if (Subtarget.usesFunctionDescriptors()) - prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, - dl, CFlags.HasNest, Subtarget); + if (!TM.Options.XCOFFInlineGlueCode) + prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl, + CFlags.HasNest, Subtarget); + else + prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, + dl, CFlags.HasNest, Subtarget); else prepareIndirectCall(DAG, Callee, Glue, Chain, dl); @@ -5745,6 +5787,30 @@ SDValue PPCTargetLowering::FinishCall( ? NumBytes : 0; + if (!TM.Options.XCOFFInlineGlueCode) { + const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4); + const MCRegister TOCReg = Subtarget.getTOCPointerRegister(); + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + const unsigned TOCSaveOffset = + Subtarget.getFrameLowering()->getTOCSaveOffset(); + const MVT RegVT = Subtarget.getScalarIntVT(); + + // Load the original toc value from the stack save slot. + SDValue PtrOffset = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, PtrOffset); + SDValue TOCLoad = DAG.getLoad( + RegVT, dl, Chain, AddPtr, + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset), + Alignment, MachineMemOperand::MONone); + + // TODO FIXME Causing scheduling overflow ... + // Copy back to the physical toc register. + // SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCLoad, Glue); + // Chain = TOCVal.getValue(0); + // Glue = TOCVal.getValue(1); + } + Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl); Glue = Chain.getValue(1); @@ -7745,9 +7811,11 @@ SDValue PPCTargetLowering::LowerCall_AIX( if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + const auto &TM = getTargetMachine(); + // For indirect calls, we need to save the TOC base to the stack for // restoration after the call. - if (CFlags.IsIndirect) { + if (CFlags.IsIndirect && TM.Options.XCOFFInlineGlueCode) { assert(!CFlags.IsTailCall && "Indirect tail-calls not supported."); const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister(); const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 7d933588025fe..eec0e141debd4 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -286,6 +286,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo { return IsPPC64 ? PPC::X1 : PPC::R1; } + MCRegister getGlueCodeDescriptorRegister() const { + return IsPPC64 ? PPC::X11 : PPC::R11; + } + bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } bool isPredictableSelectIsExpensive() const { diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll new file mode 100644 index 0000000000000..375cb16b4a5e2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -0,0 +1,41 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff +; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK32 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff +; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s + +@a = dso_local global i32 55, align 4 +@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8 +@fp = dso_local local_unnamed_addr global ptr null, align 8 + +define i32 @caller1(ptr noundef readonly captures(none) %fp) local_unnamed_addr { +entry: + %call = tail call i32 %fp(i32 signext 1, i32 signext 2, i32 signext 3) + ret i32 %call +} + +; CHECK-LABEL: .caller1 +; CHECK-DAG: mr 11, 3 +; CHECK-DAG: li 3, 1 +; CHECK-DAG: li 4, 2 +; CHECK-DAG: li 5, 3 +; CHECK: bl .__ptrgl[PR]A +; CHECK32-NEXT: ld 2 28(r1) +; CHECK64-NEXT: ld 2, 40(r1) + +define dso_local zeroext i1 @caller2() local_unnamed_addr { +entry: + %0 = load ptr, ptr @fp + %1 = load i32, ptr @a + %2 = load double, ptr @d + %call = tail call zeroext i1 %0(i32 noundef signext %1, double noundef %2, ptr noundef nonnull @a) + ret i1 %call +} + +; CHECK-LABEL: .caller2 +; CHECK: ld , L..C{{.*}}(2) # @fp +; CHECK: ld 11, 0([[REG]]) +; CHECK: lwa 3, 0(5) +; CHECK: bl .__ptrgl[PR] +; CHECK32-NEXT: ld 2, 28(r1) +; CHECK64-NEXT: ld 2, 40(r1) >From bac5d2026579f763f92e1ee8c334c6fd1c4e6c76 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 10:09:38 -0400 Subject: [PATCH 02/26] Add Pseudo and instructions to handle a call and restore together. Add a new Pseudo and Instructions for handling a direct call followed by a toc restore load. * Still needs to add rounding mode version of calls. * Missing emission of the target external symbols linkage. * Need to add MIR test to verify operands. --- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 2 ++ llvm/lib/Target/PowerPC/P10InstrResources.td | 2 +- llvm/lib/Target/PowerPC/P9InstrResources.td | 2 ++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 14 ++++++++++++++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 17 +++++++++++++++++ llvm/lib/Target/PowerPC/PPCScheduleP7.td | 2 +- 7 files changed, 38 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index b28304b07e1a3..fa92d45e07f27 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -154,6 +154,8 @@ bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) const { case PPC::TCRETURNri: case PPC::BCTRL_LWZinto_toc: case PPC::BCTRL_LWZinto_toc_RM: + case PPC::BL_RESTORE: + case PPC::BL8_RESTORE: case PPC::TAILBCTR: case PPC::TAILB: case PPC::TAILBA: diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 842174239cc4c..825ffd56da5e0 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -317,7 +317,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read], BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR, BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL, BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS, - BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM + BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, BL8_RESTORE )>; // 2 Cycles Branch operations, 2 input operands diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 106faf1f8e8d2..8c7637e3b93f1 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1326,6 +1326,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C], BCTRL8_LDinto_toc_RM, BCTRL_LWZinto_toc_RM, BCn, + BL_RESTORE, + BL8_RESTORE, CTRL_DEP )>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9bd2b0107b501..c61acf1fb40c7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5360,7 +5360,7 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC // as it is not saved or used. if (!TM.Options.XCOFFInlineGlueCode) - RetOpc = PPCISD::CALL; + RetOpc = PPCISD::BL_LOAD_TOC; else RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC : PPCISD::BCTRL; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index a973654c22c38..c6458d6caa327 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -207,6 +207,16 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, Requires<[IsPPC64]>; } +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR8, X2], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in { + def BL8_RESTORE : IForm_and_DForm_1<18, 0, 1, 58, + (outs), (ins calltarget:$LI), + "bl $LI\n\tld 2, 40(1)", IIC_BrB, + []>, + Requires<[IsPPC64]>; + // TODO FIXME Add _RM version of call. +} + } // Interpretation64Bit // FIXME: Duplicating this for the asm parser should be unnecessary, but the @@ -259,6 +269,10 @@ def : Pat<(PPCcall_rm (i64 mcsym:$dst)), def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)), (BL8_NOP_RM mcsym:$dst)>; +def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)), + (BL8_RESTORE texternalsym:$dst)>; +// TODO FIXME add _RM version. + // Atomic operations // FIXME: some of these might be used with constant operands. This will result // in constant materialization instructions that may be redundant. We currently diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index e3240a72a113f..58a1784b9faa0 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -554,6 +554,11 @@ def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCbl_load_toc : SDNode<"PPCISD::BL_LOAD_TOC", + SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + // The variants that implicitly define rounding mode for calls with // strictfp semantics. def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall, @@ -1862,6 +1867,14 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, } +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR, R2], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in { +def BL_RESTORE : IForm_and_DForm_1<18, 0, 1, 32, + (outs), (ins calltarget:$LI), + "bl $LI\n\tlwz 2, 20(1)", IIC_BrB, + []>, Requires<[IsPPC32]>; +} + let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in { def BCTRL_LWZinto_toc_RM: @@ -3460,6 +3473,10 @@ def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)), def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)), (BL_NOP_RM texternalsym:$dst)>; +def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)), + (BL_RESTORE texternalsym:$dst)>; +// TODO FIXME add _RM version of call. + def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td index bf7f2f7a9c999..09b811128150f 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -128,7 +128,7 @@ let SchedModel = P7Model in { BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, BCCTR, BCCTR8, BCCTR8n, BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, BCCTRn, gBC, gBCA, gBCAat, gBCCTR, gBCCTRL, gBCL, gBCLA, gBCLAat, gBCLR, gBCLRL, gBCLat, gBCat, - MFCTR, MFCTR8, MFLR, MFLR8 + MFCTR, MFCTR8, MFLR, MFLR8, BL_RESTORE, BL8_RESTORE )>; def : InstRW<[P7_BRU_4C], (instrs MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>; >From 955ec5e107343b0f14182e0f574588d644a4d613 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 11:05:40 -0400 Subject: [PATCH 03/26] Emit linkage for the __ptrgl external symbol. --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index a7389d9bc4fe8..a5009269e88f0 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -3041,6 +3041,15 @@ void PPCAIXAsmPrinter::emitGCOVRefs() { } void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { + // If we are using out of line pointer glue we have to emit the + // linkage for it. + if (OutContext.hasXCOFFSection( + ".__ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) { + MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol(".__ptrgl[PR]"); + OutStreamer->emitXCOFFSymbolLinkageWithVisibility(PtrGlueSym, MCSA_Extern, + MCSA_Invalid); + } + // If there are no functions and there are no toc-data definitions in this // module, we will never need to reference the TOC base. if (M.empty() && TOCDataGlobalVars.empty()) >From f27b9fc744cc5dc9e209c2bc17bf0c1481c98b49 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 11:30:21 -0400 Subject: [PATCH 04/26] Remove toc-restore code from ISEL. Folded into new instr instead. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 24 --------------------- 1 file changed, 24 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index c61acf1fb40c7..185eab1a20963 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5787,30 +5787,6 @@ SDValue PPCTargetLowering::FinishCall( ? NumBytes : 0; - if (!TM.Options.XCOFFInlineGlueCode) { - const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4); - const MCRegister TOCReg = Subtarget.getTOCPointerRegister(); - const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); - const unsigned TOCSaveOffset = - Subtarget.getFrameLowering()->getTOCSaveOffset(); - const MVT RegVT = Subtarget.getScalarIntVT(); - - // Load the original toc value from the stack save slot. - SDValue PtrOffset = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, PtrOffset); - SDValue TOCLoad = DAG.getLoad( - RegVT, dl, Chain, AddPtr, - MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset), - Alignment, MachineMemOperand::MONone); - - // TODO FIXME Causing scheduling overflow ... - // Copy back to the physical toc register. - // SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCLoad, Glue); - // Chain = TOCVal.getValue(0); - // Glue = TOCVal.getValue(1); - } - Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl); Glue = Chain.getValue(1); >From 46f1d5c5b64798994aa6c0363180ba7bf9eb6525 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 12:54:00 -0400 Subject: [PATCH 05/26] Still use TOC base with outofline glue. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 185eab1a20963..13f77dd3c9928 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5728,7 +5728,7 @@ SDValue PPCTargetLowering::FinishCall( const auto &TM = getTargetMachine(); if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || - (Subtarget.isAIXABI() && !TM.Options.XCOFFInlineGlueCode)) + Subtarget.isAIXABI()) setUsesTOCBasePtr(DAG); unsigned CallOpc = >From b4b9a349752f343e8925f21946205ea415aa7550 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 13:31:08 -0400 Subject: [PATCH 06/26] Fix test now that its runnable. --- .../CodeGen/PowerPC/aix-no-inline-glue.ll | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 375cb16b4a5e2..2d609a289373a 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -1,7 +1,7 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \ ; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK32 %s -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \ ; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s @a = dso_local global i32 55, align 4 @@ -19,9 +19,9 @@ entry: ; CHECK-DAG: li 3, 1 ; CHECK-DAG: li 4, 2 ; CHECK-DAG: li 5, 3 -; CHECK: bl .__ptrgl[PR]A -; CHECK32-NEXT: ld 2 28(r1) -; CHECK64-NEXT: ld 2, 40(r1) +; CHECK: bl .__ptrgl[PR] +; CHECK32-NEXT: lwz 2, 20(1) +; CHECK64-NEXT: ld 2, 40(1) define dso_local zeroext i1 @caller2() local_unnamed_addr { entry: @@ -33,9 +33,11 @@ entry: } ; CHECK-LABEL: .caller2 -; CHECK: ld , L..C{{.*}}(2) # @fp -; CHECK: ld 11, 0([[REG]]) -; CHECK: lwa 3, 0(5) +; CHECK64: ld [[REG:[0-9]+]], L..C{{[0-9]+}}(2) # @fp +; CHECK32: lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2) # @fp +; CHECK32: lwz 11, 0([[REG]]) ; CHECK: bl .__ptrgl[PR] -; CHECK32-NEXT: ld 2, 28(r1) -; CHECK64-NEXT: ld 2, 40(r1) +; CHECK32-NEXT: lwz 2, 20(1) +; CHECK64-NEXT: ld 2, 40(1) + +; CHECK: .extern .__ptrgl[PR] >From 0dd4e746e3f19cc84a84006a8a5bf079e70b8b6c Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 14:04:15 -0400 Subject: [PATCH 07/26] Add MIR tests. --- .../CodeGen/PowerPC/aix-no-inline-glue.ll | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 2d609a289373a..9d2e2ae5f345b 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -4,6 +4,14 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \ ; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s +; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s | \ +; RUN: FileCheck --check-prefix=MIR32 %s + +; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s | \ +; RUN: FileCheck --check-prefix=MIR64 %s + @a = dso_local global i32 55, align 4 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8 @fp = dso_local local_unnamed_addr global ptr null, align 8 @@ -23,6 +31,20 @@ entry: ; CHECK32-NEXT: lwz 2, 20(1) ; CHECK64-NEXT: ld 2, 40(1) +; MIR32: name: caller1 +; MIR32: %0:gprc = COPY $r3 +; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; MIR32: $r11 = COPY %0 +; MIR32: BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; MIR64: name: caller1 +; MIR64: %0:g8rc = COPY $x3 +; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; MIR64: $x11 = COPY %0 +; MIR64: BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + define dso_local zeroext i1 @caller2() local_unnamed_addr { entry: %0 = load ptr, ptr @fp @@ -40,4 +62,20 @@ entry: ; CHECK32-NEXT: lwz 2, 20(1) ; CHECK64-NEXT: ld 2, 40(1) +; MIR32: name: caller2 +; MIR32: %0:gprc_and_gprc_nor0 = LWZtoc @fp, $r2 :: (load (s32) from got) +; MIR32: %1:gprc = LWZ 0, killed %0 :: (dereferenceable load (s32) from @fp, align 8) +; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; MIR32: $r11 = COPY %1 +; MIR32: BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; MIR64: name: caller2 +; MIR64: %0:g8rc_and_g8rc_nox0 = LDtoc @fp, $x2 :: (load (s64) from got) +; MIR64: %1:g8rc = LD 0, killed %0 :: (dereferenceable load (s64) from @fp) +; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; MIR64: $x11 = COPY %1 +; MIR64: BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; CHECK: .extern .__ptrgl[PR] >From 0bf213e668b5b7d3f7fa39aa00e30df9ec84da99 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 21 Apr 2026 14:49:07 -0400 Subject: [PATCH 08/26] Switch to using a subtarget feature instead of TargetMachione option. Thanks Tony Varghese for the suggestion and draft implementation. --- llvm/include/llvm/CodeGen/CommandFlags.h | 2 -- llvm/include/llvm/Target/TargetOptions.h | 8 +------- llvm/lib/CodeGen/CommandFlags.cpp | 7 ------- llvm/lib/Target/PowerPC/PPC.td | 4 ++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 ++++---- llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 8 ++++---- 6 files changed, 13 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index b24c73408d861..d10e8732c1562 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -154,8 +154,6 @@ LLVM_ABI bool getJMCInstrument(); LLVM_ABI bool getXCOFFReadOnlyPointers(); -LLVM_ABI bool getXCOFFInlineGlueCode(); - enum SaveStatsMode { None, Cwd, Obj }; LLVM_ABI SaveStatsMode getSaveStats(); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index dfe87fd40d5b8..89e29d5f102f6 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -140,7 +140,7 @@ class TargetOptions { DebugStrictDwarf(false), Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false), EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false), - VerifyArgABICompliance(true), XCOFFInlineGlueCode(true) {} + VerifyArgABICompliance(true) {} /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. @@ -353,12 +353,6 @@ class TargetOptions { /// (lack) these extensions. unsigned VerifyArgABICompliance : 1; - /// When set to true, the code to form an indirect call sequence - /// is placed inline at the call site. When false an indirect call - /// is implemented with a branch to a trampoline which contains the - /// indriect call sequence. - unsigned XCOFFInlineGlueCode : 1; - /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 1de4fb9a9414d..9459797a3d074 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -120,7 +120,6 @@ CGOPT(bool, DebugStrictDwarf) CGOPT(unsigned, AlignLoops) CGOPT(bool, JMCInstrument) CGOPT(bool, XCOFFReadOnlyPointers) -CGOPT(bool, XCOFFInlineGlueCode) CGOPT(codegen::SaveStatsMode, SaveStats) #define CGBINDOPT(NAME) \ @@ -535,11 +534,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(XCOFFReadOnlyPointers); - static cl::opt<bool> XCOFFInlineGlueCode( - "xcoff-inline-glue-code", - cl::desc("Emir inline glue code for indirect calls"), cl::init(true)); - CGBINDOPT(XCOFFInlineGlueCode); - static cl::opt<bool> DisableIntegratedAS( "no-integrated-as", cl::desc("Disable integrated assembler"), cl::init(false)); @@ -641,7 +635,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.LoopAlignment = getAlignLoops(); Options.JMCInstrument = getJMCInstrument(); Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers(); - Options.XCOFFInlineGlueCode = getXCOFFInlineGlueCode(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 7b4bae60f7e74..b6bcec5305dd3 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -362,6 +362,10 @@ def FeaturePredictableSelectIsExpensive : def FeatureFastMFLR : SubtargetFeature<"fast-MFLR", "HasFastMFLR", "true", "MFLR is a fast instruction">; +def FeatureNoInlineGlue : + SubtargetFeature<"no-inline-glue", "NoInlineGlue", "true", + "Use external ._ptrgl for indirect calls">; + //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 13f77dd3c9928..44232a8d68523 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5359,7 +5359,7 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, // immediately followed by a load of the TOC pointer from the stack save // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC // as it is not saved or used. - if (!TM.Options.XCOFFInlineGlueCode) + if (Subtarget.noInlineGlue()) RetOpc = PPCISD::BL_LOAD_TOC; else RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC @@ -5639,7 +5639,7 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, // If it's a direct call pass the callee as the second operand. if (!CFlags.IsIndirect) Ops.push_back(Callee); - else if (!TM.Options.XCOFFInlineGlueCode) { + else if (Subtarget.noInlineGlue()) { // An indirect call with out of line glue code. We create a target // external symbol for '.__ptrgl' as the callee. auto &Context = DAG.getMachineFunction().getContext(); @@ -5738,7 +5738,7 @@ SDValue PPCTargetLowering::FinishCall( if (!CFlags.IsIndirect) Callee = transformCallee(Callee, DAG, dl, Subtarget); else if (Subtarget.usesFunctionDescriptors()) - if (!TM.Options.XCOFFInlineGlueCode) + if (Subtarget.noInlineGlue()) prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl, CFlags.HasNest, Subtarget); else @@ -7791,7 +7791,7 @@ SDValue PPCTargetLowering::LowerCall_AIX( // For indirect calls, we need to save the TOC base to the stack for // restoration after the call. - if (CFlags.IsIndirect && TM.Options.XCOFFInlineGlueCode) { + if (CFlags.IsIndirect && !Subtarget.noInlineGlue()) { assert(!CFlags.IsTailCall && "Indirect tail-calls not supported."); const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister(); const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 9d2e2ae5f345b..0e45a290c1dbf 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -1,15 +1,15 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \ -; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK32 %s +; RUN: -mattr=+no-inline-glue < %s | FileCheck --check-prefixes=CHECK,CHECK32 %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \ -; RUN: --xcoff-inline-glue-code=false < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s +; RUN: -mattr=+no-inline-glue < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s ; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple powerpc-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s | \ +; RUN: -mtriple powerpc-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \ ; RUN: FileCheck --check-prefix=MIR32 %s ; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple powerpc64-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s | \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \ ; RUN: FileCheck --check-prefix=MIR64 %s @a = dso_local global i32 55, align 4 >From 8213a269b6cdb0665d959caf2f3f5e0e8b05b9ac Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Wed, 22 Apr 2026 16:04:40 -0400 Subject: [PATCH 09/26] Fix _ptrgl spelling. The symbol has a single leading underscore. Also use the existing transformCallee function to convert to a target extrnal symbol and prepend the '.'. --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 4 ++-- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 16 +++++----------- llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 14 +++++++------- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index a5009269e88f0..b1fb08d5d22ed 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -3044,8 +3044,8 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { // If we are using out of line pointer glue we have to emit the // linkage for it. if (OutContext.hasXCOFFSection( - ".__ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) { - MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol(".__ptrgl[PR]"); + "._ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) { + MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol("._ptrgl[PR]"); OutStreamer->emitXCOFFSymbolLinkageWithVisibility(PtrGlueSym, MCSA_Extern, MCSA_Invalid); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 44232a8d68523..b914d4b8d70ae 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5640,15 +5640,6 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, if (!CFlags.IsIndirect) Ops.push_back(Callee); else if (Subtarget.noInlineGlue()) { - // An indirect call with out of line glue code. We create a target - // external symbol for '.__ptrgl' as the callee. - auto &Context = DAG.getMachineFunction().getContext(); - MCSectionXCOFF *Sec = Context.getXCOFFSection( - ".__ptrgl", SectionKind::getMetadata(), - XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER)); - MCSymbolXCOFF *CalleeSym = Sec->getQualNameSymbol(); - Callee = DAG.getTargetExternalSymbol(CalleeSym->getName().data(), - Callee.getValueType(), 0); Ops.push_back(Callee); // Add the register used to pass the descriptor address. Ops.push_back( @@ -5738,10 +5729,13 @@ SDValue PPCTargetLowering::FinishCall( if (!CFlags.IsIndirect) Callee = transformCallee(Callee, DAG, dl, Subtarget); else if (Subtarget.usesFunctionDescriptors()) - if (Subtarget.noInlineGlue()) + if (Subtarget.noInlineGlue()) { prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl, CFlags.HasNest, Subtarget); - else + SDValue PtrGlueCallee = + DAG.getExternalSymbol("_ptrgl", getPointerTy(DAG.getDataLayout())); + Callee = transformCallee(PtrGlueCallee, DAG, dl, Subtarget); + } else prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl, CFlags.HasNest, Subtarget); else diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 0e45a290c1dbf..0d7a2d988ddc2 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -27,7 +27,7 @@ entry: ; CHECK-DAG: li 3, 1 ; CHECK-DAG: li 4, 2 ; CHECK-DAG: li 5, 3 -; CHECK: bl .__ptrgl[PR] +; CHECK: bl ._ptrgl[PR] ; CHECK32-NEXT: lwz 2, 20(1) ; CHECK64-NEXT: ld 2, 40(1) @@ -35,14 +35,14 @@ entry: ; MIR32: %0:gprc = COPY $r3 ; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; MIR32: $r11 = COPY %0 -; MIR32: BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 ; MIR32: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; MIR64: name: caller1 ; MIR64: %0:g8rc = COPY $x3 ; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; MIR64: $x11 = COPY %0 -; MIR64: BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 define dso_local zeroext i1 @caller2() local_unnamed_addr { @@ -58,7 +58,7 @@ entry: ; CHECK64: ld [[REG:[0-9]+]], L..C{{[0-9]+}}(2) # @fp ; CHECK32: lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2) # @fp ; CHECK32: lwz 11, 0([[REG]]) -; CHECK: bl .__ptrgl[PR] +; CHECK: bl ._ptrgl[PR] ; CHECK32-NEXT: lwz 2, 20(1) ; CHECK64-NEXT: ld 2, 40(1) @@ -67,7 +67,7 @@ entry: ; MIR32: %1:gprc = LWZ 0, killed %0 :: (dereferenceable load (s32) from @fp, align 8) ; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; MIR32: $r11 = COPY %1 -; MIR32: BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3 ; MIR32: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; MIR64: name: caller2 @@ -75,7 +75,7 @@ entry: ; MIR64: %1:g8rc = LD 0, killed %0 :: (dereferenceable load (s64) from @fp) ; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; MIR64: $x11 = COPY %1 -; MIR64: BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 -; CHECK: .extern .__ptrgl[PR] +; CHECK: .extern ._ptrgl[PR] >From e7d8a5aa0f33ce8bb11f7b5b8046701ebf327eb8 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 23 Apr 2026 10:02:43 -0400 Subject: [PATCH 10/26] Remove ununsed target machine locals. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b914d4b8d70ae..0a21e148171ad 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5631,8 +5631,6 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, // MVT for a general purpose register. const MVT RegVT = Subtarget.getScalarIntVT(); - const TargetMachine &TM = Subtarget.getTargetMachine(); - // First operand is always the chain. Ops.push_back(Chain); @@ -5716,8 +5714,6 @@ SDValue PPCTargetLowering::FinishCall( unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const { - const auto &TM = getTargetMachine(); - if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || Subtarget.isAIXABI()) setUsesTOCBasePtr(DAG); @@ -7781,8 +7777,6 @@ SDValue PPCTargetLowering::LowerCall_AIX( if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); - const auto &TM = getTargetMachine(); - // For indirect calls, we need to save the TOC base to the stack for // restoration after the call. if (CFlags.IsIndirect && !Subtarget.noInlineGlue()) { >From 783499cd51a4e401230253c5918e4e7cc34f6d73 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 23 Apr 2026 10:09:52 -0400 Subject: [PATCH 11/26] Use the existing mechanism for emitting externall symbols. --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index b1fb08d5d22ed..d079fc09a105c 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -3041,15 +3041,6 @@ void PPCAIXAsmPrinter::emitGCOVRefs() { } void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { - // If we are using out of line pointer glue we have to emit the - // linkage for it. - if (OutContext.hasXCOFFSection( - "._ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) { - MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol("._ptrgl[PR]"); - OutStreamer->emitXCOFFSymbolLinkageWithVisibility(PtrGlueSym, MCSA_Extern, - MCSA_Invalid); - } - // If there are no functions and there are no toc-data definitions in this // module, we will never need to reference the TOC base. if (M.empty() && TOCDataGlobalVars.empty()) @@ -3272,6 +3263,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::BL: case PPC::BL8_NOP: case PPC::BL_NOP: { + case PPC::BL_RESTORE: + case PPC::BL8_RESTORE: const MachineOperand &MO = MI->getOperand(0); if (MO.isSymbol()) { auto *S = static_cast<MCSymbolXCOFF *>( @@ -3312,6 +3305,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { bool PPCAIXAsmPrinter::doFinalization(Module &M) { for (MCSymbol *Sym : ExtSymSDNodeSymbols) OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern); + + return PPCAsmPrinter::doFinalization(M); } >From cc8cd22bc281e81604a45e789b83710ee7dd2204 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 23 Apr 2026 10:48:38 -0400 Subject: [PATCH 12/26] Move comment. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0a21e148171ad..943af330eaf13 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5542,12 +5542,6 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, // copies together, a TOC access in the caller could be scheduled between // the assignment of the callee TOC and the branch to the callee, which leads // to incorrect code. - // On AIX there is a feature ("out of line glue code") which uses a special - // trampoline function __ptrgl to do the indirect call. If this option is - // enabled we instead simply load the address of the descriptor into r11, - // with the arguments in the 'normal' registers and branch to the __ptrgl - // stub. - // Start by loading the function address from the descriptor. SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart); auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() @@ -5613,6 +5607,11 @@ static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget) { + // On AIX there is a feature ("out of line glue code") which uses a special + // trampoline function ._ptrgl to do the indirect call. If this option is + // enabled we instead simply load the address of the descriptor into gpr11, + // with the arguments in the 'normal' registers and branch to the ._ptrgl + // stub. const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister(); SDValue MoveToPhysicalReg = DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue); >From 1a0d17900841104b02bc8e9d5b72689de91c958d Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 23 Apr 2026 10:50:23 -0400 Subject: [PATCH 13/26] Restore whitespace that was unitentionally changed. --- llvm/lib/CodeGen/CommandFlags.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 9459797a3d074..403fd49c56984 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -635,6 +635,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.LoopAlignment = getAlignLoops(); Options.JMCInstrument = getJMCInstrument(); Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers(); + Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); >From 1af8959100315e4bc276c623ff5752bbe7a3fe80 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 23 Apr 2026 11:28:32 -0400 Subject: [PATCH 14/26] Add check that no-inline-glue is used on AIX. --- llvm/lib/Target/PowerPC/PPC.td | 4 +++- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 4 ++++ llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index b6bcec5305dd3..1e60dd5606be5 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -362,9 +362,11 @@ def FeaturePredictableSelectIsExpensive : def FeatureFastMFLR : SubtargetFeature<"fast-MFLR", "HasFastMFLR", "true", "MFLR is a fast instruction">; +// When enabled indirect calls will place the address of the descriptor +// into r11 and do a direct branch to the ._ptrgl routine. def FeatureNoInlineGlue : SubtargetFeature<"no-inline-glue", "NoInlineGlue", "true", - "Use external ._ptrgl for indirect calls">; + "Use ._ptrgl for indirect calls">; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 85e022a2ba6fc..3ea7d70b42ccf 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -144,6 +144,10 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, report_fatal_error("The aix-shared-lib-tls-model-opt attribute " "is only supported on AIX in 64-bit mode.\n", false); + + if (NoInlineGlue && !getTargetTriple().isOSAIX()) + report_fatal_error("no-inline-glue feature is only supported on AIX\n", + false); } bool PPCSubtarget::enableMachineScheduler() const { return true; } diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 0d7a2d988ddc2..295cb85079c4b 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -12,6 +12,11 @@ ; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \ ; RUN: FileCheck --check-prefix=MIR64 %s +; RUN: not llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-unknown-linux \ +; RUN: -mattr=+no-inline-glue 2>&1 < %s | FileCheck --check-prefix=ERROR %s + +; ERROR: no-inline-glue feature is only supported on AIX + @a = dso_local global i32 55, align 4 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8 @fp = dso_local local_unnamed_addr global ptr null, align 8 >From 50a85a23df901c774bf9920f7acfaaa628794860 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 23 Apr 2026 11:59:00 -0400 Subject: [PATCH 15/26] Undo whitespace change. --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index d079fc09a105c..60a4897352121 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -3305,8 +3305,6 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { bool PPCAIXAsmPrinter::doFinalization(Module &M) { for (MCSymbol *Sym : ExtSymSDNodeSymbols) OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern); - - return PPCAsmPrinter::doFinalization(M); } >From 1c58b9a322c5c31350882eb85a3e7a786358a5c6 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Fri, 24 Apr 2026 09:59:59 -0400 Subject: [PATCH 16/26] Fix fallthrough errors on switch. --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 60a4897352121..fdf7e35283021 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -3262,9 +3262,9 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::BL8: case PPC::BL: case PPC::BL8_NOP: - case PPC::BL_NOP: { + case PPC::BL_NOP: case PPC::BL_RESTORE: - case PPC::BL8_RESTORE: + case PPC::BL8_RESTORE: { const MachineOperand &MO = MI->getOperand(0); if (MO.isSymbol()) { auto *S = static_cast<MCSymbolXCOFF *>( >From 6b6f41a7566481efc03ae1a18bb172f05817bbbe Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Mon, 27 Apr 2026 14:05:10 -0400 Subject: [PATCH 17/26] Add a lit test to check mixing of inline and out of line glue. --- .../CodeGen/PowerPC/aix-mixed-inline-glue.ll | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll diff --git a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll new file mode 100644 index 0000000000000..fb86e251e912d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll @@ -0,0 +1,31 @@ +; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s + +; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK64 + +define i32 @OutOfLine(ptr noundef readonly captures(none) %fp) #0 { +entry: + %call = tail call i32 %fp() + ret i32 %call +} + +define i32 @InLine(ptr noundef readonly captures(none) %fp) #1 { +entry: + %call = tail call i32 %fp() + ret i32 %call +} + +attributes #0 = {"target-features"="+no-inline-glue"} +attributes #1 = {"target-features"="-no-inline-glue"} + +; CHECK: name: OutOfLine +; CHECK: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 +; CHECK: name: InLine +; CEHCK: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 + +; CHECK64: name: OutOfLine +; CHECK64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 +; CHECK64: name: InLine +; CHECK64: BCTRL8_LDinto_toc 40, $x1, csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 >From 118e4898158af4a01262aa92f66d308d47bbaf59 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Mon, 27 Apr 2026 14:07:35 -0400 Subject: [PATCH 18/26] Remove local_unnamed_addr from lit test. --- llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 295cb85079c4b..76006d161f6fc 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -18,10 +18,10 @@ ; ERROR: no-inline-glue feature is only supported on AIX @a = dso_local global i32 55, align 4 -@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8 -@fp = dso_local local_unnamed_addr global ptr null, align 8 +@d = dso_local global double 3.141590e+00, align 8 +@fp = dso_local global ptr null, align 8 -define i32 @caller1(ptr noundef readonly captures(none) %fp) local_unnamed_addr { +define i32 @caller1(ptr noundef readonly captures(none) %fp) { entry: %call = tail call i32 %fp(i32 signext 1, i32 signext 2, i32 signext 3) ret i32 %call @@ -50,7 +50,7 @@ entry: ; MIR64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 -define dso_local zeroext i1 @caller2() local_unnamed_addr { +define dso_local zeroext i1 @caller2() { entry: %0 = load ptr, ptr @fp %1 = load i32, ptr @a >From 7465f040e68f8635ddd0e52e1b34241445949a00 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Wed, 29 Apr 2026 13:28:47 -0400 Subject: [PATCH 19/26] Add rounding mode setting version of new calls. --- llvm/lib/Target/PowerPC/P10InstrResources.td | 2 +- llvm/lib/Target/PowerPC/P9InstrResources.td | 2 ++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +++ llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 17 +++++++++++---- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 21 ++++++++++++++++--- .../PowerPC/aix-no-inline-glue-strictfp.ll | 18 ++++++++++++++++ 6 files changed, 55 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 825ffd56da5e0..495346855a4e9 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -317,7 +317,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read], BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR, BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL, BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS, - BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, BL8_RESTORE + BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, BL8_RESTORE, BL_RESTORE_RM, BL8_RESTORE_RM )>; // 2 Cycles Branch operations, 2 input operands diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 8c7637e3b93f1..64d458f806ca7 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1328,6 +1328,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C], BCn, BL_RESTORE, BL8_RESTORE, + BL_RESTORE_RM, + BL8_RESTORE_RM, CTRL_DEP )>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 943af330eaf13..9dddad9c00a70 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5392,6 +5392,9 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, case PPCISD::BCTRL: RetOpc = PPCISD::BCTRL_RM; break; + case PPCISD::BL_LOAD_TOC: + RetOpc = PPCISD::BL_LOAD_TOC_RM; + break; case PPCISD::CALL_NOTOC: RetOpc = PPCISD::CALL_NOTOC_RM; break; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index c6458d6caa327..003b96d6bd405 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -214,7 +214,15 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, "bl $LI\n\tld 2, 40(1)", IIC_BrB, []>, Requires<[IsPPC64]>; - // TODO FIXME Add _RM version of call. +} + +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR8, X2, RM], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in { + def BL8_RESTORE_RM : IForm_and_DForm_1<18, 0, 1, 58, + (outs), (ins calltarget:$LI), + "bl $LI\n\tld 2, 40(1)", IIC_BrB, + []>, + Requires<[IsPPC64]>; } } // Interpretation64Bit @@ -263,15 +271,16 @@ def : Pat<(PPCcall (i64 mcsym:$dst)), (BL8 mcsym:$dst)>; def : Pat<(PPCcall_nop (i64 mcsym:$dst)), (BL8_NOP mcsym:$dst)>; +def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)), + (BL8_RESTORE texternalsym:$dst)>; def : Pat<(PPCcall_rm (i64 mcsym:$dst)), (BL8_RM mcsym:$dst)>; def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)), (BL8_NOP_RM mcsym:$dst)>; -def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)), - (BL8_RESTORE texternalsym:$dst)>; -// TODO FIXME add _RM version. +def : Pat<(PPCbl_load_toc_rm (i64 texternalsym:$dst)), + (BL8_RESTORE_RM texternalsym:$dst)>; // Atomic operations // FIXME: some of these might be used with constant operands. This will result diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 58a1784b9faa0..e8e6dbabcf0ba 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -577,6 +577,10 @@ def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM", SDTypeProfile<0, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCbl_load_toc_rm : SDNode<"PPCISD::BL_LOAD_TOC_RM", + SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; // Return with a glue operand, matched by 'blr' def PPCretglue : SDNode<"PPCISD::RET_GLUE", SDTNone, @@ -1884,6 +1888,15 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, } +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR, R2, RM], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in { + def BL_RESTORE_RM : + IForm_and_DForm_1<18, 0, 1, 32, + (outs), (ins calltarget:$LI), + "bl $LI\n\tlwz 2, 20(1)", IIC_BrB, + []>, Requires<[IsPPC32]>; +} + let isCodeGenOnly = 1, hasSideEffects = 0 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, @@ -3464,6 +3477,9 @@ def : Pat<(PPCcall_nop (i32 mcsym:$dst)), def : Pat<(PPCcall_nop (i32 texternalsym:$dst)), (BL_NOP texternalsym:$dst)>; +def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)), + (BL_RESTORE texternalsym:$dst)>; + def : Pat<(PPCcall_rm (i32 mcsym:$dst)), (BL_RM mcsym:$dst)>; @@ -3473,9 +3489,8 @@ def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)), def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)), (BL_NOP_RM texternalsym:$dst)>; -def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)), - (BL_RESTORE texternalsym:$dst)>; -// TODO FIXME add _RM version of call. +def : Pat<(PPCbl_load_toc_rm (i32 texternalsym:$dst)), + (BL_RESTORE_RM texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll new file mode 100644 index 0000000000000..9f9fe910dd46d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll @@ -0,0 +1,18 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -stop-after=finalize-isel -mattr=+no-inline-glue < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -stop-after=finalize-isel -mattr=+no-inline-glue < %s | \ +; RUN: FileCheck --check-prefix=CHECK64 %s + +define i32 @has_strictfp(ptr noundef readonly captures(none) %fp) #0 { +entry: + %call = tail call i32 %fp() strictfp + ret i32 %call +} + +attributes #0 = { strictfp } + +; CHECK: BL_RESTORE_RM &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit-def dead $rm, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 + +; CHECK64: BL8_RESTORE_RM &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit-def dead $rm, implicit $rm, implicit $x1, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 >From 1dfef9f52fe9ee4b9b738282f6e09bbe2189302c Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 30 Apr 2026 09:04:49 -0400 Subject: [PATCH 20/26] Missed adding rounding mode calls to debug switch in code emitter. --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index fa92d45e07f27..1acb63b7bf1aa 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -155,7 +155,9 @@ bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) const { case PPC::BCTRL_LWZinto_toc: case PPC::BCTRL_LWZinto_toc_RM: case PPC::BL_RESTORE: + case PPC::BL_RESTORE_RM: case PPC::BL8_RESTORE: + case PPC::BL8_RESTORE_RM: case PPC::TAILBCTR: case PPC::TAILB: case PPC::TAILBA: >From dba8b5cd0a0de81c06722eede7779e7268377735 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Fri, 1 May 2026 13:05:30 -0400 Subject: [PATCH 21/26] Changed scheduling info for P7. --- llvm/lib/Target/PowerPC/PPCScheduleP7.td | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td index 09b811128150f..56d1114bb0d1e 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -115,6 +115,9 @@ let SchedModel = P7Model in { def : InstRW<[P7_BRU_NONE, P7_DISP_BR], (instregex "^B(L)?(A)?(8)?(_NOP|_NOTOC)?(_TLS|_RM)?(_)?$")>; + def : InstRW<[P7_BRU_NONE, P7_DISP_BR, P7_LSU_2C, P7_DISP_LS], + (instregex "^BL(8)?_RESTORE(_RM)?$")>; + def : InstRW<[P7_BRU_3C, P7_DISP_BR], (instrs BDZLRLp, BDZLRm, BDZLRp, BDZLm, BDZLp, BDZm, BDZp, BDNZ, BDNZ8, BDNZA, BDNZAm, BDNZAp, BDNZL, BDNZLA, BDNZLAm, BDNZLAp, BDNZLR, @@ -128,7 +131,7 @@ let SchedModel = P7Model in { BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, BCCTR, BCCTR8, BCCTR8n, BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, BCCTRn, gBC, gBCA, gBCAat, gBCCTR, gBCCTRL, gBCL, gBCLA, gBCLAat, gBCLR, gBCLRL, gBCLat, gBCat, - MFCTR, MFCTR8, MFLR, MFLR8, BL_RESTORE, BL8_RESTORE + MFCTR, MFCTR8, MFLR, MFLR8 )>; def : InstRW<[P7_BRU_4C], (instrs MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>; >From efa019f5e413bc5a8562cc18575d8dc29ad55da1 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 5 May 2026 11:59:22 -0400 Subject: [PATCH 22/26] Add scheduling info for P8. --- llvm/lib/Target/PowerPC/PPCScheduleP8.td | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 3a2d9d9b3bc19..468584e79bac3 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -261,6 +261,10 @@ let SchedModel = P8Model in { (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"), (instregex "^NEG(8)?(O)?$"))>; + // Special pseudo instruction that combines a direct call with a toc restore + def : InstRW<[P8_BR_2C, P8_ISSUE_BR, P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD], (instrs + (instregex "^BL(8)?_RESTORE(_RM)$"))>; + // Instructions of PM pipeline def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs >From 7cc636418a155311c671600f705ddd138f020c3e Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Tue, 5 May 2026 14:07:34 -0400 Subject: [PATCH 23/26] Fix the P9 related scheduling info. --- llvm/lib/Target/PowerPC/P9InstrResources.td | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 64d458f806ca7..3b754a56b0535 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1296,6 +1296,12 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], // Branch Instructions +// Pseduo instruction that encapsulates a branch and a toc load. +def : InstRW<[P9_BR_2C, DISP_BR_1C, P9_LS_4C, IP_AGEN_1C, DISP_1C], + (instrs + (instregex "BL(8)?_RESTORE(_RM)?$") +)>; + // Two Cycle Branch def : InstRW<[P9_BR_2C, DISP_BR_1C], (instrs @@ -1326,10 +1332,6 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C], BCTRL8_LDinto_toc_RM, BCTRL_LWZinto_toc_RM, BCn, - BL_RESTORE, - BL8_RESTORE, - BL_RESTORE_RM, - BL8_RESTORE_RM, CTRL_DEP )>; >From de49aba23ba59f06fd65d1ca4c947113d30c4ad8 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Wed, 6 May 2026 14:14:12 -0400 Subject: [PATCH 24/26] Update P10 information. --- llvm/lib/Target/PowerPC/P10InstrResources.td | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 495346855a4e9..13d3cb9f41c41 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -317,7 +317,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read], BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR, BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL, BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS, - BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, BL8_RESTORE, BL_RESTORE_RM, BL8_RESTORE_RM + BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM )>; // 2 Cycles Branch operations, 2 input operands @@ -1233,6 +1233,16 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read], MTCRF, MTCRF8 )>; +// 2 Cycle Branch operation, 1 input operand followed by a +// 6 cycle Load operation, 0 input operands. +def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10W_LD_6C, P10W_DISP_ANY], + (instrs + BL_RESTORE, + BL_RESTORE_RM, + BL8_RESTORE, + BL8_RESTORE_RM +)>; + // 6 Cycles Load operations, 0 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_ANY], (instrs >From 8c4236d4aab4c81c7944f6823c757749eb154311 Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 7 May 2026 10:23:38 -0400 Subject: [PATCH 25/26] Add driver option and rename the attribute. --- clang/include/clang/Options/Options.td | 5 +++ clang/lib/Driver/ToolChains/Arch/PPC.cpp | 9 +++++ clang/test/Driver/ppc-inline-glue.c | 37 +++++++++++++++++++ llvm/lib/Target/PowerPC/PPC.td | 2 +- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 2 +- .../CodeGen/PowerPC/aix-mixed-inline-glue.ll | 4 +- .../PowerPC/aix-no-inline-glue-strictfp.ll | 4 +- .../CodeGen/PowerPC/aix-no-inline-glue.ll | 12 +++--- 8 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/ppc-inline-glue.c diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index bffb3dfb27485..b04d41d119541 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5964,6 +5964,11 @@ def mxcoff_roptr : Flag<["-"], "mxcoff-roptr">, Group<m_Group>, Flags<[TargetSpecific]>, Visibility<[ClangOption, CC1Option]>, HelpText<"Place constant objects with relocatable address values in the RO data section and add -bforceimprw to the linker flags (AIX only)">; def mno_xcoff_roptr : Flag<["-"], "mno-xcoff-roptr">, Group<m_Group>, TargetSpecific; +def mno_inline_glue : Flag<["-"], "mno-inline-glue">, Group<m_Group>, + HelpText<"Use ._ptrgl routine for indirect calls (AIX only)">; +def minline_glue : Flag<["-"], "minline-glue">, Group<m_Group>, + HelpText<"Emit indirect calls inline (AIX only) (default)">; + let Flags = [TargetSpecific] in { def mvx : Flag<["-"], "mvx">, Group<m_Group>; diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index 17051980f34fb..0bf804266cd26 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -83,6 +83,15 @@ void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, true) && Triple.isOSAIX()) Features.push_back("+modern-aix-as"); + + if (Arg *A = Args.getLastArg(options::OPT_mno_inline_glue, + options::OPT_minline_glue)) { + if (!Triple.isOSAIX()) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << Triple.str(); + else if (A->getOption().matches(options::OPT_mno_inline_glue)) + Features.push_back("+use-ptrgl-helper"); + } } ppc::ReadGOTPtrMode ppc::getPPCReadGOTPtrMode(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/test/Driver/ppc-inline-glue.c b/clang/test/Driver/ppc-inline-glue.c new file mode 100644 index 0000000000000..792631974fdca --- /dev/null +++ b/clang/test/Driver/ppc-inline-glue.c @@ -0,0 +1,37 @@ +// RUN: %clang -### --target=powerpc-ibm-aix-xcoff -mno-inline-glue %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=NO_INLINE_GLUE + +// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff -mno-inline-glue %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=NO_INLINE_GLUE + +// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff -minline-glue %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=INLINE_GLUE + +// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=INLINE_GLUE + +// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff -mno-inline-glue -minline-glue %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=INLINE_GLUE + +// RUN: not %clang -### --target=powerpc64le-unknown-linux-gnu -mno-inline-glue \ +// RUN: %s 2>&1 | FileCheck %s --check-prefix=ERR + +// RUN: %clang -target powerpc-unkown-aix -mno-inline-glue %s -S -emit-llvm -o - | \ +// RUN: FileCheck %s + +// RUN: %clang -target powerpc-unkown-aix -mno-inline-glue -minline-glue %s -S -emit-llvm -o - | \ +// RUN: FileCheck %s --check-prefix=DIS + +// NO_INLINE_GLUE: "-target-feature" "+use-ptrgl-helper" +// INLINE_GLUE-NOT: "+use-ptrgl-helper" +// ERR: error: unsupported option '-mno-inline-glue' for target 'powerpc64le-unknown-linux-gnu' + +int test(void) { + return 0; +} + +// CHECK: test() #0 { +// CHECK: attributes #0 = { +// CHECK-ON-SAME: +use-ptrgl-helper + +// DIS-NOT: +use-ptrgl-helper diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 1e60dd5606be5..34bdb6a52ccb1 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -365,7 +365,7 @@ def FeatureFastMFLR : SubtargetFeature<"fast-MFLR", "HasFastMFLR", "true", // When enabled indirect calls will place the address of the descriptor // into r11 and do a direct branch to the ._ptrgl routine. def FeatureNoInlineGlue : - SubtargetFeature<"no-inline-glue", "NoInlineGlue", "true", + SubtargetFeature<"use-ptrgl-helper", "NoInlineGlue", "true", "Use ._ptrgl for indirect calls">; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 3ea7d70b42ccf..265f8877c35ac 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -146,7 +146,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, false); if (NoInlineGlue && !getTargetTriple().isOSAIX()) - report_fatal_error("no-inline-glue feature is only supported on AIX\n", + report_fatal_error("use-ptrgl-helper feature is only supported on AIX\n", false); } diff --git a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll index fb86e251e912d..4f795ac7a7c34 100644 --- a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll @@ -17,8 +17,8 @@ entry: ret i32 %call } -attributes #0 = {"target-features"="+no-inline-glue"} -attributes #1 = {"target-features"="-no-inline-glue"} +attributes #0 = {"target-features"="+use-ptrgl-helper"} +attributes #1 = {"target-features"="-use-ptrgl-helper"} ; CHECK: name: OutOfLine ; CHECK: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll index 9f9fe910dd46d..8de134a22cb83 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll @@ -1,8 +1,8 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \ -; RUN: -stop-after=finalize-isel -mattr=+no-inline-glue < %s | FileCheck %s +; RUN: -stop-after=finalize-isel -mattr=+use-ptrgl-helper < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \ -; RUN: -stop-after=finalize-isel -mattr=+no-inline-glue < %s | \ +; RUN: -stop-after=finalize-isel -mattr=+use-ptrgl-helper < %s | \ ; RUN: FileCheck --check-prefix=CHECK64 %s define i32 @has_strictfp(ptr noundef readonly captures(none) %fp) #0 { diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 76006d161f6fc..3ba322dfa3a26 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -1,21 +1,21 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \ -; RUN: -mattr=+no-inline-glue < %s | FileCheck --check-prefixes=CHECK,CHECK32 %s +; RUN: -mattr=+use-ptrgl-helper < %s | FileCheck --check-prefixes=CHECK,CHECK32 %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \ -; RUN: -mattr=+no-inline-glue < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s +; RUN: -mattr=+use-ptrgl-helper < %s | FileCheck --check-prefixes=CHECK,CHECK64 %s ; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple powerpc-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \ +; RUN: -mtriple powerpc-ibm-aix-xcoff -mattr=+use-ptrgl-helper < %s | \ ; RUN: FileCheck --check-prefix=MIR32 %s ; RUN: llc -stop-after=finalize-isel -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+use-ptrgl-helper < %s | \ ; RUN: FileCheck --check-prefix=MIR64 %s ; RUN: not llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-unknown-linux \ -; RUN: -mattr=+no-inline-glue 2>&1 < %s | FileCheck --check-prefix=ERROR %s +; RUN: -mattr=+use-ptrgl-helper 2>&1 < %s | FileCheck --check-prefix=ERROR %s -; ERROR: no-inline-glue feature is only supported on AIX +; ERROR: use-ptrgl-helper feature is only supported on AIX @a = dso_local global i32 55, align 4 @d = dso_local global double 3.141590e+00, align 8 >From 0ebc909c18c96b2872bdcb65c9defdb07aa04b5c Mon Sep 17 00:00:00 2001 From: Sean Fertile <[email protected]> Date: Thu, 7 May 2026 11:12:27 -0400 Subject: [PATCH 26/26] Rename nes instructions to be more descriptive. Changed the names from _RESTORE to _LWZinto_toc and _LDinto_toc to match the existing BCTRL_ based instructions. Also add an IsAIX predicate guarding them. The existing scheduling regexes for P8 match the new isntructions so I had to remove the specific new scheduling info for them. --- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 8 ++--- llvm/lib/Target/PowerPC/P10InstrResources.td | 8 ++--- llvm/lib/Target/PowerPC/P9InstrResources.td | 5 ++- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 ++-- llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 30 ++++++++-------- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 36 ++++++++++--------- llvm/lib/Target/PowerPC/PPCScheduleP7.td | 7 +++- llvm/lib/Target/PowerPC/PPCScheduleP8.td | 4 --- .../CodeGen/PowerPC/aix-mixed-inline-glue.ll | 4 +-- .../PowerPC/aix-no-inline-glue-strictfp.ll | 4 +-- .../CodeGen/PowerPC/aix-no-inline-glue.ll | 8 ++--- 11 files changed, 65 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 1acb63b7bf1aa..64427e97f729c 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -154,10 +154,10 @@ bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) const { case PPC::TCRETURNri: case PPC::BCTRL_LWZinto_toc: case PPC::BCTRL_LWZinto_toc_RM: - case PPC::BL_RESTORE: - case PPC::BL_RESTORE_RM: - case PPC::BL8_RESTORE: - case PPC::BL8_RESTORE_RM: + case PPC::BL_LWZinto_toc: + case PPC::BL_LWZinto_toc_RM: + case PPC::BL8_LDinto_toc: + case PPC::BL8_LDinto_toc_RM: case PPC::TAILBCTR: case PPC::TAILB: case PPC::TAILBA: diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 13d3cb9f41c41..91c23622c99cd 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -1237,10 +1237,10 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read], // 6 cycle Load operation, 0 input operands. def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10W_LD_6C, P10W_DISP_ANY], (instrs - BL_RESTORE, - BL_RESTORE_RM, - BL8_RESTORE, - BL8_RESTORE_RM + BL_LWZinto_toc, + BL_LWZinto_toc_RM, + BL8_LDinto_toc, + BL8_LDinto_toc_RM )>; // 6 Cycles Load operations, 0 input operands diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 3b754a56b0535..b72671eefc7fd 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1299,7 +1299,10 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], // Pseduo instruction that encapsulates a branch and a toc load. def : InstRW<[P9_BR_2C, DISP_BR_1C, P9_LS_4C, IP_AGEN_1C, DISP_1C], (instrs - (instregex "BL(8)?_RESTORE(_RM)?$") + BL_LWZinto_toc, + BL_LWZinto_toc_RM, + BL8_LDinto_toc, + BL8_LDinto_toc_RM )>; // Two Cycle Branch diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index fdf7e35283021..25432c257eed8 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -3263,8 +3263,10 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::BL: case PPC::BL8_NOP: case PPC::BL_NOP: - case PPC::BL_RESTORE: - case PPC::BL8_RESTORE: { + case PPC::BL_LWZinto_toc: + case PPC::BL_LWZinto_toc_RM: + case PPC::BL8_LDinto_toc: + case PPC::BL8_LDinto_toc_RM: { const MachineOperand &MO = MI->getOperand(0); if (MO.isSymbol()) { auto *S = static_cast<MCSymbolXCOFF *>( diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 003b96d6bd405..25b193e45c279 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -207,22 +207,24 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, Requires<[IsPPC64]>; } -let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, - Defs = [LR8, X2], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in { - def BL8_RESTORE : IForm_and_DForm_1<18, 0, 1, 58, - (outs), (ins calltarget:$LI), - "bl $LI\n\tld 2, 40(1)", IIC_BrB, - []>, - Requires<[IsPPC64]>; -} - -let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, - Defs = [LR8, X2, RM], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in { - def BL8_RESTORE_RM : IForm_and_DForm_1<18, 0, 1, 58, +let Predicates = [IsAIX] in { + let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR8, X2], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in { + def BL8_LDinto_toc : IForm_and_DForm_1<18, 0, 1, 58, (outs), (ins calltarget:$LI), "bl $LI\n\tld 2, 40(1)", IIC_BrB, []>, Requires<[IsPPC64]>; + } + + let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR8, X2, RM], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in { + def BL8_LDinto_toc_RM : IForm_and_DForm_1<18, 0, 1, 58, + (outs), (ins calltarget:$LI), + "bl $LI\n\tld 2, 40(1)", IIC_BrB, + []>, + Requires<[IsPPC64]>; + } } } // Interpretation64Bit @@ -272,7 +274,7 @@ def : Pat<(PPCcall (i64 mcsym:$dst)), def : Pat<(PPCcall_nop (i64 mcsym:$dst)), (BL8_NOP mcsym:$dst)>; def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)), - (BL8_RESTORE texternalsym:$dst)>; + (BL8_LDinto_toc texternalsym:$dst)>; def : Pat<(PPCcall_rm (i64 mcsym:$dst)), (BL8_RM mcsym:$dst)>; @@ -280,7 +282,7 @@ def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)), (BL8_NOP_RM mcsym:$dst)>; def : Pat<(PPCbl_load_toc_rm (i64 texternalsym:$dst)), - (BL8_RESTORE_RM texternalsym:$dst)>; + (BL8_LDinto_toc_RM texternalsym:$dst)>; // Atomic operations // FIXME: some of these might be used with constant operands. This will result diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index e8e6dbabcf0ba..1eef821957691 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1871,12 +1871,23 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, } -let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, - Defs = [LR, R2], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in { -def BL_RESTORE : IForm_and_DForm_1<18, 0, 1, 32, - (outs), (ins calltarget:$LI), - "bl $LI\n\tlwz 2, 20(1)", IIC_BrB, - []>, Requires<[IsPPC32]>; +let Predicates = [IsAIX] in { + let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR, R2], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in { + def BL_LWZinto_toc : IForm_and_DForm_1<18, 0, 1, 32, + (outs), (ins calltarget:$LI), + "bl $LI\n\tlwz 2, 20(1)", IIC_BrB, + []>, Requires<[IsPPC32]>; + } + + let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR, R2, RM], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in { + def BL_LWZinto_toc_RM : + IForm_and_DForm_1<18, 0, 1, 32, + (outs), (ins calltarget:$LI), + "bl $LI\n\tlwz 2, 20(1)", IIC_BrB, + []>, Requires<[IsPPC32]>; + } } let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, @@ -1888,15 +1899,6 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, } -let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, - Defs = [LR, R2, RM], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in { - def BL_RESTORE_RM : - IForm_and_DForm_1<18, 0, 1, 32, - (outs), (ins calltarget:$LI), - "bl $LI\n\tlwz 2, 20(1)", IIC_BrB, - []>, Requires<[IsPPC32]>; -} - let isCodeGenOnly = 1, hasSideEffects = 0 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, @@ -3478,7 +3480,7 @@ def : Pat<(PPCcall_nop (i32 texternalsym:$dst)), (BL_NOP texternalsym:$dst)>; def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)), - (BL_RESTORE texternalsym:$dst)>; + (BL_LWZinto_toc texternalsym:$dst)>; def : Pat<(PPCcall_rm (i32 mcsym:$dst)), (BL_RM mcsym:$dst)>; @@ -3490,7 +3492,7 @@ def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)), (BL_NOP_RM texternalsym:$dst)>; def : Pat<(PPCbl_load_toc_rm (i32 texternalsym:$dst)), - (BL_RESTORE_RM texternalsym:$dst)>; + (BL_LWZinto_toc_RM texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td index 56d1114bb0d1e..ed8fadaaef74c 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -116,7 +116,12 @@ let SchedModel = P7Model in { (instregex "^B(L)?(A)?(8)?(_NOP|_NOTOC)?(_TLS|_RM)?(_)?$")>; def : InstRW<[P7_BRU_NONE, P7_DISP_BR, P7_LSU_2C, P7_DISP_LS], - (instregex "^BL(8)?_RESTORE(_RM)?$")>; + (instrs + BL_LWZinto_toc, + BL_LWZinto_toc_RM, + BL8_LDinto_toc, + BL8_LDinto_toc_RM + )>; def : InstRW<[P7_BRU_3C, P7_DISP_BR], (instrs BDZLRLp, BDZLRm, BDZLRp, BDZLm, BDZLp, BDZm, BDZp, diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 468584e79bac3..3a2d9d9b3bc19 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -261,10 +261,6 @@ let SchedModel = P8Model in { (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"), (instregex "^NEG(8)?(O)?$"))>; - // Special pseudo instruction that combines a direct call with a toc restore - def : InstRW<[P8_BR_2C, P8_ISSUE_BR, P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD], (instrs - (instregex "^BL(8)?_RESTORE(_RM)$"))>; - // Instructions of PM pipeline def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs diff --git a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll index 4f795ac7a7c34..855137c93d7ca 100644 --- a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll @@ -21,11 +21,11 @@ attributes #0 = {"target-features"="+use-ptrgl-helper"} attributes #1 = {"target-features"="-use-ptrgl-helper"} ; CHECK: name: OutOfLine -; CHECK: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 +; CHECK: BL_LWZinto_toc &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 ; CHECK: name: InLine ; CEHCK: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 ; CHECK64: name: OutOfLine -; CHECK64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 +; CHECK64: BL8_LDinto_toc &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 ; CHECK64: name: InLine ; CHECK64: BCTRL8_LDinto_toc 40, $x1, csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll index 8de134a22cb83..dd0e88431d9c2 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll @@ -13,6 +13,6 @@ entry: attributes #0 = { strictfp } -; CHECK: BL_RESTORE_RM &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit-def dead $rm, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 +; CHECK: BL_LWZinto_toc_RM &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit-def dead $rm, implicit $rm, implicit $r1, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 -; CHECK64: BL8_RESTORE_RM &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit-def dead $rm, implicit $rm, implicit $x1, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 +; CHECK64: BL8_LDinto_toc_RM &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit-def dead $rm, implicit $rm, implicit $x1, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll index 3ba322dfa3a26..83c390c1c2acd 100644 --- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll +++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll @@ -40,14 +40,14 @@ entry: ; MIR32: %0:gprc = COPY $r3 ; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; MIR32: $r11 = COPY %0 -; MIR32: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32: BL_LWZinto_toc &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 ; MIR32: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; MIR64: name: caller1 ; MIR64: %0:g8rc = COPY $x3 ; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; MIR64: $x11 = COPY %0 -; MIR64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64: BL8_LDinto_toc &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 define dso_local zeroext i1 @caller2() { @@ -72,7 +72,7 @@ entry: ; MIR32: %1:gprc = LWZ 0, killed %0 :: (dereferenceable load (s32) from @fp, align 8) ; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; MIR32: $r11 = COPY %1 -; MIR32: BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3 +; MIR32: BL_LWZinto_toc &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3 ; MIR32: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; MIR64: name: caller2 @@ -80,7 +80,7 @@ entry: ; MIR64: %1:g8rc = LD 0, killed %0 :: (dereferenceable load (s64) from @fp) ; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; MIR64: $x11 = COPY %1 -; MIR64: BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; MIR64: BL8_LDinto_toc &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; MIR64: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 ; CHECK: .extern ._ptrgl[PR] _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
