https://github.com/mandlebug updated 
https://github.com/llvm/llvm-project/pull/193786

>From e680d93e23759c64da8bb43c634100af929c63ec Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Mon, 20 Apr 2026 18:38:54 -0400
Subject: [PATCH 01/26] First draft of out of line pointer glue for AIX.

* The toc restore after the call breaks scheduling after the DAG has
  been selected. Need to investigate further.
* Add MIR to the test to check the intermediate representation also.
* Not sure if the option is better as a target option for the whole
  compilation unit, or if it should be a feature attribute on the
  function making the call.
---
 llvm/include/llvm/CodeGen/CommandFlags.h      |  2 +
 llvm/include/llvm/Target/TargetOptions.h      |  8 +-
 llvm/lib/CodeGen/CommandFlags.cpp             |  8 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 82 +++++++++++++++++--
 llvm/lib/Target/PowerPC/PPCSubtarget.h        |  4 +
 .../CodeGen/PowerPC/aix-no-inline-glue.ll     | 41 ++++++++++
 6 files changed, 136 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll

diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h 
b/llvm/include/llvm/CodeGen/CommandFlags.h
index d10e8732c1562..b24c73408d861 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -154,6 +154,8 @@ LLVM_ABI bool getJMCInstrument();
 
 LLVM_ABI bool getXCOFFReadOnlyPointers();
 
+LLVM_ABI bool getXCOFFInlineGlueCode();
+
 enum SaveStatsMode { None, Cwd, Obj };
 
 LLVM_ABI SaveStatsMode getSaveStats();
diff --git a/llvm/include/llvm/Target/TargetOptions.h 
b/llvm/include/llvm/Target/TargetOptions.h
index 89e29d5f102f6..dfe87fd40d5b8 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -140,7 +140,7 @@ class TargetOptions {
         DebugStrictDwarf(false), Hotpatch(false),
         PPCGenScalarMASSEntries(false), JMCInstrument(false),
         EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false),
-        VerifyArgABICompliance(true) {}
+        VerifyArgABICompliance(true), XCOFFInlineGlueCode(true) {}
 
   /// DisableFramePointerElim - This returns true if frame pointer elimination
   /// optimization should be disabled for the given machine function.
@@ -353,6 +353,12 @@ class TargetOptions {
   /// (lack) these extensions.
   unsigned VerifyArgABICompliance : 1;
 
+  /// When set to true, the code to form an indirect call sequence
+  /// is placed inline at the call site. When false an indirect call
+  /// is implemented with a branch to a trampoline which contains the
+  /// indriect call sequence.
+  unsigned XCOFFInlineGlueCode : 1;
+
   /// Name of the stack usage file (i.e., .su file) if user passes
   /// -fstack-usage. If empty, it can be implied that -fstack-usage is not
   /// passed on the command line.
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp 
b/llvm/lib/CodeGen/CommandFlags.cpp
index 403fd49c56984..1de4fb9a9414d 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -120,6 +120,7 @@ CGOPT(bool, DebugStrictDwarf)
 CGOPT(unsigned, AlignLoops)
 CGOPT(bool, JMCInstrument)
 CGOPT(bool, XCOFFReadOnlyPointers)
+CGOPT(bool, XCOFFInlineGlueCode)
 CGOPT(codegen::SaveStatsMode, SaveStats)
 
 #define CGBINDOPT(NAME)                                                        
\
@@ -534,6 +535,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
       cl::init(false));
   CGBINDOPT(XCOFFReadOnlyPointers);
 
+  static cl::opt<bool> XCOFFInlineGlueCode(
+      "xcoff-inline-glue-code",
+      cl::desc("Emir inline glue code for indirect calls"), cl::init(true));
+  CGBINDOPT(XCOFFInlineGlueCode);
+
   static cl::opt<bool> DisableIntegratedAS(
       "no-integrated-as", cl::desc("Disable integrated assembler"),
       cl::init(false));
@@ -635,7 +641,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple 
&TheTriple) {
   Options.LoopAlignment = getAlignLoops();
   Options.JMCInstrument = getJMCInstrument();
   Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers();
-
+  Options.XCOFFInlineGlueCode = getXCOFFInlineGlueCode();
   Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
 
   Options.ThreadModel = getThreadModel();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d21ccb59f9962..9bd2b0107b501 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5359,8 +5359,11 @@ static unsigned 
getCallOpcode(PPCTargetLowering::CallFlags CFlags,
     // immediately followed by a load of the TOC pointer from the stack save
     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
     // as it is not saved or used.
-    RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
-                                                 : PPCISD::BCTRL;
+    if (!TM.Options.XCOFFInlineGlueCode)
+      RetOpc = PPCISD::CALL;
+    else
+      RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
+                                                   : PPCISD::BCTRL;
   } else if (Subtarget.isUsingPCRelativeCalls()) {
     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
     RetOpc = PPCISD::CALL_NOTOC;
@@ -5539,6 +5542,11 @@ static void prepareDescriptorIndirectCall(SelectionDAG 
&DAG, SDValue &Callee,
   // copies together, a TOC access in the caller could be scheduled between
   // the assignment of the callee TOC and the branch to the callee, which leads
   // to incorrect code.
+  // On AIX there is a feature ("out of line glue code") which uses a special
+  // trampoline function __ptrgl to do the indirect call. If this option is
+  // enabled we instead simply load the address of the descriptor into r11,
+  // with the arguments in the 'normal' registers and branch to the __ptrgl
+  // stub.
 
   // Start by loading the function address from the descriptor.
   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
@@ -5600,6 +5608,18 @@ static void prepareDescriptorIndirectCall(SelectionDAG 
&DAG, SDValue &Callee,
   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
 }
 
+static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee,
+                                     SDValue &Glue, SDValue &Chain,
+                                     SDValue CallSeqStart, const CallBase *CB,
+                                     const SDLoc &dl, bool hasNest,
+                                     const PPCSubtarget &Subtarget) {
+  const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister();
+  SDValue MoveToPhysicalReg =
+      DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue);
+  Chain = MoveToPhysicalReg.getValue(0);
+  Glue = MoveToPhysicalReg.getValue(1);
+}
+
 static void
 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
@@ -5611,13 +5631,29 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
   // MVT for a general purpose register.
   const MVT RegVT = Subtarget.getScalarIntVT();
 
+  const TargetMachine &TM = Subtarget.getTargetMachine();
+
   // First operand is always the chain.
   Ops.push_back(Chain);
 
   // If it's a direct call pass the callee as the second operand.
   if (!CFlags.IsIndirect)
     Ops.push_back(Callee);
-  else {
+  else if (!TM.Options.XCOFFInlineGlueCode) {
+    // An indirect call with out of line glue code. We create a target
+    // external symbol for '.__ptrgl' as the callee.
+    auto &Context = DAG.getMachineFunction().getContext();
+    MCSectionXCOFF *Sec = Context.getXCOFFSection(
+        ".__ptrgl", SectionKind::getMetadata(),
+        XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
+    MCSymbolXCOFF *CalleeSym = Sec->getQualNameSymbol();
+    Callee = DAG.getTargetExternalSymbol(CalleeSym->getName().data(),
+                                         Callee.getValueType(), 0);
+    Ops.push_back(Callee);
+    // Add the register used to pass the descriptor address.
+    Ops.push_back(
+        DAG.getRegister(Subtarget.getGlueCodeDescriptorRegister(), RegVT));
+  } else {
     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
 
     // For the TOC based ABIs, we have saved the TOC pointer to the linkage 
area
@@ -5689,8 +5725,10 @@ SDValue PPCTargetLowering::FinishCall(
     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
 
+  const auto &TM = getTargetMachine();
+
   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
-      Subtarget.isAIXABI())
+      (Subtarget.isAIXABI() && !TM.Options.XCOFFInlineGlueCode))
     setUsesTOCBasePtr(DAG);
 
   unsigned CallOpc =
@@ -5700,8 +5738,12 @@ SDValue PPCTargetLowering::FinishCall(
   if (!CFlags.IsIndirect)
     Callee = transformCallee(Callee, DAG, dl, Subtarget);
   else if (Subtarget.usesFunctionDescriptors())
-    prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
-                                  dl, CFlags.HasNest, Subtarget);
+    if (!TM.Options.XCOFFInlineGlueCode)
+      prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
+                               CFlags.HasNest, Subtarget);
+    else
+      prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
+                                    dl, CFlags.HasNest, Subtarget);
   else
     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
 
@@ -5745,6 +5787,30 @@ SDValue PPCTargetLowering::FinishCall(
                             ? NumBytes
                             : 0;
 
+  if (!TM.Options.XCOFFInlineGlueCode) {
+    const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
+    const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
+    const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+    const unsigned TOCSaveOffset =
+        Subtarget.getFrameLowering()->getTOCSaveOffset();
+    const MVT RegVT = Subtarget.getScalarIntVT();
+
+    // Load the original toc value from the stack save slot.
+    SDValue PtrOffset = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+    SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
+    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, PtrOffset);
+    SDValue TOCLoad = DAG.getLoad(
+        RegVT, dl, Chain, AddPtr,
+        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
+        Alignment, MachineMemOperand::MONone);
+
+    // TODO FIXME Causing scheduling overflow ...
+    // Copy back to the physical toc register.
+    // SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCLoad, Glue);
+    // Chain = TOCVal.getValue(0);
+    // Glue = TOCVal.getValue(1);
+  }
+
   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
   Glue = Chain.getValue(1);
 
@@ -7745,9 +7811,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
+  const auto &TM = getTargetMachine();
+
   // For indirect calls, we need to save the TOC base to the stack for
   // restoration after the call.
-  if (CFlags.IsIndirect) {
+  if (CFlags.IsIndirect && TM.Options.XCOFFInlineGlueCode) {
     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h 
b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 7d933588025fe..eec0e141debd4 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -286,6 +286,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
     return IsPPC64 ? PPC::X1 : PPC::R1;
   }
 
+  MCRegister getGlueCodeDescriptorRegister() const {
+    return IsPPC64 ? PPC::X11 : PPC::R11;
+  }
+
   bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
 
   bool isPredictableSelectIsExpensive() const {
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
new file mode 100644
index 0000000000000..375cb16b4a5e2
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -0,0 +1,41 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff
+; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK32 %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff
+; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
+
+@a = dso_local global i32 55, align 4
+@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
+@fp = dso_local local_unnamed_addr global ptr null, align 8
+
+define i32 @caller1(ptr noundef readonly captures(none) %fp) 
local_unnamed_addr {
+entry:
+  %call = tail call i32 %fp(i32 signext 1, i32 signext 2, i32 signext 3)
+  ret i32 %call
+}
+
+; CHECK-LABEL: .caller1
+; CHECK-DAG:    mr 11, 3
+; CHECK-DAG:    li 3, 1
+; CHECK-DAG:    li 4, 2
+; CHECK-DAG:    li 5, 3
+; CHECK: bl .__ptrgl[PR]A
+; CHECK32-NEXT: ld 2  28(r1)
+; CHECK64-NEXT: ld 2, 40(r1)
+
+define dso_local zeroext i1 @caller2() local_unnamed_addr {
+entry:
+  %0 = load ptr, ptr @fp
+  %1 = load i32, ptr @a
+  %2 = load double, ptr @d
+  %call = tail call zeroext i1 %0(i32 noundef signext %1, double noundef %2, 
ptr noundef nonnull @a)
+  ret i1 %call
+}
+
+; CHECK-LABEL: .caller2
+; CHECK: ld , L..C{{.*}}(2)                          # @fp
+; CHECK: ld 11, 0([[REG]])
+; CHECK: lwa 3, 0(5)
+; CHECK: bl .__ptrgl[PR]
+; CHECK32-NEXT: ld 2, 28(r1)
+; CHECK64-NEXT: ld 2, 40(r1)

>From bac5d2026579f763f92e1ee8c334c6fd1c4e6c76 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 10:09:38 -0400
Subject: [PATCH 02/26] Add Pseudo and instructions to handle a call and
 restore together.

Add a new Pseudo and Instructions for handling a direct call
followed by a toc restore load.

* Still needs to add rounding mode version of calls.
* Missing emission of the target external symbols linkage.
* Need to add MIR test to verify operands.
---
 .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp   |  2 ++
 llvm/lib/Target/PowerPC/P10InstrResources.td    |  2 +-
 llvm/lib/Target/PowerPC/P9InstrResources.td     |  2 ++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp     |  2 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td        | 14 ++++++++++++++
 llvm/lib/Target/PowerPC/PPCInstrInfo.td         | 17 +++++++++++++++++
 llvm/lib/Target/PowerPC/PPCScheduleP7.td        |  2 +-
 7 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp 
b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index b28304b07e1a3..fa92d45e07f27 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -154,6 +154,8 @@ bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) 
const {
   case PPC::TCRETURNri:
   case PPC::BCTRL_LWZinto_toc:
   case PPC::BCTRL_LWZinto_toc_RM:
+  case PPC::BL_RESTORE:
+  case PPC::BL8_RESTORE:
   case PPC::TAILBCTR:
   case PPC::TAILB:
   case PPC::TAILBA:
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td 
b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 842174239cc4c..825ffd56da5e0 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -317,7 +317,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
     BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, 
BDZLRp, gBCLR,
     BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, 
gBCLRL,
     BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, 
BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, 
BL_RM, BL_TLS,
-    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
+    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, BL8_RESTORE
 )>;
 
 // 2 Cycles Branch operations, 2 input operands
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td 
b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 106faf1f8e8d2..8c7637e3b93f1 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1326,6 +1326,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
   BCTRL8_LDinto_toc_RM,
   BCTRL_LWZinto_toc_RM,
   BCn,
+  BL_RESTORE,
+  BL8_RESTORE,
   CTRL_DEP
 )>;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9bd2b0107b501..c61acf1fb40c7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5360,7 +5360,7 @@ static unsigned 
getCallOpcode(PPCTargetLowering::CallFlags CFlags,
     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
     // as it is not saved or used.
     if (!TM.Options.XCOFFInlineGlueCode)
-      RetOpc = PPCISD::CALL;
+      RetOpc = PPCISD::BL_LOAD_TOC;
     else
       RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
                                                    : PPCISD::BCTRL;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td 
b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index a973654c22c38..c6458d6caa327 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -207,6 +207,16 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
     Requires<[IsPPC64]>;
 }
 
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+    Defs = [LR8, X2], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in {
+  def BL8_RESTORE : IForm_and_DForm_1<18, 0, 1, 58,
+                      (outs), (ins calltarget:$LI),
+                      "bl $LI\n\tld 2, 40(1)", IIC_BrB,
+                      []>,
+                       Requires<[IsPPC64]>;
+  // TODO FIXME Add _RM version of call.
+}
+
 } // Interpretation64Bit
 
 // FIXME: Duplicating this for the asm parser should be unnecessary, but the
@@ -259,6 +269,10 @@ def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
 def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
           (BL8_NOP_RM mcsym:$dst)>;
 
+def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)),
+          (BL8_RESTORE texternalsym:$dst)>;
+// TODO FIXME add _RM version.
+
 // Atomic operations
 // FIXME: some of these might be used with constant operands. This will result
 // in constant materialization instructions that may be redundant. We currently
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index e3240a72a113f..58a1784b9faa0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -554,6 +554,11 @@ def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                 SDNPVariadic]>;
 
+def PPCbl_load_toc : SDNode<"PPCISD::BL_LOAD_TOC",
+                            SDTypeProfile<0, 1, []>,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
 // The variants that implicitly define rounding mode for calls with
 // strictfp semantics.
 def PPCcall_rm  : SDNode<"PPCISD::CALL_RM", SDT_PPCCall,
@@ -1862,6 +1867,14 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
 
 }
 
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+    Defs = [LR, R2], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in {
+def BL_RESTORE : IForm_and_DForm_1<18, 0, 1, 32,
+                   (outs), (ins calltarget:$LI),
+                   "bl $LI\n\tlwz 2, 20(1)", IIC_BrB,
+                    []>, Requires<[IsPPC32]>;
+}
+
 let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
     Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in {
   def BCTRL_LWZinto_toc_RM:
@@ -3460,6 +3473,10 @@ def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)),
 def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
           (BL_NOP_RM texternalsym:$dst)>;
 
+def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)),
+          (BL_RESTORE texternalsym:$dst)>;
+// TODO FIXME add _RM version of call.
+
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
 
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td 
b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index bf7f2f7a9c999..09b811128150f 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -128,7 +128,7 @@ let SchedModel = P7Model in {
     BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, BCCTR,
     BCCTR8, BCCTR8n, BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, BCCTRn, gBC, gBCA,
     gBCAat, gBCCTR, gBCCTRL, gBCL, gBCLA, gBCLAat, gBCLR, gBCLRL, gBCLat, 
gBCat,
-    MFCTR, MFCTR8, MFLR, MFLR8
+    MFCTR, MFCTR8, MFLR, MFLR8, BL_RESTORE, BL8_RESTORE
   )>;
 
   def : InstRW<[P7_BRU_4C], (instrs MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, 
MTCTRloop)>;

>From 955ec5e107343b0f14182e0f574588d644a4d613 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 11:05:40 -0400
Subject: [PATCH 03/26] Emit linkage for the __ptrgl external symbol.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 
b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a7389d9bc4fe8..a5009269e88f0 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -3041,6 +3041,15 @@ void PPCAIXAsmPrinter::emitGCOVRefs() {
 }
 
 void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
+  // If we are using out of line pointer glue we have to emit the
+  // linkage for it.
+  if (OutContext.hasXCOFFSection(
+          ".__ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) {
+    MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol(".__ptrgl[PR]");
+    OutStreamer->emitXCOFFSymbolLinkageWithVisibility(PtrGlueSym, MCSA_Extern,
+                                                      MCSA_Invalid);
+  }
+
   // If there are no functions and there are no toc-data definitions in this
   // module, we will never need to reference the TOC base.
   if (M.empty() && TOCDataGlobalVars.empty())

>From f27b9fc744cc5dc9e209c2bc17bf0c1481c98b49 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 11:30:21 -0400
Subject: [PATCH 04/26] Remove toc-restore code from ISEL. Folded into new
 instr instead.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 24 ---------------------
 1 file changed, 24 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c61acf1fb40c7..185eab1a20963 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5787,30 +5787,6 @@ SDValue PPCTargetLowering::FinishCall(
                             ? NumBytes
                             : 0;
 
-  if (!TM.Options.XCOFFInlineGlueCode) {
-    const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
-    const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
-    const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
-    const unsigned TOCSaveOffset =
-        Subtarget.getFrameLowering()->getTOCSaveOffset();
-    const MVT RegVT = Subtarget.getScalarIntVT();
-
-    // Load the original toc value from the stack save slot.
-    SDValue PtrOffset = DAG.getIntPtrConstant(TOCSaveOffset, dl);
-    SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
-    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, PtrOffset);
-    SDValue TOCLoad = DAG.getLoad(
-        RegVT, dl, Chain, AddPtr,
-        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
-        Alignment, MachineMemOperand::MONone);
-
-    // TODO FIXME Causing scheduling overflow ...
-    // Copy back to the physical toc register.
-    // SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCLoad, Glue);
-    // Chain = TOCVal.getValue(0);
-    // Glue = TOCVal.getValue(1);
-  }
-
   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
   Glue = Chain.getValue(1);
 

>From 46f1d5c5b64798994aa6c0363180ba7bf9eb6525 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 12:54:00 -0400
Subject: [PATCH 05/26] Still use TOC base with outofline glue.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 185eab1a20963..13f77dd3c9928 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5728,7 +5728,7 @@ SDValue PPCTargetLowering::FinishCall(
   const auto &TM = getTargetMachine();
 
   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
-      (Subtarget.isAIXABI() && !TM.Options.XCOFFInlineGlueCode))
+      Subtarget.isAIXABI())
     setUsesTOCBasePtr(DAG);
 
   unsigned CallOpc =

>From b4b9a349752f343e8925f21946205ea415aa7550 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 13:31:08 -0400
Subject: [PATCH 06/26] Fix test now that its runnable.

---
 .../CodeGen/PowerPC/aix-no-inline-glue.ll     | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 375cb16b4a5e2..2d609a289373a 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -1,7 +1,7 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \
 ; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK32 %s
 
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \
 ; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
 
 @a = dso_local global i32 55, align 4
@@ -19,9 +19,9 @@ entry:
 ; CHECK-DAG:    li 3, 1
 ; CHECK-DAG:    li 4, 2
 ; CHECK-DAG:    li 5, 3
-; CHECK: bl .__ptrgl[PR]A
-; CHECK32-NEXT: ld 2  28(r1)
-; CHECK64-NEXT: ld 2, 40(r1)
+; CHECK: bl .__ptrgl[PR]
+; CHECK32-NEXT: lwz 2, 20(1)
+; CHECK64-NEXT: ld 2, 40(1)
 
 define dso_local zeroext i1 @caller2() local_unnamed_addr {
 entry:
@@ -33,9 +33,11 @@ entry:
 }
 
 ; CHECK-LABEL: .caller2
-; CHECK: ld , L..C{{.*}}(2)                          # @fp
-; CHECK: ld 11, 0([[REG]])
-; CHECK: lwa 3, 0(5)
+; CHECK64: ld [[REG:[0-9]+]], L..C{{[0-9]+}}(2)  # @fp
+; CHECK32: lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2) # @fp
+; CHECK32: lwz 11, 0([[REG]])
 ; CHECK: bl .__ptrgl[PR]
-; CHECK32-NEXT: ld 2, 28(r1)
-; CHECK64-NEXT: ld 2, 40(r1)
+; CHECK32-NEXT: lwz 2, 20(1)
+; CHECK64-NEXT: ld 2, 40(1)
+
+; CHECK: .extern .__ptrgl[PR]

>From 0dd4e746e3f19cc84a84006a8a5bf079e70b8b6c Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 14:04:15 -0400
Subject: [PATCH 07/26] Add MIR tests.

---
 .../CodeGen/PowerPC/aix-no-inline-glue.ll     | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 2d609a289373a..9d2e2ae5f345b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -4,6 +4,14 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \
 ; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
 
+; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
+; RUN:   -mtriple powerpc-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s | \
+; RUN:   FileCheck --check-prefix=MIR32 %s
+
+; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
+; RUN:   -mtriple powerpc64-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s 
| \
+; RUN:   FileCheck --check-prefix=MIR64 %s
+
 @a = dso_local global i32 55, align 4
 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
 @fp = dso_local local_unnamed_addr global ptr null, align 8
@@ -23,6 +31,20 @@ entry:
 ; CHECK32-NEXT: lwz 2, 20(1)
 ; CHECK64-NEXT: ld 2, 40(1)
 
+; MIR32: name:            caller1
+; MIR32:   %0:gprc = COPY $r3
+; MIR32:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; MIR32:   $r11 = COPY %0
+; MIR32:   BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; MIR32:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; MIR64: name:            caller1
+; MIR64:   %0:g8rc = COPY $x3
+; MIR64:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; MIR64:   $x11 = COPY %0
+; MIR64:   BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
 define dso_local zeroext i1 @caller2() local_unnamed_addr {
 entry:
   %0 = load ptr, ptr @fp
@@ -40,4 +62,20 @@ entry:
 ; CHECK32-NEXT: lwz 2, 20(1)
 ; CHECK64-NEXT: ld 2, 40(1)
 
+; MIR32: name:            caller2
+; MIR32:   %0:gprc_and_gprc_nor0 = LWZtoc @fp, $r2 :: (load (s32) from got)
+; MIR32:   %1:gprc = LWZ 0, killed %0 :: (dereferenceable load (s32) from @fp, 
align 8)
+; MIR32:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; MIR32:   $r11 = COPY %1
+; MIR32:   BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3
+; MIR32:   ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; MIR64: name:            caller2
+; MIR64:   %0:g8rc_and_g8rc_nox0 = LDtoc @fp, $x2 :: (load (s64) from got)
+; MIR64:   %1:g8rc = LD 0, killed %0 :: (dereferenceable load (s64) from @fp)
+; MIR64:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; MIR64:   $x11 = COPY %1
+; MIR64:   BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
 ; CHECK: .extern .__ptrgl[PR]

>From 0bf213e668b5b7d3f7fa39aa00e30df9ec84da99 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 21 Apr 2026 14:49:07 -0400
Subject: [PATCH 08/26] Switch to using a subtarget feature instead of
 TargetMachione option.

Thanks Tony Varghese for the suggestion and draft implementation.
---
 llvm/include/llvm/CodeGen/CommandFlags.h        | 2 --
 llvm/include/llvm/Target/TargetOptions.h        | 8 +-------
 llvm/lib/CodeGen/CommandFlags.cpp               | 7 -------
 llvm/lib/Target/PowerPC/PPC.td                  | 4 ++++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp     | 8 ++++----
 llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 8 ++++----
 6 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h 
b/llvm/include/llvm/CodeGen/CommandFlags.h
index b24c73408d861..d10e8732c1562 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -154,8 +154,6 @@ LLVM_ABI bool getJMCInstrument();
 
 LLVM_ABI bool getXCOFFReadOnlyPointers();
 
-LLVM_ABI bool getXCOFFInlineGlueCode();
-
 enum SaveStatsMode { None, Cwd, Obj };
 
 LLVM_ABI SaveStatsMode getSaveStats();
diff --git a/llvm/include/llvm/Target/TargetOptions.h 
b/llvm/include/llvm/Target/TargetOptions.h
index dfe87fd40d5b8..89e29d5f102f6 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -140,7 +140,7 @@ class TargetOptions {
         DebugStrictDwarf(false), Hotpatch(false),
         PPCGenScalarMASSEntries(false), JMCInstrument(false),
         EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false),
-        VerifyArgABICompliance(true), XCOFFInlineGlueCode(true) {}
+        VerifyArgABICompliance(true) {}
 
   /// DisableFramePointerElim - This returns true if frame pointer elimination
   /// optimization should be disabled for the given machine function.
@@ -353,12 +353,6 @@ class TargetOptions {
   /// (lack) these extensions.
   unsigned VerifyArgABICompliance : 1;
 
-  /// When set to true, the code to form an indirect call sequence
-  /// is placed inline at the call site. When false an indirect call
-  /// is implemented with a branch to a trampoline which contains the
-  /// indriect call sequence.
-  unsigned XCOFFInlineGlueCode : 1;
-
   /// Name of the stack usage file (i.e., .su file) if user passes
   /// -fstack-usage. If empty, it can be implied that -fstack-usage is not
   /// passed on the command line.
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp 
b/llvm/lib/CodeGen/CommandFlags.cpp
index 1de4fb9a9414d..9459797a3d074 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -120,7 +120,6 @@ CGOPT(bool, DebugStrictDwarf)
 CGOPT(unsigned, AlignLoops)
 CGOPT(bool, JMCInstrument)
 CGOPT(bool, XCOFFReadOnlyPointers)
-CGOPT(bool, XCOFFInlineGlueCode)
 CGOPT(codegen::SaveStatsMode, SaveStats)
 
 #define CGBINDOPT(NAME)                                                        
\
@@ -535,11 +534,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
       cl::init(false));
   CGBINDOPT(XCOFFReadOnlyPointers);
 
-  static cl::opt<bool> XCOFFInlineGlueCode(
-      "xcoff-inline-glue-code",
-      cl::desc("Emir inline glue code for indirect calls"), cl::init(true));
-  CGBINDOPT(XCOFFInlineGlueCode);
-
   static cl::opt<bool> DisableIntegratedAS(
       "no-integrated-as", cl::desc("Disable integrated assembler"),
       cl::init(false));
@@ -641,7 +635,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple 
&TheTriple) {
   Options.LoopAlignment = getAlignLoops();
   Options.JMCInstrument = getJMCInstrument();
   Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers();
-  Options.XCOFFInlineGlueCode = getXCOFFInlineGlueCode();
   Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
 
   Options.ThreadModel = getThreadModel();
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 7b4bae60f7e74..b6bcec5305dd3 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -362,6 +362,10 @@ def FeaturePredictableSelectIsExpensive :
 def FeatureFastMFLR : SubtargetFeature<"fast-MFLR", "HasFastMFLR", "true",
                                        "MFLR is a fast instruction">;
 
+def FeatureNoInlineGlue :
+ SubtargetFeature<"no-inline-glue", "NoInlineGlue", "true",
+                  "Use external ._ptrgl for indirect calls">;
+
 
//===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 13f77dd3c9928..44232a8d68523 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5359,7 +5359,7 @@ static unsigned 
getCallOpcode(PPCTargetLowering::CallFlags CFlags,
     // immediately followed by a load of the TOC pointer from the stack save
     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
     // as it is not saved or used.
-    if (!TM.Options.XCOFFInlineGlueCode)
+    if (Subtarget.noInlineGlue())
       RetOpc = PPCISD::BL_LOAD_TOC;
     else
       RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
@@ -5639,7 +5639,7 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
   // If it's a direct call pass the callee as the second operand.
   if (!CFlags.IsIndirect)
     Ops.push_back(Callee);
-  else if (!TM.Options.XCOFFInlineGlueCode) {
+  else if (Subtarget.noInlineGlue()) {
     // An indirect call with out of line glue code. We create a target
     // external symbol for '.__ptrgl' as the callee.
     auto &Context = DAG.getMachineFunction().getContext();
@@ -5738,7 +5738,7 @@ SDValue PPCTargetLowering::FinishCall(
   if (!CFlags.IsIndirect)
     Callee = transformCallee(Callee, DAG, dl, Subtarget);
   else if (Subtarget.usesFunctionDescriptors())
-    if (!TM.Options.XCOFFInlineGlueCode)
+    if (Subtarget.noInlineGlue())
       prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
                                CFlags.HasNest, Subtarget);
     else
@@ -7791,7 +7791,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
 
   // For indirect calls, we need to save the TOC base to the stack for
   // restoration after the call.
-  if (CFlags.IsIndirect && TM.Options.XCOFFInlineGlueCode) {
+  if (CFlags.IsIndirect && !Subtarget.noInlineGlue()) {
     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 9d2e2ae5f345b..0e45a290c1dbf 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \
-; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK32 %s
+; RUN:   -mattr=+no-inline-glue < %s | FileCheck 
--check-prefixes=CHECK,CHECK32 %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN:   --xcoff-inline-glue-code=false < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
+; RUN:   -mattr=+no-inline-glue < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
 
 ; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
-; RUN:   -mtriple powerpc-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s | \
+; RUN:   -mtriple powerpc-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \
 ; RUN:   FileCheck --check-prefix=MIR32 %s
 
 ; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
-; RUN:   -mtriple powerpc64-ibm-aix-xcoff --xcoff-inline-glue-code=false < %s 
| \
+; RUN:   -mtriple powerpc64-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \
 ; RUN:   FileCheck --check-prefix=MIR64 %s
 
 @a = dso_local global i32 55, align 4

>From 8213a269b6cdb0665d959caf2f3f5e0e8b05b9ac Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Wed, 22 Apr 2026 16:04:40 -0400
Subject: [PATCH 09/26] Fix _ptrgl spelling.

The symbol has a single leading underscore. Also use the existing
transformCallee function to convert to a target extrnal symbol and
prepend the '.'.
---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp       |  4 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp     | 16 +++++-----------
 llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 14 +++++++-------
 3 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 
b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a5009269e88f0..b1fb08d5d22ed 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -3044,8 +3044,8 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
   // If we are using out of line pointer glue we have to emit the
   // linkage for it.
   if (OutContext.hasXCOFFSection(
-          ".__ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) {
-    MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol(".__ptrgl[PR]");
+          "._ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) {
+    MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol("._ptrgl[PR]");
     OutStreamer->emitXCOFFSymbolLinkageWithVisibility(PtrGlueSym, MCSA_Extern,
                                                       MCSA_Invalid);
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 44232a8d68523..b914d4b8d70ae 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5640,15 +5640,6 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
   if (!CFlags.IsIndirect)
     Ops.push_back(Callee);
   else if (Subtarget.noInlineGlue()) {
-    // An indirect call with out of line glue code. We create a target
-    // external symbol for '.__ptrgl' as the callee.
-    auto &Context = DAG.getMachineFunction().getContext();
-    MCSectionXCOFF *Sec = Context.getXCOFFSection(
-        ".__ptrgl", SectionKind::getMetadata(),
-        XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
-    MCSymbolXCOFF *CalleeSym = Sec->getQualNameSymbol();
-    Callee = DAG.getTargetExternalSymbol(CalleeSym->getName().data(),
-                                         Callee.getValueType(), 0);
     Ops.push_back(Callee);
     // Add the register used to pass the descriptor address.
     Ops.push_back(
@@ -5738,10 +5729,13 @@ SDValue PPCTargetLowering::FinishCall(
   if (!CFlags.IsIndirect)
     Callee = transformCallee(Callee, DAG, dl, Subtarget);
   else if (Subtarget.usesFunctionDescriptors())
-    if (Subtarget.noInlineGlue())
+    if (Subtarget.noInlineGlue()) {
       prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
                                CFlags.HasNest, Subtarget);
-    else
+      SDValue PtrGlueCallee =
+          DAG.getExternalSymbol("_ptrgl", getPointerTy(DAG.getDataLayout()));
+      Callee = transformCallee(PtrGlueCallee, DAG, dl, Subtarget);
+    } else
       prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
                                     dl, CFlags.HasNest, Subtarget);
   else
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 0e45a290c1dbf..0d7a2d988ddc2 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -27,7 +27,7 @@ entry:
 ; CHECK-DAG:    li 3, 1
 ; CHECK-DAG:    li 4, 2
 ; CHECK-DAG:    li 5, 3
-; CHECK: bl .__ptrgl[PR]
+; CHECK: bl ._ptrgl[PR]
 ; CHECK32-NEXT: lwz 2, 20(1)
 ; CHECK64-NEXT: ld 2, 40(1)
 
@@ -35,14 +35,14 @@ entry:
 ; MIR32:   %0:gprc = COPY $r3
 ; MIR32:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; MIR32:   $r11 = COPY %0
-; MIR32:   BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; MIR32:   BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
 ; MIR32:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
 
 ; MIR64: name:            caller1
 ; MIR64:   %0:g8rc = COPY $x3
 ; MIR64:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; MIR64:   $x11 = COPY %0
-; MIR64:   BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; MIR64:   BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
 ; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
 define dso_local zeroext i1 @caller2() local_unnamed_addr {
@@ -58,7 +58,7 @@ entry:
 ; CHECK64: ld [[REG:[0-9]+]], L..C{{[0-9]+}}(2)  # @fp
 ; CHECK32: lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2) # @fp
 ; CHECK32: lwz 11, 0([[REG]])
-; CHECK: bl .__ptrgl[PR]
+; CHECK: bl ._ptrgl[PR]
 ; CHECK32-NEXT: lwz 2, 20(1)
 ; CHECK64-NEXT: ld 2, 40(1)
 
@@ -67,7 +67,7 @@ entry:
 ; MIR32:   %1:gprc = LWZ 0, killed %0 :: (dereferenceable load (s32) from @fp, 
align 8)
 ; MIR32:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; MIR32:   $r11 = COPY %1
-; MIR32:   BL_RESTORE &".__ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3
+; MIR32:   BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3
 ; MIR32:   ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
 
 ; MIR64: name:            caller2
@@ -75,7 +75,7 @@ entry:
 ; MIR64:   %1:g8rc = LD 0, killed %0 :: (dereferenceable load (s64) from @fp)
 ; MIR64:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; MIR64:   $x11 = COPY %1
-; MIR64:   BL8_RESTORE &".__ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; MIR64:   BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
 ; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-; CHECK: .extern .__ptrgl[PR]
+; CHECK: .extern ._ptrgl[PR]

>From e7d8a5aa0f33ce8bb11f7b5b8046701ebf327eb8 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 23 Apr 2026 10:02:43 -0400
Subject: [PATCH 10/26] Remove ununsed target machine locals.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b914d4b8d70ae..0a21e148171ad 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5631,8 +5631,6 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
   // MVT for a general purpose register.
   const MVT RegVT = Subtarget.getScalarIntVT();
 
-  const TargetMachine &TM = Subtarget.getTargetMachine();
-
   // First operand is always the chain.
   Ops.push_back(Chain);
 
@@ -5716,8 +5714,6 @@ SDValue PPCTargetLowering::FinishCall(
     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
 
-  const auto &TM = getTargetMachine();
-
   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
       Subtarget.isAIXABI())
     setUsesTOCBasePtr(DAG);
@@ -7781,8 +7777,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
-  const auto &TM = getTargetMachine();
-
   // For indirect calls, we need to save the TOC base to the stack for
   // restoration after the call.
   if (CFlags.IsIndirect && !Subtarget.noInlineGlue()) {

>From 783499cd51a4e401230253c5918e4e7cc34f6d73 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 23 Apr 2026 10:09:52 -0400
Subject: [PATCH 11/26] Use the existing mechanism for emitting externall
 symbols.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 
b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b1fb08d5d22ed..d079fc09a105c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -3041,15 +3041,6 @@ void PPCAIXAsmPrinter::emitGCOVRefs() {
 }
 
 void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
-  // If we are using out of line pointer glue we have to emit the
-  // linkage for it.
-  if (OutContext.hasXCOFFSection(
-          "._ptrgl", XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))) {
-    MCSymbol *PtrGlueSym = OutContext.getOrCreateSymbol("._ptrgl[PR]");
-    OutStreamer->emitXCOFFSymbolLinkageWithVisibility(PtrGlueSym, MCSA_Extern,
-                                                      MCSA_Invalid);
-  }
-
   // If there are no functions and there are no toc-data definitions in this
   // module, we will never need to reference the TOC base.
   if (M.empty() && TOCDataGlobalVars.empty())
@@ -3272,6 +3263,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
   case PPC::BL:
   case PPC::BL8_NOP:
   case PPC::BL_NOP: {
+  case PPC::BL_RESTORE:
+  case PPC::BL8_RESTORE:
     const MachineOperand &MO = MI->getOperand(0);
     if (MO.isSymbol()) {
       auto *S = static_cast<MCSymbolXCOFF *>(
@@ -3312,6 +3305,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
 bool PPCAIXAsmPrinter::doFinalization(Module &M) {
   for (MCSymbol *Sym : ExtSymSDNodeSymbols)
     OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern);
+
+
   return PPCAsmPrinter::doFinalization(M);
 }
 

>From cc8cd22bc281e81604a45e789b83710ee7dd2204 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 23 Apr 2026 10:48:38 -0400
Subject: [PATCH 12/26] Move comment.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0a21e148171ad..943af330eaf13 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5542,12 +5542,6 @@ static void prepareDescriptorIndirectCall(SelectionDAG 
&DAG, SDValue &Callee,
   // copies together, a TOC access in the caller could be scheduled between
   // the assignment of the callee TOC and the branch to the callee, which leads
   // to incorrect code.
-  // On AIX there is a feature ("out of line glue code") which uses a special
-  // trampoline function __ptrgl to do the indirect call. If this option is
-  // enabled we instead simply load the address of the descriptor into r11,
-  // with the arguments in the 'normal' registers and branch to the __ptrgl
-  // stub.
-
   // Start by loading the function address from the descriptor.
   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
@@ -5613,6 +5607,11 @@ static void prepareOutOfLineGlueCall(SelectionDAG &DAG, 
SDValue &Callee,
                                      SDValue CallSeqStart, const CallBase *CB,
                                      const SDLoc &dl, bool hasNest,
                                      const PPCSubtarget &Subtarget) {
+  // On AIX there is a feature ("out of line glue code") which uses a special
+  // trampoline function ._ptrgl to do the indirect call. If this option is
+  // enabled we instead simply load the address of the descriptor into gpr11,
+  // with the arguments in the 'normal' registers and branch to the ._ptrgl
+  // stub.
   const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister();
   SDValue MoveToPhysicalReg =
       DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue);

>From 1a0d17900841104b02bc8e9d5b72689de91c958d Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 23 Apr 2026 10:50:23 -0400
Subject: [PATCH 13/26] Restore whitespace that was unitentionally changed.

---
 llvm/lib/CodeGen/CommandFlags.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/CommandFlags.cpp 
b/llvm/lib/CodeGen/CommandFlags.cpp
index 9459797a3d074..403fd49c56984 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -635,6 +635,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple 
&TheTriple) {
   Options.LoopAlignment = getAlignLoops();
   Options.JMCInstrument = getJMCInstrument();
   Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers();
+
   Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
 
   Options.ThreadModel = getThreadModel();

>From 1af8959100315e4bc276c623ff5752bbe7a3fe80 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 23 Apr 2026 11:28:32 -0400
Subject: [PATCH 14/26] Add check that no-inline-glue is used on AIX.

---
 llvm/lib/Target/PowerPC/PPC.td                  | 4 +++-
 llvm/lib/Target/PowerPC/PPCSubtarget.cpp        | 4 ++++
 llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 5 +++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index b6bcec5305dd3..1e60dd5606be5 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -362,9 +362,11 @@ def FeaturePredictableSelectIsExpensive :
 def FeatureFastMFLR : SubtargetFeature<"fast-MFLR", "HasFastMFLR", "true",
                                        "MFLR is a fast instruction">;
 
+// When enabled indirect calls will place the address of the descriptor
+// into r11 and do a direct branch to the ._ptrgl routine.
 def FeatureNoInlineGlue :
  SubtargetFeature<"no-inline-glue", "NoInlineGlue", "true",
-                  "Use external ._ptrgl for indirect calls">;
+                  "Use ._ptrgl for indirect calls">;
 
 
//===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp 
b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 85e022a2ba6fc..3ea7d70b42ccf 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -144,6 +144,10 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, 
StringRef TuneCPU,
     report_fatal_error("The aix-shared-lib-tls-model-opt attribute "
                        "is only supported on AIX in 64-bit mode.\n",
                        false);
+
+  if (NoInlineGlue && !getTargetTriple().isOSAIX())
+    report_fatal_error("no-inline-glue feature is only supported on AIX\n",
+                       false);
 }
 
 bool PPCSubtarget::enableMachineScheduler() const { return true; }
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 0d7a2d988ddc2..295cb85079c4b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -12,6 +12,11 @@
 ; RUN:   -mtriple powerpc64-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \
 ; RUN:   FileCheck --check-prefix=MIR64 %s
 
+; RUN: not llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-unknown-linux 
\
+; RUN:   -mattr=+no-inline-glue 2>&1 < %s | FileCheck --check-prefix=ERROR %s
+
+; ERROR: no-inline-glue feature is only supported on AIX
+
 @a = dso_local global i32 55, align 4
 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
 @fp = dso_local local_unnamed_addr global ptr null, align 8

>From 50a85a23df901c774bf9920f7acfaaa628794860 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 23 Apr 2026 11:59:00 -0400
Subject: [PATCH 15/26] Undo whitespace change.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 
b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d079fc09a105c..60a4897352121 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -3305,8 +3305,6 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
 bool PPCAIXAsmPrinter::doFinalization(Module &M) {
   for (MCSymbol *Sym : ExtSymSDNodeSymbols)
     OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern);
-
-
   return PPCAsmPrinter::doFinalization(M);
 }
 

>From 1c58b9a322c5c31350882eb85a3e7a786358a5c6 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Fri, 24 Apr 2026 09:59:59 -0400
Subject: [PATCH 16/26] Fix fallthrough errors on switch.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 
b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 60a4897352121..fdf7e35283021 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -3262,9 +3262,9 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
   case PPC::BL8:
   case PPC::BL:
   case PPC::BL8_NOP:
-  case PPC::BL_NOP: {
+  case PPC::BL_NOP:
   case PPC::BL_RESTORE:
-  case PPC::BL8_RESTORE:
+  case PPC::BL8_RESTORE: {
     const MachineOperand &MO = MI->getOperand(0);
     if (MO.isSymbol()) {
       auto *S = static_cast<MCSymbolXCOFF *>(

>From 6b6f41a7566481efc03ae1a18bb172f05817bbbe Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Mon, 27 Apr 2026 14:05:10 -0400
Subject: [PATCH 17/26] Add a lit test to check mixing of inline and out of
 line glue.

---
 .../CodeGen/PowerPC/aix-mixed-inline-glue.ll  | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll

diff --git a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
new file mode 100644
index 0000000000000..fb86e251e912d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
@@ -0,0 +1,31 @@
+; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
+; RUN:   -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s
+
+; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
+; RUN:   -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK64
+
+define i32 @OutOfLine(ptr noundef readonly captures(none) %fp) #0 {
+entry:
+  %call = tail call i32 %fp()
+  ret i32 %call
+}
+
+define i32 @InLine(ptr noundef readonly captures(none) %fp) #1 {
+entry:
+  %call = tail call i32 %fp()
+  ret i32 %call
+}
+
+attributes #0 = {"target-features"="+no-inline-glue"}
+attributes #1 = {"target-features"="-no-inline-glue"}
+
+; CHECK: name:            OutOfLine
+; CHECK:  BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, 
implicit-def $r1, implicit-def $r3
+; CHECK: name:            InLine
+; CEHCK: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit 
$r2, implicit-def $r1, implicit-def $r3
+
+; CHECK64: name:            OutOfLine
+; CHECK64:  BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x2, 
implicit-def $r1, implicit-def $x3
+; CHECK64: name:            InLine
+; CHECK64:   BCTRL8_LDinto_toc 40, $x1, csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit 
$x2, implicit-def $r1, implicit-def $x3

>From 118e4898158af4a01262aa92f66d308d47bbaf59 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Mon, 27 Apr 2026 14:07:35 -0400
Subject: [PATCH 18/26] Remove local_unnamed_addr from lit test.

---
 llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 295cb85079c4b..76006d161f6fc 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -18,10 +18,10 @@
 ; ERROR: no-inline-glue feature is only supported on AIX
 
 @a = dso_local global i32 55, align 4
-@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
-@fp = dso_local local_unnamed_addr global ptr null, align 8
+@d = dso_local global double 3.141590e+00, align 8
+@fp = dso_local global ptr null, align 8
 
-define i32 @caller1(ptr noundef readonly captures(none) %fp) 
local_unnamed_addr {
+define i32 @caller1(ptr noundef readonly captures(none) %fp) {
 entry:
   %call = tail call i32 %fp(i32 signext 1, i32 signext 2, i32 signext 3)
   ret i32 %call
@@ -50,7 +50,7 @@ entry:
 ; MIR64:   BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
 ; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-define dso_local zeroext i1 @caller2() local_unnamed_addr {
+define dso_local zeroext i1 @caller2() {
 entry:
   %0 = load ptr, ptr @fp
   %1 = load i32, ptr @a

>From 7465f040e68f8635ddd0e52e1b34241445949a00 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Wed, 29 Apr 2026 13:28:47 -0400
Subject: [PATCH 19/26] Add rounding mode setting version of new calls.

---
 llvm/lib/Target/PowerPC/P10InstrResources.td  |  2 +-
 llvm/lib/Target/PowerPC/P9InstrResources.td   |  2 ++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  3 +++
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      | 17 +++++++++++----
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       | 21 ++++++++++++++++---
 .../PowerPC/aix-no-inline-glue-strictfp.ll    | 18 ++++++++++++++++
 6 files changed, 55 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll

diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td 
b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 825ffd56da5e0..495346855a4e9 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -317,7 +317,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
     BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, 
BDZLRp, gBCLR,
     BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, 
gBCLRL,
     BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, 
BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, 
BL_RM, BL_TLS,
-    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, BL8_RESTORE
+    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, 
BL8_RESTORE, BL_RESTORE_RM, BL8_RESTORE_RM
 )>;
 
 // 2 Cycles Branch operations, 2 input operands
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td 
b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 8c7637e3b93f1..64d458f806ca7 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1328,6 +1328,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
   BCn,
   BL_RESTORE,
   BL8_RESTORE,
+  BL_RESTORE_RM,
+  BL8_RESTORE_RM,
   CTRL_DEP
 )>;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 943af330eaf13..9dddad9c00a70 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5392,6 +5392,9 @@ static unsigned 
getCallOpcode(PPCTargetLowering::CallFlags CFlags,
     case PPCISD::BCTRL:
       RetOpc = PPCISD::BCTRL_RM;
       break;
+    case PPCISD::BL_LOAD_TOC:
+      RetOpc = PPCISD::BL_LOAD_TOC_RM;
+      break;
     case PPCISD::CALL_NOTOC:
       RetOpc = PPCISD::CALL_NOTOC_RM;
       break;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td 
b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index c6458d6caa327..003b96d6bd405 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -214,7 +214,15 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
                       "bl $LI\n\tld 2, 40(1)", IIC_BrB,
                       []>,
                        Requires<[IsPPC64]>;
-  // TODO FIXME Add _RM version of call.
+}
+
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+    Defs = [LR8, X2, RM], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in {
+  def BL8_RESTORE_RM : IForm_and_DForm_1<18, 0, 1, 58,
+                        (outs), (ins calltarget:$LI),
+                        "bl $LI\n\tld 2, 40(1)", IIC_BrB,
+                        []>,
+                         Requires<[IsPPC64]>;
 }
 
 } // Interpretation64Bit
@@ -263,15 +271,16 @@ def : Pat<(PPCcall (i64 mcsym:$dst)),
           (BL8 mcsym:$dst)>;
 def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
           (BL8_NOP mcsym:$dst)>;
+def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)),
+          (BL8_RESTORE texternalsym:$dst)>;
 
 def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
           (BL8_RM mcsym:$dst)>;
 def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
           (BL8_NOP_RM mcsym:$dst)>;
 
-def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)),
-          (BL8_RESTORE texternalsym:$dst)>;
-// TODO FIXME add _RM version.
+def : Pat<(PPCbl_load_toc_rm (i64 texternalsym:$dst)),
+          (BL8_RESTORE_RM texternalsym:$dst)>;
 
 // Atomic operations
 // FIXME: some of these might be used with constant operands. This will result
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 58a1784b9faa0..e8e6dbabcf0ba 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -577,6 +577,10 @@ def PPCbctrl_load_toc_rm : 
SDNode<"PPCISD::BCTRL_LOAD_TOC_RM",
                                   SDTypeProfile<0, 1, []>,
                                   [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                    SDNPVariadic]>;
+def PPCbl_load_toc_rm : SDNode<"PPCISD::BL_LOAD_TOC_RM",
+                            SDTypeProfile<0, 1, []>,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
 
 // Return with a glue operand, matched by 'blr'
 def PPCretglue   : SDNode<"PPCISD::RET_GLUE", SDTNone,
@@ -1884,6 +1888,15 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
 
 }
 
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+    Defs = [LR, R2, RM], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in {
+  def BL_RESTORE_RM :
+    IForm_and_DForm_1<18, 0, 1, 32,
+      (outs), (ins calltarget:$LI),
+      "bl $LI\n\tlwz 2, 20(1)", IIC_BrB,
+      []>, Requires<[IsPPC32]>;
+}
+
 let isCodeGenOnly = 1, hasSideEffects = 0 in {
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
@@ -3464,6 +3477,9 @@ def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
 def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
           (BL_NOP texternalsym:$dst)>;
 
+def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)),
+          (BL_RESTORE texternalsym:$dst)>;
+
 def : Pat<(PPCcall_rm (i32 mcsym:$dst)),
           (BL_RM mcsym:$dst)>;
 
@@ -3473,9 +3489,8 @@ def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)),
 def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
           (BL_NOP_RM texternalsym:$dst)>;
 
-def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)),
-          (BL_RESTORE texternalsym:$dst)>;
-// TODO FIXME add _RM version of call.
+def : Pat<(PPCbl_load_toc_rm (i32 texternalsym:$dst)),
+          (BL_RESTORE_RM texternalsym:$dst)>;
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
new file mode 100644
index 0000000000000..9f9fe910dd46d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
@@ -0,0 +1,18 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \
+; RUN:   -stop-after=finalize-isel -mattr=+no-inline-glue < %s | FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:   -stop-after=finalize-isel -mattr=+no-inline-glue < %s | \
+; RUN:  FileCheck --check-prefix=CHECK64 %s
+
+define i32 @has_strictfp(ptr noundef readonly captures(none) %fp) #0 {
+entry:
+  %call = tail call i32 %fp() strictfp
+  ret i32 %call
+}
+
+attributes #0 = { strictfp }
+
+; CHECK: BL_RESTORE_RM &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit-def dead $rm, implicit $rm, implicit $r1, 
implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3
+
+; CHECK64: BL8_RESTORE_RM &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit-def dead $rm, implicit $rm, implicit $x1, 
implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3

>From 1dfef9f52fe9ee4b9b738282f6e09bbe2189302c Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 30 Apr 2026 09:04:49 -0400
Subject: [PATCH 20/26] Missed adding rounding mode calls to debug switch in
 code emitter.

---
 llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp 
b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index fa92d45e07f27..1acb63b7bf1aa 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -155,7 +155,9 @@ bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) 
const {
   case PPC::BCTRL_LWZinto_toc:
   case PPC::BCTRL_LWZinto_toc_RM:
   case PPC::BL_RESTORE:
+  case PPC::BL_RESTORE_RM:
   case PPC::BL8_RESTORE:
+  case PPC::BL8_RESTORE_RM:
   case PPC::TAILBCTR:
   case PPC::TAILB:
   case PPC::TAILBA:

>From dba8b5cd0a0de81c06722eede7779e7268377735 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Fri, 1 May 2026 13:05:30 -0400
Subject: [PATCH 21/26] Changed scheduling info for P7.

---
 llvm/lib/Target/PowerPC/PPCScheduleP7.td | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td 
b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 09b811128150f..56d1114bb0d1e 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -115,6 +115,9 @@ let SchedModel = P7Model in {
   def : InstRW<[P7_BRU_NONE, P7_DISP_BR],
     (instregex "^B(L)?(A)?(8)?(_NOP|_NOTOC)?(_TLS|_RM)?(_)?$")>;
 
+  def : InstRW<[P7_BRU_NONE, P7_DISP_BR, P7_LSU_2C, P7_DISP_LS],
+    (instregex "^BL(8)?_RESTORE(_RM)?$")>;
+
   def : InstRW<[P7_BRU_3C, P7_DISP_BR], (instrs
     BDZLRLp, BDZLRm, BDZLRp, BDZLm, BDZLp, BDZm, BDZp,
     BDNZ, BDNZ8, BDNZA, BDNZAm, BDNZAp, BDNZL, BDNZLA, BDNZLAm, BDNZLAp, 
BDNZLR,
@@ -128,7 +131,7 @@ let SchedModel = P7Model in {
     BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, BCCTR,
     BCCTR8, BCCTR8n, BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, BCCTRn, gBC, gBCA,
     gBCAat, gBCCTR, gBCCTRL, gBCL, gBCLA, gBCLAat, gBCLR, gBCLRL, gBCLat, 
gBCat,
-    MFCTR, MFCTR8, MFLR, MFLR8, BL_RESTORE, BL8_RESTORE
+    MFCTR, MFCTR8, MFLR, MFLR8
   )>;
 
   def : InstRW<[P7_BRU_4C], (instrs MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, 
MTCTRloop)>;

>From efa019f5e413bc5a8562cc18575d8dc29ad55da1 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 5 May 2026 11:59:22 -0400
Subject: [PATCH 22/26] Add scheduling info for P8.

---
 llvm/lib/Target/PowerPC/PPCScheduleP8.td | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td 
b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 3a2d9d9b3bc19..468584e79bac3 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -261,6 +261,10 @@ let SchedModel = P8Model in {
     (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"),
     (instregex "^NEG(8)?(O)?$"))>;
 
+  // Special pseudo instruction that combines a direct call with a toc restore
+  def : InstRW<[P8_BR_2C, P8_ISSUE_BR, P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD], 
(instrs
+    (instregex "^BL(8)?_RESTORE(_RM)$"))>;
+
   // Instructions of PM pipeline
 
   def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs

>From 7cc636418a155311c671600f705ddd138f020c3e Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Tue, 5 May 2026 14:07:34 -0400
Subject: [PATCH 23/26] Fix the P9 related scheduling info.

---
 llvm/lib/Target/PowerPC/P9InstrResources.td | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td 
b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 64d458f806ca7..3b754a56b0535 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1296,6 +1296,12 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, 
DISP_1C],
 
 // Branch Instructions
 
+// Pseduo instruction that encapsulates a branch and a toc load.
+def : InstRW<[P9_BR_2C, DISP_BR_1C, P9_LS_4C, IP_AGEN_1C, DISP_1C],
+      (instrs
+    (instregex "BL(8)?_RESTORE(_RM)?$")
+)>;
+
 // Two Cycle Branch
 def : InstRW<[P9_BR_2C, DISP_BR_1C],
       (instrs
@@ -1326,10 +1332,6 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
   BCTRL8_LDinto_toc_RM,
   BCTRL_LWZinto_toc_RM,
   BCn,
-  BL_RESTORE,
-  BL8_RESTORE,
-  BL_RESTORE_RM,
-  BL8_RESTORE_RM,
   CTRL_DEP
 )>;
 

>From de49aba23ba59f06fd65d1ca4c947113d30c4ad8 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Wed, 6 May 2026 14:14:12 -0400
Subject: [PATCH 24/26] Update P10 information.

---
 llvm/lib/Target/PowerPC/P10InstrResources.td | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td 
b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 495346855a4e9..13d3cb9f41c41 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -317,7 +317,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
     BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, 
BDZLRp, gBCLR,
     BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, 
gBCLRL,
     BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, 
BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, 
BL_RM, BL_TLS,
-    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM, BL_RESTORE, 
BL8_RESTORE, BL_RESTORE_RM, BL8_RESTORE_RM
+    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
 )>;
 
 // 2 Cycles Branch operations, 2 input operands
@@ -1233,6 +1233,16 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, 
P10FX_Read],
     MTCRF, MTCRF8
 )>;
 
+// 2 Cycle Branch operation, 1 input operand followed by a
+// 6 cycle Load operation, 0 input operands.
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10W_LD_6C, 
P10W_DISP_ANY],
+      (instrs
+    BL_RESTORE,
+    BL_RESTORE_RM,
+    BL8_RESTORE,
+    BL8_RESTORE_RM
+)>;
+
 // 6 Cycles Load operations, 0 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_ANY],
       (instrs

>From 8c4236d4aab4c81c7944f6823c757749eb154311 Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 7 May 2026 10:23:38 -0400
Subject: [PATCH 25/26] Add driver option and rename the attribute.

---
 clang/include/clang/Options/Options.td        |  5 +++
 clang/lib/Driver/ToolChains/Arch/PPC.cpp      |  9 +++++
 clang/test/Driver/ppc-inline-glue.c           | 37 +++++++++++++++++++
 llvm/lib/Target/PowerPC/PPC.td                |  2 +-
 llvm/lib/Target/PowerPC/PPCSubtarget.cpp      |  2 +-
 .../CodeGen/PowerPC/aix-mixed-inline-glue.ll  |  4 +-
 .../PowerPC/aix-no-inline-glue-strictfp.ll    |  4 +-
 .../CodeGen/PowerPC/aix-no-inline-glue.ll     | 12 +++---
 8 files changed, 63 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/ppc-inline-glue.c

diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index bffb3dfb27485..b04d41d119541 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -5964,6 +5964,11 @@ def mxcoff_roptr : Flag<["-"], "mxcoff-roptr">, 
Group<m_Group>,
   Flags<[TargetSpecific]>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Place constant objects with relocatable address values in the RO 
data section and add -bforceimprw to the linker flags (AIX only)">;
 def mno_xcoff_roptr : Flag<["-"], "mno-xcoff-roptr">, Group<m_Group>, 
TargetSpecific;
+def mno_inline_glue : Flag<["-"], "mno-inline-glue">, Group<m_Group>,
+  HelpText<"Use ._ptrgl routine for indirect calls (AIX only)">;
+def minline_glue : Flag<["-"], "minline-glue">, Group<m_Group>,
+  HelpText<"Emit indirect calls inline (AIX only) (default)">;
+
 
 let Flags = [TargetSpecific] in {
 def mvx : Flag<["-"], "mvx">, Group<m_Group>;
diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp 
b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
index 17051980f34fb..0bf804266cd26 100644
--- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
@@ -83,6 +83,15 @@ void ppc::getPPCTargetFeatures(const Driver &D, const 
llvm::Triple &Triple,
                    true) &&
       Triple.isOSAIX())
     Features.push_back("+modern-aix-as");
+
+  if (Arg *A = Args.getLastArg(options::OPT_mno_inline_glue,
+                               options::OPT_minline_glue)) {
+    if (!Triple.isOSAIX())
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << A->getAsString(Args) << Triple.str();
+    else if (A->getOption().matches(options::OPT_mno_inline_glue))
+      Features.push_back("+use-ptrgl-helper");
+  }
 }
 
 ppc::ReadGOTPtrMode ppc::getPPCReadGOTPtrMode(const Driver &D, const 
llvm::Triple &Triple,
diff --git a/clang/test/Driver/ppc-inline-glue.c 
b/clang/test/Driver/ppc-inline-glue.c
new file mode 100644
index 0000000000000..792631974fdca
--- /dev/null
+++ b/clang/test/Driver/ppc-inline-glue.c
@@ -0,0 +1,37 @@
+// RUN: %clang -### --target=powerpc-ibm-aix-xcoff -mno-inline-glue %s 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=NO_INLINE_GLUE
+
+// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff -mno-inline-glue %s 2>&1 
| \
+// RUN:     FileCheck %s --check-prefix=NO_INLINE_GLUE
+
+// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff -minline-glue %s 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=INLINE_GLUE
+
+// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff %s 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=INLINE_GLUE
+
+// RUN: %clang -### --target=powerpc64-ibm-aix-xcoff -mno-inline-glue 
-minline-glue %s 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=INLINE_GLUE
+
+// RUN: not %clang -### --target=powerpc64le-unknown-linux-gnu 
-mno-inline-glue \
+// RUN:     %s 2>&1 | FileCheck %s --check-prefix=ERR
+
+// RUN: %clang -target powerpc-unkown-aix -mno-inline-glue %s -S -emit-llvm -o 
- | \
+// RUN:     FileCheck %s
+
+// RUN: %clang -target powerpc-unkown-aix -mno-inline-glue -minline-glue %s -S 
-emit-llvm -o - | \
+// RUN:     FileCheck %s --check-prefix=DIS
+
+// NO_INLINE_GLUE: "-target-feature" "+use-ptrgl-helper"
+// INLINE_GLUE-NOT: "+use-ptrgl-helper"
+// ERR: error: unsupported option '-mno-inline-glue' for target 
'powerpc64le-unknown-linux-gnu'
+
+int test(void) {
+  return 0;
+}
+
+// CHECK: test() #0 {
+// CHECK: attributes #0 = {
+// CHECK-ON-SAME: +use-ptrgl-helper
+
+// DIS-NOT: +use-ptrgl-helper
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 1e60dd5606be5..34bdb6a52ccb1 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -365,7 +365,7 @@ def FeatureFastMFLR : SubtargetFeature<"fast-MFLR", 
"HasFastMFLR", "true",
 // When enabled indirect calls will place the address of the descriptor
 // into r11 and do a direct branch to the ._ptrgl routine.
 def FeatureNoInlineGlue :
- SubtargetFeature<"no-inline-glue", "NoInlineGlue", "true",
+ SubtargetFeature<"use-ptrgl-helper", "NoInlineGlue", "true",
                   "Use ._ptrgl for indirect calls">;
 
 
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp 
b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 3ea7d70b42ccf..265f8877c35ac 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -146,7 +146,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, 
StringRef TuneCPU,
                        false);
 
   if (NoInlineGlue && !getTargetTriple().isOSAIX())
-    report_fatal_error("no-inline-glue feature is only supported on AIX\n",
+    report_fatal_error("use-ptrgl-helper feature is only supported on AIX\n",
                        false);
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
index fb86e251e912d..4f795ac7a7c34 100644
--- a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
@@ -17,8 +17,8 @@ entry:
   ret i32 %call
 }
 
-attributes #0 = {"target-features"="+no-inline-glue"}
-attributes #1 = {"target-features"="-no-inline-glue"}
+attributes #0 = {"target-features"="+use-ptrgl-helper"}
+attributes #1 = {"target-features"="-use-ptrgl-helper"}
 
 ; CHECK: name:            OutOfLine
 ; CHECK:  BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, 
implicit-def $r1, implicit-def $r3
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
index 9f9fe910dd46d..8de134a22cb83 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \
-; RUN:   -stop-after=finalize-isel -mattr=+no-inline-glue < %s | FileCheck %s
+; RUN:   -stop-after=finalize-isel -mattr=+use-ptrgl-helper < %s | FileCheck %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN:   -stop-after=finalize-isel -mattr=+no-inline-glue < %s | \
+; RUN:   -stop-after=finalize-isel -mattr=+use-ptrgl-helper < %s | \
 ; RUN:  FileCheck --check-prefix=CHECK64 %s
 
 define i32 @has_strictfp(ptr noundef readonly captures(none) %fp) #0 {
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 76006d161f6fc..3ba322dfa3a26 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -1,21 +1,21 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \
-; RUN:   -mattr=+no-inline-glue < %s | FileCheck 
--check-prefixes=CHECK,CHECK32 %s
+; RUN:   -mattr=+use-ptrgl-helper < %s | FileCheck 
--check-prefixes=CHECK,CHECK32 %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN:   -mattr=+no-inline-glue < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
+; RUN:   -mattr=+use-ptrgl-helper < %s | FileCheck 
--check-prefixes=CHECK,CHECK64 %s
 
 ; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
-; RUN:   -mtriple powerpc-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \
+; RUN:   -mtriple powerpc-ibm-aix-xcoff -mattr=+use-ptrgl-helper < %s | \
 ; RUN:   FileCheck --check-prefix=MIR32 %s
 
 ; RUN: llc -stop-after=finalize-isel  -verify-machineinstrs -mcpu=pwr8 \
-; RUN:   -mtriple powerpc64-ibm-aix-xcoff -mattr=+no-inline-glue < %s | \
+; RUN:   -mtriple powerpc64-ibm-aix-xcoff -mattr=+use-ptrgl-helper < %s | \
 ; RUN:   FileCheck --check-prefix=MIR64 %s
 
 ; RUN: not llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-unknown-linux 
\
-; RUN:   -mattr=+no-inline-glue 2>&1 < %s | FileCheck --check-prefix=ERROR %s
+; RUN:   -mattr=+use-ptrgl-helper 2>&1 < %s | FileCheck --check-prefix=ERROR %s
 
-; ERROR: no-inline-glue feature is only supported on AIX
+; ERROR: use-ptrgl-helper feature is only supported on AIX
 
 @a = dso_local global i32 55, align 4
 @d = dso_local global double 3.141590e+00, align 8

>From 0ebc909c18c96b2872bdcb65c9defdb07aa04b5c Mon Sep 17 00:00:00 2001
From: Sean Fertile <[email protected]>
Date: Thu, 7 May 2026 11:12:27 -0400
Subject: [PATCH 26/26] Rename nes instructions to be more descriptive.

Changed the names from _RESTORE to _LWZinto_toc and _LDinto_toc
to match the existing BCTRL_ based instructions. Also add an IsAIX
predicate guarding them. The existing scheduling regexes for P8 match
the new isntructions so I had to remove the specific new scheduling info
for them.
---
 .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp |  8 ++---
 llvm/lib/Target/PowerPC/P10InstrResources.td  |  8 ++---
 llvm/lib/Target/PowerPC/P9InstrResources.td   |  5 ++-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  6 ++--
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      | 30 ++++++++--------
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       | 36 ++++++++++---------
 llvm/lib/Target/PowerPC/PPCScheduleP7.td      |  7 +++-
 llvm/lib/Target/PowerPC/PPCScheduleP8.td      |  4 ---
 .../CodeGen/PowerPC/aix-mixed-inline-glue.ll  |  4 +--
 .../PowerPC/aix-no-inline-glue-strictfp.ll    |  4 +--
 .../CodeGen/PowerPC/aix-no-inline-glue.ll     |  8 ++---
 11 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp 
b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 1acb63b7bf1aa..64427e97f729c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -154,10 +154,10 @@ bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) 
const {
   case PPC::TCRETURNri:
   case PPC::BCTRL_LWZinto_toc:
   case PPC::BCTRL_LWZinto_toc_RM:
-  case PPC::BL_RESTORE:
-  case PPC::BL_RESTORE_RM:
-  case PPC::BL8_RESTORE:
-  case PPC::BL8_RESTORE_RM:
+  case PPC::BL_LWZinto_toc:
+  case PPC::BL_LWZinto_toc_RM:
+  case PPC::BL8_LDinto_toc:
+  case PPC::BL8_LDinto_toc_RM:
   case PPC::TAILBCTR:
   case PPC::TAILB:
   case PPC::TAILBA:
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td 
b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 13d3cb9f41c41..91c23622c99cd 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -1237,10 +1237,10 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, 
P10FX_Read],
 // 6 cycle Load operation, 0 input operands.
 def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10W_LD_6C, 
P10W_DISP_ANY],
       (instrs
-    BL_RESTORE,
-    BL_RESTORE_RM,
-    BL8_RESTORE,
-    BL8_RESTORE_RM
+    BL_LWZinto_toc,
+    BL_LWZinto_toc_RM,
+    BL8_LDinto_toc,
+    BL8_LDinto_toc_RM
 )>;
 
 // 6 Cycles Load operations, 0 input operands
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td 
b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 3b754a56b0535..b72671eefc7fd 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1299,7 +1299,10 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, 
DISP_1C],
 // Pseduo instruction that encapsulates a branch and a toc load.
 def : InstRW<[P9_BR_2C, DISP_BR_1C, P9_LS_4C, IP_AGEN_1C, DISP_1C],
       (instrs
-    (instregex "BL(8)?_RESTORE(_RM)?$")
+  BL_LWZinto_toc,
+  BL_LWZinto_toc_RM,
+  BL8_LDinto_toc,
+  BL8_LDinto_toc_RM
 )>;
 
 // Two Cycle Branch
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 
b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fdf7e35283021..25432c257eed8 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -3263,8 +3263,10 @@ void PPCAIXAsmPrinter::emitInstruction(const 
MachineInstr *MI) {
   case PPC::BL:
   case PPC::BL8_NOP:
   case PPC::BL_NOP:
-  case PPC::BL_RESTORE:
-  case PPC::BL8_RESTORE: {
+  case PPC::BL_LWZinto_toc:
+  case PPC::BL_LWZinto_toc_RM:
+  case PPC::BL8_LDinto_toc:
+  case PPC::BL8_LDinto_toc_RM: {
     const MachineOperand &MO = MI->getOperand(0);
     if (MO.isSymbol()) {
       auto *S = static_cast<MCSymbolXCOFF *>(
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td 
b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 003b96d6bd405..25b193e45c279 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -207,22 +207,24 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
     Requires<[IsPPC64]>;
 }
 
-let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
-    Defs = [LR8, X2], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in {
-  def BL8_RESTORE : IForm_and_DForm_1<18, 0, 1, 58,
-                      (outs), (ins calltarget:$LI),
-                      "bl $LI\n\tld 2, 40(1)", IIC_BrB,
-                      []>,
-                       Requires<[IsPPC64]>;
-}
-
-let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
-    Defs = [LR8, X2, RM], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in {
-  def BL8_RESTORE_RM : IForm_and_DForm_1<18, 0, 1, 58,
+let Predicates = [IsAIX] in {
+  let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+      Defs = [LR8, X2], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in {
+    def BL8_LDinto_toc : IForm_and_DForm_1<18, 0, 1, 58,
                         (outs), (ins calltarget:$LI),
                         "bl $LI\n\tld 2, 40(1)", IIC_BrB,
                         []>,
                          Requires<[IsPPC64]>;
+  }
+
+  let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+      Defs = [LR8, X2, RM], Uses = [RM, X1], RST = 2, RA = 1, D = 40 in {
+    def BL8_LDinto_toc_RM : IForm_and_DForm_1<18, 0, 1, 58,
+                          (outs), (ins calltarget:$LI),
+                          "bl $LI\n\tld 2, 40(1)", IIC_BrB,
+                          []>,
+                           Requires<[IsPPC64]>;
+  }
 }
 
 } // Interpretation64Bit
@@ -272,7 +274,7 @@ def : Pat<(PPCcall (i64 mcsym:$dst)),
 def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
           (BL8_NOP mcsym:$dst)>;
 def : Pat<(PPCbl_load_toc (i64 texternalsym:$dst)),
-          (BL8_RESTORE texternalsym:$dst)>;
+          (BL8_LDinto_toc texternalsym:$dst)>;
 
 def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
           (BL8_RM mcsym:$dst)>;
@@ -280,7 +282,7 @@ def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
           (BL8_NOP_RM mcsym:$dst)>;
 
 def : Pat<(PPCbl_load_toc_rm (i64 texternalsym:$dst)),
-          (BL8_RESTORE_RM texternalsym:$dst)>;
+          (BL8_LDinto_toc_RM texternalsym:$dst)>;
 
 // Atomic operations
 // FIXME: some of these might be used with constant operands. This will result
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index e8e6dbabcf0ba..1eef821957691 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1871,12 +1871,23 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
 
 }
 
-let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
-    Defs = [LR, R2], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in {
-def BL_RESTORE : IForm_and_DForm_1<18, 0, 1, 32,
-                   (outs), (ins calltarget:$LI),
-                   "bl $LI\n\tlwz 2, 20(1)", IIC_BrB,
-                    []>, Requires<[IsPPC32]>;
+let Predicates = [IsAIX] in {
+  let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+      Defs = [LR, R2], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in {
+  def BL_LWZinto_toc : IForm_and_DForm_1<18, 0, 1, 32,
+                     (outs), (ins calltarget:$LI),
+                     "bl $LI\n\tlwz 2, 20(1)", IIC_BrB,
+                      []>, Requires<[IsPPC32]>;
+  }
+
+  let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+      Defs = [LR, R2, RM], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in {
+    def BL_LWZinto_toc_RM :
+      IForm_and_DForm_1<18, 0, 1, 32,
+        (outs), (ins calltarget:$LI),
+        "bl $LI\n\tlwz 2, 20(1)", IIC_BrB,
+        []>, Requires<[IsPPC32]>;
+  }
 }
 
 let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
@@ -1888,15 +1899,6 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
 
 }
 
-let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
-    Defs = [LR, R2, RM], Uses = [RM, R1], RST = 2, RA = 1, D = 20 in {
-  def BL_RESTORE_RM :
-    IForm_and_DForm_1<18, 0, 1, 32,
-      (outs), (ins calltarget:$LI),
-      "bl $LI\n\tlwz 2, 20(1)", IIC_BrB,
-      []>, Requires<[IsPPC32]>;
-}
-
 let isCodeGenOnly = 1, hasSideEffects = 0 in {
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
@@ -3478,7 +3480,7 @@ def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
           (BL_NOP texternalsym:$dst)>;
 
 def : Pat<(PPCbl_load_toc (i32 texternalsym:$dst)),
-          (BL_RESTORE texternalsym:$dst)>;
+          (BL_LWZinto_toc texternalsym:$dst)>;
 
 def : Pat<(PPCcall_rm (i32 mcsym:$dst)),
           (BL_RM mcsym:$dst)>;
@@ -3490,7 +3492,7 @@ def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
           (BL_NOP_RM texternalsym:$dst)>;
 
 def : Pat<(PPCbl_load_toc_rm (i32 texternalsym:$dst)),
-          (BL_RESTORE_RM texternalsym:$dst)>;
+          (BL_LWZinto_toc_RM texternalsym:$dst)>;
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td 
b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 56d1114bb0d1e..ed8fadaaef74c 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -116,7 +116,12 @@ let SchedModel = P7Model in {
     (instregex "^B(L)?(A)?(8)?(_NOP|_NOTOC)?(_TLS|_RM)?(_)?$")>;
 
   def : InstRW<[P7_BRU_NONE, P7_DISP_BR, P7_LSU_2C, P7_DISP_LS],
-    (instregex "^BL(8)?_RESTORE(_RM)?$")>;
+    (instrs
+    BL_LWZinto_toc,
+    BL_LWZinto_toc_RM,
+    BL8_LDinto_toc,
+    BL8_LDinto_toc_RM
+    )>;
 
   def : InstRW<[P7_BRU_3C, P7_DISP_BR], (instrs
     BDZLRLp, BDZLRm, BDZLRp, BDZLm, BDZLp, BDZm, BDZp,
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td 
b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 468584e79bac3..3a2d9d9b3bc19 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -261,10 +261,6 @@ let SchedModel = P8Model in {
     (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"),
     (instregex "^NEG(8)?(O)?$"))>;
 
-  // Special pseudo instruction that combines a direct call with a toc restore
-  def : InstRW<[P8_BR_2C, P8_ISSUE_BR, P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD], 
(instrs
-    (instregex "^BL(8)?_RESTORE(_RM)$"))>;
-
   // Instructions of PM pipeline
 
   def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs
diff --git a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
index 4f795ac7a7c34..855137c93d7ca 100644
--- a/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-mixed-inline-glue.ll
@@ -21,11 +21,11 @@ attributes #0 = {"target-features"="+use-ptrgl-helper"}
 attributes #1 = {"target-features"="-use-ptrgl-helper"}
 
 ; CHECK: name:            OutOfLine
-; CHECK:  BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, 
implicit-def $r1, implicit-def $r3
+; CHECK:  BL_LWZinto_toc &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r2, 
implicit-def $r1, implicit-def $r3
 ; CHECK: name:            InLine
 ; CEHCK: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit 
$r2, implicit-def $r1, implicit-def $r3
 
 ; CHECK64: name:            OutOfLine
-; CHECK64:  BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x2, 
implicit-def $r1, implicit-def $x3
+; CHECK64:  BL8_LDinto_toc &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x2, 
implicit-def $r1, implicit-def $x3
 ; CHECK64: name:            InLine
 ; CHECK64:   BCTRL8_LDinto_toc 40, $x1, csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit 
$x2, implicit-def $r1, implicit-def $x3
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
index 8de134a22cb83..dd0e88431d9c2 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue-strictfp.ll
@@ -13,6 +13,6 @@ entry:
 
 attributes #0 = { strictfp }
 
-; CHECK: BL_RESTORE_RM &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit-def dead $rm, implicit $rm, implicit $r1, 
implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3
+; CHECK: BL_LWZinto_toc_RM &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit-def dead $rm, implicit $rm, implicit $r1, 
implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3
 
-; CHECK64: BL8_RESTORE_RM &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit-def dead $rm, implicit $rm, implicit $x1, 
implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3
+; CHECK64: BL8_LDinto_toc_RM &"._ptrgl[PR]", csr_ppc64, implicit-def dead 
$lr8, implicit-def dead $x2, implicit-def dead $rm, implicit $rm, implicit $x1, 
implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3
diff --git a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll 
b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
index 3ba322dfa3a26..83c390c1c2acd 100644
--- a/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-no-inline-glue.ll
@@ -40,14 +40,14 @@ entry:
 ; MIR32:   %0:gprc = COPY $r3
 ; MIR32:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; MIR32:   $r11 = COPY %0
-; MIR32:   BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; MIR32:   BL_LWZinto_toc &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
 ; MIR32:  ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
 
 ; MIR64: name:            caller1
 ; MIR64:   %0:g8rc = COPY $x3
 ; MIR64:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; MIR64:   $x11 = COPY %0
-; MIR64:   BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; MIR64:   BL8_LDinto_toc &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
 ; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
 define dso_local zeroext i1 @caller2() {
@@ -72,7 +72,7 @@ entry:
 ; MIR32:   %1:gprc = LWZ 0, killed %0 :: (dereferenceable load (s32) from @fp, 
align 8)
 ; MIR32:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; MIR32:   $r11 = COPY %1
-; MIR32:   BL_RESTORE &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3
+; MIR32:   BL_LWZinto_toc &"._ptrgl[PR]", csr_aix32, implicit-def dead $lr, 
implicit-def dead $r2, implicit $rm, implicit $r1, implicit $r11, implicit $r3, 
implicit $f1, implicit $r6, implicit $r2, implicit-def $r1, implicit-def $r3
 ; MIR32:   ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
 
 ; MIR64: name:            caller2
@@ -80,7 +80,7 @@ entry:
 ; MIR64:   %1:g8rc = LD 0, killed %0 :: (dereferenceable load (s64) from @fp)
 ; MIR64:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; MIR64:   $x11 = COPY %1
-; MIR64:   BL8_RESTORE &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; MIR64:   BL8_LDinto_toc &"._ptrgl[PR]", csr_ppc64, implicit-def dead $lr8, 
implicit-def dead $x2, implicit $rm, implicit $x1, implicit $x11, implicit $x3, 
implicit $f1, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
 ; MIR64:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
 ; CHECK: .extern ._ptrgl[PR]

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to