https://github.com/oskarwirga updated https://github.com/llvm/llvm-project/pull/188638
>From afc90aeb22c8e6a35ce14e6a379e1dc125d86e00 Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Mon, 2 Mar 2026 16:31:12 -0800 Subject: [PATCH 1/7] [Clang][LLVM] Disable NonLazyBind when pointer authentication is enabled NonLazyBind causes the compiler to emit inline GOT loads that bypass the linker's authentication stubs. On arm64e, calls must go through stubs that load from __auth_got and authenticate via braa; NonLazyBind skips this by loading directly from __got. Conditionally disable NonLazyBind in all callsites: - CGCall.cpp: general -fno-plt path - CGObjC.cpp: objc_retain, objc_release (Native ARC) - CGObjCMac.cpp: objc_msgSend, _setjmp, objc_loadClassref - PreISelIntrinsicLowering.cpp: ObjC runtime lowering The clang-side checks use PointerAuth.FunctionPointers.isEnabled(). The LLVM-side check uses Triple.isArm64e() because CodeGenOptions are not available at that level of the pipeline. --- clang/lib/CodeGen/CGCall.cpp | 5 ++- clang/lib/CodeGen/CGObjC.cpp | 18 +++++---- clang/lib/CodeGen/CGObjCMac.cpp | 37 ++++++++++++------- .../CodeGen/ptrauth-suppress-nonlazybind.c | 21 +++++++++++ llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 6 ++- .../AArch64/ptrauth-objc-arc.ll | 29 +++++++++++++++ 6 files changed, 93 insertions(+), 23 deletions(-) create mode 100644 clang/test/CodeGen/ptrauth-suppress-nonlazybind.c create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b57802ebfced8..f67e34ebd8b46 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2687,7 +2687,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // is used. // FIXME: what if we just haven't processed the function definition // yet, or if it's an external definition like C99 inline? - if (CodeGenOpts.NoPLT) { + // Suppress NonLazyBind when ptrauth is enabled: inline GOT loads + // bypass authentication stubs. + if (CodeGenOpts.NoPLT && + !CodeGenOpts.PointerAuth.FunctionPointers.isEnabled()) { if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { if (!Fn->isDefined() && !AttrOnCallSite) { FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind); diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 10aad2e26938d..0a105b761ac44 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -2299,10 +2299,12 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false); fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName); - // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) - if (fnName == "objc_retain") - f->addFnAttr(llvm::Attribute::NonLazyBind); + // We have Native ARC, so set nonlazybind attribute for performance. + // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs). + if (!CGF.CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) + if (fnName == "objc_retain") + f->addFnAttr(llvm::Attribute::NonLazyBind); } // Cast the argument to 'id'. @@ -2875,9 +2877,11 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value, llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); setARCRuntimeFunctionLinkage(CGM, fn); - // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) - f->addFnAttr(llvm::Attribute::NonLazyBind); + // We have Native ARC, so set nonlazybind attribute for performance. + // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs). + if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) + f->addFnAttr(llvm::Attribute::NonLazyBind); } // Cast the argument to 'id'. diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index e6c244547cefd..296c7d5cd5309 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -63,12 +63,17 @@ class ObjCCommonTypesHelper { llvm::FunctionCallee getMessageSendFn() const { // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. + // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs). llvm::Type *params[] = {ObjectPtrTy, SelectorPtrTy}; + llvm::AttributeList Attrs; + if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) { + Attrs = llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind); + } return CGM.CreateRuntimeFunction( llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend", - llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, - llvm::Attribute::NonLazyBind)); + Attrs); } /// void objc_msgSend_stret (id, SEL, ...) @@ -551,11 +556,14 @@ class ObjCTypesHelper : public ObjCCommonTypesHelper { llvm::FunctionCallee getSetJmpFn() { // This is specifically the prototype for x86. llvm::Type *params[] = {CGM.DefaultPtrTy}; + llvm::AttributeList Attrs; + if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) { + Attrs = llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind); + } return CGM.CreateRuntimeFunction( - llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", - llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, - llvm::Attribute::NonLazyBind)); + llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", Attrs); } public: @@ -700,13 +708,14 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { // classref except by calling this function. llvm::Type *params[] = {Int8PtrPtrTy}; llvm::LLVMContext &C = CGM.getLLVMContext(); - llvm::AttributeSet AS = llvm::AttributeSet::get( - C, { - llvm::Attribute::get(C, llvm::Attribute::NonLazyBind), - llvm::Attribute::getWithMemoryEffects( - C, llvm::MemoryEffects::none()), - llvm::Attribute::get(C, llvm::Attribute::NoUnwind), - }); + llvm::SmallVector<llvm::Attribute, 3> AttrVec; + if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) { + AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NonLazyBind)); + } + AttrVec.push_back( + llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none())); + AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NoUnwind)); + llvm::AttributeSet AS = llvm::AttributeSet::get(C, AttrVec); llvm::FunctionCallee F = CGM.CreateRuntimeFunction( llvm::FunctionType::get(ClassnfABIPtrTy, params, false), "objc_loadClassref", diff --git a/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c b/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c new file mode 100644 index 0000000000000..0cb37af01eb10 --- /dev/null +++ b/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple arm64e-apple-ios -fptrauth-calls -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=PTRAUTH +// RUN: %clang_cc1 -triple arm64-apple-ios -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=NOPTRAUTH +// +// When pointer authentication is enabled (-fptrauth-calls), NonLazyBind +// must NOT be applied. NonLazyBind causes inline GOT loads that bypass +// the linker's authentication stubs. On arm64e, calls must go through +// stubs that load from __auth_got and authenticate via braa. + +void external_function(void); + +void caller(void) { + external_function(); +} + +// With ptrauth enabled, the declaration should NOT have nonlazybind. +// PTRAUTH: declare{{.*}} void @external_function() +// PTRAUTH-NOT: nonlazybind + +// Without ptrauth, -fno-plt adds nonlazybind normally. +// NOPTRAUTH: ; Function Attrs:{{.*}}nonlazybind +// NOPTRAUTH: declare{{.*}} void @external_function() diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 0544995f979f7..c96a8c337c9f8 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -37,6 +37,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -169,7 +170,10 @@ static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, if (setNonLazyBind && !Fn->isWeakForLinker()) { // If we have Native ARC, set nonlazybind attribute for these APIs for // performance. - Fn->addFnAttr(Attribute::NonLazyBind); + // Suppress on arm64e: inline GOT loads bypass auth stubs. + Triple TT(M->getTargetTriple()); + if (!TT.isArm64e()) + Fn->addFnAttr(Attribute::NonLazyBind); } } diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll new file mode 100644 index 0000000000000..4c5c951054fdc --- /dev/null +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll @@ -0,0 +1,29 @@ +; RUN: opt -mtriple=arm64e-apple-ios -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s --check-prefix=ARM64E +; RUN: opt -mtriple=arm64-apple-ios -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s --check-prefix=ARM64 +; +; Test that objc_retain and objc_release do not get nonlazybind on arm64e. + +define ptr @test_objc_retain(ptr %arg0) { +entry: + %0 = call ptr @llvm.objc.retain(ptr %arg0) + ret ptr %0 +} + +define void @test_objc_release(ptr %arg0) { +entry: + call void @llvm.objc.release(ptr %arg0) + ret void +} + +declare void @llvm.objc.release(ptr) +declare ptr @llvm.objc.retain(ptr) + +; arm64e: retain and release should NOT have nonlazybind. +; ARM64E-DAG: declare void @objc_release(ptr) +; ARM64E-DAG: declare ptr @objc_retain(ptr) +; ARM64E-NOT: nonlazybind + +; arm64: retain and release should have nonlazybind. +; ARM64-DAG: declare void @objc_release(ptr) [[NLB:#[0-9]+]] +; ARM64-DAG: declare ptr @objc_retain(ptr) [[NLB]] +; ARM64: attributes [[NLB]] = { nonlazybind } >From cf94a05c87ca46c7445b118ca3134f1a4ecdcf34 Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Fri, 27 Mar 2026 00:56:35 -0700 Subject: [PATCH 2/7] [Clang] Gate NonLazyBind suppression on arm64e triple, not PointerAuth.FunctionPointers Address reviewer feedback: - Check getTriple().isArm64e() instead of PointerAuth.FunctionPointers.isEnabled() in all Clang CodeGen sites. This is about how the arm64e ABI affects GOT, not about function pointer signing generically. - Remove redundant -fptrauth-calls from test (implicit for arm64e). - Rename tests to include "arm64e" in filenames. - Update CHECK prefixes from PTRAUTH/NOPTRAUTH to ARM64E/ARM64. --- clang/lib/CodeGen/CGCall.cpp | 7 +++---- clang/lib/CodeGen/CGObjC.cpp | 8 +++---- clang/lib/CodeGen/CGObjCMac.cpp | 8 +++---- .../CodeGen/arm64e-suppress-nonlazybind.c | 20 ++++++++++++++++++ .../CodeGen/ptrauth-suppress-nonlazybind.c | 21 ------------------- ...ptrauth-objc-arc.ll => arm64e-objc-arc.ll} | 0 6 files changed, 31 insertions(+), 33 deletions(-) create mode 100644 clang/test/CodeGen/arm64e-suppress-nonlazybind.c delete mode 100644 clang/test/CodeGen/ptrauth-suppress-nonlazybind.c rename llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/{ptrauth-objc-arc.ll => arm64e-objc-arc.ll} (100%) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index f67e34ebd8b46..556625b10d7a8 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2687,10 +2687,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // is used. // FIXME: what if we just haven't processed the function definition // yet, or if it's an external definition like C99 inline? - // Suppress NonLazyBind when ptrauth is enabled: inline GOT loads - // bypass authentication stubs. - if (CodeGenOpts.NoPLT && - !CodeGenOpts.PointerAuth.FunctionPointers.isEnabled()) { + // Suppress NonLazyBind on arm64e: inline GOT loads bypass the + // linker's authentication stubs. + if (CodeGenOpts.NoPLT && !getTriple().isArm64e()) { if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { if (!Fn->isDefined() && !AttrOnCallSite) { FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind); diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 0a105b761ac44..a238c3e8e31f7 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -2300,8 +2300,8 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName); // We have Native ARC, so set nonlazybind attribute for performance. - // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs). - if (!CGF.CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) + // Suppress on arm64e (inline GOT loads bypass auth stubs). + if (!CGF.CGM.getTriple().isArm64e()) if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) if (fnName == "objc_retain") f->addFnAttr(llvm::Attribute::NonLazyBind); @@ -2878,8 +2878,8 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value, fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); setARCRuntimeFunctionLinkage(CGM, fn); // We have Native ARC, so set nonlazybind attribute for performance. - // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs). - if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) + // Suppress on arm64e (inline GOT loads bypass auth stubs). + if (!CGM.getTriple().isArm64e()) if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) f->addFnAttr(llvm::Attribute::NonLazyBind); } diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 296c7d5cd5309..e107dcc3d14a1 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -63,10 +63,10 @@ class ObjCCommonTypesHelper { llvm::FunctionCallee getMessageSendFn() const { // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. - // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs). + // Suppress on arm64e (inline GOT loads bypass auth stubs). llvm::Type *params[] = {ObjectPtrTy, SelectorPtrTy}; llvm::AttributeList Attrs; - if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) { + if (!CGM.getTriple().isArm64e()) { Attrs = llvm::AttributeList::get(CGM.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NonLazyBind); @@ -557,7 +557,7 @@ class ObjCTypesHelper : public ObjCCommonTypesHelper { // This is specifically the prototype for x86. llvm::Type *params[] = {CGM.DefaultPtrTy}; llvm::AttributeList Attrs; - if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) { + if (!CGM.getTriple().isArm64e()) { Attrs = llvm::AttributeList::get(CGM.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NonLazyBind); @@ -709,7 +709,7 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { llvm::Type *params[] = {Int8PtrPtrTy}; llvm::LLVMContext &C = CGM.getLLVMContext(); llvm::SmallVector<llvm::Attribute, 3> AttrVec; - if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) { + if (!CGM.getTriple().isArm64e()) { AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NonLazyBind)); } AttrVec.push_back( diff --git a/clang/test/CodeGen/arm64e-suppress-nonlazybind.c b/clang/test/CodeGen/arm64e-suppress-nonlazybind.c new file mode 100644 index 0000000000000..f9a8f3140facb --- /dev/null +++ b/clang/test/CodeGen/arm64e-suppress-nonlazybind.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple arm64e-apple-ios -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=ARM64E +// RUN: %clang_cc1 -triple arm64-apple-ios -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=ARM64 +// +// On arm64e, NonLazyBind must NOT be applied. NonLazyBind causes inline +// GOT loads that bypass the linker's authentication stubs. Calls must go +// through stubs that load from __auth_got and authenticate via braa. + +void external_function(void); + +void caller(void) { + external_function(); +} + +// arm64e: the declaration should NOT have nonlazybind. +// ARM64E: declare{{.*}} void @external_function() +// ARM64E-NOT: nonlazybind + +// arm64: -fno-plt adds nonlazybind normally. +// ARM64: ; Function Attrs:{{.*}}nonlazybind +// ARM64: declare{{.*}} void @external_function() diff --git a/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c b/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c deleted file mode 100644 index 0cb37af01eb10..0000000000000 --- a/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 -triple arm64e-apple-ios -fptrauth-calls -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=PTRAUTH -// RUN: %clang_cc1 -triple arm64-apple-ios -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=NOPTRAUTH -// -// When pointer authentication is enabled (-fptrauth-calls), NonLazyBind -// must NOT be applied. NonLazyBind causes inline GOT loads that bypass -// the linker's authentication stubs. On arm64e, calls must go through -// stubs that load from __auth_got and authenticate via braa. - -void external_function(void); - -void caller(void) { - external_function(); -} - -// With ptrauth enabled, the declaration should NOT have nonlazybind. -// PTRAUTH: declare{{.*}} void @external_function() -// PTRAUTH-NOT: nonlazybind - -// Without ptrauth, -fno-plt adds nonlazybind normally. -// NOPTRAUTH: ; Function Attrs:{{.*}}nonlazybind -// NOPTRAUTH: declare{{.*}} void @external_function() diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/arm64e-objc-arc.ll similarity index 100% rename from llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll rename to llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/arm64e-objc-arc.ll >From bde6719d9d8ea7b3c19c0a6ae7d271615d48cbb7 Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Wed, 1 Apr 2026 09:24:23 -0700 Subject: [PATCH 3/7] [AArch64] Drop NonLazyBind and RtLibUseGOT on arm64e in the backend Unauthenticated indirect branches (blr) bypass pointer authentication on arm64e. Drop NonLazyBind in GlobalISel CallLowering and RtLibUseGOT in both SelectionDAG and GlobalISel when targeting arm64e. This is defense-in-depth for the Clang-level suppression: even if NonLazyBind or RtLibUseGOT leaks into IR, the backend will not emit unsigned indirect calls on arm64e. --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 5 ++- .../Target/AArch64/AArch64ISelLowering.cpp | 5 ++- .../AArch64/GISel/AArch64CallLowering.cpp | 5 ++- llvm/test/CodeGen/AArch64/nonlazybind.ll | 38 +++++++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 4256e9a42b889..55cf2fe41cf85 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -158,7 +158,10 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, } if (const Function *F = dyn_cast<Function>(CalleeV)) { - if (F->hasFnAttribute(Attribute::NonLazyBind)) { + // NonLazyBind is incompatible with arm64e: it emits an unauthenticated + // indirect branch (blr) which bypasses pointer authentication. + if (F->hasFnAttribute(Attribute::NonLazyBind) && + !MF.getTarget().getTargetTriple().isArm64e()) { LLT Ty = getLLTForType(*F->getType(), DL); Register Reg = MIRBuilder.buildGlobalValue(Ty, F).getReg(0); Info.Callee = MachineOperand::CreateReg(Reg, false); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 57c7f12d0236b..dbd6dfeb4989b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10432,9 +10432,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); } } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + // Unauthenticated GOT loads are incompatible with arm64e: the + // resulting indirect branch (blr) bypasses pointer authentication. bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large && Subtarget->isTargetMachO()) || - MF.getFunction().getParent()->getRtLibUseGOT(); + (MF.getFunction().getParent()->getRtLibUseGOT() && + !Subtarget->getTargetTriple().isArm64e()); const char *Sym = S->getSymbol(); if (UseGot) { Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index fa7216d9564a8..4a7c769bc4a80 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1412,7 +1412,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, else { // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt) // is set. - if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) { + // Unauthenticated GOT loads are incompatible with arm64e: the + // resulting indirect branch (blr) bypasses pointer authentication. + if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT() && + !Subtarget.getTargetTriple().isArm64e()) { auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE); DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB); MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT); diff --git a/llvm/test/CodeGen/AArch64/nonlazybind.ll b/llvm/test/CodeGen/AArch64/nonlazybind.ll index f5bb3a4ecbc9a..03f3be10b746b 100644 --- a/llvm/test/CodeGen/AArch64/nonlazybind.ll +++ b/llvm/test/CodeGen/AArch64/nonlazybind.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO ; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL +; RUN: llc -mtriple=arm64e-apple-ios -global-isel=false %s -o - | FileCheck %s --check-prefix=ARM64E-SDAG +; RUN: llc -mtriple=arm64e-apple-ios -global-isel %s -o - | FileCheck %s --check-prefix=ARM64E-GI ; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI ; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=ELF,ELF-SDAG @@ -52,6 +54,34 @@ define void @test_laziness(ptr %a) nounwind { ; MACHO-NORMAL-NEXT: ret ; MACHO-NORMAL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 ; +; ARM64E-SDAG-LABEL: test_laziness: +; ARM64E-SDAG: ; %bb.0: +; ARM64E-SDAG-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ARM64E-SDAG-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ARM64E-SDAG-NEXT: mov x19, x0 +; ARM64E-SDAG-NEXT: bl _external +; ARM64E-SDAG-NEXT: mov x0, x19 +; ARM64E-SDAG-NEXT: mov w1, #1 ; =0x1 +; ARM64E-SDAG-NEXT: mov w2, #1000 ; =0x3e8 +; ARM64E-SDAG-NEXT: bl _memset +; ARM64E-SDAG-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ARM64E-SDAG-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ARM64E-SDAG-NEXT: ret +; +; ARM64E-GI-LABEL: test_laziness: +; ARM64E-GI: ; %bb.0: +; ARM64E-GI-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ARM64E-GI-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ARM64E-GI-NEXT: mov x19, x0 +; ARM64E-GI-NEXT: bl _external +; ARM64E-GI-NEXT: mov x0, x19 +; ARM64E-GI-NEXT: mov w1, #1 ; =0x1 +; ARM64E-GI-NEXT: mov w2, #1000 ; =0x3e8 +; ARM64E-GI-NEXT: bl _memset +; ARM64E-GI-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ARM64E-GI-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ARM64E-GI-NEXT: ret +; ; ELF-LABEL: test_laziness: ; ELF: // %bb.0: ; ELF-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill @@ -86,6 +116,14 @@ define void @test_laziness_tail() nounwind { ; MACHO-NORMAL: ; %bb.0: ; MACHO-NORMAL-NEXT: b _external ; +; ARM64E-SDAG-LABEL: test_laziness_tail: +; ARM64E-SDAG: ; %bb.0: +; ARM64E-SDAG-NEXT: b _external +; +; ARM64E-GI-LABEL: test_laziness_tail: +; ARM64E-GI: ; %bb.0: +; ARM64E-GI-NEXT: b _external +; ; ELF-LABEL: test_laziness_tail: ; ELF: // %bb.0: ; ELF-NEXT: adrp x0, :got:external >From 9f955a1e3517f85c71f8300d8c39ff435767f0a4 Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Wed, 8 Apr 2026 19:23:02 -0400 Subject: [PATCH 4/7] nit: remove copy of target triple --- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index c96a8c337c9f8..53c0470bf56ed 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -37,7 +37,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -171,8 +170,7 @@ static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, // If we have Native ARC, set nonlazybind attribute for these APIs for // performance. // Suppress on arm64e: inline GOT loads bypass auth stubs. - Triple TT(M->getTargetTriple()); - if (!TT.isArm64e()) + if (!M->getTargetTriple().isArm64e()) Fn->addFnAttr(Attribute::NonLazyBind); } } >From c4dfffcc496212a43f25a81caa9847a097702b81 Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Thu, 9 Apr 2026 14:05:08 -0400 Subject: [PATCH 5/7] Use RTLCI for objc_retain/release attrs - Added RTLCI getter to LibcallLoweringModuleAnalysisResult - Replaced setNonLazyBind with RTLCI pointer - use getFunctionTy to move target specific conditions out into RuntimeLibcalls.cpp --- llvm/include/llvm/CodeGen/LibcallLoweringInfo.h | 4 ++++ llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 17 ++++++++--------- llvm/lib/IR/RuntimeLibcalls.cpp | 12 ++++++++++++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h index c0d72ea4d4d38..41f2ce2afdc33 100644 --- a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h @@ -100,6 +100,10 @@ class LibcallLoweringModuleAnalysisResult { operator bool() const { return RTLCI != nullptr; } + const RTLIB::RuntimeLibcallsInfo *getRuntimeLibcallsInfo() const { + return RTLCI; + } + LLVM_ABI bool invalidate(Module &, const PreservedAnalyses &, ModuleAnalysisManager::Invalidator &); diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 53c0470bf56ed..a0f34bf6d6220 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -148,7 +148,7 @@ static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) { } static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, - bool setNonLazyBind = false) { + const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr) { assert(IntrinsicInst::mayLowerToFunctionCall(F.getIntrinsicID()) && "Pre-ISel intrinsics do lower into regular function calls"); if (F.use_empty()) @@ -166,12 +166,10 @@ static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) { Fn->setLinkage(F.getLinkage()); - if (setNonLazyBind && !Fn->isWeakForLinker()) { - // If we have Native ARC, set nonlazybind attribute for these APIs for - // performance. - // Suppress on arm64e: inline GOT loads bypass auth stubs. - if (!M->getTargetTriple().isArm64e()) - Fn->addFnAttr(Attribute::NonLazyBind); + if (RTLCI && !Fn->isWeakForLinker()) { + auto [FuncTy, Attrs] = RTLCI->getFunctionTy( + M->getContext(), M->getTargetTriple(), M->getDataLayout(), NewFn); + Fn->addFnAttrs(AttrBuilder(M->getContext(), Attrs.getFnAttrs())); } } @@ -678,6 +676,7 @@ static bool expandLoopTrap(Function &Intr) { bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { // Map unique constants to globals. DenseMap<Constant *, GlobalVariable *> CMap; + const auto *RTLCI = ModuleLibcalls.getRuntimeLibcallsInfo(); bool Changed = false; for (Function &F : M) { switch (F.getIntrinsicID()) { @@ -751,10 +750,10 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { Changed |= lowerObjCCall(F, RTLIB::impl_objc_moveWeak); break; case Intrinsic::objc_release: - Changed |= lowerObjCCall(F, RTLIB::impl_objc_release, true); + Changed |= lowerObjCCall(F, RTLIB::impl_objc_release, RTLCI); break; case Intrinsic::objc_retain: - Changed |= lowerObjCCall(F, RTLIB::impl_objc_retain, true); + Changed |= lowerObjCCall(F, RTLIB::impl_objc_retain, RTLCI); break; case Intrinsic::objc_retainAutorelease: Changed |= lowerObjCCall(F, RTLIB::impl_objc_retainAutorelease); diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 63da006fe5ca8..6ea57fc462f64 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -427,6 +427,18 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, return {FunctionType::get(Type::getVoidTy(Ctx), ArgTys, false), Attrs}; } + case RTLIB::impl_objc_retain: + case RTLIB::impl_objc_release: { + // NonLazyBind improves performance via direct GOT loads. Suppress on + // arm64e: inline GOT loads bypass authenticated stubs. + if (TT.isArm64e()) + return {}; + AttrBuilder FuncAttrBuilder(Ctx); + FuncAttrBuilder.addAttribute(Attribute::NonLazyBind); + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + return {nullptr, Attrs}; + } default: return {}; } >From 5917e490ec14a9c02e1e78a44d38e172d5f8e972 Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Fri, 10 Apr 2026 12:00:49 -0400 Subject: [PATCH 6/7] Keep function attributes in the initializer list --- clang/lib/CodeGen/CGObjCMac.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index e107dcc3d14a1..c6f0d62054c18 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -708,13 +708,12 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { // classref except by calling this function. llvm::Type *params[] = {Int8PtrPtrTy}; llvm::LLVMContext &C = CGM.getLLVMContext(); - llvm::SmallVector<llvm::Attribute, 3> AttrVec; - if (!CGM.getTriple().isArm64e()) { + llvm::SmallVector<llvm::Attribute, 3> AttrVec = { + llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none()), + llvm::Attribute::get(C, llvm::Attribute::NoUnwind), + }; + if (!CGM.getTriple().isArm64e()) AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NonLazyBind)); - } - AttrVec.push_back( - llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none())); - AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NoUnwind)); llvm::AttributeSet AS = llvm::AttributeSet::get(C, AttrVec); llvm::FunctionCallee F = CGM.CreateRuntimeFunction( llvm::FunctionType::get(ClassnfABIPtrTy, params, false), >From edef333077b79a6e910dbb052b60720eae08d14a Mon Sep 17 00:00:00 2001 From: Oskar Wirga <[email protected]> Date: Fri, 10 Apr 2026 17:18:17 -0400 Subject: [PATCH 7/7] Hard error on arm64e NonLazyBind in the backend Updated the test to match! --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 9 +++++++-- llvm/test/CodeGen/AArch64/nonlazybind.ll | 19 ++----------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 55cf2fe41cf85..57032ea96ddae 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -158,14 +159,18 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, } if (const Function *F = dyn_cast<Function>(CalleeV)) { - // NonLazyBind is incompatible with arm64e: it emits an unauthenticated - // indirect branch (blr) which bypasses pointer authentication. if (F->hasFnAttribute(Attribute::NonLazyBind) && !MF.getTarget().getTargetTriple().isArm64e()) { LLT Ty = getLLTForType(*F->getType(), DL); Register Reg = MIRBuilder.buildGlobalValue(Ty, F).getReg(0); Info.Callee = MachineOperand::CreateReg(Reg, false); } else { + // NonLazyBind is incompatible with arm64e: it emits an unauthenticated + // indirect branch (blr) which bypasses pointer authentication. + if (F->hasFnAttribute(Attribute::NonLazyBind)) + MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported( + MF.getFunction(), + "nonlazybind attribute is not compatible with arm64e")); Info.Callee = MachineOperand::CreateGA(F, 0); } } else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) { diff --git a/llvm/test/CodeGen/AArch64/nonlazybind.ll b/llvm/test/CodeGen/AArch64/nonlazybind.ll index 03f3be10b746b..79677921f0b04 100644 --- a/llvm/test/CodeGen/AArch64/nonlazybind.ll +++ b/llvm/test/CodeGen/AArch64/nonlazybind.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO ; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL ; RUN: llc -mtriple=arm64e-apple-ios -global-isel=false %s -o - | FileCheck %s --check-prefix=ARM64E-SDAG -; RUN: llc -mtriple=arm64e-apple-ios -global-isel %s -o - | FileCheck %s --check-prefix=ARM64E-GI +; RUN: not llc -mtriple=arm64e-apple-ios -global-isel %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ARM64E-GI-ERR ; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI ; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=ELF,ELF-SDAG @@ -68,19 +68,7 @@ define void @test_laziness(ptr %a) nounwind { ; ARM64E-SDAG-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; ARM64E-SDAG-NEXT: ret ; -; ARM64E-GI-LABEL: test_laziness: -; ARM64E-GI: ; %bb.0: -; ARM64E-GI-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; ARM64E-GI-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; ARM64E-GI-NEXT: mov x19, x0 -; ARM64E-GI-NEXT: bl _external -; ARM64E-GI-NEXT: mov x0, x19 -; ARM64E-GI-NEXT: mov w1, #1 ; =0x1 -; ARM64E-GI-NEXT: mov w2, #1000 ; =0x3e8 -; ARM64E-GI-NEXT: bl _memset -; ARM64E-GI-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ARM64E-GI-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; ARM64E-GI-NEXT: ret +; ARM64E-GI-ERR: error: {{.*}}: nonlazybind attribute is not compatible with arm64e ; ; ELF-LABEL: test_laziness: ; ELF: // %bb.0: @@ -120,9 +108,6 @@ define void @test_laziness_tail() nounwind { ; ARM64E-SDAG: ; %bb.0: ; ARM64E-SDAG-NEXT: b _external ; -; ARM64E-GI-LABEL: test_laziness_tail: -; ARM64E-GI: ; %bb.0: -; ARM64E-GI-NEXT: b _external ; ; ELF-LABEL: test_laziness_tail: ; ELF: // %bb.0: _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
