https://github.com/DataCorrupted created 
https://github.com/llvm/llvm-project/pull/170618

- Generation
- Dispatch

>From 81b0d800623f03b0c3fa9b4412e36dc585236ed8 Mon Sep 17 00:00:00 2001
From: Peter Rong <[email protected]>
Date: Wed, 3 Dec 2025 22:42:51 -0800
Subject: [PATCH] [ExposeDirectMethod] Nil chech thunk generation

- Generation
- Dispatch
---
 clang/lib/CodeGen/CGDecl.cpp        |   4 +-
 clang/lib/CodeGen/CGObjC.cpp        |  17 +-
 clang/lib/CodeGen/CGObjCMac.cpp     | 243 +++++++++++++++++++++++++++-
 clang/lib/CodeGen/CodeGenFunction.h |   7 +
 4 files changed, 263 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 8b1cd83af2396..9f0e09eac8866 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -2757,7 +2757,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, 
ParamValue Arg,
   llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
 
   LValue lv = MakeAddrLValue(DeclPtr, Ty);
-  if (IsScalar) {
+  // If this is a thunk, don't bother with ARC lifetime management.
+  // The true implementation will take care of that.
+  if (IsScalar && !CurFuncIsThunk) {
     Qualifiers qs = Ty.getQualifiers();
     if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
       // We honor __attribute__((ns_consumed)) for types with lifetime.
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 10aad2e26938d..f1b8627fc119a 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -761,7 +761,18 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl 
*OMD,
 
   const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD);
   if (OMD->isDirectMethod()) {
-    Fn->setVisibility(llvm::Function::HiddenVisibility);
+    Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    if (CGM.shouldExposeSymbol(OMD)) {
+      // Find the decl that may have visibility set (property or method)
+      const NamedDecl *Decl = OMD;
+      if (const auto *PD = OMD->findPropertyDecl()) {
+        Decl = PD;
+      }
+      // and respect source level visibility setting
+      if (auto V = Decl->getExplicitVisibility(NamedDecl::VisibilityForValue)) 
{
+        Fn->setVisibility(CGM.GetLLVMVisibility(*V));
+      }
+    }
     CGM.SetLLVMFunctionAttributes(OMD, FI, Fn, /*IsThunk=*/false);
     CGM.SetLLVMFunctionAttributesForDefinition(OMD, Fn);
   } else {
@@ -781,10 +792,6 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl 
*OMD,
                 OMD->getLocation(), StartLoc);
 
   if (OMD->isDirectMethod()) {
-    // This function is a direct call, it has to implement a nil check
-    // on entry.
-    //
-    // TODO: possibly have several entry points to elide the check
     CGM.getObjCRuntime().GenerateDirectMethodPrologue(*this, Fn, OMD, CD);
   }
 
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 3f4b11c634ce4..741e5d85b5935 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -1066,6 +1066,15 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime {
   DirectMethodInfo &GenerateDirectMethod(const ObjCMethodDecl *OMD,
                                        const ObjCContainerDecl *CD);
 
+  llvm::Function *GenerateObjCDirectThunk(const ObjCMethodDecl *OMD,
+                                          const ObjCContainerDecl *CD,
+                                          llvm::Function *Implementation);
+
+  llvm::Function *GetDirectMethodCallee(const ObjCMethodDecl *OMD,
+                                        const ObjCContainerDecl *CD,
+                                        bool ReceiverCanBeNull,
+                                        bool ClassObjectCanBeUnrealized);
+
   /// Generate class realization code: [self self]
   /// This is used for class methods to ensure the class is initialized.
   /// Returns the realized class object.
@@ -2094,6 +2103,9 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(
 
   bool ReceiverCanBeNull =
       canMessageReceiverBeNull(CGF, Method, IsSuper, ClassReceiver, Arg0);
+  bool ClassObjectCanBeUnrealized =
+      Method && Method->isClassMethod() &&
+      canClassObjectBeUnrealized(ClassReceiver, CGF);
 
   bool RequiresNullCheck = false;
   bool RequiresSelValue = true;
@@ -2101,7 +2113,11 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(
   llvm::FunctionCallee Fn = nullptr;
   if (Method && Method->isDirectMethod()) {
     assert(!IsSuper);
-    Fn = GenerateDirectMethod(Method, Method->getClassInterface());
+    // Use GetDirectMethodCallee to decide whether to use implementation or
+    // thunk.
+    Fn = GetDirectMethodCallee(Method, Method->getClassInterface(),
+                               ReceiverCanBeNull, ClassObjectCanBeUnrealized);
+
     // Direct methods will synthesize the proper `_cmd` internally,
     // so just don't bother with setting the `_cmd` argument.
     RequiresSelValue = false;
@@ -2138,6 +2154,23 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(
   if (!RequiresNullCheck && Method && Method->hasParamDestroyedInCallee())
     RequiresNullCheck = true;
 
+  if (CGM.shouldHaveNilCheckInline(Method)) {
+    // For variadic class methods, we need to inline pre condition checks. That
+    // include two things:
+    // 1. if this is a class method, we have to realize the class if we are not
+    // sure.
+    if (ClassReceiver && ClassObjectCanBeUnrealized) {
+      // Perform class realization using the helper function
+      Arg0 = GenerateClassRealization(CGF, Arg0, ClassReceiver);
+      ActualArgs[0] = CallArg(RValue::get(Arg0), ActualArgs[0].Ty);
+    }
+    // 2. inline the nil check if we are not sure if the receiver can be null.
+    // Luckly, `NullReturnState` already does that for corner cases like
+    // ns_consume, we only need to override the flag, even if return value is
+    // unused.
+    RequiresNullCheck |= ReceiverCanBeNull;
+  }
+
   NullReturnState nullReturn;
   if (RequiresNullCheck) {
     nullReturn.init(CGF, Arg0);
@@ -3912,6 +3945,8 @@ CGObjCCommonMac::GenerateDirectMethod(const 
ObjCMethodDecl *OMD,
   llvm::FunctionType *MethodTy =
       Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD));
 
+  bool ExposeSymbol = CGM.shouldExposeSymbol(OMD);
+
   if (OldFn) {
     Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
                                 "", &CGM.getModule());
@@ -3921,10 +3956,30 @@ CGObjCCommonMac::GenerateDirectMethod(const 
ObjCMethodDecl *OMD,
 
     // Replace the cached implementation in the map.
     I->second.Implementation = Fn;
+    llvm::Function *OldThunk = I->second.Thunk;
 
+    // If implementation was replaced, and old thunk exists, invalidate the old
+    // thunk
+    //
+    // TODO: ideally, new thunk shouldn't be necessary, if the different return
+    // type are just subclasses, at IR level they are just pointers, i.e. the
+    // NewThunk and the OldThunk are identical.
+    if (OldThunk) {
+      llvm::Function *NewThunk = GenerateObjCDirectThunk(OMD, CD, Fn);
+
+      // Replace all uses before erasing
+      NewThunk->takeName(OldThunk);
+      OldThunk->replaceAllUsesWith(NewThunk);
+      OldThunk->eraseFromParent();
+
+      I->second.Thunk = NewThunk;
+    }
   } else {
-    auto Name = getSymbolNameForMethod(OMD, /*include category*/ false);
+    // Generate symbol without \01 prefix when optimization enabled
+    auto Name = getSymbolNameForMethod(OMD, /*include category*/ false,
+                                       /*includePrefixByte*/ !ExposeSymbol);
 
+    // ALWAYS use ExternalLinkage for true implementation
     Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
                                 Name, &CGM.getModule());
     auto [It, inserted] = DirectMethodDefinitions.insert(std::make_pair(COMD, 
DirectMethodInfo(Fn)));
@@ -3936,6 +3991,190 @@ CGObjCCommonMac::GenerateDirectMethod(const 
ObjCMethodDecl *OMD,
   return I->second;
 }
 
+/// Start an Objective-C direct method thunk.
+///
+/// The thunk must use musttail to remain transparent to ARC - any
+/// ARC operations must happen in the caller, not in the thunk.
+void CodeGenFunction::StartObjCDirectThunk(const ObjCMethodDecl *OMD,
+                                           llvm::Function *Fn,
+                            const CGFunctionInfo &FI)
+                                          {
+  // Mark this as a thunk function to disable ARC parameter processing
+  // and other thunk-inappropriate behavior.
+  CurFuncIsThunk = true;
+
+  // Build argument list for StartFunction.
+  // We must include all parameters to match the thunk's LLVM function type.
+  // The thunk uses musttail to forward all arguments directly, so ARC
+  // processing in the prolog is harmless - the parameters are forwarded
+  // as-is without local copies.
+  FunctionArgList FunctionArgs;
+  FunctionArgs.push_back(OMD->getSelfDecl());
+  FunctionArgs.append(OMD->param_begin(), OMD->param_end());
+
+  // The Start/Finish thunk pattern is borrowed from CGVTables.cpp
+  // for C++ virtual method thunks, but adapted for ObjC direct methods.
+  //
+  // Like C++ thunks, we don't have an actual AST body for the thunk - we only
+  // have the method's parameter declarations. Therefore, we pass empty
+  // `GlobalDecl` to `StartFunction` ...
+  StartFunction(GlobalDecl(), OMD->getReturnType(), Fn, FI, FunctionArgs,
+                OMD->getLocation(), OMD->getLocation());
+
+  // and manually set the decl afterwards so other utilities / helpers in CGF
+  // can still access the AST (e.g. arrange function arguments)
+  CurCodeDecl = OMD;
+  CurFuncDecl = OMD;
+}
+
+/// Finish an Objective-C direct method thunk.
+void CodeGenFunction::FinishObjCDirectThunk() {
+  // Create a dummy block to return the value of the thunk.
+  //
+  // The non-nil branch alredy returned because of musttail.
+  // Only nil branch will jump to this return block.
+  // If the nil check is not emitted (for class methods), this will be a dead
+  // block.
+  //
+  // Either way, the LLVM optimizer will simplify it later. This is just to 
make
+  // CFG happy.
+  EmitBlock(createBasicBlock("dummy_ret_block"));
+
+  // Disable the final ARC autorelease.
+  // Thunk functions are tailcall to actual implementation, so it doesn't need
+  // to worry about ARC.
+  AutoreleaseResult = false;
+
+  // Clear these to restore the invariants expected by
+  // StartFunction/FinishFunction.
+  CurCodeDecl = nullptr;
+  CurFuncDecl = nullptr;
+
+  FinishFunction();
+}
+
+llvm::Function *
+CGObjCCommonMac::GenerateObjCDirectThunk(const ObjCMethodDecl *OMD,
+                                         const ObjCContainerDecl *CD,
+                                         llvm::Function *Implementation) {
+
+  assert(CGM.shouldHaveNilCheckThunk(OMD) &&
+         "Should only generate thunk when optimization enabled");
+  assert(Implementation && "Implementation must exist");
+
+  llvm::FunctionType *ThunkTy = Implementation->getFunctionType();
+  std::string ThunkName = Implementation->getName().str() + "_thunk";
+
+  // Create thunk with linkonce_odr linkage (allows deduplication)
+  llvm::Function *Thunk =
+      llvm::Function::Create(ThunkTy, llvm::GlobalValue::LinkOnceODRLinkage,
+                             ThunkName, &CGM.getModule());
+
+  // Thunks should always have hidden visibility, other link units will have
+  // their own version of the (identical) thunk. If they make cross link-unit
+  // call, they are either calling through their thunk or directly dispatching
+  // to the true implementation, so making thunk visibile is meaningless.
+  Thunk->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  Thunk->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+  // Start the ObjC direct thunk (sets up state and calls StartFunction)
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD);
+
+  // Create a CodeGenFunction to generate the thunk body
+  CodeGenFunction CGF(CGM);
+  CGF.StartObjCDirectThunk(OMD, Thunk, FI);
+
+  // Copy function-level attributes from implementation to make musttail happy
+  llvm::AttributeList ImplAttrs = Implementation->getAttributes();
+  Thunk->setAttributes(ImplAttrs);
+
+  // - [self self] for class methods (class realization)
+  // - if (self == nil) branch to nil block with zero return
+  // - continuation block for non-nil case
+  GenerateDirectMethodsPreconditionCheck(CGF, Thunk, OMD, CD);
+
+  // Now emit the musttail call to the true implementation
+  // Collect all arguments for forwarding
+  SmallVector<llvm::Value *, 8> Args;
+  for (auto &Arg : Thunk->args())
+    Args.push_back(&Arg);
+
+  // Create musttail call to the implementation
+  llvm::CallInst *Call = CGF.Builder.CreateCall(Implementation, Args);
+  Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
+
+  // Apply call-site attributes using ConstructAttributeList
+  // When sret is used, the call must have matching sret attributes on the 
first
+  // parameter for musttail to work correctly. This mirrors what C++ thunks do
+  // in EmitMustTailThunk.
+  unsigned CallingConv;
+  llvm::AttributeList Attrs;
+  CGM.ConstructAttributeList(Implementation->getName(), FI, GlobalDecl(OMD),
+                             Attrs, CallingConv, /*AttrOnCallSite=*/true,
+                             /*IsThunk=*/false);
+  Call->setAttributes(Attrs);
+  Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
+
+  // Immediately return the call result (musttail requirement)
+  if (FI.getReturnInfo().isIndirect()) {
+    // SRet case: return void
+    CGF.Builder.CreateRetVoid();
+  } else {
+    if (ThunkTy->getReturnType()->isVoidTy())
+      CGF.Builder.CreateRetVoid();
+    else
+      CGF.Builder.CreateRet(Call);
+  }
+
+  // Finish the ObjC direct thunk (creates dummy block and calls 
FinishFunction)
+  CGF.FinishObjCDirectThunk();
+  return Thunk;
+}
+
+llvm::Function *CGObjCCommonMac::GetDirectMethodCallee(
+    const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD,
+    bool ReceiverCanBeNull, bool ClassObjectCanBeUnrealized) {
+
+  // Get from cache or populate the function declaration lazily
+  DirectMethodInfo &Info = GenerateDirectMethod(OMD, CD);
+
+  // If optimization not enabled, always use implementation (which includes the
+  // nil check)
+  if (!CGM.shouldExposeSymbol(OMD)) {
+    return Info.Implementation;
+  }
+
+  // Varidic methods doesn't have thunk, the caller need to inline the nil 
check
+  if (CGM.shouldHaveNilCheckInline(OMD)) {
+    return Info.Implementation;
+  }
+
+  // Thunk is lazily generated.
+  auto getOrCreateThunk = [&]() {
+    if (!Info.Thunk)
+      Info.Thunk = GenerateObjCDirectThunk(OMD, CD, Info.Implementation);
+    return Info.Thunk;
+  };
+
+  assert(CGM.shouldHaveNilCheckThunk(OMD) &&
+         "a method either has nil check thunk or have thunk inlined when "
+         "exposing its symbol");
+
+  if (OMD->isInstanceMethod()) {
+    // If we can prove instance methods receiver is not null, return the true
+    // implementation
+    return ReceiverCanBeNull ? getOrCreateThunk() : Info.Implementation;
+  }
+  if (OMD->isClassMethod()) {
+    // For class methods, it need to be non-null and realized before we 
dispatch
+    // to true implementation
+    return (ReceiverCanBeNull || ClassObjectCanBeUnrealized)
+               ? getOrCreateThunk()
+               : Info.Implementation;
+  }
+  llvm_unreachable("OMD should either be a class method or instance method");
+}
+
 llvm::Value *
 CGObjCCommonMac::GenerateClassRealization(CodeGenFunction &CGF,
                                           llvm::Value *classObject,
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 8c4c1c8c2dc95..f507146b37cc5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2422,6 +2422,13 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   void FinishThunk();
 
+  /// Start an Objective-C direct method thunk.
+  void StartObjCDirectThunk(const ObjCMethodDecl *OMD, llvm::Function *Fn,
+                            const CGFunctionInfo &FI);
+
+  /// Finish an Objective-C direct method thunk.
+  void FinishObjCDirectThunk();
+
   /// Emit a musttail call for a thunk with a potentially adjusted this 
pointer.
   void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr,
                          llvm::FunctionCallee Callee);

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to