https://github.com/DataCorrupted created https://github.com/llvm/llvm-project/pull/170618
- Generation - Dispatch >From 81b0d800623f03b0c3fa9b4412e36dc585236ed8 Mon Sep 17 00:00:00 2001 From: Peter Rong <[email protected]> Date: Wed, 3 Dec 2025 22:42:51 -0800 Subject: [PATCH] [ExposeDirectMethod] Nil chech thunk generation - Generation - Dispatch --- clang/lib/CodeGen/CGDecl.cpp | 4 +- clang/lib/CodeGen/CGObjC.cpp | 17 +- clang/lib/CodeGen/CGObjCMac.cpp | 243 +++++++++++++++++++++++++++- clang/lib/CodeGen/CodeGenFunction.h | 7 + 4 files changed, 263 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 8b1cd83af2396..9f0e09eac8866 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -2757,7 +2757,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr); LValue lv = MakeAddrLValue(DeclPtr, Ty); - if (IsScalar) { + // If this is a thunk, don't bother with ARC lifetime management. + // The true implementation will take care of that. + if (IsScalar && !CurFuncIsThunk) { Qualifiers qs = Ty.getQualifiers(); if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { // We honor __attribute__((ns_consumed)) for types with lifetime. diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 10aad2e26938d..f1b8627fc119a 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -761,7 +761,18 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD); if (OMD->isDirectMethod()) { - Fn->setVisibility(llvm::Function::HiddenVisibility); + Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (CGM.shouldExposeSymbol(OMD)) { + // Find the decl that may have visibility set (property or method) + const NamedDecl *Decl = OMD; + if (const auto *PD = OMD->findPropertyDecl()) { + Decl = PD; + } + // and respect source level visibility setting + if (auto V = Decl->getExplicitVisibility(NamedDecl::VisibilityForValue)) { + Fn->setVisibility(CGM.GetLLVMVisibility(*V)); + } + } CGM.SetLLVMFunctionAttributes(OMD, FI, Fn, /*IsThunk=*/false); CGM.SetLLVMFunctionAttributesForDefinition(OMD, Fn); } else { @@ -781,10 +792,6 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, OMD->getLocation(), StartLoc); if (OMD->isDirectMethod()) { - // This function is a direct call, it has to implement a nil check - // on entry. - // - // TODO: possibly have several entry points to elide the check CGM.getObjCRuntime().GenerateDirectMethodPrologue(*this, Fn, OMD, CD); } diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 3f4b11c634ce4..741e5d85b5935 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1066,6 +1066,15 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime { DirectMethodInfo &GenerateDirectMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD); + llvm::Function *GenerateObjCDirectThunk(const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD, + llvm::Function *Implementation); + + llvm::Function *GetDirectMethodCallee(const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD, + bool ReceiverCanBeNull, + bool ClassObjectCanBeUnrealized); + /// Generate class realization code: [self self] /// This is used for class methods to ensure the class is initialized. /// Returns the realized class object. @@ -2094,6 +2103,9 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend( bool ReceiverCanBeNull = canMessageReceiverBeNull(CGF, Method, IsSuper, ClassReceiver, Arg0); + bool ClassObjectCanBeUnrealized = + Method && Method->isClassMethod() && + canClassObjectBeUnrealized(ClassReceiver, CGF); bool RequiresNullCheck = false; bool RequiresSelValue = true; @@ -2101,7 +2113,11 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend( llvm::FunctionCallee Fn = nullptr; if (Method && Method->isDirectMethod()) { assert(!IsSuper); - Fn = GenerateDirectMethod(Method, Method->getClassInterface()); + // Use GetDirectMethodCallee to decide whether to use implementation or + // thunk. + Fn = GetDirectMethodCallee(Method, Method->getClassInterface(), + ReceiverCanBeNull, ClassObjectCanBeUnrealized); + // Direct methods will synthesize the proper `_cmd` internally, // so just don't bother with setting the `_cmd` argument. RequiresSelValue = false; @@ -2138,6 +2154,23 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend( if (!RequiresNullCheck && Method && Method->hasParamDestroyedInCallee()) RequiresNullCheck = true; + if (CGM.shouldHaveNilCheckInline(Method)) { + // For variadic class methods, we need to inline pre condition checks. That + // include two things: + // 1. if this is a class method, we have to realize the class if we are not + // sure. + if (ClassReceiver && ClassObjectCanBeUnrealized) { + // Perform class realization using the helper function + Arg0 = GenerateClassRealization(CGF, Arg0, ClassReceiver); + ActualArgs[0] = CallArg(RValue::get(Arg0), ActualArgs[0].Ty); + } + // 2. inline the nil check if we are not sure if the receiver can be null. + // Luckly, `NullReturnState` already does that for corner cases like + // ns_consume, we only need to override the flag, even if return value is + // unused. + RequiresNullCheck |= ReceiverCanBeNull; + } + NullReturnState nullReturn; if (RequiresNullCheck) { nullReturn.init(CGF, Arg0); @@ -3912,6 +3945,8 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, llvm::FunctionType *MethodTy = Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); + bool ExposeSymbol = CGM.shouldExposeSymbol(OMD); + if (OldFn) { Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, "", &CGM.getModule()); @@ -3921,10 +3956,30 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, // Replace the cached implementation in the map. I->second.Implementation = Fn; + llvm::Function *OldThunk = I->second.Thunk; + // If implementation was replaced, and old thunk exists, invalidate the old + // thunk + // + // TODO: ideally, new thunk shouldn't be necessary, if the different return + // type are just subclasses, at IR level they are just pointers, i.e. the + // NewThunk and the OldThunk are identical. + if (OldThunk) { + llvm::Function *NewThunk = GenerateObjCDirectThunk(OMD, CD, Fn); + + // Replace all uses before erasing + NewThunk->takeName(OldThunk); + OldThunk->replaceAllUsesWith(NewThunk); + OldThunk->eraseFromParent(); + + I->second.Thunk = NewThunk; + } } else { - auto Name = getSymbolNameForMethod(OMD, /*include category*/ false); + // Generate symbol without \01 prefix when optimization enabled + auto Name = getSymbolNameForMethod(OMD, /*include category*/ false, + /*includePrefixByte*/ !ExposeSymbol); + // ALWAYS use ExternalLinkage for true implementation Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, Name, &CGM.getModule()); auto [It, inserted] = DirectMethodDefinitions.insert(std::make_pair(COMD, DirectMethodInfo(Fn))); @@ -3936,6 +3991,190 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, return I->second; } +/// Start an Objective-C direct method thunk. +/// +/// The thunk must use musttail to remain transparent to ARC - any +/// ARC operations must happen in the caller, not in the thunk. +void CodeGenFunction::StartObjCDirectThunk(const ObjCMethodDecl *OMD, + llvm::Function *Fn, + const CGFunctionInfo &FI) + { + // Mark this as a thunk function to disable ARC parameter processing + // and other thunk-inappropriate behavior. + CurFuncIsThunk = true; + + // Build argument list for StartFunction. + // We must include all parameters to match the thunk's LLVM function type. + // The thunk uses musttail to forward all arguments directly, so ARC + // processing in the prolog is harmless - the parameters are forwarded + // as-is without local copies. + FunctionArgList FunctionArgs; + FunctionArgs.push_back(OMD->getSelfDecl()); + FunctionArgs.append(OMD->param_begin(), OMD->param_end()); + + // The Start/Finish thunk pattern is borrowed from CGVTables.cpp + // for C++ virtual method thunks, but adapted for ObjC direct methods. + // + // Like C++ thunks, we don't have an actual AST body for the thunk - we only + // have the method's parameter declarations. Therefore, we pass empty + // `GlobalDecl` to `StartFunction` ... + StartFunction(GlobalDecl(), OMD->getReturnType(), Fn, FI, FunctionArgs, + OMD->getLocation(), OMD->getLocation()); + + // and manually set the decl afterwards so other utilities / helpers in CGF + // can still access the AST (e.g. arrange function arguments) + CurCodeDecl = OMD; + CurFuncDecl = OMD; +} + +/// Finish an Objective-C direct method thunk. +void CodeGenFunction::FinishObjCDirectThunk() { + // Create a dummy block to return the value of the thunk. + // + // The non-nil branch alredy returned because of musttail. + // Only nil branch will jump to this return block. + // If the nil check is not emitted (for class methods), this will be a dead + // block. + // + // Either way, the LLVM optimizer will simplify it later. This is just to make + // CFG happy. + EmitBlock(createBasicBlock("dummy_ret_block")); + + // Disable the final ARC autorelease. + // Thunk functions are tailcall to actual implementation, so it doesn't need + // to worry about ARC. + AutoreleaseResult = false; + + // Clear these to restore the invariants expected by + // StartFunction/FinishFunction. + CurCodeDecl = nullptr; + CurFuncDecl = nullptr; + + FinishFunction(); +} + +llvm::Function * +CGObjCCommonMac::GenerateObjCDirectThunk(const ObjCMethodDecl *OMD, + const ObjCContainerDecl *CD, + llvm::Function *Implementation) { + + assert(CGM.shouldHaveNilCheckThunk(OMD) && + "Should only generate thunk when optimization enabled"); + assert(Implementation && "Implementation must exist"); + + llvm::FunctionType *ThunkTy = Implementation->getFunctionType(); + std::string ThunkName = Implementation->getName().str() + "_thunk"; + + // Create thunk with linkonce_odr linkage (allows deduplication) + llvm::Function *Thunk = + llvm::Function::Create(ThunkTy, llvm::GlobalValue::LinkOnceODRLinkage, + ThunkName, &CGM.getModule()); + + // Thunks should always have hidden visibility, other link units will have + // their own version of the (identical) thunk. If they make cross link-unit + // call, they are either calling through their thunk or directly dispatching + // to the true implementation, so making thunk visibile is meaningless. + Thunk->setVisibility(llvm::GlobalValue::HiddenVisibility); + Thunk->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + // Start the ObjC direct thunk (sets up state and calls StartFunction) + const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD); + + // Create a CodeGenFunction to generate the thunk body + CodeGenFunction CGF(CGM); + CGF.StartObjCDirectThunk(OMD, Thunk, FI); + + // Copy function-level attributes from implementation to make musttail happy + llvm::AttributeList ImplAttrs = Implementation->getAttributes(); + Thunk->setAttributes(ImplAttrs); + + // - [self self] for class methods (class realization) + // - if (self == nil) branch to nil block with zero return + // - continuation block for non-nil case + GenerateDirectMethodsPreconditionCheck(CGF, Thunk, OMD, CD); + + // Now emit the musttail call to the true implementation + // Collect all arguments for forwarding + SmallVector<llvm::Value *, 8> Args; + for (auto &Arg : Thunk->args()) + Args.push_back(&Arg); + + // Create musttail call to the implementation + llvm::CallInst *Call = CGF.Builder.CreateCall(Implementation, Args); + Call->setTailCallKind(llvm::CallInst::TCK_MustTail); + + // Apply call-site attributes using ConstructAttributeList + // When sret is used, the call must have matching sret attributes on the first + // parameter for musttail to work correctly. This mirrors what C++ thunks do + // in EmitMustTailThunk. + unsigned CallingConv; + llvm::AttributeList Attrs; + CGM.ConstructAttributeList(Implementation->getName(), FI, GlobalDecl(OMD), + Attrs, CallingConv, /*AttrOnCallSite=*/true, + /*IsThunk=*/false); + Call->setAttributes(Attrs); + Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + + // Immediately return the call result (musttail requirement) + if (FI.getReturnInfo().isIndirect()) { + // SRet case: return void + CGF.Builder.CreateRetVoid(); + } else { + if (ThunkTy->getReturnType()->isVoidTy()) + CGF.Builder.CreateRetVoid(); + else + CGF.Builder.CreateRet(Call); + } + + // Finish the ObjC direct thunk (creates dummy block and calls FinishFunction) + CGF.FinishObjCDirectThunk(); + return Thunk; +} + +llvm::Function *CGObjCCommonMac::GetDirectMethodCallee( + const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD, + bool ReceiverCanBeNull, bool ClassObjectCanBeUnrealized) { + + // Get from cache or populate the function declaration lazily + DirectMethodInfo &Info = GenerateDirectMethod(OMD, CD); + + // If optimization not enabled, always use implementation (which includes the + // nil check) + if (!CGM.shouldExposeSymbol(OMD)) { + return Info.Implementation; + } + + // Varidic methods doesn't have thunk, the caller need to inline the nil check + if (CGM.shouldHaveNilCheckInline(OMD)) { + return Info.Implementation; + } + + // Thunk is lazily generated. + auto getOrCreateThunk = [&]() { + if (!Info.Thunk) + Info.Thunk = GenerateObjCDirectThunk(OMD, CD, Info.Implementation); + return Info.Thunk; + }; + + assert(CGM.shouldHaveNilCheckThunk(OMD) && + "a method either has nil check thunk or have thunk inlined when " + "exposing its symbol"); + + if (OMD->isInstanceMethod()) { + // If we can prove instance methods receiver is not null, return the true + // implementation + return ReceiverCanBeNull ? getOrCreateThunk() : Info.Implementation; + } + if (OMD->isClassMethod()) { + // For class methods, it need to be non-null and realized before we dispatch + // to true implementation + return (ReceiverCanBeNull || ClassObjectCanBeUnrealized) + ? getOrCreateThunk() + : Info.Implementation; + } + llvm_unreachable("OMD should either be a class method or instance method"); +} + llvm::Value * CGObjCCommonMac::GenerateClassRealization(CodeGenFunction &CGF, llvm::Value *classObject, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8c4c1c8c2dc95..f507146b37cc5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2422,6 +2422,13 @@ class CodeGenFunction : public CodeGenTypeCache { void FinishThunk(); + /// Start an Objective-C direct method thunk. + void StartObjCDirectThunk(const ObjCMethodDecl *OMD, llvm::Function *Fn, + const CGFunctionInfo &FI); + + /// Finish an Objective-C direct method thunk. + void FinishObjCDirectThunk(); + /// Emit a musttail call for a thunk with a potentially adjusted this pointer. void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, llvm::FunctionCallee Callee); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
