https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/159685
>From 5a6c69b498d59edee3147c5de8c9c7b40f48c4b0 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Tue, 23 Sep 2025 20:47:55 +0000 Subject: [PATCH 1/5] [WPD]: Add devirtualization pass to the pass pipeline. - Build ExportSummary locally when they are not given. --- llvm/include/llvm/Passes/PassBuilder.h | 4 ++ .../llvm/Transforms/IPO/WholeProgramDevirt.h | 7 ++- llvm/lib/Passes/PassBuilderPipelines.cpp | 19 ++++++++ .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 46 +++++++++++++------ 4 files changed, 59 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 8538a8b2afe14..f81ac4814ca3c 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -99,6 +99,10 @@ class PipelineTuningOptions { // analyses after various module->function or cgscc->function adaptors in the // default pipelines. bool EagerlyInvalidateAnalyses; + + // Tuning option to enable/disable speculative devirtualization. + // Its default value is false. + bool DevirtualizeSpeculatively; }; /// This class provides access to building LLVM's passes. diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 7a03405b4f462..2e33a4098be1b 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; bool UseCommandLine = false; + bool DevirtSpeculatively = false; WholeProgramDevirtPass() : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively = false) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); } LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index bd03ac090721c..030f4fbc7c963 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -325,6 +325,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; + DevirtualizeSpeculatively = false; } namespace llvm { @@ -1641,6 +1642,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { + MPM.addPass(WholeProgramDevirtPass( + /*ExportSummary*/ nullptr, + /*ImportSummary*/ nullptr, + /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, + lowertypetests::DropTestKind::Assume)); + if (EnableModuleInliner) { + MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), + UseInlineAdvisor, + ThinOrFullLTOPhase::None)); + } else { + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParamsFromOptLevel(Level), + /* MandatoryFirst */ true, + InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner})); + } + } return MPM; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 2dd0fde6b34d6..80848d976304d 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -636,9 +636,11 @@ struct DevirtModule { std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest; PatternList FunctionsToSkip; + const bool DevirtSpeculatively; DevirtModule(Module &M, ModuleAnalysisManager &MAM, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively) : M(M), MAM(MAM), FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), ExportSummary(ExportSummary), ImportSummary(ImportSummary), @@ -651,7 +653,8 @@ struct DevirtModule { RemarksEnabled(areRemarksEnabled()), OREGetter([&](Function &F) -> OptimizationRemarkEmitter & { return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - }) { + }), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); FunctionsToSkip.init(SkipFunctionNames); } @@ -765,7 +768,8 @@ struct DevirtModule { // Lower the module using the action and summary passed as command line // arguments. For testing purposes only. - static bool runForTesting(Module &M, ModuleAnalysisManager &MAM); + static bool runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively); }; struct DevirtIndex { @@ -808,11 +812,22 @@ struct DevirtIndex { PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &MAM) { if (UseCommandLine) { - if (!DevirtModule::runForTesting(M, MAM)) + if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } - if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run()) + + std::optional<ModuleSummaryIndex> Index; + if (!ExportSummary && !ImportSummary && DevirtSpeculatively) { + // Build the ExportSummary from the module. + assert(!ExportSummary && + "ExportSummary is expected to be empty in non-LTO mode"); + ProfileSummaryInfo PSI(M); + Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI)); + ExportSummary = Index.has_value() ? &Index.value() : nullptr; + } + if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively) + .run()) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -1010,7 +1025,8 @@ static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) { return ErrorSuccess(); } -bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { +bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively) { std::unique_ptr<ModuleSummaryIndex> Summary = std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false); @@ -1039,7 +1055,8 @@ bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { ClSummaryAction == PassSummaryAction::Export ? Summary.get() : nullptr, ClSummaryAction == PassSummaryAction::Import ? Summary.get() - : nullptr) + : nullptr, + DevirtSpeculatively) .run(); if (!ClWriteSummary.empty()) { @@ -1103,10 +1120,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // Without DevirtSpeculatively, we cannot perform whole program // devirtualization analysis on a vtable with public LTO visibility. - if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1127,7 +1144,7 @@ bool DevirtModule::tryFindVirtualCallTargets( // In most cases empty functions will be overridden by the // implementation of the derived class, so we can skip them. - if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1) continue; @@ -1250,8 +1267,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, // add support to compare the virtual function pointer to the // devirtualized target. In case of a mismatch, fall back to indirect // call. - if (DevirtCheckMode == WPDCheckMode::Fallback || - ClDevirtualizeSpeculatively) { + if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); // Version the indirect call site. If the called value is equal to the // given callee, 'NewInst' will be executed, otherwise the original call @@ -2375,7 +2391,7 @@ bool DevirtModule::run() { Function *PublicTypeTestFunc = nullptr; // If we are in speculative devirtualization mode, we can work on the public // type test intrinsics. - if (ClDevirtualizeSpeculatively) + if (DevirtSpeculatively) PublicTypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeTestFunc = @@ -2511,7 +2527,7 @@ bool DevirtModule::run() { // Out of speculative devirtualization mode, Try to apply virtual constant // propagation or branch funneling. // TODO: This should eventually be enabled for non-public type tests. - if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { + if (!SingleImplDevirt && !DevirtSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); >From 7472477c85bdc524cbd256041baa9474afc81c07 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 24 Sep 2025 07:44:55 +0000 Subject: [PATCH 2/5] [Clang]: Enable speculative devirtualization --- clang/docs/UsersManual.rst | 9 +++++++++ clang/include/clang/Basic/CodeGenOptions.def | 2 ++ clang/include/clang/Driver/Options.td | 12 +++++++++--- clang/lib/CodeGen/BackendUtil.cpp | 1 + clang/lib/CodeGen/CGClass.cpp | 14 ++++++++------ clang/lib/CodeGen/CGVTables.cpp | 6 ++++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 13 ++++++++----- clang/lib/Driver/ToolChains/Clang.cpp | 7 +++++++ clang/test/CodeGenCXX/type-metadata.cpp | 8 ++++++++ clang/test/Driver/clang_f_opts.c | 2 -- 10 files changed, 56 insertions(+), 18 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index fb22ad3c90af4..7f3de0f336947 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2319,6 +2319,13 @@ are listed below. This enables better devirtualization. Turned off by default, because it is still experimental. +.. option:: -fdevirtualize-speculatively + + Enable speculative devirtualization optimization, such as single-implementation + devirtualization. This optimization is used out of LTO mode for now. + Turned off by default. + TODO: Enable for LTO mode. + .. option:: -fwhole-program-vtables Enable whole-program vtable optimizations, such as single-implementation @@ -5207,6 +5214,8 @@ Execute ``clang-cl /?`` to see a list of supported options: -fstandalone-debug Emit full debug info for all types used by the program -fstrict-aliasing Enable optimizations based on strict aliasing rules -fsyntax-only Run the preprocessor, parser and semantic analysis stages + -fdevirtualize-speculatively + Enables speculative devirtualization optimization. -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto -gcodeview-ghash Emit type record hashes in a .debug$H section -gcodeview Generate CodeView debug information diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 90e1f8d1eb5e9..94a55302c8016 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -362,6 +362,8 @@ VALUE_CODEGENOPT(WarnStackSize , 32, UINT_MAX, Benign) ///< Set via -fwarn-s CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe is used CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize use-lists. +CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the speculative + /// devirtualization optimization. CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply whole-program /// vtable optimization. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cb5cb888c6da7..d0807fa64d961 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4475,6 +4475,13 @@ defm new_infallible : BoolFOption<"new-infallible", BothFlags<[], [ClangOption, CC1Option], " treating throwing global C++ operator new as always returning valid memory " "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>; +defm devirtualize_speculatively + : BoolFOption<"devirtualize-speculatively", + CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse, + PosFlag<SetTrue, [], [], + "Enables speculative devirtualization optimization.">, + NegFlag<SetFalse>, + BothFlags<[], [ClangOption, CLOption, CC1Option]>>; defm whole_program_vtables : BoolFOption<"whole-program-vtables", CodeGenOpts<"WholeProgramVTables">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option], @@ -7070,9 +7077,8 @@ defm variable_expansion_in_unroller : BooleanFFlag<"variable-expansion-in-unroll Group<clang_ignored_gcc_optimization_f_Group>; defm web : BooleanFFlag<"web">, Group<clang_ignored_gcc_optimization_f_Group>; defm whole_program : BooleanFFlag<"whole-program">, Group<clang_ignored_gcc_optimization_f_Group>; -defm devirtualize : BooleanFFlag<"devirtualize">, Group<clang_ignored_gcc_optimization_f_Group>; -defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">, - Group<clang_ignored_gcc_optimization_f_Group>; +defm devirtualize : BooleanFFlag<"devirtualize">, + Group<clang_ignored_gcc_optimization_f_Group>; // Generic gfortran options. def A_DASH : Joined<["-"], "A-">, Group<gfortran_Group>; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 3c313149ca1fc..5168682d4691f 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -938,6 +938,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index f782b0cd17da4..6736126f7d316 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2827,10 +2827,11 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); - else if (CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type test assumes if we are forcing public - // visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD)) { + else if ((CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type test assumes if we are forcing public + // visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CanQualType Ty = CGM.getContext().getCanonicalTagType(RD); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty); llvm::Value *TypeId = @@ -2988,8 +2989,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, } bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { - if (!CGM.getCodeGenOpts().WholeProgramVTables || - !CGM.HasHiddenLTOVisibility(RD)) + if ((!CGM.getCodeGenOpts().WholeProgramVTables || + !CGM.HasHiddenLTOVisibility(RD)) && + !CGM.getCodeGenOpts().DevirtualizeSpeculatively) return false; if (CGM.getCodeGenOpts().VirtualFunctionElimination) diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index e14e883a55ac5..959ba2031acf4 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1358,10 +1358,12 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - // Emit type metadata on vtables with LTO or IR instrumentation. + // Emit type metadata on vtables with LTO or IR instrumentation or + // speculative devirtualization. // In IR instrumentation, the type metadata is used to find out vtable // definitions (for type profiling) among all global variables. - if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() && + !getCodeGenOpts().DevirtualizeSpeculatively) return; CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 65c47633bc5c4..41aa84fa8c07d 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -717,9 +717,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); bool ShouldEmitWPDInfo = - CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type tests if we are forcing public visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD); + (CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively; llvm::Value *VirtualFn = nullptr; { @@ -2114,13 +2115,15 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // definitions to ensure we associate derived classes with base classes // defined in headers but with a strong definition only in a shared library. if (!VTable->isDeclarationForLinker() || - CGM.getCodeGenOpts().WholeProgramVTables) { + CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); // For available_externally definitions, add the vtable to // @llvm.compiler.used so that it isn't deleted before whole program // analysis. if (VTable->isDeclarationForLinker()) { - assert(CGM.getCodeGenOpts().WholeProgramVTables); + assert(CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively); CGM.addCompilerUsedGlobal(VTable); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4e8f63ea49480..bf2639d4da4ec 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7731,6 +7731,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs); + // Temporarily disable this for LTO if it's not explicitly enabled. + // TODO: enable it by default for LTO also. + if (Args.hasFlag(options::OPT_fdevirtualize_speculatively, + options::OPT_fno_devirtualize_speculatively, + /*Default value*/ false)) + CmdArgs.push_back("-fdevirtualize-speculatively"); + bool VirtualFunctionElimination = Args.hasFlag(options::OPT_fvirtual_function_elimination, options::OPT_fno_virtual_function_elimination, false); diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 1cb2fed8db3e6..61d36204942dc 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -14,6 +14,9 @@ // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s +// Test for the speculative devirtualization feature in nonlto mode: +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + // Tests for cfi + whole-program-vtables: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s @@ -178,6 +181,7 @@ void af(A *a) { // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -212,6 +216,7 @@ void df1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -224,6 +229,7 @@ void dg1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@") @@ -236,6 +242,7 @@ void dh1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]]) @@ -297,6 +304,7 @@ void f(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@") diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 765f9d6ae3212..e5a23270ea732 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -377,7 +377,6 @@ // RUN: -ftree-ter \ // RUN: -ftree-vrp \ // RUN: -fno-devirtualize \ -// RUN: -fno-devirtualize-speculatively \ // RUN: -fslp-vectorize-aggressive \ // RUN: -fno-slp-vectorize-aggressive \ // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s @@ -436,7 +435,6 @@ // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported -// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is not supported // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been deprecated and will be ignored // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been deprecated and will be ignored >From 1840074777f52ef4cb541cd778ee52e36636a7d5 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 19 Nov 2025 03:49:34 +0000 Subject: [PATCH 3/5] Resolve review comments: - Improve documentation and comments - Update release notes. --- clang/docs/ReleaseNotes.rst | 1 + clang/docs/UsersManual.rst | 53 ++++++++++++++++++++---- clang/lib/CodeGen/CGClass.cpp | 4 +- clang/lib/CodeGen/ItaniumCXXABI.cpp | 10 +++-- clang/lib/Driver/ToolChains/Clang.cpp | 2 - clang/test/CodeGenCXX/type-metadata.cpp | 8 ---- llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++ 7 files changed, 64 insertions(+), 22 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index add1582344a0e..e265bed8cbc63 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -305,6 +305,7 @@ Modified Compiler Flags ----------------------- - The `-gkey-instructions` compiler flag is now enabled by default when DWARF is emitted for plain C/C++ and optimizations are enabled. (#GH149509) - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of this limit. (#GH160440) +- The `-fdevirtualize-speculatively` compiler flag is now supported to enable speculative devirtualization of virtual function calls, it's disabled by default. (#GH159685) Removed Compiler Flags ------------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 7f3de0f336947..cfe93d550efb8 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2319,13 +2319,6 @@ are listed below. This enables better devirtualization. Turned off by default, because it is still experimental. -.. option:: -fdevirtualize-speculatively - - Enable speculative devirtualization optimization, such as single-implementation - devirtualization. This optimization is used out of LTO mode for now. - Turned off by default. - TODO: Enable for LTO mode. - .. option:: -fwhole-program-vtables Enable whole-program vtable optimizations, such as single-implementation @@ -2350,6 +2343,52 @@ are listed below. pure ThinLTO, as all split regular LTO modules are merged and LTO linked with regular LTO. +.. option:: -fdevirtualize-speculatively + + Enable speculative devirtualization optimization where a virtual call + can be transformed into a direct call under the assumption that its + object is of a particular type. A runtime check is inserted to validate + the assumption before making the direct call, and if the check fails, + the original virtual call is made instead. This optimization can enable + more inlining opportunities and better optimization of the direct call. + This is different from other whole program devirtualization optimizations + that rely on global analysis and hidden visibility of the objects to prove + that the object is always of a particular type at a virtual call site. + This optimization doesn't require global analysis or hidden visibility. + This optimization doesn't devirtualize all virtual calls, but only + when there's a single implementation of the virtual function. + There could be a single implementaiton of the virtual function + either because the function is not overridden in any derived class, + or because there is a sinlge instantiated object that is using the funciton. + + Ex of IR before the optimization: + .. code-block:: llvm + %vtable = load ptr, ptr %BV, align 8, !tbaa !6 + %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %0 = load ptr, ptr %vtable, align 8 + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + ret void + + IR after the optimization: + .. code-block:: llvm + %vtable = load ptr, ptr %BV, align 8, !tbaa !12 + %0 = load ptr, ptr %vtable, align 8 + %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev + br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15 + if.true.direct_targ: ; preds = %entry + tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + if.false.orig_indirect: ; preds = %entry + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + + if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ret void + This feature is temporarily ignored at the LLVM side when LTO is enabled. + TODO: Update the comment when the LLVM side supports it. + This feature is turned off by default. + .. option:: -f[no-]unique-source-file-names When enabled, allows the compiler to assume that each object file diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 6736126f7d316..5fd240fa1d115 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2827,9 +2827,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); + // Emit the type test assumes for the features of WPD (only when LTO + // visibility is NOT public) and speculative devirtualization. else if ((CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type test assumes if we are forcing public - // visibility. !CGM.AlwaysHasLTOVisibilityPublic(RD)) || CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CanQualType Ty = CGM.getContext().getCanonicalTagType(RD); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 41aa84fa8c07d..24ff12c18d69a 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -716,6 +716,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); + // TODO: Update this name not to be restricted to WPD only + // as we now emit the vtable info info for speculative devirtualization as + // well. bool ShouldEmitWPDInfo = (CGM.getCodeGenOpts().WholeProgramVTables && // Don't insert type tests if we are forcing public visibility. @@ -2111,9 +2114,10 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // Always emit type metadata on non-available_externally definitions, and on // available_externally definitions if we are performing whole program - // devirtualization. For WPD we need the type metadata on all vtable - // definitions to ensure we associate derived classes with base classes - // defined in headers but with a strong definition only in a shared library. + // devirtualization or speculative devirtualization. We need the type metadata + // on all vtable definitions to ensure we associate derived classes with base + // classes defined in headers but with a strong definition only in a shared + // library. if (!VTable->isDeclarationForLinker() || CGM.getCodeGenOpts().WholeProgramVTables || CGM.getCodeGenOpts().DevirtualizeSpeculatively) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bf2639d4da4ec..a0523e1dd92d2 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7731,8 +7731,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs); - // Temporarily disable this for LTO if it's not explicitly enabled. - // TODO: enable it by default for LTO also. if (Args.hasFlag(options::OPT_fdevirtualize_speculatively, options::OPT_fno_devirtualize_speculatively, /*Default value*/ false)) diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 61d36204942dc..1cb2fed8db3e6 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -14,9 +14,6 @@ // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s -// Test for the speculative devirtualization feature in nonlto mode: -// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s - // Tests for cfi + whole-program-vtables: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s @@ -181,7 +178,6 @@ void af(A *a) { // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -216,7 +212,6 @@ void df1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -229,7 +224,6 @@ void dg1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@") @@ -242,7 +236,6 @@ void dh1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]]) @@ -304,7 +297,6 @@ void f(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@") diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 030f4fbc7c963..973c5c606e3eb 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1642,6 +1642,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + // Add devirtualization pass only when LTO is not enabled, as otherwise + // the pass is already enabled in the LTO pipeline. if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { MPM.addPass(WholeProgramDevirtPass( /*ExportSummary*/ nullptr, @@ -1649,6 +1651,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::Assume)); + // Given that the devirtualization creates more opportunities for inlining, + // we run the Inliner again here to maximize the optimization gain we + // get from devirtualization. + // Also, we can't run devirtualization before inlining because the + // devirtualization depends on the passes optimizing/eliminating vtable GVs + // and those passes are only effective after inlining. if (EnableModuleInliner) { MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), UseInlineAdvisor, >From c67f13b5b71579ea7dcd4a59e2f2fc61e8f20711 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 19 Nov 2025 04:11:44 +0000 Subject: [PATCH 4/5] Add tests for emiting MD and ensuring that MD is not dropped on the way to the WPD at backend --- .../speculative-devirt-metadata.cpp | 77 ++++++++++++++++++ clang/test/CodeGenCXX/speculative-devirt.cpp | 78 +++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 clang/test/CodeGenCXX/speculative-devirt-metadata.cpp create mode 100644 clang/test/CodeGenCXX/speculative-devirt.cpp diff --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp new file mode 100644 index 0000000000000..a20d71e086ed3 --- /dev/null +++ b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp @@ -0,0 +1,77 @@ +// Test that Clang emits vtable metadata when speculative devirtualization is enabled. +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); + virtual void g(); + virtual void h(); +}; + +namespace { + +struct D : B { + D(); + virtual void f(); + virtual void h(); +}; + +} + +A::A() {} +B::B() {} +D::D() {} + +void A::f() { +} + +void B::g() { +} + +void D::f() { +} + +void D::h() { +} + +void af(A *a) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + a->f(); +} + +void dg1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->g(); +} +void df1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->f(); +} + +void dh1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->h(); +} + + +D d; + +void foo() { + dg1(&d); + df1(&d); + dh1(&d); + + + struct FA : A { + void f() {} + } fa; + af(&fa); +} diff --git a/clang/test/CodeGenCXX/speculative-devirt.cpp b/clang/test/CodeGenCXX/speculative-devirt.cpp new file mode 100644 index 0000000000000..f1a69fd90573a --- /dev/null +++ b/clang/test/CodeGenCXX/speculative-devirt.cpp @@ -0,0 +1,78 @@ +// Test that the vtable metadata that are emitted by Clang when speculative devirtualization +// is enabled can be used by the WholeProgramDevirt pass without being dropped on the way. +// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux -fdevirtualize-speculatively -mllvm -print-before=wholeprogramdevirt -S %s 2>&1 | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); + virtual void g(); + virtual void h(); +}; + +namespace { + +struct D : B { + D(); + virtual void f(); + virtual void h(); +}; + +} + +A::A() {} +B::B() {} +D::D() {} + +void A::f() { +} + +void B::g() { +} + +void D::f() { +} + +void D::h() { +} + +void af(A *a) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + a->f(); +} + +void dg1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->g(); +} +void df1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->f(); +} + +void dh1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->h(); +} + + +D d; + +void foo() { + dg1(&d); + df1(&d); + dh1(&d); + + + struct FA : A { + void f() {} + } fa; + af(&fa); +} >From 96b5bfa9a7aac886f5a65ead5b0431a838b355cd Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Thu, 27 Nov 2025 01:46:24 +0000 Subject: [PATCH 5/5] resolve review comments --- clang/docs/UsersManual.rst | 10 +-- .../speculative-devirt-metadata.cpp | 19 ++--- clang/test/CodeGenCXX/speculative-devirt.cpp | 78 ------------------- .../WholeProgramDevirt/devirt-metadata.ll | 64 +++++++++++++++ 4 files changed, 79 insertions(+), 92 deletions(-) delete mode 100644 clang/test/CodeGenCXX/speculative-devirt.cpp create mode 100644 llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index cfe93d550efb8..28ca7ac5d22ad 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2351,15 +2351,15 @@ are listed below. the assumption before making the direct call, and if the check fails, the original virtual call is made instead. This optimization can enable more inlining opportunities and better optimization of the direct call. - This is different from other whole program devirtualization optimizations + This is different from whole program devirtualization optimization that rely on global analysis and hidden visibility of the objects to prove that the object is always of a particular type at a virtual call site. This optimization doesn't require global analysis or hidden visibility. This optimization doesn't devirtualize all virtual calls, but only - when there's a single implementation of the virtual function. - There could be a single implementaiton of the virtual function + when there's a single implementation of the virtual function in the module. + There could be a single implementation of the virtual function either because the function is not overridden in any derived class, - or because there is a sinlge instantiated object that is using the funciton. + or because there is a single instantiated object that is using the function. Ex of IR before the optimization: .. code-block:: llvm @@ -2386,7 +2386,7 @@ are listed below. if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ ret void This feature is temporarily ignored at the LLVM side when LTO is enabled. - TODO: Update the comment when the LLVM side supports it. + TODO: Update the comment when the LLVM side supports this feature for LTO. This feature is turned off by default. .. option:: -f[no-]unique-source-file-names diff --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp index a20d71e086ed3..20d2ab9f46fe5 100644 --- a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp +++ b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp @@ -1,5 +1,5 @@ // Test that Clang emits vtable metadata when speculative devirtualization is enabled. -// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=CHECK %s struct A { A(); @@ -39,25 +39,26 @@ void D::h() { } void af(A *a) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) a->f(); } void dg1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) d->g(); } + void df1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) d->f(); } void dh1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) d->h(); } diff --git a/clang/test/CodeGenCXX/speculative-devirt.cpp b/clang/test/CodeGenCXX/speculative-devirt.cpp deleted file mode 100644 index f1a69fd90573a..0000000000000 --- a/clang/test/CodeGenCXX/speculative-devirt.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// Test that the vtable metadata that are emitted by Clang when speculative devirtualization -// is enabled can be used by the WholeProgramDevirt pass without being dropped on the way. -// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux -fdevirtualize-speculatively -mllvm -print-before=wholeprogramdevirt -S %s 2>&1 | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s - -struct A { - A(); - virtual void f(); -}; - -struct B : virtual A { - B(); - virtual void g(); - virtual void h(); -}; - -namespace { - -struct D : B { - D(); - virtual void f(); - virtual void h(); -}; - -} - -A::A() {} -B::B() {} -D::D() {} - -void A::f() { -} - -void B::g() { -} - -void D::f() { -} - -void D::h() { -} - -void af(A *a) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - a->f(); -} - -void dg1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - d->g(); -} -void df1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - d->f(); -} - -void dh1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - d->h(); -} - - -D d; - -void foo() { - dg1(&d); - df1(&d); - dh1(&d); - - - struct FA : A { - void f() {} - } fa; - af(&fa); -} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll new file mode 100644 index 0000000000000..d8781d5686b53 --- /dev/null +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll @@ -0,0 +1,64 @@ +; Test that the needed intrinsics for devirtualization are preserved and not dropped by other +; optimizations. + +; RUN: opt -S -O3 %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +@vt1 = constant [1 x ptr] [ptr @vf], !type !8 +@vt2 = constant [1 x ptr] [ptr @vf2], !type !12 + +define i1 @vf(ptr %this) #0 !dbg !7 { + ret i1 true +} + +define i1 @vf2(ptr %this) !dbg !11 { + ret i1 false +} + +define void @call(ptr %obj) #1 !dbg !5 { + %vtable = load ptr, ptr %obj + ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"typeid") + ; CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + call i1 %fptr(ptr %obj), !dbg !6 + ret void +} + +define void @call1(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"typeid1") + ; CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + %1 = call i1 %fptr(ptr %obj), !dbg !10 + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEb", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} + +!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 35, column: 32, scope: !9) +!11 = distinct !DISubprogram(name: "vf2", linkageName: "_ZN3vt13vf2Eb", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!12 = !{i32 0, !"typeid1"} + _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
