https://github.com/tonykuttai updated https://github.com/llvm/llvm-project/pull/187986
>From 9dd21307e01e1173007aad98651d38fae1cc1e2d Mon Sep 17 00:00:00 2001 From: Tony Varghese <[email protected]> Date: Fri, 19 Jun 2026 15:46:49 +0530 Subject: [PATCH 1/2] [Clang][AIX] Add -mloadtime-comment-vars support to preserve variables in the final object file. --- clang/docs/LanguageExtensions.rst | 66 ++++++++++ clang/include/clang/Basic/CodeGenOptions.h | 3 + clang/include/clang/Options/Options.td | 7 ++ clang/lib/CodeGen/CodeGenModule.cpp | 119 ++++++++++++++++++ clang/lib/CodeGen/CodeGenModule.h | 18 +++ clang/lib/Driver/ToolChains/Clang.cpp | 9 ++ .../CodeGen/PowerPC/loadtime-comment-mixed.c | 12 ++ clang/test/CodeGen/loadtime-comment-vars.c | 61 +++++++++ clang/test/Driver/mloadtime-comment-vars.c | 11 ++ .../lower-comment-string.ll | 21 ++-- 10 files changed, 319 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c create mode 100644 clang/test/CodeGen/loadtime-comment-vars.c create mode 100644 clang/test/Driver/mloadtime-comment-vars.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index d79d82a175c68..04e2f14b53984 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -6873,6 +6873,72 @@ When ``#pragma comment(copyright, ...)`` appears in a C++20 module interface unit, the copyright string is embedded only in the object file compiled from that interface unit. Importing TUs do not re-emit the string. +Preserving Identifying Variables with -mloadtime-comment-vars +-------------------------------------------------------------- + +The ``-mloadtime-comment-vars=`` flag accepts a comma-separated list of +global variable names that should be preserved in the final object file as +loadtime identifying strings. This is an AIX-specific feature and is ignored +on other targets. + +This flag complements ``#pragma comment(copyright, ...)`` for codebases that +already use the traditional UNIX convention of embedding identifying strings +directly in source variables rather than via a pragma. + +Syntax: + +.. code-block:: console + + -mloadtime-comment-vars=<var1>[,<var2>,...] + +Valid variable types: + +A variable named in the list must meet both of these conditions to be +preserved: + +- Its type must be a character pointer (``char *``, ``const char *``) or a + character array (``char[]``). +- It must have an initializer. + +Variables that fail either check -- for example, an ``int`` or a ``struct`` -- +are silently skipped. Variables that appear in the list but are not defined in +the translation unit are also ignored. + +Example: + +.. code-block:: c + + static char *sccsid = "@(#) MyApp Version 1.0"; + static char version[] = "@(#) Built 2026-05-24"; + + void foo() {} + +Compiled with: + +.. code-block:: console + + clang -target powerpc64-ibm-aix \ + -mloadtime-comment-vars=sccsid,version \ + -c source.c -o source.o + +Both ``sccsid`` and ``version`` survive optimization and are retained in the +object file. + +.. code-block:: console + + $ what source.o + source.o: + MyApp Version 1.0 + Built 2026-05-24 + +Interaction with ``#pragma comment(copyright, ...)`` : + +The two mechanisms can be used together in the same translation unit. The +pragma produces a dedicated ``__loadtime_comment_str`` symbol placed in the +``__loadtime_comment`` section, while ``-mloadtime-comment-vars`` preserves +the named source variables in place using ``.ref`` directives. Both sets of +strings appear in the final object file independently. + Evaluating Object Size ====================== diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 97d68877467fd..03bf2f730d631 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -342,6 +342,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// A list of linker options to embed in the object file. std::vector<std::string> LinkerOptions; + /// List of global variable names to preserve as loadtime comment variables. + std::vector<std::string> LoadTimeCommentVars; + /// Name of the profile file to use as output for -fprofile-instr-generate, /// -fprofile-generate, and -fcs-profile-generate. std::string InstrProfileOutput; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 5028684731b2d..71fd39ccf4ea2 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4792,6 +4792,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del Visibility<[ClangOption, CC1Option]>, HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">, MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">; +def mloadtime_comment_vars_EQ + : CommaJoined<["-"], "mloadtime-comment-vars=">, + Group<m_Group>, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Comma-separated list of global variable names to treat as " + "loadtime variables">, + MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>; def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">, Values<"none,explicit,all">, NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index cc0d0341a2dd0..d06dee73acfc5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1077,6 +1077,13 @@ void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); + + // Queue loadtime comment variable candidates into the deferred emission + // list before EmitDeferred() runs, so their initializers (which may + // reference other globals, e.g. static const char *p = a;) are emitted + // through the normal infrastructure with correct ordering. + QueueLoadTimeCommentVarEmission(); + EmitDeferred(); DeferredDecls.insert_range(EmittedDeferredDecls); EmittedDeferredDecls.clear(); @@ -1758,6 +1765,9 @@ void CodeGenModule::Release() { EmitBackendOptionsMetadata(getCodeGenOpts()); + // Mark loadtime comment variables specified via -mloadtime-comment-vars. + ProcessLoadTimeCommentVars(); + // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags()); @@ -4337,6 +4347,115 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { return true; } +/// Check if a variable declaration is suitable to be treated as a loadtime +/// comment variable. Valid variables must be character pointers or character +/// arrays with an initializer. +bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const { + // Must be a valid declaration and must have an initializer (the string). + if (!D || !D->hasInit()) + return false; + + QualType Ty = D->getType(); + + // 1. Handle Pointers (e.g., char *sccsid, const char *copyright). + if (const PointerType *PT = Ty->getAs<PointerType>()) { + if (PT->getPointeeType()->isAnyCharacterType()) + return true; + } + + // 2. Handle Arrays (e.g., char version[]) + if (const ArrayType *AT = getContext().getAsArrayType(Ty)) { + if (AT->getElementType()->isAnyCharacterType()) + return true; + } + + return false; // Reject ints, structs, etc. +} + +/// Check if a variable is eligible to be treated as a loadtime comment +/// variable. This requires: (1) the variable name is in the requested list +/// and (2) the variable type is valid (char pointer or array with initializer). +bool CodeGenModule::isLoadTimeCommentCandidateVariable( + const VarDecl *VD, const std::vector<std::string> &LoadTimeCommentVars) { + if (!llvm::is_contained(LoadTimeCommentVars, VD->getName())) + return false; + return isValidLoadTimeCommentVariable(VD); +} + +/// QueueLoadTimeCommentVarEmission: Called before EmitDeferred(). +/// Move loadtime comment variable candidates from DeferredDecls into +/// DeferredDeclsToEmit so that the normal deferred emission machinery +/// defines them — including any globals their initializers reference +/// (e.g. static const char *p = a;). +void CodeGenModule::QueueLoadTimeCommentVarEmission() { + if (!getTriple().isOSAIX()) + return; + + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (LoadTimeCommentVars.empty()) + return; + + TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); + for (auto *D : TU->decls()) { + auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + continue; + if (!isLoadTimeCommentCandidateVariable(VD, LoadTimeCommentVars)) + continue; + + // Move the decl from DeferredDecls -> DeferredDeclsToEmit so EmitDeferred + // will define it. If it is already being emitted (e.g. it is referenced + // somewhere), this is a harmless duplicate that EmitDeferred ignores. + GlobalDecl GD(VD); + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + if (DDI != DeferredDecls.end()) { + addDeferredDeclToEmit(DDI->second); + DeferredDecls.erase(DDI); + } + } +} + +/// ProcessLoadTimeCommentVars: Called after EmitDeferred(). +/// Attach loadtime_comment metadata and add each variable to +/// llvm.compiler.used. By this point the deferred emission loop has already +/// defined the globals, so we only need to look them up and annotate them. Only +/// valid on AIX targets. +void CodeGenModule::ProcessLoadTimeCommentVars() { + if (!getTriple().isOSAIX()) + return; + + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (LoadTimeCommentVars.empty()) + return; + + auto &C = getLLVMContext(); + TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); + + for (auto *D : TU->decls()) { + auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + continue; + if (!isLoadTimeCommentCandidateVariable(VD, LoadTimeCommentVars)) + continue; + + // Look up the LLVM global that EmitDeferred() should have defined. + llvm::GlobalValue *GV = GetGlobalValue(getMangledName(GlobalDecl(VD))); + if (!GV || GV->isDeclaration()) + continue; + + auto *GVar = dyn_cast<llvm::GlobalVariable>(GV); + if (!GVar) + continue; + + // Mark with loadtime_comment metadata for LowerCommentStringPass. + GVar->setMetadata("loadtime_comment", llvm::MDNode::get(C, {})); + + // Prevent the optimizer from removing the global variable. + llvm::appendToCompilerUsed(getModule(), {GVar}); + } +} + ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { StringRef Name = getMangledName(GD); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index badb740f0ba32..66814adf7f6a9 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -2171,6 +2171,24 @@ class CodeGenModule : public CodeGenTypeCache { /// Emit deactivation symbols for any PFP fields whose offset is taken with /// offsetof. void emitPFPFieldsWithEvaluatedOffset(); + + /// Check if a variable declaration is suitable to be treated as a loadtime + /// comment variable (must be a character pointer or array with initializer). + bool isValidLoadTimeCommentVariable(const VarDecl *D) const; + + /// Check if a variable is eligible to be treated as a loadtime comment + /// variable (must be in the requested list and have a valid char type). + bool isLoadTimeCommentCandidateVariable( + const VarDecl *VD, const std::vector<std::string> &LoadTimeCommentVars); + + /// Queue loadtime comment variable candidates into the deferred + /// emission list before EmitDeferred() so their initializers are emitted + /// through the normal infrastructure with correct ordering. + void QueueLoadTimeCommentVarEmission(); + + /// Attach loadtime_comment metadata and add variables to + /// llvm.compiler.used after EmitDeferred() has defined them. + void ProcessLoadTimeCommentVars(); }; } // end namespace CodeGen diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0cbb1f18809f7..fb8efa3dde077 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6221,6 +6221,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else if (UnwindTables) CmdArgs.push_back("-funwind-tables=1"); + // Forward loadtime-comment vars option to cc1 only on AIX targets. + if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) { + if (Triple.isOSAIX()) + A->render(Args, CmdArgs); + else + D.Diag(diag::warn_drv_unsupported_option_for_target) + << A->getAsString(Args) << TripleStr; + } + // Sframe unwind tables are independent of the other types. Although also // defined for aarch64, only x86_64 support is implemented at the moment. if (Arg *A = Args.getLastArg(options::OPT_gsframe)) { diff --git a/clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c b/clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c new file mode 100644 index 0000000000000..64c6ec66b6160 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -O2 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +#pragma comment(copyright, "@(#) pragma path") + +static char *sccsid = "@(#) option path"; + +void f(void) {} + +// CHECK: @[[PRAGMA:__loadtime_comment_str_[0-9a-f]+]] = weak_odr hidden unnamed_addr constant [17 x i8] c"@(#) pragma path\00", section "__loadtime_comment", align 1, !loadtime_comment ![[MD:[0-9]+]] +// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @[[PRAGMA]], ptr @sccsid], section "llvm.metadata" diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c new file mode 100644 index 0000000000000..d54f848ca2eea --- /dev/null +++ b/clang/test/CodeGen/loadtime-comment-vars.c @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -O2 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number,same_copyright,active,not_defined_here -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number,same_copyright,active,not_defined_here -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// RUN: %clang_cc1 -O2 -triple x86_64-linux-gnu -mloadtime-comment-vars=sccsid,version -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=NONAIX + +// 1. String pointer +static char *sccsid = "@(#) sccsid Version 1.0"; + +// 2. String array +static char version[] = "@(#) Copyright Version 2.0"; + +// 3. Const string (Not in CLI list, should NOT be emitted) +static const char *copyright = "@(#) Copyright 2026"; + +// 4. Integer (In CLI list but invalid type, should NOT be emitted) +static int build_number = 12345; + +// 5. Struct (not in CLI list and invalid type, NOT emitted) +struct build_info { + int major; + int minor; +} static build_data = {1, 0}; + +// 6. Deferred: pointer whose initializer references another static global. +// Both the pointer AND the string it points to must be emitted. +static const char dummy[] = "dummy copyright deferred"; +static const char *same_copyright = dummy; + +// 7. Variable already referenced (eager emission path) +static char *active = "@(#) active string"; +void bar() { (void)active; } + +// 8. Variable listed but only declared (extern) +extern char *not_defined_here; + +void foo() {} + +// CHECK-DAG: @active = internal global ptr @.str, align {{[0-9]+}}, !loadtime_comment ![[MD:[0-9]+]] +// CHECK: @.str = private unnamed_addr constant [19 x i8] c"@(#) active string\00", align {{[0-9]+}} +// CHECK-DAG: @sccsid = internal global ptr @.str.1, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK: @.str.1 = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align {{[0-9]+}} +// CHECK-DAG: @version = internal global [27 x i8] c"@(#) Copyright Version 2.0\00", align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK-DAG: @same_copyright = internal global ptr @dummy, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK: @dummy = internal constant [25 x i8] c"dummy copyright deferred\00" +// CHECK: @llvm.compiler.used = appending global [4 x ptr] +// CHECK-SAME: ptr @sccsid +// CHECK-SAME: ptr @version +// CHECK-SAME: ptr @same_copyright +// CHECK-SAME: ptr @active +// CHECK-SAME: section "llvm.metadata" + +// Ensure unrequested/invalid variables are not emitted +// CHECK-NOT: @copyright +// CHECK-NOT: @build_number +// CHECK-NOT: @build_data +// CHECK-NOT: @not_defined_here + +// NONAIX-NOT: loadtime_comment +// NONAIX-NOT: @sccsid +// NONAIX-NOT: @version + diff --git a/clang/test/Driver/mloadtime-comment-vars.c b/clang/test/Driver/mloadtime-comment-vars.c new file mode 100644 index 0000000000000..a443c85aec1f7 --- /dev/null +++ b/clang/test/Driver/mloadtime-comment-vars.c @@ -0,0 +1,11 @@ +// RUN: %clang -### -target powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version %s 2>&1 | FileCheck %s +// RUN: %clang -### -target x86_64-linux-gnu -mloadtime-comment-vars=sccsid,version %s 2>&1 | FileCheck %s --check-prefix=NONAIX + +// CHECK: "-cc1" +// CHECK-SAME: "-mloadtime-comment-vars=sccsid,version" + +// NONAIX: warning: ignoring '-mloadtime-comment-vars=sccsid,version' option as it is not currently supported for target 'x86_64-unknown-linux-gnu' +// NONAIX: "-cc1" +// NONAIX-NOT: "-mloadtime-comment-vars=sccsid,version" + +int main(void) { return 0; } diff --git a/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll b/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll index dcae2e3b99d26..ff09388f9c71b 100644 --- a/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll +++ b/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll @@ -9,7 +9,9 @@ target triple = "powerpc-ibm-aix" @__loadtime_comment_str_f20696a95b638f0b = weak_odr hidden unnamed_addr constant [24 x i8] c"@(#) Copyright TU1 v1.0\00", section "__loadtime_comment", align 1, !loadtime_comment !0 [email protected] = appending global [1 x ptr] [ptr @__loadtime_comment_str_f20696a95b638f0b], section "llvm.metadata" [email protected]_comment_vars.str = private unnamed_addr constant [22 x i8] c"loadtime_comment vars\00", align 1 +@loadtime_comment_vars_gv = internal global ptr @.loadtime_comment_vars.str, align 8, !loadtime_comment !0 [email protected] = appending global [2 x ptr] [ptr @__loadtime_comment_str_f20696a95b638f0b, ptr @loadtime_comment_vars_gv], section "llvm.metadata" define void @f0() { entry: @@ -23,16 +25,19 @@ entry: !0 = !{} ; ---- Globals -------------------------------------------- ; CHECK: @[[LOADTIME_COMMENT_STR:__loadtime_comment_str_[0-9a-f]+]] = weak_odr hidden unnamed_addr constant [24 x i8] c"@(#) Copyright TU1 v1.0\00", section "__loadtime_comment", align 1, !loadtime_comment !0 -; CHECK-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[LOADTIME_COMMENT_STR]]], section "llvm.metadata" +; CHECK: @.loadtime_comment_vars.str = private unnamed_addr constant [22 x i8] c"loadtime_comment vars\00", align 1 +; CHECK: @loadtime_comment_vars_gv = internal global ptr @.loadtime_comment_vars.str, align {{[0-9]+}}, !loadtime_comment !0 +; CHECK-NEXT: @llvm.compiler.used = appending global [2 x ptr] [ptr @[[LOADTIME_COMMENT_STR]], ptr @loadtime_comment_vars_gv], section "llvm.metadata" -; Function has an implicit ref MD pointing at the string: -; CHECK-O0: define void @f0() !implicit.ref ![[MD:[0-9]+]] -; CHECK-ON: define void @f0() local_unnamed_addr #0 !implicit.ref ![[MD:[0-9]+]] - -; CHECK-O0: define i32 @main() !implicit.ref ![[MD]] -; CHECK-ON: define noundef i32 @main() local_unnamed_addr #0 !implicit.ref ![[MD]] +; Function has implicit refs to both loadtime comment globals. +; CHECK-O0: define void @f0() !implicit.ref ![[MD:[0-9]+]] !implicit.ref ![[MD2:[0-9]+]] +; CHECK-ON: define void @f0() local_unnamed_addr #0 !implicit.ref ![[MD:[0-9]+]] !implicit.ref ![[MD2:[0-9]+]] +; CHECK-O0: define i32 @main() !implicit.ref ![[MD]] !implicit.ref ![[MD2]] +; CHECK-ON: define noundef i32 @main() local_unnamed_addr #0 !implicit.ref ![[MD]] !implicit.ref ![[MD2]] ; Verify metadata content ; CHECK-O0: ![[MD]] = !{ptr @[[LOADTIME_COMMENT_STR]]} ; CHECK-ON: ![[MD]] = !{ptr @[[LOADTIME_COMMENT_STR]]} +; CHECK-O0: ![[MD2]] = !{ptr @loadtime_comment_vars_gv} +; CHECK-ON: ![[MD2]] = !{ptr @loadtime_comment_vars_gv} >From 5dc32ee4937c38d63aac4c6df131d7147c37e537 Mon Sep 17 00:00:00 2001 From: Tony Varghese <[email protected]> Date: Mon, 22 Jun 2026 14:30:46 +0530 Subject: [PATCH 2/2] [Clang][AIX] Handle -mloadtime-comment-vars in global var emission --- clang/docs/LanguageExtensions.rst | 17 +- clang/lib/CodeGen/CodeGenModule.cpp | 150 +++++++----------- clang/lib/CodeGen/CodeGenModule.h | 15 +- .../CodeGen/loadtime-comment-vars-cxx.cpp | 85 ++++++++++ clang/test/CodeGen/loadtime-comment-vars.c | 10 +- clang/test/Driver/mloadtime-comment-vars.c | 4 + 6 files changed, 176 insertions(+), 105 deletions(-) create mode 100644 clang/test/CodeGen/loadtime-comment-vars-cxx.cpp diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 04e2f14b53984..236ae14bbcfb4 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -6878,8 +6878,9 @@ Preserving Identifying Variables with -mloadtime-comment-vars The ``-mloadtime-comment-vars=`` flag accepts a comma-separated list of global variable names that should be preserved in the final object file as -loadtime identifying strings. This is an AIX-specific feature and is ignored -on other targets. +loadtime identifying strings. This is an AIX-specific feature; on other +targets the compiler emits a warning and the flag is not forwarded to +``-cc1``. This flag complements ``#pragma comment(copyright, ...)`` for codebases that already use the traditional UNIX convention of embedding identifying strings @@ -6891,6 +6892,18 @@ Syntax: -mloadtime-comment-vars=<var1>[,<var2>,...] +Name matching: + +- In C, names are matched as plain identifiers (for example, ``sccsid``). +- In C++, names containing ``::`` are treated as source-qualified names and + matched against the declaration's qualified source name (for example, + ``N::x`` or ``A::x``). +- In C++, names without ``::`` are treated as unqualified names and matched by + plain identifier. This may match more than one declaration when names are + reused across scopes. +- To target a single declaration in C++, prefer qualified names. Unqualified + matches can preserve additional globals and increase object size. + Valid variable types: A variable named in the list must meet both of these conditions to be diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d06dee73acfc5..6adf5723ae8c1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1078,12 +1078,6 @@ void CodeGenModule::Release() { if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); - // Queue loadtime comment variable candidates into the deferred emission - // list before EmitDeferred() runs, so their initializers (which may - // reference other globals, e.g. static const char *p = a;) are emitted - // through the normal infrastructure with correct ordering. - QueueLoadTimeCommentVarEmission(); - EmitDeferred(); DeferredDecls.insert_range(EmittedDeferredDecls); EmittedDeferredDecls.clear(); @@ -1765,9 +1759,6 @@ void CodeGenModule::Release() { EmitBackendOptionsMetadata(getCodeGenOpts()); - // Mark loadtime comment variables specified via -mloadtime-comment-vars. - ProcessLoadTimeCommentVars(); - // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags()); @@ -4285,7 +4276,12 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { (VD->getStorageDuration() == SD_Static || VD->getStorageDuration() == SD_Thread)) || (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && - VD->getType().isConstQualified()))) + VD->getType().isConstQualified()) || + // Keep requested loadtime-comment variables in the normal + // emission path so EmitGlobalVarDefinition can annotate the definition. + (getTriple().isOSAIX() && !CodeGenOpts.LoadTimeCommentVars.empty() && + isLoadTimeCommentCandidateVariable(VD, + CodeGenOpts.LoadTimeCommentVars)))) return true; return getContext().DeclMustBeEmitted(Global); @@ -4347,23 +4343,19 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { return true; } -/// Check if a variable declaration is suitable to be treated as a loadtime -/// comment variable. Valid variables must be character pointers or character -/// arrays with an initializer. +/// Return true if a variable is a supported loadtime-comment declaration: +/// character pointer/array with an initializer. bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const { - // Must be a valid declaration and must have an initializer (the string). if (!D || !D->hasInit()) return false; QualType Ty = D->getType(); - // 1. Handle Pointers (e.g., char *sccsid, const char *copyright). if (const PointerType *PT = Ty->getAs<PointerType>()) { if (PT->getPointeeType()->isAnyCharacterType()) return true; } - // 2. Handle Arrays (e.g., char version[]) if (const ArrayType *AT = getContext().getAsArrayType(Ty)) { if (AT->getElementType()->isAnyCharacterType()) return true; @@ -4372,88 +4364,57 @@ bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const { return false; // Reject ints, structs, etc. } -/// Check if a variable is eligible to be treated as a loadtime comment -/// variable. This requires: (1) the variable name is in the requested list -/// and (2) the variable type is valid (char pointer or array with initializer). -bool CodeGenModule::isLoadTimeCommentCandidateVariable( - const VarDecl *VD, const std::vector<std::string> &LoadTimeCommentVars) { - if (!llvm::is_contained(LoadTimeCommentVars, VD->getName())) +/// Return true if a variable name matches any entry in LoadTimeCommentVars. +/// +/// - A token containing "::" is treated as a source-qualified name. +/// - A token without "::" is treated as an unqualified identifier and may +/// match declarations in multiple scopes. +/// +/// For qualified matching, leading "::" is ignored on both sides, so "::x" +/// and "x" both select a file-scope variable. +bool CodeGenModule::matchesLoadTimeCommentVarName( + const VarDecl *VD, + const std::vector<std::string> &LoadTimeCommentVars) const { + if (!VD) return false; - return isValidLoadTimeCommentVariable(VD); -} -/// QueueLoadTimeCommentVarEmission: Called before EmitDeferred(). -/// Move loadtime comment variable candidates from DeferredDecls into -/// DeferredDeclsToEmit so that the normal deferred emission machinery -/// defines them — including any globals their initializers reference -/// (e.g. static const char *p = a;). -void CodeGenModule::QueueLoadTimeCommentVarEmission() { - if (!getTriple().isOSAIX()) - return; - - const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; - if (LoadTimeCommentVars.empty()) - return; + StringRef Unqualified = VD->getName(); + std::optional<std::string> Qualified; - TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); - for (auto *D : TU->decls()) { - auto *VD = dyn_cast<VarDecl>(D); - if (!VD) + for (const std::string &RequestedName : LoadTimeCommentVars) { + StringRef Requested(RequestedName); + if (Requested.empty()) continue; - if (!isLoadTimeCommentCandidateVariable(VD, LoadTimeCommentVars)) - continue; - - // Move the decl from DeferredDecls -> DeferredDeclsToEmit so EmitDeferred - // will define it. If it is already being emitted (e.g. it is referenced - // somewhere), this is a harmless duplicate that EmitDeferred ignores. - GlobalDecl GD(VD); - StringRef MangledName = getMangledName(GD); - auto DDI = DeferredDecls.find(MangledName); - if (DDI != DeferredDecls.end()) { - addDeferredDeclToEmit(DDI->second); - DeferredDecls.erase(DDI); - } - } -} - -/// ProcessLoadTimeCommentVars: Called after EmitDeferred(). -/// Attach loadtime_comment metadata and add each variable to -/// llvm.compiler.used. By this point the deferred emission loop has already -/// defined the globals, so we only need to look them up and annotate them. Only -/// valid on AIX targets. -void CodeGenModule::ProcessLoadTimeCommentVars() { - if (!getTriple().isOSAIX()) - return; - const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; - if (LoadTimeCommentVars.empty()) - return; - - auto &C = getLLVMContext(); - TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); - - for (auto *D : TU->decls()) { - auto *VD = dyn_cast<VarDecl>(D); - if (!VD) - continue; - if (!isLoadTimeCommentCandidateVariable(VD, LoadTimeCommentVars)) - continue; - - // Look up the LLVM global that EmitDeferred() should have defined. - llvm::GlobalValue *GV = GetGlobalValue(getMangledName(GlobalDecl(VD))); - if (!GV || GV->isDeclaration()) + if (Requested.contains("::")) { + if (!Qualified) { + Qualified = VD->getQualifiedNameAsString(); + // Normalize file-scope names by dropping a leading "::". + if (StringRef(*Qualified).starts_with("::")) + Qualified->erase(0, 2); + } + Requested.consume_front("::"); + if (Requested == *Qualified) + return true; continue; + } - auto *GVar = dyn_cast<llvm::GlobalVariable>(GV); - if (!GVar) - continue; + if (Requested == Unqualified) + return true; + } - // Mark with loadtime_comment metadata for LowerCommentStringPass. - GVar->setMetadata("loadtime_comment", llvm::MDNode::get(C, {})); + return false; +} - // Prevent the optimizer from removing the global variable. - llvm::appendToCompilerUsed(getModule(), {GVar}); - } +/// Check if a variable is eligible to be treated as a loadtime comment +/// variable. This requires: (1) the variable name is in the requested list +/// and (2) the variable type is valid (char pointer or array with initializer). +bool CodeGenModule::isLoadTimeCommentCandidateVariable( + const VarDecl *VD, + const std::vector<std::string> &LoadTimeCommentVars) const { + if (!isValidLoadTimeCommentVariable(VD)) + return false; + return matchesLoadTimeCommentVarName(VD, LoadTimeCommentVars); } ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { @@ -6646,6 +6607,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, GV); + if (getTriple().isOSAIX()) { + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (!LoadTimeCommentVars.empty() && + isLoadTimeCommentCandidateVariable(D, LoadTimeCommentVars)) { + auto &C = getLLVMContext(); + // Mark for LowerCommentStringPass and keep the symbol alive. + GV->setMetadata("loadtime_comment", llvm::MDNode::get(C, {})); + llvm::appendToCompilerUsed(getModule(), {GV}); + } + } + // Set the llvm linkage type as appropriate. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 66814adf7f6a9..592198a6238ce 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -2179,16 +2179,13 @@ class CodeGenModule : public CodeGenTypeCache { /// Check if a variable is eligible to be treated as a loadtime comment /// variable (must be in the requested list and have a valid char type). bool isLoadTimeCommentCandidateVariable( - const VarDecl *VD, const std::vector<std::string> &LoadTimeCommentVars); + const VarDecl *VD, + const std::vector<std::string> &LoadTimeCommentVars) const; - /// Queue loadtime comment variable candidates into the deferred - /// emission list before EmitDeferred() so their initializers are emitted - /// through the normal infrastructure with correct ordering. - void QueueLoadTimeCommentVarEmission(); - - /// Attach loadtime_comment metadata and add variables to - /// llvm.compiler.used after EmitDeferred() has defined them. - void ProcessLoadTimeCommentVars(); + /// Check if a variable name matches any entry in LoadTimeCommentVars. + bool matchesLoadTimeCommentVarName( + const VarDecl *VD, + const std::vector<std::string> &LoadTimeCommentVars) const; }; } // end namespace CodeGen diff --git a/clang/test/CodeGen/loadtime-comment-vars-cxx.cpp b/clang/test/CodeGen/loadtime-comment-vars-cxx.cpp new file mode 100644 index 0000000000000..57a0f84d4e170 --- /dev/null +++ b/clang/test/CodeGen/loadtime-comment-vars-cxx.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -std=c++17 -O2 -triple powerpc64-ibm-aix \ +// RUN: -mloadtime-comment-vars=x,N::q,A::x,N::ptr,B::ver,C::info \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + + +// 1. Unqualified name "x" — matches both ::x (file scope) and N::x (namespace) +char x[] = "@(#) global x"; + +namespace N { +char x[] = "@(#) ns x"; + +// 2. Qualified name "N::q" — selects only this declaration +char q[] = "@(#) ns q"; + + +// 3. Deferred pointer-chain inside a namespace. +// N::ptr points to N::base (another static). MustBeEmitted forces N::ptr +// through EmitGlobalVarDefinition; the initializer reference to N::base +// causes N::base to be emitted as a side-effect. +static const char base[] = "base deferred ns"; +static const char *ptr = base; +} // namespace N + + +// 4. Qualified name "A::x" — class static member (const char *) +struct A { + static const char *x; +}; +const char *A::x = "@(#) class x"; + + +// 5. Deferred pointer-chain for a class static member. +// B::ver points to a separate static array base_b. +struct B { + static const char *ver; +}; +static const char base_b[] = "base for B::ver"; +const char *B::ver = base_b; + +// 6. Qualified name in list but only declared, never defined — must be skipped. +struct C { static const char *info; }; +// C::info has no definition in this TU. + + +// 7. Invalid type — int with a matching name should NOT be tagged. +int not_string = 7; + +void f() {} + +// --- Checks ---------------------------------------------------------------- + +// Unqualified "x" matches both ::x and N::x. +// CHECK-DAG: @x = global [14 x i8] c"@(#) global x\00", align {{[0-9]+}}, !loadtime_comment ![[MD:[0-9]+]] +// CHECK-DAG: @_ZN1N1xE = global [10 x i8] c"@(#) ns x\00", align {{[0-9]+}}, !loadtime_comment ![[MD]] + +// Qualified "N::q" selects the specific namespace member. +// CHECK-DAG: @_ZN1N1qE = global [10 x i8] c"@(#) ns q\00", align {{[0-9]+}}, !loadtime_comment ![[MD]] + +// Qualified "A::x" selects the class static member (pointer to literal). +// CHECK-DAG: @[[AX:_ZN1A1xE]] = {{.*}}global ptr @[[AXSTR:.*]], align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK-DAG: @[[AXSTR]] = private unnamed_addr constant [13 x i8] c"@(#) class x\00", align {{[0-9]+}} + +// Deferred: N::ptr points to N::base — both must be emitted. +// CHECK-DAG: @_ZN1NL3ptrE = internal global ptr @_ZN1NL4baseE, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK-DAG: @_ZN1NL4baseE = internal constant [17 x i8] c"base deferred ns\00", align {{[0-9]+}} + +// Deferred: B::ver points to base_b — both must be emitted. +// CHECK-DAG: @_ZN1B3verE = global ptr @_ZL6base_b, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK-DAG: @_ZL6base_b = internal constant [16 x i8] c"base for B::ver\00", align {{[0-9]+}} + +// Invalid type must not be tagged. +// CHECK-NOT: @not_string{{.*}}!loadtime_comment + +// C::info is declared but not defined — must not appear at all. +// CHECK-NOT: @_ZN1C4infoE + +// All six selected globals are preserved in llvm.compiler.used. +// CHECK: @llvm.compiler.used = appending global [6 x ptr] +// CHECK-SAME: @x +// CHECK-SAME: @_ZN1N1xE +// CHECK-SAME: @_ZN1N1qE +// CHECK-SAME: @_ZN1NL3ptrE +// CHECK-SAME: @[[AX]] +// CHECK-SAME: @_ZN1B3verE +// CHECK-SAME: section "llvm.metadata" diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c index d54f848ca2eea..057c39f4f8380 100644 --- a/clang/test/CodeGen/loadtime-comment-vars.c +++ b/clang/test/CodeGen/loadtime-comment-vars.c @@ -35,13 +35,13 @@ extern char *not_defined_here; void foo() {} -// CHECK-DAG: @active = internal global ptr @.str, align {{[0-9]+}}, !loadtime_comment ![[MD:[0-9]+]] -// CHECK: @.str = private unnamed_addr constant [19 x i8] c"@(#) active string\00", align {{[0-9]+}} -// CHECK-DAG: @sccsid = internal global ptr @.str.1, align {{[0-9]+}}, !loadtime_comment ![[MD]] -// CHECK: @.str.1 = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align {{[0-9]+}} +// CHECK-DAG: @[[ACTIVE:active]] = internal global ptr @[[ACTIVE_STR:.str(\.[0-9]+)?]], align {{[0-9]+}}, !loadtime_comment ![[MD:[0-9]+]] +// CHECK-DAG: @[[ACTIVE_STR]] = private unnamed_addr constant [19 x i8] c"@(#) active string\00", align {{[0-9]+}} +// CHECK-DAG: @sccsid = internal global ptr @[[SCCSID_STR:.str(\.[0-9]+)?]], align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK-DAG: @[[SCCSID_STR]] = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align {{[0-9]+}} // CHECK-DAG: @version = internal global [27 x i8] c"@(#) Copyright Version 2.0\00", align {{[0-9]+}}, !loadtime_comment ![[MD]] // CHECK-DAG: @same_copyright = internal global ptr @dummy, align {{[0-9]+}}, !loadtime_comment ![[MD]] -// CHECK: @dummy = internal constant [25 x i8] c"dummy copyright deferred\00" +// CHECK-DAG: @dummy = internal constant [25 x i8] c"dummy copyright deferred\00" // CHECK: @llvm.compiler.used = appending global [4 x ptr] // CHECK-SAME: ptr @sccsid // CHECK-SAME: ptr @version diff --git a/clang/test/Driver/mloadtime-comment-vars.c b/clang/test/Driver/mloadtime-comment-vars.c index a443c85aec1f7..4c5cfc586dab2 100644 --- a/clang/test/Driver/mloadtime-comment-vars.c +++ b/clang/test/Driver/mloadtime-comment-vars.c @@ -1,9 +1,13 @@ // RUN: %clang -### -target powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version %s 2>&1 | FileCheck %s +// RUN: %clang -### -target powerpc64-ibm-aix -mloadtime-comment-vars=::x,N::x,A::x %s 2>&1 | FileCheck %s --check-prefix=SCOPE // RUN: %clang -### -target x86_64-linux-gnu -mloadtime-comment-vars=sccsid,version %s 2>&1 | FileCheck %s --check-prefix=NONAIX // CHECK: "-cc1" // CHECK-SAME: "-mloadtime-comment-vars=sccsid,version" +// SCOPE: "-cc1" +// SCOPE-SAME: "-mloadtime-comment-vars=::x,N::x,A::x" + // NONAIX: warning: ignoring '-mloadtime-comment-vars=sccsid,version' option as it is not currently supported for target 'x86_64-unknown-linux-gnu' // NONAIX: "-cc1" // NONAIX-NOT: "-mloadtime-comment-vars=sccsid,version" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
