https://github.com/tonykuttai updated https://github.com/llvm/llvm-project/pull/187986
>From ce71ece410c0c3b5b8164435b153f70dfbed12a7 Mon Sep 17 00:00:00 2001 From: Tony Varghese <[email protected]> Date: Fri, 19 Jun 2026 15:46:49 +0530 Subject: [PATCH] [Clang][AIX] Add -mloadtime-comment-vars support to preserve variables in the final object file. --- clang/docs/LanguageExtensions.rst | 66 ++++++++++ clang/include/clang/Basic/CodeGenOptions.h | 3 + clang/include/clang/Options/Options.td | 7 ++ clang/lib/CodeGen/CodeGenModule.cpp | 116 ++++++++++++++++++ clang/lib/CodeGen/CodeGenModule.h | 18 +++ clang/lib/Driver/ToolChains/Clang.cpp | 10 ++ .../CodeGen/PowerPC/loadtime-comment-mixed.c | 12 ++ clang/test/CodeGen/loadtime-comment-vars.c | 61 +++++++++ clang/test/Driver/mloadtime-comment-vars.c | 11 ++ .../lower-comment-string.ll | 21 ++-- 10 files changed, 317 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c create mode 100644 clang/test/CodeGen/loadtime-comment-vars.c create mode 100644 clang/test/Driver/mloadtime-comment-vars.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index d79d82a175c68..04e2f14b53984 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -6873,6 +6873,72 @@ When ``#pragma comment(copyright, ...)`` appears in a C++20 module interface unit, the copyright string is embedded only in the object file compiled from that interface unit. Importing TUs do not re-emit the string. +Preserving Identifying Variables with -mloadtime-comment-vars +-------------------------------------------------------------- + +The ``-mloadtime-comment-vars=`` flag accepts a comma-separated list of +global variable names that should be preserved in the final object file as +loadtime identifying strings. This is an AIX-specific feature and is ignored +on other targets. + +This flag complements ``#pragma comment(copyright, ...)`` for codebases that +already use the traditional UNIX convention of embedding identifying strings +directly in source variables rather than via a pragma. + +Syntax: + +.. code-block:: console + + -mloadtime-comment-vars=<var1>[,<var2>,...] + +Valid variable types: + +A variable named in the list must meet both of these conditions to be +preserved: + +- Its type must be a character pointer (``char *``, ``const char *``) or a + character array (``char[]``). +- It must have an initializer. + +Variables that fail either check -- for example, an ``int`` or a ``struct`` -- +are silently skipped. Variables that appear in the list but are not defined in +the translation unit are also ignored. + +Example: + +.. code-block:: c + + static char *sccsid = "@(#) MyApp Version 1.0"; + static char version[] = "@(#) Built 2026-05-24"; + + void foo() {} + +Compiled with: + +.. code-block:: console + + clang -target powerpc64-ibm-aix \ + -mloadtime-comment-vars=sccsid,version \ + -c source.c -o source.o + +Both ``sccsid`` and ``version`` survive optimization and are retained in the +object file. + +.. code-block:: console + + $ what source.o + source.o: + MyApp Version 1.0 + Built 2026-05-24 + +Interaction with ``#pragma comment(copyright, ...)`` : + +The two mechanisms can be used together in the same translation unit. The +pragma produces a dedicated ``__loadtime_comment_str`` symbol placed in the +``__loadtime_comment`` section, while ``-mloadtime-comment-vars`` preserves +the named source variables in place using ``.ref`` directives. Both sets of +strings appear in the final object file independently. + Evaluating Object Size ====================== diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 97d68877467fd..03bf2f730d631 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -342,6 +342,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// A list of linker options to embed in the object file. std::vector<std::string> LinkerOptions; + /// List of global variable names to preserve as loadtime comment variables. + std::vector<std::string> LoadTimeCommentVars; + /// Name of the profile file to use as output for -fprofile-instr-generate, /// -fprofile-generate, and -fcs-profile-generate. std::string InstrProfileOutput; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 5028684731b2d..71fd39ccf4ea2 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4792,6 +4792,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del Visibility<[ClangOption, CC1Option]>, HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">, MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">; +def mloadtime_comment_vars_EQ + : CommaJoined<["-"], "mloadtime-comment-vars=">, + Group<m_Group>, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Comma-separated list of global variable names to treat as " + "loadtime variables">, + MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>; def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">, Values<"none,explicit,all">, NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index cc0d0341a2dd0..d89d68e16d176 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1077,6 +1077,11 @@ void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); + // Queue loadtime comment variable candidates into the deferred emission + // list before EmitDeferred() runs, so their initializers (which may + // reference other globals, e.g. static const char *p = a;) are emitted + // through the normal infrastructure with correct ordering. + QueueLoadTimeCommentVarEmission(); EmitDeferred(); DeferredDecls.insert_range(EmittedDeferredDecls); EmittedDeferredDecls.clear(); @@ -1758,6 +1763,9 @@ void CodeGenModule::Release() { EmitBackendOptionsMetadata(getCodeGenOpts()); + // Mark loadtime comment variables specified via -mloadtime-comment-vars. + ProcessLoadTimeCommentVars(); + // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags()); @@ -4337,6 +4345,114 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { return true; } +/// Check if a variable declaration is suitable to be treated as a loadtime +/// comment variable. Valid variables must be character pointers or character +/// arrays with an initializer. +bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const { + // Must be a valid declaration and must have an initializer (the string). + if (!D || !D->hasInit()) + return false; + + QualType Ty = D->getType(); + + // 1. Handle Pointers (e.g., char *sccsid, const char *copyright). + if (const PointerType *PT = Ty->getAs<PointerType>()) { + if (PT->getPointeeType()->isAnyCharacterType()) + return true; + } + + // 2. Handle Arrays (e.g., char version[]) + // Use ASTContext::getAsArrayType to safely unwrap constant arrays. + if (const ArrayType *AT = getContext().getAsArrayType(Ty)) { + if (AT->getElementType()->isAnyCharacterType()) + return true; + } + + return false; // Reject ints, structs, etc. +} + +/// Check if a variable is eligible to be treated as a loadtime comment +/// variable. This requires: (1) the variable name is in the requested list +/// and (2) the variable type is valid (char pointer or array with initializer). +bool CodeGenModule::isLoadTimeCommentCandidateVariable( + const VarDecl *VD, const std::vector<std::string> &LoadTimeCommentVars) { + if (!llvm::is_contained(LoadTimeCommentVars, VD->getName())) + return false; + return isValidLoadTimeCommentVariable(VD); +} + +/// Phase 1 (called before EmitDeferred): move loadtime comment variable +/// candidates from DeferredDecls into DeferredDeclsToEmit so that the normal +/// deferred emission machinery defines them — including any globals their +/// initializers reference (e.g. static const char *p = a;). +void CodeGenModule::QueueLoadTimeCommentVarEmission() { + if (!getTriple().isOSAIX()) + return; + + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (LoadTimeCommentVars.empty()) + return; + + TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); + for (auto *D : TU->decls()) { + auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + continue; + if (!isLoadTimeCommentCandidateVariable(VD, LoadTimeCommentVars)) + continue; + + // Move the decl from DeferredDecls -> DeferredDeclsToEmit so EmitDeferred + // will define it. If it is already being emitted (e.g. it is referenced + // somewhere), this is a harmless duplicate that EmitDeferred ignores. + GlobalDecl GD(VD); + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + if (DDI != DeferredDecls.end()) { + addDeferredDeclToEmit(DDI->second); + DeferredDecls.erase(DDI); + } + } +} + +/// Phase 2 (called after EmitDeferred): attach loadtime_comment metadata and +/// add each variable to llvm.compiler.used. By this point the deferred +/// emission loop has already defined the globals, so we only need to look them +/// up and annotate them. Only valid on AIX targets. +void CodeGenModule::ProcessLoadTimeCommentVars() { + if (!getTriple().isOSAIX()) + return; + + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (LoadTimeCommentVars.empty()) + return; + + auto &C = getLLVMContext(); + TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); + + for (auto *D : TU->decls()) { + auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + continue; + if (!isLoadTimeCommentCandidateVariable(VD, LoadTimeCommentVars)) + continue; + + // Look up the LLVM global that EmitDeferred() should have defined. + llvm::GlobalValue *GV = GetGlobalValue(getMangledName(GlobalDecl(VD))); + if (!GV || GV->isDeclaration()) + continue; + + auto *GVar = dyn_cast<llvm::GlobalVariable>(GV); + if (!GVar) + continue; + + // Mark with loadtime_comment metadata for LowerCommentStringPass. + GVar->setMetadata("loadtime_comment", llvm::MDNode::get(C, {})); + + // Prevent the optimizer from removing the global variable. + llvm::appendToCompilerUsed(getModule(), {GVar}); + } +} + ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { StringRef Name = getMangledName(GD); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index badb740f0ba32..eba21bee5026a 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -2171,6 +2171,24 @@ class CodeGenModule : public CodeGenTypeCache { /// Emit deactivation symbols for any PFP fields whose offset is taken with /// offsetof. void emitPFPFieldsWithEvaluatedOffset(); + + /// Check if a variable declaration is suitable to be treated as a loadtime + /// comment variable (must be a character pointer or array with initializer). + bool isValidLoadTimeCommentVariable(const VarDecl *D) const; + + /// Check if a variable is eligible to be treated as a loadtime comment + /// variable (must be in the requested list and have a valid char type). + bool isLoadTimeCommentCandidateVariable( + const VarDecl *VD, const std::vector<std::string> &LoadTimeCommentVars); + + /// Phase 1: queue loadtime comment variable candidates into the deferred + /// emission list before EmitDeferred() so their initializers are emitted + /// through the normal infrastructure with correct ordering. + void QueueLoadTimeCommentVarEmission(); + + /// Phase 2: attach loadtime_comment metadata and add variables to + /// llvm.compiler.used after EmitDeferred() has defined them. + void ProcessLoadTimeCommentVars(); }; } // end namespace CodeGen diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e3288c81d4c95..2e4daa3820dba 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6221,6 +6221,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else if (UnwindTables) CmdArgs.push_back("-funwind-tables=1"); + // Forward loadtime-comment vars option to cc1 only on AIX targets. + // For other targets the option is ignored with an explicit diagnostic. + if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) { + if (Triple.isOSAIX()) + A->render(Args, CmdArgs); + else + D.Diag(diag::warn_drv_unsupported_option_for_target) + << A->getAsString(Args) << TripleStr; + } + // Sframe unwind tables are independent of the other types. Although also // defined for aarch64, only x86_64 support is implemented at the moment. if (Arg *A = Args.getLastArg(options::OPT_gsframe)) { diff --git a/clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c b/clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c new file mode 100644 index 0000000000000..64c6ec66b6160 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/loadtime-comment-mixed.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -O2 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +#pragma comment(copyright, "@(#) pragma path") + +static char *sccsid = "@(#) option path"; + +void f(void) {} + +// CHECK: @[[PRAGMA:__loadtime_comment_str_[0-9a-f]+]] = weak_odr hidden unnamed_addr constant [17 x i8] c"@(#) pragma path\00", section "__loadtime_comment", align 1, !loadtime_comment ![[MD:[0-9]+]] +// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @[[PRAGMA]], ptr @sccsid], section "llvm.metadata" diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c new file mode 100644 index 0000000000000..d54f848ca2eea --- /dev/null +++ b/clang/test/CodeGen/loadtime-comment-vars.c @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -O2 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number,same_copyright,active,not_defined_here -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number,same_copyright,active,not_defined_here -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// RUN: %clang_cc1 -O2 -triple x86_64-linux-gnu -mloadtime-comment-vars=sccsid,version -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=NONAIX + +// 1. String pointer +static char *sccsid = "@(#) sccsid Version 1.0"; + +// 2. String array +static char version[] = "@(#) Copyright Version 2.0"; + +// 3. Const string (Not in CLI list, should NOT be emitted) +static const char *copyright = "@(#) Copyright 2026"; + +// 4. Integer (In CLI list but invalid type, should NOT be emitted) +static int build_number = 12345; + +// 5. Struct (not in CLI list and invalid type, NOT emitted) +struct build_info { + int major; + int minor; +} static build_data = {1, 0}; + +// 6. Deferred: pointer whose initializer references another static global. +// Both the pointer AND the string it points to must be emitted. +static const char dummy[] = "dummy copyright deferred"; +static const char *same_copyright = dummy; + +// 7. Variable already referenced (eager emission path) +static char *active = "@(#) active string"; +void bar() { (void)active; } + +// 8. Variable listed but only declared (extern) +extern char *not_defined_here; + +void foo() {} + +// CHECK-DAG: @active = internal global ptr @.str, align {{[0-9]+}}, !loadtime_comment ![[MD:[0-9]+]] +// CHECK: @.str = private unnamed_addr constant [19 x i8] c"@(#) active string\00", align {{[0-9]+}} +// CHECK-DAG: @sccsid = internal global ptr @.str.1, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK: @.str.1 = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align {{[0-9]+}} +// CHECK-DAG: @version = internal global [27 x i8] c"@(#) Copyright Version 2.0\00", align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK-DAG: @same_copyright = internal global ptr @dummy, align {{[0-9]+}}, !loadtime_comment ![[MD]] +// CHECK: @dummy = internal constant [25 x i8] c"dummy copyright deferred\00" +// CHECK: @llvm.compiler.used = appending global [4 x ptr] +// CHECK-SAME: ptr @sccsid +// CHECK-SAME: ptr @version +// CHECK-SAME: ptr @same_copyright +// CHECK-SAME: ptr @active +// CHECK-SAME: section "llvm.metadata" + +// Ensure unrequested/invalid variables are not emitted +// CHECK-NOT: @copyright +// CHECK-NOT: @build_number +// CHECK-NOT: @build_data +// CHECK-NOT: @not_defined_here + +// NONAIX-NOT: loadtime_comment +// NONAIX-NOT: @sccsid +// NONAIX-NOT: @version + diff --git a/clang/test/Driver/mloadtime-comment-vars.c b/clang/test/Driver/mloadtime-comment-vars.c new file mode 100644 index 0000000000000..a443c85aec1f7 --- /dev/null +++ b/clang/test/Driver/mloadtime-comment-vars.c @@ -0,0 +1,11 @@ +// RUN: %clang -### -target powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version %s 2>&1 | FileCheck %s +// RUN: %clang -### -target x86_64-linux-gnu -mloadtime-comment-vars=sccsid,version %s 2>&1 | FileCheck %s --check-prefix=NONAIX + +// CHECK: "-cc1" +// CHECK-SAME: "-mloadtime-comment-vars=sccsid,version" + +// NONAIX: warning: ignoring '-mloadtime-comment-vars=sccsid,version' option as it is not currently supported for target 'x86_64-unknown-linux-gnu' +// NONAIX: "-cc1" +// NONAIX-NOT: "-mloadtime-comment-vars=sccsid,version" + +int main(void) { return 0; } diff --git a/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll b/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll index dcae2e3b99d26..ff09388f9c71b 100644 --- a/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll +++ b/llvm/test/Transforms/LowerCommentString/lower-comment-string.ll @@ -9,7 +9,9 @@ target triple = "powerpc-ibm-aix" @__loadtime_comment_str_f20696a95b638f0b = weak_odr hidden unnamed_addr constant [24 x i8] c"@(#) Copyright TU1 v1.0\00", section "__loadtime_comment", align 1, !loadtime_comment !0 [email protected] = appending global [1 x ptr] [ptr @__loadtime_comment_str_f20696a95b638f0b], section "llvm.metadata" [email protected]_comment_vars.str = private unnamed_addr constant [22 x i8] c"loadtime_comment vars\00", align 1 +@loadtime_comment_vars_gv = internal global ptr @.loadtime_comment_vars.str, align 8, !loadtime_comment !0 [email protected] = appending global [2 x ptr] [ptr @__loadtime_comment_str_f20696a95b638f0b, ptr @loadtime_comment_vars_gv], section "llvm.metadata" define void @f0() { entry: @@ -23,16 +25,19 @@ entry: !0 = !{} ; ---- Globals -------------------------------------------- ; CHECK: @[[LOADTIME_COMMENT_STR:__loadtime_comment_str_[0-9a-f]+]] = weak_odr hidden unnamed_addr constant [24 x i8] c"@(#) Copyright TU1 v1.0\00", section "__loadtime_comment", align 1, !loadtime_comment !0 -; CHECK-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[LOADTIME_COMMENT_STR]]], section "llvm.metadata" +; CHECK: @.loadtime_comment_vars.str = private unnamed_addr constant [22 x i8] c"loadtime_comment vars\00", align 1 +; CHECK: @loadtime_comment_vars_gv = internal global ptr @.loadtime_comment_vars.str, align {{[0-9]+}}, !loadtime_comment !0 +; CHECK-NEXT: @llvm.compiler.used = appending global [2 x ptr] [ptr @[[LOADTIME_COMMENT_STR]], ptr @loadtime_comment_vars_gv], section "llvm.metadata" -; Function has an implicit ref MD pointing at the string: -; CHECK-O0: define void @f0() !implicit.ref ![[MD:[0-9]+]] -; CHECK-ON: define void @f0() local_unnamed_addr #0 !implicit.ref ![[MD:[0-9]+]] - -; CHECK-O0: define i32 @main() !implicit.ref ![[MD]] -; CHECK-ON: define noundef i32 @main() local_unnamed_addr #0 !implicit.ref ![[MD]] +; Function has implicit refs to both loadtime comment globals. +; CHECK-O0: define void @f0() !implicit.ref ![[MD:[0-9]+]] !implicit.ref ![[MD2:[0-9]+]] +; CHECK-ON: define void @f0() local_unnamed_addr #0 !implicit.ref ![[MD:[0-9]+]] !implicit.ref ![[MD2:[0-9]+]] +; CHECK-O0: define i32 @main() !implicit.ref ![[MD]] !implicit.ref ![[MD2]] +; CHECK-ON: define noundef i32 @main() local_unnamed_addr #0 !implicit.ref ![[MD]] !implicit.ref ![[MD2]] ; Verify metadata content ; CHECK-O0: ![[MD]] = !{ptr @[[LOADTIME_COMMENT_STR]]} ; CHECK-ON: ![[MD]] = !{ptr @[[LOADTIME_COMMENT_STR]]} +; CHECK-O0: ![[MD2]] = !{ptr @loadtime_comment_vars_gv} +; CHECK-ON: ![[MD2]] = !{ptr @loadtime_comment_vars_gv} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
