https://github.com/tonykuttai updated https://github.com/llvm/llvm-project/pull/187986
>From aaf3e32ad107731f28260d0709f86887246f7cae Mon Sep 17 00:00:00 2001 From: Tony Varghese <[email protected]> Date: Sat, 23 May 2026 14:54:05 -0400 Subject: [PATCH 1/2] [PowerPC][AIX] Add -mloadtime-comment-vars support --- clang/include/clang/Basic/CodeGenOptions.h | 3 + clang/include/clang/Options/Options.td | 7 + clang/lib/CodeGen/CodeGenModule.cpp | 78 ++++++++++ clang/lib/CodeGen/CodeGenModule.h | 8 + clang/lib/Driver/ToolChains/Clang.cpp | 5 + clang/test/CodeGen/loadtime-comment-vars.c | 28 ++++ .../Utils/LowerCommentStringPass.cpp | 146 +++++++++++------- .../loadtime-comment-vars.ll | 26 ++++ 8 files changed, 245 insertions(+), 56 deletions(-) create mode 100644 clang/test/CodeGen/loadtime-comment-vars.c create mode 100644 llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index e43112b4bb98b..54b2fd2077d7b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -334,6 +334,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// A list of linker options to embed in the object file. std::vector<std::string> LinkerOptions; + /// List of global variable names to preserve as loadtime comment variables. + std::vector<std::string> LoadTimeCommentVars; + /// Name of the profile file to use as output for -fprofile-instr-generate, /// -fprofile-generate, and -fcs-profile-generate. std::string InstrProfileOutput; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 753e3ac1b74a5..ae800711a2612 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4758,6 +4758,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del Visibility<[ClangOption, CC1Option]>, HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">, MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">; +def mloadtime_comment_vars_EQ + : CommaJoined<["-"], "mloadtime-comment-vars=">, + Group<m_Group>, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Comma-separated list of global variable names to treat as " + "loadtime variables">, + MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>; def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">, Values<"none,explicit,all">, NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 8630e000c59d0..2866abe8077cc 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1738,6 +1738,9 @@ void CodeGenModule::Release() { EmitLoadTimeComment(); + // Emit loadtime comment variables specified via -mloadtime-comment-vars. + EmitLoadTimeCommentVars(); + // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags()); @@ -4237,6 +4240,81 @@ void CodeGenModule::EmitLoadTimeComment() { } } +/// Check if a variable declaration is suitable to be treated as a loadtime +/// comment variable. Valid variables must be character pointers or character +/// arrays with an initializer. +bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const { + // Must be a valid declaration and must have an initializer (the string) + if (!D || !D->hasInit()) + return false; + + QualType Ty = D->getType(); + + // 1. Handle Pointers (e.g., char *sccsid, const char *copyright) + if (const PointerType *PT = Ty->getAs<PointerType>()) { + if (PT->getPointeeType()->isAnyCharacterType()) + return true; + } + + // 2. Handle Arrays (e.g., char version[]) + // We use ASTContext::getAsArrayType to safely unwrap constant arrays + if (const ArrayType *AT = getContext().getAsArrayType(Ty)) { + if (AT->getElementType()->isAnyCharacterType()) + return true; + } + + return false; // Reject ints, structs, etc. +} + +/// Emit global variables specified via -mloadtime-comment-vars as loadtime +/// comment variables. These variables are tagged with metadata and marked as +/// used to prevent garbage collection. Only valid on AIX. +void CodeGenModule::EmitLoadTimeCommentVars() { + // Only supported on AIX + if (!getTriple().isOSAIX()) + return; + + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (LoadTimeCommentVars.empty()) + return; + + TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); + // Iterate through all top-level declarations + for (auto *D : TU->decls()) { + if (VarDecl *VD = dyn_cast<VarDecl>(D)) { + + // Check if the variable name is in the loadtime comment vars list + if (!llvm::is_contained(LoadTimeCommentVars, VD->getName())) + continue; + + if (!isValidLoadTimeCommentVariable(VD)) + continue; + + // Get or create the GlobalValue in the IR + llvm::Constant *Addr = GetAddrOfGlobalVar(VD); + + // Strip pointer casts safely + if (auto *GV = + dyn_cast<llvm::GlobalVariable>(Addr->stripPointerCasts())) { + + // Force Clang to emit the definition if it skipped it + if (GV->isDeclaration()) + EmitGlobalDefinition(VD); + + if (!GV->isDeclaration()) { + // Tag with metadata for backend processing + auto &C = getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(C, VD->getName())}; + GV->setMetadata("copyright.variable", llvm::MDNode::get(C, Ops)); + + // Prevent linker and optimization passes from removing this variable + addUsedGlobal(GV); + } + } + } + } +} + bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // In OpenMP 5.0 variables and function may be marked as // device_type(host/nohost) and we should not emit them eagerly unless we sure diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 44b816084316a..a041184c7828c 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -2172,6 +2172,14 @@ class CodeGenModule : public CodeGenTypeCache { /// Emit the load-time comment metadata (e.g., from /// #pragma comment(copyright, ...)) for the translation unit. void EmitLoadTimeComment(); + + /// Check if a variable declaration is suitable to be treated as a loadtime + /// comment variable (must be a character pointer or array with initializer). + bool isValidLoadTimeCommentVariable(const VarDecl *D) const; + + /// Emit global variables specified via -mloadtime-comment-vars as loadtime + /// comment variables, tagging them with metadata and preventing removal. + void EmitLoadTimeCommentVars(); }; } // end namespace CodeGen diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 8d8e00bbaf7d0..4d3aebcc1a2a8 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6180,6 +6180,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else if (UnwindTables) CmdArgs.push_back("-funwind-tables=1"); + // Forward loadtime-comment vars option to cc1. + if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) { + A->render(Args, CmdArgs); + } + // Sframe unwind tables are independent of the other types. Although also // defined for aarch64, only x86_64 support is implemented at the moment. if (Arg *A = Args.getLastArg(options::OPT_gsframe)) { diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c new file mode 100644 index 0000000000000..ef32ba494ed80 --- /dev/null +++ b/clang/test/CodeGen/loadtime-comment-vars.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s + +// String pointer (Should be emitted and tagged) +static char *sccsid = "@(#) Object sid Version 1.0"; + +// String array (Should be emitted and tagged) +static char version[] = "Object scc Version 2.0"; + +// Const string (Not in CLI list, should NOT be emitted) +static const char *copyright = "Copyright 2026"; + +// Integer (In CLI list but invalid type, should NOT be emitted) +static int build_number = 12345; + +void foo() {} + +// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}, !copyright.variable ![[MD_SCC:[0-9]+]] +// CHECK-NEXT: @.str = private unnamed_addr constant [28 x i8] c"@(#) Object sid Version 1.0\00", align 1 +// CHECK: @version = internal global [23 x i8] c"Object scc Version 2.0\00", align {{[0-9]+}}, !copyright.variable ![[MD_VER:[0-9]+]] + +// Ensure the unrequested/invalid variables are optimized away +// CHECK-NOT: @copyright +// CHECK-NOT: @build_number + +// Ensure the metadata tags contain the correct strings +// CHECK: ![[MD_SCC]] = !{!"sccsid"} +// CHECK: ![[MD_VER]] = !{!"version"} diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp index cc9bee494d597..240df69a7ad21 100644 --- a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp +++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp @@ -83,68 +83,102 @@ PreservedAnalyses LowerCommentStringPass::run(Module &M, LLVMContext &Ctx = M.getContext(); - // Single-metadata: !comment_string.loadtime = !{!0} - // Each operand node is expected to have one MDString operand. + // This pass processes two types of copyright/identifying information: + // 1. A single TU-wide copyright string from #pragma comment(copyright, "...") + // 2. Multiple user-specified variables from -mloadtime-comment-vars=... + // + // Both need implicit references from every function to survive DCE and LTO. + // Collect all copyright globals, then create implicit references + // from every function definition to each global. This forces the backend + // to treat them as reachable and preserve them in the final object file. + SmallVector<GlobalValue *, 4> CopyrightGlobals; + + // ========================================================================= + // Process #pragma comment(copyright, "...") - at most one per TU + // ========================================================================= + // Frontend emits module-level metadata: + // !comment_string.loadtime = !{!0} + // !0 = !{!"Copyright text here"} + // + // We materialize this as a global string in the __loadtime_comment section, + // which linkers recognize and include in the object file's loadtime + // comment area. NamedMDNode *MD = M.getNamedMetadata("comment_string.loadtime"); - if (!MD || MD->getNumOperands() == 0) - return PreservedAnalyses::all(); - - // At this point we are guaranteed that one TU contains a single copyright - // metadata entry. Create TU-local string global for that metadata entry. - MDNode *MdNode = MD->getOperand(0); - if (!MdNode || MdNode->getNumOperands() == 0) - return PreservedAnalyses::all(); - - auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0)); - if (!MdString) - return PreservedAnalyses::all(); - - StringRef Text = MdString->getString(); - if (Text.empty()) - return PreservedAnalyses::all(); - - // 1. Create a single null-terminated string global. - Constant *StrInit = ConstantDataArray::getString(Ctx, Text, /*AddNull=*/true); - - // The global variable should be internal, constant, and TU-local. - // This avoids duplicate symbol issues across TUs. - auto *StrGV = new GlobalVariable(M, StrInit->getType(), - /*isConstant=*/true, - GlobalValue::InternalLinkage, StrInit, - /*Name=*/"__loadtime_comment_str"); - // Set unnamed_addr to allow the linker to merge identical strings. - StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - StrGV->setAlignment(Align(1)); - // Place in the "__loadtime_comment" section. - // The GV is constant, so we expect a read-only section. - StrGV->setSection("__loadtime_comment"); - - // 2. Add the string to llvm.compiler.used to prevent LLVM optimization/LTO - // passes from removing it. - appendToCompilerUsed(M, {StrGV}); - - // 3. Attach !implicit.ref metadata to every defined function. - // Create a metadata node pointing to the copyright string: - // !N = !{ptr @__loadtime_comment_str} - Metadata *Ops[] = {ConstantAsMetadata::get(StrGV)}; - MDNode *ImplicitRefMD = MDNode::get(Ctx, Ops); - - auto AddImplicitRef = [&](Function &F) { + if (MD && MD->getNumOperands() > 0) { + MDNode *MdNode = MD->getOperand(0); + if (MdNode && MdNode->getNumOperands() > 0) { + auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0)); + if (MdString && !MdString->getString().empty()) { + StringRef Text = MdString->getString(); + // Create a null-terminated string constant in the special section + Constant *StrInit = + ConstantDataArray::getString(Ctx, Text, /*AddNull*/ true); + // The global variable should be internal, constant, and TU-local. + // This avoids duplicate symbol issues across TUs. + auto *StrGV = new GlobalVariable(M, StrInit->getType(), + /*isConstant=*/true, + GlobalValue::InternalLinkage, StrInit, + /*Name=*/"__loadtime_comment_str"); + StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + StrGV->setAlignment(Align(1)); + // Backend recognizes this section and emits it to .loadtime_comment + StrGV->setSection("__loadtime_comment"); + // Prevent removal by optimizer passes (but not sufficient for linker) + appendToCompilerUsed(M, {StrGV}); + // Add to list - will get implicit refs from all functions below + CopyrightGlobals.push_back(StrGV); + } + } + // Clean up the metadata - we've consumed it + MD->eraseFromParent(); + } + + // ========================================================================= + // Process -mloadtime-comment-vars=sccsid,version,... (CLI flag) + // ========================================================================= + // Frontend marks qualifying variables with metadata: + // @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"} + // + // These are user-defined globals (char*/char[]) that should be preserved. + // Unlike pragma strings, these already exist in the IR - we just need to + // ensure they survive to the object file by adding implicit references. + for (GlobalVariable &GV : M.globals()) { + if (GV.getMetadata("copyright.variable")) { + CopyrightGlobals.push_back(&GV); + } + } + + // ========================================================================= + // Create implicit references from every function to each global + // ========================================================================= + // Each implicit.ref node references exactly ONE global. Multiple nodes + // can be attached to a single function (e.g., !implicit.ref !1, !implicit.ref + // !2) + auto AddImplicitRef = [&](Function &F, GlobalValue *GV) { if (F.isDeclaration()) return; - // Attach the !implicit.ref metadata to the function. - F.setMetadata(LLVMContext::MD_implicit_ref, ImplicitRefMD); - LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: " - << F.getName() << "\n"); + // Create metadata: !N = !{ptr @global_variable} + Metadata *Ops[] = {ConstantAsMetadata::get(GV)}; + MDNode *NewMD = MDNode::get(Ctx, Ops); + // Attach to function - addMetadata allows multiple + // !implicit.ref nodes per function, one for each copyright global + F.addMetadata(LLVMContext::MD_implicit_ref, *NewMD); + + LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: " + << F.getName() << " for global: " << GV->getName() + << "\n"); }; - // Process all functions in the module and add !implicit.ref to the function. - for (Function &F : M) - AddImplicitRef(F); + // Apply implicit references: for each global, mark all functions as users + if (!CopyrightGlobals.empty()) { + for (GlobalValue *GV : CopyrightGlobals) { + for (Function &F : M) + AddImplicitRef(F, GV); + } + } - // Cleanup the processed metadata. - MD->eraseFromParent(); - LLVM_DEBUG(dbgs() << "[copyright] created string and anchor for module\n"); + LLVM_DEBUG(dbgs() << "[copyright] processed " << CopyrightGlobals.size() + << " copyright globals\n"); return PreservedAnalyses::all(); } diff --git a/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll new file mode 100644 index 0000000000000..a845749c53cfb --- /dev/null +++ b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll @@ -0,0 +1,26 @@ +; RUN: opt -passes=lower-comment-string -S < %s | FileCheck %s + +target triple = "powerpc64-ibm-aix" + +@sccsid = internal global ptr @.str, align 8, !copyright.variable !0 [email protected] = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align 1 +@version = internal global [22 x i8] c"Copyright Version 2.0\00", align 1, !copyright.variable !1 + +; CHECK: define void @foo() !implicit.ref ![[REF1:[0-9]+]] !implicit.ref ![[REF2:[0-9]+]] { +define void @foo() { +entry: + ret void +} + +; CHECK: define void @bar() !implicit.ref ![[REF1]] !implicit.ref ![[REF2]] { +define void @bar() { +entry: + ret void +} + +!0 = !{!"sccsid"} +!1 = !{!"version"} + +; Verify that the generated implicit.ref metadata nodes point to the correct global variables. +; CHECK: ![[REF1]] = !{ptr @sccsid} +; CHECK: ![[REF2]] = !{ptr @version} >From a992929e4b913173088e96c4bca746db3c8cf59a Mon Sep 17 00:00:00 2001 From: Tony Varghese <[email protected]> Date: Sun, 24 May 2026 00:30:37 +0530 Subject: [PATCH 2/2] Apply changes from code browser Apply changes from code browser --- .../Utils/LowerCommentStringPass.cpp | 85 +++++++++++++------ 1 file changed, 57 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp index 240df69a7ad21..f0354e8d7704f 100644 --- a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp +++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp @@ -4,40 +4,69 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// +// This pass processes copyright and identifying variable metadata for AIX, +// handling two distinct mechanisms: +// +// 1. #pragma comment(copyright, "...") - TU-wide copyright strings +// 2. -mloadtime-comment-vars=<names> - User-specified global variables +// +// Both types of information must be preserved in the final object file and +// survive optimization passes including DCE and LTO. +// +// === Mechanism 1: #pragma comment(copyright, "...") === // -// This pass lowers the module-level comment string metadata emitted by Clang: +// Clang emits module-level metadata for copyright pragmas: // // !comment_string.loadtime = !{!"Copyright ..."} // -// into concrete, translation-unit-local globals. -// This Pass is enabled only for AIX. -// For each module (translation unit), the pass performs the following: +// This pass materializes the metadata into a concrete global variable: // // 1. Creates a null-terminated, internal constant string global -// (`__loadtime_comment_str`) containing the copyright text with -// section attribute "__loadtime_comment". The backend places this -// in the .text section of the object file. -// -// 2. Marks the string in `llvm.compiler.used` so it cannot be dropped by -// optimization or LTO. -// -// 3. Attaches `!implicit.ref` metadata referencing the string to every -// defined function in the module. The PowerPC AIX backend recognizes -// this metadata and emits a `.ref` directive from the function to the -// string, creating a concrete relocation that prevents the linker from -// discarding the string (as long as the referencing symbol is kept). -// -// Input IR: -// !comment_string.loadtime = !{!"Copyright"} -// Output IR: -// @__loadtime_comment_str = internal constant [N x i8] c"Copyright\00", -// section "__loadtime_comment" -// @llvm.compiler.used = appending global [1 x ptr] [ptr -// @__loadtime_comment_str] -// -// define i32 @func() !implicit.ref !5 { ... } -// !5 = !{ptr @__loadtime_comment_str} +// `__loadtime_comment_str` containing the copyright text with section +// attribute "__loadtime_comment". The backend emits this to a special +// section in the object file. +// +// 2. Marks the global in `llvm.compiler.used` to prevent removal by +// optimization passes. +// +// 3. Attaches `!implicit.ref` metadata to every defined function, +// referencing the global. The PowerPC AIX backend emits a `.ref` +// directive for each reference, creating relocations that prevent the +// linker from discarding the string. +// +// === Mechanism 2: -mloadtime-comment-vars=<names> === +// +// Clang tags user-specified global variables (e.g., char *sccsid, char +// version[]) with metadata: +// +// @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"} +// +// This pass: +// +// 1. Identifies globals tagged with `!copyright.variable` metadata. +// +// 2. Attaches `!implicit.ref` metadata to every defined function, +// referencing each tagged global. This ensures the variables survive +// optimization and linking. +// +// === Output Example === +// +// Input IR: +// !comment_string.loadtime = !{!"Copyright 2026"} +// @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"} +// +// Output IR: +// @__loadtime_comment_str = internal constant [15 x i8] c"Copyright 2026\00", +// section "__loadtime_comment" +// @llvm.compiler.used = appending global [1 x ptr] +// [ptr @__loadtime_comment_str] +// @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"} +// +// define i32 @func() !implicit.ref !1 !implicit.ref !2 { ... } +// !1 = !{ptr @__loadtime_comment_str} +// !2 = !{ptr @sccsid} // //===----------------------------------------------------------------------===// _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
