https://github.com/ilya-biryukov created https://github.com/llvm/llvm-project/pull/151666
Thanks to Justin Stitt for bringing this up, providing test cases and a direction for the fix! >From ac646c60e0b323eeab7dfa6e1b8965fa7fa78796 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov <[email protected]> Date: Thu, 31 Jul 2025 16:35:50 +0200 Subject: [PATCH 1/2] [Clang] Dump minimization hints for namespaces Unlike other declarations, these cover two ranges: - from `namespace/inline namespace` to the opening `{`, - the closing `}`. This allows to mark the declarations inside the namespace itself independently. --- clang/lib/Frontend/FrontendAction.cpp | 86 +++++++++++++++---- .../test/Frontend/dump-minimization-hints.cpp | 49 ++++++++++- 2 files changed, 118 insertions(+), 17 deletions(-) diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 1d82fc775b28a..3aa5eecfbc8e5 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -9,6 +9,7 @@ #include "clang/Frontend/FrontendAction.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" #include "clang/AST/DeclGroup.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticOptions.h" @@ -38,6 +39,7 @@ #include "clang/Serialization/ASTDeserializationListener.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/GlobalModuleIndex.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/BuryPointer.h" @@ -87,12 +89,25 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, // reducing the granularity and making the output less useful. return; } - if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) { + auto *DC = D->getLexicalDeclContext(); + if (!DC || !DC->isFileContext()) { // We choose to work at namespace level to reduce complexity and the // number of cases we care about. return; } + PendingDecls.push_back(D); + if (auto *NS = dyn_cast<NamespaceDecl>(DC)) { + // Add any namespaces we have not seen before. + // Note that we filter out namespaces from DeclRead as it includes too + // all redeclarations and we only want the ones that had other used + // declarations. + while (NS && ProcessedNamespaces.insert(NS).second) { + PendingDecls.push_back(NS); + + NS = dyn_cast<NamespaceDecl>(NS->getLexicalParent()); + } + } } struct Position { @@ -141,23 +156,25 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, OptionalFileEntryRef Ref; }; llvm::DenseMap<const FileEntry *, FileData> FileToRanges; + for (const Decl *D : PendingDecls) { - CharSourceRange R = SM.getExpansionRange(D->getSourceRange()); - if (!R.isValid()) - continue; + for (CharSourceRange R : getRangesToMark(D)) { + if (!R.isValid()) + continue; - auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); - if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { - // Such cases are rare and difficult to handle. - continue; - } + auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); + if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { + // Such cases are rare and difficult to handle. + continue; + } - auto &Data = FileToRanges[F]; - if (!Data.Ref) - Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); - Data.FromTo.push_back( - {Position::GetBeginSpelling(SM, R), - Position::GetEndSpelling(SM, R, D->getLangOpts())}); + auto &Data = FileToRanges[F]; + if (!Data.Ref) + Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); + Data.FromTo.push_back( + {Position::GetBeginSpelling(SM, R), + Position::GetEndSpelling(SM, R, D->getLangOpts())}); + } } // To simplify output, merge consecutive and intersecting ranges. @@ -188,10 +205,49 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, private: std::vector<const Decl *> PendingDecls; + llvm::DenseSet<const NamespaceDecl *> ProcessedNamespaces; bool IsCollectingDecls = true; const SourceManager &SM; std::unique_ptr<llvm::raw_ostream> OS; + llvm::SmallVector<CharSourceRange, 2> getRangesToMark(const Decl *D) { + auto *NS = dyn_cast<NamespaceDecl>(D); + if (!NS) + return {SM.getExpansionRange(D->getSourceRange())}; + + SourceLocation LBraceLoc; + if (NS->isAnonymousNamespace()) { + LBraceLoc = NS->getLocation(); + } else { + // Start with the location of the identifier. + SourceLocation TokenBeforeLBrace = NS->getLocation(); + if (NS->hasAttrs()) { + for (auto *A : NS->getAttrs()) { + // But attributes may go after it. + if (SM.isBeforeInTranslationUnit(TokenBeforeLBrace, + A->getRange().getEnd())) { + // Give up, the attributes are often coming from macros and we + // cannot skip them reliably. + return {}; + } + } + } + auto &LangOpts = D->getLangOpts(); + // Now skip one token, the next should be the lbrace. + Token Tok; + if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) || + Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) || + Tok.getKind() != tok::l_brace) { + // On error or if we did not find the token we expected, avoid marking + // everything inside the namespace as used. + return {}; + } + LBraceLoc = Tok.getLocation(); + } + return {SM.getExpansionRange(SourceRange(NS->getBeginLoc(), LBraceLoc)), + SM.getExpansionRange(NS->getRBraceLoc())}; + } + void printJson(llvm::ArrayRef<RequiredRanges> Result) { *OS << "{\n"; *OS << R"( "required_ranges": [)" << "\n"; diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp index 273fd7f4ecd63..4c5dfbc343cd4 100644 --- a/clang/test/Frontend/dump-minimization-hints.cpp +++ b/clang/test/Frontend/dump-minimization-hints.cpp @@ -59,6 +59,36 @@ // RANGE-NEXT: "line": 23, // RANGE-NEXT: "column": 2 // RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 31, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 31, +// RANGE-NEXT: "column": 27 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 32, +// RANGE-NEXT: "column": 3 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 32, +// RANGE-NEXT: "column": 12 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 34, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 34, +// RANGE-NEXT: "column": 2 +// RANGE-NEXT: } // RANGE-NEXT: } // RANGE-NEXT: ] // RANGE-NEXT: } @@ -88,7 +118,7 @@ int multiply(int a, int b) { return a * b; } -inline int unused_by_foo() {} // line 17 +inline void unused_by_foo() {} // line 17 inline void recursively_used_by_foo() {} // line 19 inline int used_by_foo() { // line 20 @@ -98,6 +128,20 @@ inline int used_by_foo() { // line 20 struct UnusedByFoo {}; +namespace ns_unused_by_foo { + void x(); +} + +namespace ns_used_by_foo { // line 31 + void x(); // line 32 + void unused_y(); +} // line 34 + +// Does not have any declarations that are used, so +// will not be marked as used. +namespace ns_used_by_foo { + void unused_z(); +} //--- foo.cpp #include "foo.h" int global_value = 5; @@ -107,5 +151,6 @@ int main() { int doubled_value = multiply(current_value, 2); int final_result = doubled_value + global_value; - return used_by_foo(); + used_by_foo(); + ns_used_by_foo::x(); } >From 616ef7343a7c387773cae36ee63be10b05bf673b Mon Sep 17 00:00:00 2001 From: Ilya Biryukov <[email protected]> Date: Fri, 1 Aug 2025 09:14:46 +0200 Subject: [PATCH 2/2] [Clang] Handle C++20 export declarations in -dump-minimization-hints Thanks to Justin Stitt for bringing this up, providing test cases and a direction for the fix! --- clang/lib/Frontend/FrontendAction.cpp | 82 ++++++++---- .../dump-minimization-hints-cpp20-modules.cpp | 117 ++++++++++++++++++ 2 files changed, 172 insertions(+), 27 deletions(-) create mode 100644 clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 3aa5eecfbc8e5..4ceb78ee81bad 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -84,29 +84,28 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, if (!IsCollectingDecls) return; if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) || - isa<NamespaceDecl>(D)) { + isa<NamespaceDecl>(D) || isa<ExportDecl>(D)) { // These decls cover a lot of nested declarations that might not be used, // reducing the granularity and making the output less useful. return; } - auto *DC = D->getLexicalDeclContext(); - if (!DC || !DC->isFileContext()) { - // We choose to work at namespace level to reduce complexity and the - // number of cases we care about. + if (isa<ParmVarDecl>(D)) { + // Parameters are covered by their functions. return; } + auto *DC = D->getLexicalDeclContext(); + if (!DC || !shouldIncludeDeclsIn(DC)) + return; PendingDecls.push_back(D); - if (auto *NS = dyn_cast<NamespaceDecl>(DC)) { - // Add any namespaces we have not seen before. - // Note that we filter out namespaces from DeclRead as it includes too - // all redeclarations and we only want the ones that had other used - // declarations. - while (NS && ProcessedNamespaces.insert(NS).second) { - PendingDecls.push_back(NS); - - NS = dyn_cast<NamespaceDecl>(NS->getLexicalParent()); - } + for (; (isa<ExportDecl>(DC) || isa<NamespaceDecl>(DC)) && + ProcessedDeclContexts.insert(DC).second; + DC = DC->getLexicalParent()) { + // Add any interesting decl contexts that we have not seen before. + // Note that we filter them out from DeclRead as that would include all + // redeclarations of namespaces, potentially those that do not have any + // imported declarations. + PendingDecls.push_back(cast<Decl>(DC)); } } @@ -205,12 +204,37 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, private: std::vector<const Decl *> PendingDecls; - llvm::DenseSet<const NamespaceDecl *> ProcessedNamespaces; + llvm::DenseSet<const DeclContext *> ProcessedDeclContexts; bool IsCollectingDecls = true; const SourceManager &SM; std::unique_ptr<llvm::raw_ostream> OS; + static bool shouldIncludeDeclsIn(const DeclContext* DC) { + assert(DC && "DC is null"); + // We choose to work at namespace level to reduce complexity and the number + // of cases we care about. + // We still need to carefully handle composite declarations like + // `ExportDecl`. + for (; DC; DC = DC->getLexicalParent()) { + if (DC->isFileContext()) + return true; + if (isa<ExportDecl>(DC)) + continue; // Depends on the parent. + return false; + } + llvm_unreachable("DeclConext chain must end with a translation unit"); + } llvm::SmallVector<CharSourceRange, 2> getRangesToMark(const Decl *D) { + if (auto *ED = dyn_cast<ExportDecl>(D)) { + if (!ED->hasBraces()) + return {SM.getExpansionRange(ED->getExportLoc())}; + + return {SM.getExpansionRange(SourceRange( + ED->getExportLoc(), + lexForLBrace(ED->getExportLoc(), D->getLangOpts()))), + SM.getExpansionRange(ED->getRBraceLoc())}; + } + auto *NS = dyn_cast<NamespaceDecl>(D); if (!NS) return {SM.getExpansionRange(D->getSourceRange())}; @@ -232,17 +256,7 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, } } } - auto &LangOpts = D->getLangOpts(); - // Now skip one token, the next should be the lbrace. - Token Tok; - if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) || - Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) || - Tok.getKind() != tok::l_brace) { - // On error or if we did not find the token we expected, avoid marking - // everything inside the namespace as used. - return {}; - } - LBraceLoc = Tok.getLocation(); + LBraceLoc = lexForLBrace(TokenBeforeLBrace, D->getLangOpts()); } return {SM.getExpansionRange(SourceRange(NS->getBeginLoc(), LBraceLoc)), SM.getExpansionRange(NS->getRBraceLoc())}; @@ -283,6 +297,20 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer, *OS << " ]\n"; *OS << "}\n"; } + + SourceLocation lexForLBrace(SourceLocation TokenBeforeLBrace, + const LangOptions &LangOpts) { + // Now skip one token, the next should be the lbrace. + Token Tok; + if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) || + Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) || + Tok.getKind() != tok::l_brace) { + // On error or if we did not find the token we expected, avoid marking + // everything inside the namespace as used. + return SourceLocation(); + } + return Tok.getLocation(); + } }; /// Dumps deserialized declarations. diff --git a/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp b/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp new file mode 100644 index 0000000000000..978af4cc7c0ad --- /dev/null +++ b/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp @@ -0,0 +1,117 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cpp -dump-minimization-hints=%t/decls +// RUN: cat %t/decls +// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s +// RANGE:{ +// RANGE-NEXT:"required_ranges": [ +// RANGE-NEXT: { +// RANGE-NEXT: "file": "/usr/local/google/home/ibiryukov/code/llvm-project/build/tools/clang/test/Frontend/Output/dump-minimization-hints-cpp20-modules.cpp.tmp/foo.cppm", +// RANGE-NEXT: "range": [ +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 3, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 3, +// RANGE-NEXT: "column": 22 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 4, +// RANGE-NEXT: "column": 3 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 4, +// RANGE-NEXT: "column": 9 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 4, +// RANGE-NEXT: "column": 10 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 4, +// RANGE-NEXT: "column": 43 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 6, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 6, +// RANGE-NEXT: "column": 2 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 8, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 8, +// RANGE-NEXT: "column": 7 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 8, +// RANGE-NEXT: "column": 8 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 8, +// RANGE-NEXT: "column": 25 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 9, +// RANGE-NEXT: "column": 3 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 9, +// RANGE-NEXT: "column": 36 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 11, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 11, +// RANGE-NEXT: "column": 2 +// RANGE-NEXT: } +// RANGE-NEXT: } +// RANGE-NEXT: ] +// RANGE-NEXT: } +// RANGE-NEXT:] +// RANGE-NEXT:} + +//--- foo.cppm +export module foo; + +namespace piecemeal { // line 3 + export int used(int n) { return n + 1; } + export int unused(int n) { return n + 2; } +} + +export namespace whole { // line 8 + int used(int n) { return n + 1; } + int unused(int n) { return n + 3; } +} // line 11 + +//--- use.cpp +import foo; + +int main() { + piecemeal::used(4); // only of the functions used from each namespace. + whole::used(4); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
