https://github.com/DEBADRIBASAK created https://github.com/llvm/llvm-project/pull/166568
This PR adds the implementation for printing missing origin stats for lifetime analysis. **Purpose:** This capability is added to track the expression types with missing origin. While retrieving the origins from origin manager, some expressions show missing origins. Currently these are created on the fly using getOrCreate function. For analysing the coverage of the check, it will be necessary to see what kind of expressions have a missing origin. It prints the counts in this form: `StmtClassName<Type> : count` **Approach:** 1. The signature of the runLifetimeAnalysis function is changed to return the LifetimeAnalysis object which will be used to get the origin manager which can be used for finding the count of missing origins. 2. The count of missing origins is kept in origin manager while the CFG is visited as part of the analysis. Example output: For the file llvm-project/llvm/lib/Demangle/Demangle.cpp: ``` *** LifetimeSafety Missing Origin Stats (expression_type : count) : DeclRefExpr<std::string_view> : 52 StringLiteral<const char[3]> : 2 DeclRefExpr<char *> : 15 CallExpr<char *> : 4 StringLiteral<const char[2]> : 1 Total missing origins: 74 **************************************** ``` >From fd8693c51af7d9f91be0926f4150e77c39e2dba4 Mon Sep 17 00:00:00 2001 From: Debadri Basak <[email protected]> Date: Wed, 5 Nov 2025 14:07:42 +0000 Subject: [PATCH 1/2] Adding the lifetime stats collection logic to AnalysisBasedWarnings --- .../Analyses/LifetimeSafety/LifetimeSafety.h | 15 +++++++--- .../Analyses/LifetimeSafety/Origins.h | 6 ++++ .../clang/Sema/AnalysisBasedWarnings.h | 9 ++++++ .../LifetimeSafety/LifetimeSafety.cpp | 13 +++++--- clang/lib/Analysis/LifetimeSafety/Origins.cpp | 18 +++++++++++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 30 +++++++++++++++++-- 6 files changed, 81 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h index 91ffbb169f947..eb532bc8be3a7 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -23,7 +23,11 @@ #include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" #include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" #include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" #include "clang/Analysis/AnalysisDeclContext.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/raw_ostream.h" +#include <string> namespace clang::lifetimes { @@ -44,10 +48,6 @@ class LifetimeSafetyReporter { Confidence Confidence) {} }; -/// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - namespace internal { /// An object to hold the factories for immutable collections, ensuring /// that all created states share the same underlying memory management. @@ -60,6 +60,7 @@ struct LifetimeFactory { /// Running the lifetime safety analysis and querying its results. It /// encapsulates the various dataflow analyses. class LifetimeSafetyAnalysis { + public: LifetimeSafetyAnalysis(AnalysisDeclContext &AC, LifetimeSafetyReporter *Reporter); @@ -82,6 +83,12 @@ class LifetimeSafetyAnalysis { std::unique_ptr<LoanPropagationAnalysis> LoanPropagation; }; } // namespace internal + +/// The main entry point for the analysis. +std::unique_ptr<internal::LifetimeSafetyAnalysis> +runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + } // namespace clang::lifetimes #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h index ba138b078b379..3f8c8a4d7ce9b 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -16,7 +16,10 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" +#include "clang/AST/TypeBase.h" #include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/raw_ostream.h" namespace clang::lifetimes::internal { @@ -76,6 +79,8 @@ class OriginManager { void dump(OriginID OID, llvm::raw_ostream &OS) const; + const llvm::StringMap<int> getMissingOrigins() const; + private: OriginID getNextOriginID() { return NextOriginID++; } @@ -85,6 +90,7 @@ class OriginManager { llvm::SmallVector<Origin> AllOrigins; llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; + llvm::StringMap<unsigned> ExprTypeToMissingOriginCount; }; } // namespace clang::lifetimes::internal diff --git a/clang/include/clang/Sema/AnalysisBasedWarnings.h b/clang/include/clang/Sema/AnalysisBasedWarnings.h index 4103c3f006a8f..604039ef61cb7 100644 --- a/clang/include/clang/Sema/AnalysisBasedWarnings.h +++ b/clang/include/clang/Sema/AnalysisBasedWarnings.h @@ -14,7 +14,10 @@ #define LLVM_CLANG_SEMA_ANALYSISBASEDWARNINGS_H #include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" #include <memory> namespace clang { @@ -95,6 +98,9 @@ class AnalysisBasedWarnings { /// a single function. unsigned MaxUninitAnalysisBlockVisitsPerFunction; + /// Map from expressions missing origin in OriginManager to their counts. + llvm::StringMap<unsigned> MissingOriginCount; + /// @} public: @@ -116,6 +122,9 @@ class AnalysisBasedWarnings { Policy &getPolicyOverrides() { return PolicyOverrides; } void PrintStats() const; + + void FindMissingOrigins(AnalysisDeclContext &AC, + clang::lifetimes::internal::FactManager &FactMgr); }; } // namespace sema diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp index 00c7ed90503e7..d183ce976f946 100644 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -23,9 +23,11 @@ #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/raw_ostream.h" #include <memory> namespace clang::lifetimes { @@ -69,9 +71,12 @@ void LifetimeSafetyAnalysis::run() { } } // namespace internal -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); +std::unique_ptr<internal::LifetimeSafetyAnalysis> +runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + std::unique_ptr<internal::LifetimeSafetyAnalysis> Analysis = + std::make_unique<internal::LifetimeSafetyAnalysis>(AC, Reporter); + Analysis->run(); + return Analysis; } } // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp index ea51a75324e06..453abf48261c2 100644 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -7,6 +7,9 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/AST/Expr.h" +#include "clang/AST/TypeBase.h" +#include "llvm/ADT/StringMap.h" namespace clang::lifetimes::internal { @@ -22,6 +25,10 @@ void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { OS << ")"; } +const llvm::StringMap<unsigned> OriginManager::getMissingOrigins() const { + return ExprTypeToMissingOriginCount; +} + Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { AllOrigins.emplace_back(ID, &D); return AllOrigins.back(); @@ -37,6 +44,17 @@ OriginID OriginManager::get(const Expr &E) { auto It = ExprToOriginID.find(&E); if (It != ExprToOriginID.end()) return It->second; + + // if the expression has no specific origin, increment the missing origin + // counter. + std::string ExprStr(E.getStmtClassName()); + ExprStr = ExprStr + "<" + E.getType().getAsString() + ">"; + auto CountIt = ExprTypeToMissingOriginCount.find(ExprStr); + if (CountIt == ExprTypeToMissingOriginCount.end()) { + ExprTypeToMissingOriginCount[ExprStr] = 1; + } else { + CountIt->second++; + } // If the expression itself has no specific origin, and it's a reference // to a declaration, its origin is that of the declaration it refers to. // For pointer types, where we don't pre-emptively create an origin for the diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 140b709dbb651..9160939a85735 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,7 +29,9 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" @@ -52,7 +54,9 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <deque> #include <iterator> @@ -3065,7 +3069,11 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) { if (AC.getCFG()) { lifetimes::LifetimeSafetyReporterImpl LifetimeSafetyReporter(S); - lifetimes::runLifetimeSafetyAnalysis(AC, &LifetimeSafetyReporter); + std::unique_ptr<clang::lifetimes::internal::LifetimeSafetyAnalysis> + Analysis = + lifetimes::runLifetimeSafetyAnalysis(AC, &LifetimeSafetyReporter); + if (S.CollectStats) + FindMissingOrigins(AC, Analysis->getFactManager()); } } // Check for violations of "called once" parameter properties. @@ -3131,9 +3139,27 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( } } +void clang::sema::AnalysisBasedWarnings::FindMissingOrigins( + AnalysisDeclContext &AC, lifetimes::internal::FactManager &FactMgr) { + if (AC.getCFG()) { + for (const auto &[expr, count] : + FactMgr.getOriginMgr().getMissingOrigins()) { + MissingOriginCount[expr] += count; + } + } +} + void clang::sema::AnalysisBasedWarnings::PrintStats() const { + llvm::errs() << "\n*** LifetimeSafety Missing Origin Stats " + "(expression_type : count) :\n"; + unsigned totalMissingOrigins = 0; + for (const auto &[expr, count] : MissingOriginCount) { + llvm::errs() << expr << " : " << count << '\n'; + totalMissingOrigins += count; + } + llvm::errs() << "Total missing origins: " << totalMissingOrigins << "\n"; + llvm::errs() << "****************************************\n"; llvm::errs() << "\n*** Analysis Based Warnings Stats:\n"; - unsigned NumCFGsBuilt = NumFunctionsAnalyzed - NumFunctionsWithBadCFGs; unsigned AvgCFGBlocksPerFunction = !NumCFGsBuilt ? 0 : NumCFGBlocks/NumCFGsBuilt; >From 3ff81d6435d7da0b4c86459fd71344cf2008acca Mon Sep 17 00:00:00 2001 From: Debadri Basak <[email protected]> Date: Wed, 5 Nov 2025 14:22:48 +0000 Subject: [PATCH 2/2] Correcting the signature of getMissingOrigins --- .../include/clang/Analysis/Analyses/LifetimeSafety/Origins.h | 2 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h index 3f8c8a4d7ce9b..26686a63e9204 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -79,7 +79,7 @@ class OriginManager { void dump(OriginID OID, llvm::raw_ostream &OS) const; - const llvm::StringMap<int> getMissingOrigins() const; + const llvm::StringMap<unsigned> getMissingOrigins() const; private: OriginID getNextOriginID() { return NextOriginID++; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 9160939a85735..77d2013ff3a93 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3151,14 +3151,14 @@ void clang::sema::AnalysisBasedWarnings::FindMissingOrigins( void clang::sema::AnalysisBasedWarnings::PrintStats() const { llvm::errs() << "\n*** LifetimeSafety Missing Origin Stats " - "(expression_type : count) :\n"; + "(expression_type : count) :\n\n"; unsigned totalMissingOrigins = 0; for (const auto &[expr, count] : MissingOriginCount) { llvm::errs() << expr << " : " << count << '\n'; totalMissingOrigins += count; } llvm::errs() << "Total missing origins: " << totalMissingOrigins << "\n"; - llvm::errs() << "****************************************\n"; + llvm::errs() << "\n****************************************\n"; llvm::errs() << "\n*** Analysis Based Warnings Stats:\n"; unsigned NumCFGsBuilt = NumFunctionsAnalyzed - NumFunctionsWithBadCFGs; unsigned AvgCFGBlocksPerFunction = _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
