https://github.com/melver updated https://github.com/llvm/llvm-project/pull/142955
>From f2685208390325e663b48e52606b2f7deed5fb5d Mon Sep 17 00:00:00 2001 From: Marco Elver <el...@google.com> Date: Wed, 21 May 2025 23:49:48 +0200 Subject: [PATCH 1/2] Thread Safety Analysis: Basic capability alias-analysis Add basic alias analysis for capabilities by reusing LocalVariableMap, which tracks currently valid definitions of variables (where possible). Aliases created through complex control flow are not tracked. For example, the analysis will no longer generate false positives for cases such as: void testNestedAccess(Container *c) { Foo *ptr = &c->foo; ptr->mu.Lock(); c->foo.data = 42; // OK - no false positive ptr->mu.Unlock(); } void testNestedAcquire(Container *c) EXCLUSIVE_LOCK_FUNCTION(&c->foo.mu) { Foo *buf = &c->foo; buf->mu.Lock(); // OK - no false positive } This implementation would satisfy the basic needs of addressing the concerns for Linux kernel application [1]. Link: https://lore.kernel.org/all/CANpmjNPquO=W1JAh1FNQb8pMQjgeZAKCPQUAd7qUg=5pjJ6x=q...@mail.gmail.com/ [1] --- .../Analysis/Analyses/ThreadSafetyCommon.h | 24 +- clang/lib/Analysis/ThreadSafety.cpp | 110 ++++++-- clang/lib/Analysis/ThreadSafetyCommon.cpp | 135 +++++++++- clang/test/Sema/warn-thread-safety-analysis.c | 6 +- .../SemaCXX/warn-thread-safety-analysis.cpp | 244 +++++++++++++++++- 5 files changed, 494 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index 6c97905a2d7f9..ff963281e1baa 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -31,10 +31,12 @@ #include "clang/Analysis/CFG.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" +#include <functional> #include <sstream> #include <string> #include <utility> @@ -386,6 +388,11 @@ class SExprBuilder { SelfVar->setKind(til::Variable::VK_SFun); } + // Create placeholder for this: we don't know the VarDecl on construction yet. + til::LiteralPtr *createThisPlaceholder() { + return new (Arena) til::LiteralPtr(nullptr); + } + // Translate a clang expression in an attribute to a til::SExpr. // Constructs the context from D, DeclExp, and SelfDecl. CapabilityExpr translateAttrExpr(const Expr *AttrExp, const NamedDecl *D, @@ -394,8 +401,8 @@ class SExprBuilder { CapabilityExpr translateAttrExpr(const Expr *AttrExp, CallingContext *Ctx); - // Translate a variable reference. - til::LiteralPtr *createVariable(const VarDecl *VD); + // Translate a VarDecl to its canonical TIL expression. + til::SExpr *translateVariable(const VarDecl *VD, CallingContext *Ctx); // Translate a clang statement or expression to a TIL expression. // Also performs substitution of variables; Ctx provides the context. @@ -412,6 +419,10 @@ class SExprBuilder { const til::SCFG *getCFG() const { return Scfg; } til::SCFG *getCFG() { return Scfg; } + void setLookupLocalVarExpr(std::function<const Expr *(const NamedDecl *)> F) { + LookupLocalVarExpr = std::move(F); + } + private: // We implement the CFGVisitor API friend class CFGWalker; @@ -445,6 +456,7 @@ class SExprBuilder { const AbstractConditionalOperator *C, CallingContext *Ctx); til::SExpr *translateDeclStmt(const DeclStmt *S, CallingContext *Ctx); + til::SExpr *translateStmtExpr(const StmtExpr *SE, CallingContext *Ctx); // Map from statements in the clang CFG to SExprs in the til::SCFG. using StatementMap = llvm::DenseMap<const Stmt *, til::SExpr *>; @@ -501,6 +513,9 @@ class SExprBuilder { void mergeEntryMapBackEdge(); void mergePhiNodesBackEdge(const CFGBlock *Blk); + // Returns true if a variable is assumed to be reassigned. + bool isVariableReassigned(const VarDecl *VD); + private: // Set to true when parsing capability expressions, which get translated // inaccurately in order to hack around smart pointers etc. @@ -531,6 +546,11 @@ class SExprBuilder { std::vector<til::Phi *> IncompleteArgs; til::BasicBlock *CurrentBB = nullptr; BlockInfo *CurrentBlockInfo = nullptr; + + // Context-dependent lookup of currently valid definitions of local variables. + std::function<const Expr *(const NamedDecl *)> LookupLocalVarExpr; + // Set caching local variables that are reassigned. + std::optional<llvm::DenseSet<const VarDecl *>> ReassignedLocalVariables; }; #ifndef NDEBUG diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 80e7c8eff671a..46e0578eb885a 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -39,6 +39,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" @@ -428,6 +429,7 @@ class LocalVariableMap { Context::Factory ContextFactory; std::vector<VarDefinition> VarDefinitions; std::vector<std::pair<const Stmt *, Context>> SavedContexts; + bool Imprecise = false; public: LocalVariableMap() { @@ -522,6 +524,9 @@ class LocalVariableMap { void traverseCFG(CFG *CFGraph, const PostOrderCFGView *SortedGraph, std::vector<CFGBlockInfo> &BlockInfo); + /// Returns true if the queries to this instance may be imprecise. + bool isImprecise() const { return Imprecise; } + protected: friend class VarMapBuilder; @@ -609,6 +614,7 @@ class VarMapBuilder : public ConstStmtVisitor<VarMapBuilder> { void VisitDeclStmt(const DeclStmt *S); void VisitBinaryOperator(const BinaryOperator *BO); + void VisitCallExpr(const CallExpr *CE); }; } // namespace @@ -654,6 +660,45 @@ void VarMapBuilder::VisitBinaryOperator(const BinaryOperator *BO) { } } +// Invalidates local variable definitions if variable escaped. +void VarMapBuilder::VisitCallExpr(const CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return; + + for (unsigned Idx = 0; Idx < CE->getNumArgs(); ++Idx) { + if (Idx >= FD->getNumParams()) + break; + + const Expr *Arg = CE->getArg(Idx)->IgnoreParenImpCasts(); + const ParmVarDecl *PVD = FD->getParamDecl(Idx); + QualType ParamType = PVD->getType(); + + // Potential reassignment if passed by non-const reference / pointer. + const ValueDecl *VDec = nullptr; + if (ParamType->isReferenceType() && + !ParamType->getPointeeType().isConstQualified()) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) + VDec = DRE->getDecl(); + } else if (ParamType->isPointerType() && + !ParamType->getPointeeType().isConstQualified()) { + Arg = Arg->IgnoreParenCasts(); + if (const auto *UO = dyn_cast<UnaryOperator>(Arg)) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubE = UO->getSubExpr()->IgnoreParenCasts(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(SubE)) + VDec = DRE->getDecl(); + } + } + } + + if (VDec && Ctx.lookup(VDec)) { + Ctx = VMap->clearDefinition(VDec, Ctx); + VMap->saveContext(CE, Ctx); + } + } +} + // Computes the intersection of two contexts. The intersection is the // set of variables which have the same definition in both contexts; // variables with different definitions are discarded. @@ -663,10 +708,14 @@ LocalVariableMap::intersectContexts(Context C1, Context C2) { for (const auto &P : C1) { const NamedDecl *Dec = P.first; const unsigned *i2 = C2.lookup(Dec); - if (!i2) // variable doesn't exist on second path + if (!i2) { + // The variable doesn't exist on second path. Result = removeDefinition(Dec, Result); - else if (*i2 != P.second) // variable exists, but has different definition + } else if (*i2 != P.second) { + // The variable exists, but has different definition. Result = clearDefinition(Dec, Result); + Imprecise = true; + } } return Result; } @@ -1141,11 +1190,10 @@ class ThreadSafetyAnalyzer { void warnIfMutexNotHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, AccessKind AK, Expr *MutexExp, - ProtectedOperationKind POK, til::LiteralPtr *Self, + ProtectedOperationKind POK, til::SExpr *Self, SourceLocation Loc); void warnIfMutexHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, - Expr *MutexExp, til::LiteralPtr *Self, - SourceLocation Loc); + Expr *MutexExp, til::SExpr *Self, SourceLocation Loc); void checkAccess(const FactSet &FSet, const Expr *Exp, AccessKind AK, ProtectedOperationKind POK); @@ -1543,6 +1591,14 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, const CFGBlockInfo *PredBlockInfo = &BlockInfo[PredBlock->getBlockID()]; const LocalVarContext &LVarCtx = PredBlockInfo->ExitContext; + // Temporarily set the lookup context for SExprBuilder. + SxBuilder.setLookupLocalVarExpr([&](const NamedDecl *D) { + auto Ctx = LVarCtx; + return LocalVarMap.isImprecise() ? nullptr : LocalVarMap.lookupExpr(D, Ctx); + }); + auto Cleanup = llvm::make_scope_exit( + [this] { SxBuilder.setLookupLocalVarExpr(nullptr); }); + const auto *Exp = getTrylockCallExpr(Cond, LVarCtx, Negate); if (!Exp) return; @@ -1599,7 +1655,7 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { } void handleCall(const Expr *Exp, const NamedDecl *D, - til::LiteralPtr *Self = nullptr, + til::SExpr *Self = nullptr, SourceLocation Loc = SourceLocation()); void examineArguments(const FunctionDecl *FD, CallExpr::const_arg_iterator ArgBegin, @@ -1611,7 +1667,18 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { const FactSet &FunctionExitFSet) : ConstStmtVisitor<BuildLockset>(), Analyzer(Anlzr), FSet(Info.EntrySet), FunctionExitFSet(FunctionExitFSet), LVarCtx(Info.EntryContext), - CtxIndex(Info.EntryIndex) {} + CtxIndex(Info.EntryIndex) { + Analyzer->SxBuilder.setLookupLocalVarExpr([this](const NamedDecl *D) { + auto Ctx = LVarCtx; + // Do not resolve aliases if the LocalVarMap is imprecise for this + // function, to retain stable variable names at every point. + return Analyzer->LocalVarMap.isImprecise() + ? nullptr + : Analyzer->LocalVarMap.lookupExpr(D, Ctx); + }); + } + + ~BuildLockset() { Analyzer->SxBuilder.setLookupLocalVarExpr(nullptr); } void VisitUnaryOperator(const UnaryOperator *UO); void VisitBinaryOperator(const BinaryOperator *BO); @@ -1629,7 +1696,7 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { /// of at least the passed in AccessKind. void ThreadSafetyAnalyzer::warnIfMutexNotHeld( const FactSet &FSet, const NamedDecl *D, const Expr *Exp, AccessKind AK, - Expr *MutexExp, ProtectedOperationKind POK, til::LiteralPtr *Self, + Expr *MutexExp, ProtectedOperationKind POK, til::SExpr *Self, SourceLocation Loc) { LockKind LK = getLockKindFromAccessKind(AK); CapabilityExpr Cp = SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); @@ -1688,8 +1755,7 @@ void ThreadSafetyAnalyzer::warnIfMutexNotHeld( /// Warn if the LSet contains the given lock. void ThreadSafetyAnalyzer::warnIfMutexHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, - Expr *MutexExp, - til::LiteralPtr *Self, + Expr *MutexExp, til::SExpr *Self, SourceLocation Loc) { CapabilityExpr Cp = SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); if (Cp.isInvalid()) { @@ -1857,7 +1923,7 @@ void ThreadSafetyAnalyzer::checkPtAccess(const FactSet &FSet, const Expr *Exp, /// of an implicitly called cleanup function. /// \param Loc If \p Exp = nullptr, the location. void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, - til::LiteralPtr *Self, SourceLocation Loc) { + til::SExpr *Self, SourceLocation Loc) { CapExprSet ExclusiveLocksToAdd, SharedLocksToAdd; CapExprSet ExclusiveLocksToRemove, SharedLocksToRemove, GenericLocksToRemove; CapExprSet ScopedReqsAndExcludes; @@ -1869,7 +1935,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, const auto *TagT = Exp->getType()->getAs<TagType>(); if (D->hasAttrs() && TagT && Exp->isPRValue()) { til::LiteralPtr *Placeholder = - Analyzer->SxBuilder.createVariable(nullptr); + Analyzer->SxBuilder.createThisPlaceholder(); [[maybe_unused]] auto inserted = Analyzer->ConstructedObjects.insert({Exp, Placeholder}); assert(inserted.second && "Are we visiting the same expression again?"); @@ -2158,6 +2224,9 @@ void BuildLockset::examineArguments(const FunctionDecl *FD, } void BuildLockset::VisitCallExpr(const CallExpr *Exp) { + // adjust the context + LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, Exp, LVarCtx); + if (const auto *CE = dyn_cast<CXXMemberCallExpr>(Exp)) { const auto *ME = dyn_cast<MemberExpr>(CE->getCallee()); // ME can be null when calling a method pointer @@ -2545,7 +2614,8 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { } if (UnderlyingLocks.empty()) continue; - CapabilityExpr Cp(SxBuilder.createVariable(Param), StringRef(), + CapabilityExpr Cp(SxBuilder.translateVariable(Param, nullptr), + StringRef(), /*Neg=*/false, /*Reentrant=*/false); auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>( Cp, Param->getLocation(), FactEntry::Declared); @@ -2662,17 +2732,19 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { if (!DD->hasAttrs()) break; - LocksetBuilder.handleCall(nullptr, DD, - SxBuilder.createVariable(AD.getVarDecl()), - AD.getTriggerStmt()->getEndLoc()); + LocksetBuilder.handleCall( + nullptr, DD, + SxBuilder.translateVariable(AD.getVarDecl(), nullptr), + AD.getTriggerStmt()->getEndLoc()); break; } case CFGElement::CleanupFunction: { const CFGCleanupFunction &CF = BI.castAs<CFGCleanupFunction>(); - LocksetBuilder.handleCall(/*Exp=*/nullptr, CF.getFunctionDecl(), - SxBuilder.createVariable(CF.getVarDecl()), - CF.getVarDecl()->getLocation()); + LocksetBuilder.handleCall( + /*Exp=*/nullptr, CF.getFunctionDecl(), + SxBuilder.translateVariable(CF.getVarDecl(), nullptr), + CF.getVarDecl()->getLocation()); break; } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index ddbd0a9ca904b..0f540f21ad396 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/OperationKinds.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" #include "clang/Analysis/Analyses/ThreadSafetyTIL.h" @@ -241,7 +242,31 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, return CapabilityExpr(E, AttrExp->getType(), Neg); } -til::LiteralPtr *SExprBuilder::createVariable(const VarDecl *VD) { +til::SExpr *SExprBuilder::translateVariable(const VarDecl *VD, + CallingContext *Ctx) { + assert(VD); + + QualType Ty = VD->getType(); + if (Ty->isPointerType()) { + // Substitute local variable aliases with a canonical definition. + if (VD->hasInit() && !isVariableReassigned(VD)) { + // Fast and CFG-independent mode: Substitute local pointer variables with + // their initializers if they are explicitly const or never reassigned. + return translate(VD->getInit(), Ctx); + } else if (LookupLocalVarExpr) { + // Attempt to resolve an alias through the more complex local variable map + // lookup. This will fail with complex control-flow graphs (where we + // revert to no alias resolution to retain stable variable names). + if (const Expr *E = LookupLocalVarExpr(VD)) { + // Guard against recursion on self-assignment (e.g. `T *x = x;`). + if (const auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) + if (DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) + return new (Arena) til::LiteralPtr(VD); + return translate(E, Ctx); + } + } + } + return new (Arena) til::LiteralPtr(VD); } @@ -313,6 +338,8 @@ til::SExpr *SExprBuilder::translate(const Stmt *S, CallingContext *Ctx) { case Stmt::DeclStmtClass: return translateDeclStmt(cast<DeclStmt>(S), Ctx); + case Stmt::StmtExprClass: + return translateStmtExpr(cast<StmtExpr>(S), Ctx); default: break; } @@ -353,6 +380,9 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, : cast<ObjCMethodDecl>(D)->getCanonicalDecl()->getParamDecl(I); } + if (const auto *VarD = dyn_cast<VarDecl>(VD)) + return translateVariable(VarD, Ctx); + // For non-local variables, treat it as a reference to a named object. return new (Arena) til::LiteralPtr(VD); } @@ -691,6 +721,15 @@ SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) { return nullptr; } +til::SExpr *SExprBuilder::translateStmtExpr(const StmtExpr *SE, + CallingContext *Ctx) { + // The value of a statement expression is the value of the last statement, + // which must be an expression. + const CompoundStmt *CS = SE->getSubStmt(); + return CS->body_empty() ? new (Arena) til::Undefined(SE) + : translate(CS->body_back(), Ctx); +} + // If (E) is non-trivial, then add it to the current basic block, and // update the statement map so that S refers to E. Returns a new variable // that refers to E. @@ -1012,6 +1051,100 @@ void SExprBuilder::exitCFG(const CFGBlock *Last) { IncompleteArgs.clear(); } +bool SExprBuilder::isVariableReassigned(const VarDecl *VD) { + struct ReassignmentFinder : RecursiveASTVisitor<ReassignmentFinder> { + bool VisitDeclRefExpr(DeclRefExpr *DRE) { + // Check for self-reference in the initializer. This is not strictly a + // reassignment, but undefined behaviour. + if (DRE->getDecl()->getCanonicalDecl() == InVarDecl) + ReassignedLocalVariables.insert(InVarDecl); + return true; + } + + bool TraverseVarDecl(VarDecl *VD) { + if (!VD->hasInit() || !VD->isLocalVarDecl()) + return true; + // Traverse the initializer to check for self-initialization. + InVarDecl = VD; + TraverseStmt(VD->getInit()); + InVarDecl = nullptr; + return true; + } + + bool VisitUnaryOperator(UnaryOperator *UO) { + if (UO->getOpcode() != UO_AddrOf) + return true; + // If the address of a variable is taken, it has "escaped", assume it may + // be reassigned. + const Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) + if (VD->isLocalVarDecl()) + ReassignedLocalVariables.insert(VD->getCanonicalDecl()); + return true; + } + + bool VisitBinaryOperator(BinaryOperator *BO) { + if (InVarDecl) + return true; + if (!BO->isAssignmentOp()) + return true; + // If a variable appears as LHS of assignment, then it's a reassignment. + const auto *LHS = BO->getLHS()->IgnoreParenImpCasts(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(LHS)) + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) + if (VD->isLocalVarDecl()) + ReassignedLocalVariables.insert(VD->getCanonicalDecl()); + return true; + } + + bool VisitCallExpr(CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return true; + for (unsigned Idx = 0; Idx < CE->getNumArgs(); ++Idx) { + if (Idx >= FD->getNumParams()) + break; + const Expr *Arg = CE->getArg(Idx)->IgnoreParenImpCasts(); + const ParmVarDecl *PVD = FD->getParamDecl(Idx); + QualType ParamType = PVD->getType(); + // Potential reassignment if passed by non-const reference. + if (ParamType->isReferenceType() && + !ParamType->getPointeeType().isConstQualified()) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) + if (VD->isLocalVarDecl()) + ReassignedLocalVariables.insert(VD->getCanonicalDecl()); + } + } + return true; + } + + const VarDecl *InVarDecl = nullptr; + llvm::DenseSet<const VarDecl *> ReassignedLocalVariables; + }; + + if (VD->getType().isConstQualified()) + return false; // Assume undefined-behavior freedom. + if (!VD->isLocalVarDecl()) + return true; // Not a local variable (assume reassigned). + auto *FD = dyn_cast<FunctionDecl>(VD->getDeclContext()); + if (!FD) + return true; // Assume reassigned. + + if (LLVM_UNLIKELY(!ReassignedLocalVariables.has_value())) { + // Lazy init ReassignedLocalVariables set. + ReassignmentFinder Visitor; + // const_cast ok: FunctionDecl not modified. + Visitor.TraverseDecl(const_cast<FunctionDecl *>(FD)); + ReassignedLocalVariables = std::move(Visitor.ReassignedLocalVariables); + } + + // Use the canonical declaration to ensure consistent lookup in the cache. + VD = VD->getCanonicalDecl(); + return ReassignedLocalVariables.value().contains(VD); +} + #ifndef NDEBUG namespace { diff --git a/clang/test/Sema/warn-thread-safety-analysis.c b/clang/test/Sema/warn-thread-safety-analysis.c index b43f97af9162e..549cb1231baa6 100644 --- a/clang/test/Sema/warn-thread-safety-analysis.c +++ b/clang/test/Sema/warn-thread-safety-analysis.c @@ -184,9 +184,13 @@ int main(void) { /// Cleanup functions { struct Mutex* const __attribute__((cleanup(unlock_scope))) scope = &mu1; - mutex_exclusive_lock(scope); // Note that we have to lock through scope, because no alias analysis! + mutex_exclusive_lock(scope); // Lock through scope works. // Cleanup happens automatically -> no warning. } + { + struct Mutex* const __attribute__((unused, cleanup(unlock_scope))) scope = &mu1; + mutex_exclusive_lock(&mu1); // With basic alias analysis lock through mu1 also works. + } foo_.a_value = 0; // expected-warning {{writing variable 'a_value' requires holding mutex 'mu_' exclusively}} *foo_.a_ptr = 1; // expected-warning {{writing the value pointed to by 'a_ptr' requires holding mutex 'bar.other_mu' exclusively}} diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index d82e2484ace1e..ae8ee9b366a40 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -1556,9 +1556,9 @@ void main() { Child *c; Base *b = c; - b->func1(); // expected-warning {{calling function 'func1' requires holding mutex 'b->mu_' exclusively}} + b->func1(); // expected-warning {{calling function 'func1' requires holding mutex 'c->mu_' exclusively}} b->mu_.Lock(); - b->func2(); // expected-warning {{cannot call function 'func2' while mutex 'b->mu_' is held}} + b->func2(); // expected-warning {{cannot call function 'func2' while mutex 'c->mu_' is held}} b->mu_.Unlock(); c->func1(); // expected-warning {{calling function 'func1' requires holding mutex 'c->mu_' exclusively}} @@ -7238,3 +7238,243 @@ class TestNegativeWithReentrantMutex { }; } // namespace Reentrancy + +// Tests for tracking aliases of capabilities. +namespace CapabilityAliases { +struct Foo { + Mutex mu; + int data GUARDED_BY(mu); +}; + +Foo *returnsFoo(); +void locksRequired(Foo *foo) EXCLUSIVE_LOCKS_REQUIRED(foo->mu); +void escapeAlias(int a, Foo *&ptr); +void escapeAlias(int b, Foo **ptr); +void passByConstRef(Foo* const& ptr); + +void testBasicPointerAlias(Foo *f) { + Foo *ptr = f; + ptr->mu.Lock(); // lock through alias + f->data = 42; // access through original + ptr->mu.Unlock(); // unlock through alias +} + +void testBasicPointerAliasNoInit(Foo *f) { + Foo *ptr; + + ptr = nullptr; + ptr = f; + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); + ptr = nullptr; +} + +void testBasicPointerAliasLoop() { + for (;;) { + Foo *f = returnsFoo(); + Foo *ptr = f; + if (!ptr) + break; + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); + } +} + +void testPointerAliasNoEscape1(Foo *f) { + Foo *ptr = f; + testBasicPointerAlias(ptr); // pass alias by value + + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); +} + +void testPointerAliasNoEscape2(Foo *f) { + Foo *ptr = f; + passByConstRef(ptr); // pass alias by const ref + + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); +} + +void testPointerAliasNoEscape3() { + Foo *ptr = returnsFoo(); + ptr->mu.Lock(); + locksRequired(ptr); + ptr->mu.Unlock(); +} + +void testPointerAliasEscape1(Foo *f) { + Foo *ptr = f; + escapeAlias(0, ptr); + + ptr->mu.Lock(); + f->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + ptr->mu.Unlock(); +} + +void testPointerAliasEscape2(Foo *f) { + Foo *ptr = f; + escapeAlias(0, &ptr); + + ptr->mu.Lock(); + f->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + ptr->mu.Unlock(); +} + +void testPointerAliasEscape3(Foo *f) { + Foo *ptr; + + ptr = f; + escapeAlias(0, &ptr); + + ptr->mu.Lock(); + f->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + ptr->mu.Unlock(); +} + +void testPointerAliasEscapeAndReset(Foo *f) { + Foo *ptr; + + ptr = f; + escapeAlias(0, &ptr); + ptr = f; + + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); +} + +void testPointerAliasTryLock1() { + Foo *ptr = returnsFoo(); + if (ptr->mu.TryLock()) { + locksRequired(ptr); + ptr->mu.Unlock(); + } +} + +void testPointerAliasTryLock2() { + Foo *ptr; + ptr = returnsFoo(); + Foo *ptr2 = ptr; + if (ptr->mu.TryLock()) { + locksRequired(ptr); + ptr2->mu.Unlock(); + } +} + +void testReassignment() { + Foo f1, f2; + Foo *ptr = &f1; + ptr->mu.Lock(); + f1.data = 42; + ptr->mu.Unlock(); + + ptr = &f2; + ptr->mu.Lock(); + f2.data = 42; + f1.data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f1.mu'}} \ + // expected-note{{found near match 'f2.mu'}} + ptr->mu.Unlock(); +} + +// Nested field access through pointer +struct Container { + Foo foo; +}; + +void testNestedAccess(Container *c) { + Foo *ptr = &c->foo; // pointer to nested field + ptr->mu.Lock(); + c->foo.data = 42; + ptr->mu.Unlock(); +} + +void testNestedAcquire(Container *c) EXCLUSIVE_LOCK_FUNCTION(&c->foo.mu) { + Foo *buf = &c->foo; + buf->mu.Lock(); +} + +struct ContainerOfPtr { + Foo *foo_ptr; +}; + +void testIndirectAccess(ContainerOfPtr *fc) { + Foo *ptr = fc->foo_ptr; // get pointer + ptr->mu.Lock(); + fc->foo_ptr->data = 42; // access via original + ptr->mu.Unlock(); +} + +void testControlFlowDoWhile(Foo *f, int x) { + Foo *ptr = f; + + f->mu.Lock(); + if (x) { + // complex merge + do { } while (x--); + } + ptr->data = 42; + ptr->mu.Unlock(); +} + +// FIXME: No alias tracking through complex control flow. +void testComplexControlFlow(Foo *f1, Foo *f2, bool cond) { + Foo *ptr; + if (cond) { + ptr = f1; + } else { + ptr = f2; + } + ptr->mu.Lock(); + if (cond) { + f1->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f1->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + } else { + f2->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f2->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + } + ptr->mu.Unlock(); +} + +void testLockFunction(Foo *f) EXCLUSIVE_LOCK_FUNCTION(&f->mu) { + Mutex *mu = &f->mu; + mu->Lock(); +} + +void testUnlockFunction(Foo *f) UNLOCK_FUNCTION(&f->mu) { + Mutex *mu = &f->mu; + mu->Unlock(); +} + +// This is an idiom to deal with "pointer to returned object has a lock held, +// but you must unlock it later" where the statement expression would be hidden +// behind a macro. +void lockWithinStatementExpr() { + Foo *f = ({ auto x = returnsFoo(); x->mu.Lock(); x; }); + f->data = 42; + f->mu.Unlock(); +} + +// Semantically UB, but let's not crash the compiler with this (should be +// handled by -Wuninitialized). +void testSelfInit() { + Mutex *mu = mu; // don't do this at home + mu->Lock(); + mu->Unlock(); +} + +void testSelfAssign() { + Foo *f = returnsFoo(); + f = f; + f->mu.Lock(); + f->data = 42; + f->mu.Unlock(); +} +} // namespace CapabilityAliases >From 27aab7f059870c6078b80bdef71ea887b10d4184 Mon Sep 17 00:00:00 2001 From: Marco Elver <el...@google.com> Date: Wed, 6 Aug 2025 13:15:43 +0200 Subject: [PATCH 2/2] Thread Safety Analysis: Fix loop-invariant alias tracking in LocalVariableMap Fixes LocalVariableMap to correctly handle loop-invariant aliases. This allows us to remove the isVariableReassigned() and rely entirely on LocalVariableMap as the single source of truth for local alias tracking. The old implementation failed because the merge logic compared raw VarDefinition IDs. The analysis's technique for handling back-edges (in createReferenceContext()) generates new 'reference' definitions for loop-scoped variables. This superficial ID comparison caused an alias to be invalidated at both back-edge merges (in intersectBackEdge()) and at subsequent forward-merges with non-loop paths (in intersectContexts()). Fix it by adding the getCanonicalDefinitionID() helper that resolves any definition ID down to its non-reference base. As a result, a variable's definition is correctly preserved across control-flow merges as long as its underlying canonical definition remains the same. --- .../Analysis/Analyses/ThreadSafetyCommon.h | 6 -- clang/lib/Analysis/ThreadSafety.cpp | 49 +++++---- clang/lib/Analysis/ThreadSafetyCommon.cpp | 101 +----------------- .../SemaCXX/warn-thread-safety-analysis.cpp | 50 +++++++++ 4 files changed, 80 insertions(+), 126 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index ff963281e1baa..e152b17bde868 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -31,7 +31,6 @@ #include "clang/Analysis/CFG.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" @@ -513,9 +512,6 @@ class SExprBuilder { void mergeEntryMapBackEdge(); void mergePhiNodesBackEdge(const CFGBlock *Blk); - // Returns true if a variable is assumed to be reassigned. - bool isVariableReassigned(const VarDecl *VD); - private: // Set to true when parsing capability expressions, which get translated // inaccurately in order to hack around smart pointers etc. @@ -549,8 +545,6 @@ class SExprBuilder { // Context-dependent lookup of currently valid definitions of local variables. std::function<const Expr *(const NamedDecl *)> LookupLocalVarExpr; - // Set caching local variables that are reassigned. - std::optional<llvm::DenseSet<const VarDecl *>> ReassignedLocalVariables; }; #ifndef NDEBUG diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 46e0578eb885a..d2fabf0ba19dc 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -429,7 +429,6 @@ class LocalVariableMap { Context::Factory ContextFactory; std::vector<VarDefinition> VarDefinitions; std::vector<std::pair<const Stmt *, Context>> SavedContexts; - bool Imprecise = false; public: LocalVariableMap() { @@ -524,12 +523,16 @@ class LocalVariableMap { void traverseCFG(CFG *CFGraph, const PostOrderCFGView *SortedGraph, std::vector<CFGBlockInfo> &BlockInfo); - /// Returns true if the queries to this instance may be imprecise. - bool isImprecise() const { return Imprecise; } - protected: friend class VarMapBuilder; + // Resolve any definition ID down to its non-reference base ID. + unsigned getCanonicalDefinitionID(unsigned ID) { + while (ID > 0 && VarDefinitions[ID].isReference()) + ID = VarDefinitions[ID].Ref; + return ID; + } + // Get the current context index unsigned getContextIndex() { return SavedContexts.size()-1; } @@ -707,14 +710,15 @@ LocalVariableMap::intersectContexts(Context C1, Context C2) { Context Result = C1; for (const auto &P : C1) { const NamedDecl *Dec = P.first; - const unsigned *i2 = C2.lookup(Dec); - if (!i2) { + const unsigned *I2 = C2.lookup(Dec); + if (!I2) { // The variable doesn't exist on second path. Result = removeDefinition(Dec, Result); - } else if (*i2 != P.second) { - // The variable exists, but has different definition. + } else if (getCanonicalDefinitionID(P.second) != + getCanonicalDefinitionID(*I2)) { + // If canonical definitions mismatch the underlying definitions are + // different, invalidate. Result = clearDefinition(Dec, Result); - Imprecise = true; } } return Result; @@ -735,13 +739,22 @@ LocalVariableMap::Context LocalVariableMap::createReferenceContext(Context C) { // createReferenceContext. void LocalVariableMap::intersectBackEdge(Context C1, Context C2) { for (const auto &P : C1) { - unsigned i1 = P.second; - VarDefinition *VDef = &VarDefinitions[i1]; + const unsigned I1 = P.second; + VarDefinition *VDef = &VarDefinitions[I1]; assert(VDef->isReference()); - const unsigned *i2 = C2.lookup(P.first); - if (!i2 || (*i2 != i1)) - VDef->Ref = 0; // Mark this variable as undefined + const unsigned *I2 = C2.lookup(P.first); + if (!I2) { + // Variable does not exist at the end of the loop, invalidate. + VDef->Ref = 0; + continue; + } + + // Compare the canonical IDs. This correctly handles chains of references + // and determines if the variable is truly loop-invariant. + if (getCanonicalDefinitionID(VDef->Ref) != getCanonicalDefinitionID(*I2)) { + VDef->Ref = 0; // Mark this variable as undefined + } } } @@ -1594,7 +1607,7 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, // Temporarily set the lookup context for SExprBuilder. SxBuilder.setLookupLocalVarExpr([&](const NamedDecl *D) { auto Ctx = LVarCtx; - return LocalVarMap.isImprecise() ? nullptr : LocalVarMap.lookupExpr(D, Ctx); + return LocalVarMap.lookupExpr(D, Ctx); }); auto Cleanup = llvm::make_scope_exit( [this] { SxBuilder.setLookupLocalVarExpr(nullptr); }); @@ -1670,11 +1683,7 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { CtxIndex(Info.EntryIndex) { Analyzer->SxBuilder.setLookupLocalVarExpr([this](const NamedDecl *D) { auto Ctx = LVarCtx; - // Do not resolve aliases if the LocalVarMap is imprecise for this - // function, to retain stable variable names at every point. - return Analyzer->LocalVarMap.isImprecise() - ? nullptr - : Analyzer->LocalVarMap.lookupExpr(D, Ctx); + return Analyzer->LocalVarMap.lookupExpr(D, Ctx); }); } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index 0f540f21ad396..404f82040220f 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -19,7 +19,6 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/OperationKinds.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" #include "clang/Analysis/Analyses/ThreadSafetyTIL.h" @@ -249,11 +248,7 @@ til::SExpr *SExprBuilder::translateVariable(const VarDecl *VD, QualType Ty = VD->getType(); if (Ty->isPointerType()) { // Substitute local variable aliases with a canonical definition. - if (VD->hasInit() && !isVariableReassigned(VD)) { - // Fast and CFG-independent mode: Substitute local pointer variables with - // their initializers if they are explicitly const or never reassigned. - return translate(VD->getInit(), Ctx); - } else if (LookupLocalVarExpr) { + if (LookupLocalVarExpr) { // Attempt to resolve an alias through the more complex local variable map // lookup. This will fail with complex control-flow graphs (where we // revert to no alias resolution to retain stable variable names). @@ -1051,100 +1046,6 @@ void SExprBuilder::exitCFG(const CFGBlock *Last) { IncompleteArgs.clear(); } -bool SExprBuilder::isVariableReassigned(const VarDecl *VD) { - struct ReassignmentFinder : RecursiveASTVisitor<ReassignmentFinder> { - bool VisitDeclRefExpr(DeclRefExpr *DRE) { - // Check for self-reference in the initializer. This is not strictly a - // reassignment, but undefined behaviour. - if (DRE->getDecl()->getCanonicalDecl() == InVarDecl) - ReassignedLocalVariables.insert(InVarDecl); - return true; - } - - bool TraverseVarDecl(VarDecl *VD) { - if (!VD->hasInit() || !VD->isLocalVarDecl()) - return true; - // Traverse the initializer to check for self-initialization. - InVarDecl = VD; - TraverseStmt(VD->getInit()); - InVarDecl = nullptr; - return true; - } - - bool VisitUnaryOperator(UnaryOperator *UO) { - if (UO->getOpcode() != UO_AddrOf) - return true; - // If the address of a variable is taken, it has "escaped", assume it may - // be reassigned. - const Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) - if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) - if (VD->isLocalVarDecl()) - ReassignedLocalVariables.insert(VD->getCanonicalDecl()); - return true; - } - - bool VisitBinaryOperator(BinaryOperator *BO) { - if (InVarDecl) - return true; - if (!BO->isAssignmentOp()) - return true; - // If a variable appears as LHS of assignment, then it's a reassignment. - const auto *LHS = BO->getLHS()->IgnoreParenImpCasts(); - if (const auto *DRE = dyn_cast<DeclRefExpr>(LHS)) - if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) - if (VD->isLocalVarDecl()) - ReassignedLocalVariables.insert(VD->getCanonicalDecl()); - return true; - } - - bool VisitCallExpr(CallExpr *CE) { - const FunctionDecl *FD = CE->getDirectCallee(); - if (!FD) - return true; - for (unsigned Idx = 0; Idx < CE->getNumArgs(); ++Idx) { - if (Idx >= FD->getNumParams()) - break; - const Expr *Arg = CE->getArg(Idx)->IgnoreParenImpCasts(); - const ParmVarDecl *PVD = FD->getParamDecl(Idx); - QualType ParamType = PVD->getType(); - // Potential reassignment if passed by non-const reference. - if (ParamType->isReferenceType() && - !ParamType->getPointeeType().isConstQualified()) { - if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) - if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) - if (VD->isLocalVarDecl()) - ReassignedLocalVariables.insert(VD->getCanonicalDecl()); - } - } - return true; - } - - const VarDecl *InVarDecl = nullptr; - llvm::DenseSet<const VarDecl *> ReassignedLocalVariables; - }; - - if (VD->getType().isConstQualified()) - return false; // Assume undefined-behavior freedom. - if (!VD->isLocalVarDecl()) - return true; // Not a local variable (assume reassigned). - auto *FD = dyn_cast<FunctionDecl>(VD->getDeclContext()); - if (!FD) - return true; // Assume reassigned. - - if (LLVM_UNLIKELY(!ReassignedLocalVariables.has_value())) { - // Lazy init ReassignedLocalVariables set. - ReassignmentFinder Visitor; - // const_cast ok: FunctionDecl not modified. - Visitor.TraverseDecl(const_cast<FunctionDecl *>(FD)); - ReassignedLocalVariables = std::move(Visitor.ReassignedLocalVariables); - } - - // Use the canonical declaration to ensure consistent lookup in the cache. - VD = VD->getCanonicalDecl(); - return ReassignedLocalVariables.value().contains(VD); -} - #ifndef NDEBUG namespace { diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index ae8ee9b366a40..700b11e402e0e 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -7477,4 +7477,54 @@ void testSelfAssign() { f->data = 42; f->mu.Unlock(); } + +void testNestedLoopInvariant(Container *c, int n) { + Foo *ptr = &c->foo; + ptr->mu.Lock(); + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + } + } + + c->foo.data = 42; // ok: alias still valid + ptr->mu.Unlock(); +} + +void testLoopWithBreak(Foo *f, bool cond) { + Foo *ptr = f; + ptr->mu.Lock(); + for (int i = 0; i < 10; ++i) { + if (cond) { + break; // merge point is after the loop + } + } + f->data = 42; // ok + ptr->mu.Unlock(); +} + +void testLoopWithContinue(Foo *f, bool cond) { + Foo *ptr = f; + ptr->mu.Lock(); + for (int i = 0; i < 10; ++i) { + if (cond) { + continue; // tests merge at the top of loop. + } + } + f->data = 42; // ok + ptr->mu.Unlock(); +} + +void testLoopConditionalReassignment(Foo *f1, Foo *f2, bool cond) { + Foo *ptr = f1; + ptr->mu.Lock(); // expected-note{{mutex acquired here}} + + for (int i = 0; i < 10; ++i) { + if (cond) { + ptr = f2; // alias is reassigned on some path inside the loop. + } + } + f1->data = 42; + ptr->mu.Unlock(); // expected-warning{{releasing mutex 'ptr->mu' that was not held}} +} // expected-warning{{mutex 'f1->mu' is still held at the end of function}} } // namespace CapabilityAliases _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits