https://github.com/balazs-benics-sonarsource updated https://github.com/llvm/llvm-project/pull/144327
From a5ae39c1b8a5abc39f8948a724b354839e708e4b Mon Sep 17 00:00:00 2001 From: Balazs Benics <balazs.ben...@sonarsource.com> Date: Mon, 16 Jun 2025 12:14:06 +0200 Subject: [PATCH 1/3] [analyzer] Enforce not making overly complicated symbols Out of the worst 500 entry points, 45 were improved by at least 10%. Out of these 45, 5 were improved by more than 50%. Out of these 45, 2 were improved by more than 80%. For example, for the `DelugeFirmware/src/OSLikeStuff/fault_handler/fault_handler.c` TU: - `printPointers` entry point was improved from 31.1 seconds to 1.1 second (28x). - `handle_cpu_fault` entry point was improved from 15.5 seconds to 3 seconds (5x). We had in total 3'037'085 entry points in the test pool. Out of these 390'156 were measured to run over a second. TODO: Add the plot here about RT. CPP-6182 --- .../Core/PathSensitive/SValBuilder.h | 24 +-- .../Core/PathSensitive/SymExpr.h | 25 +-- .../Core/PathSensitive/SymbolManager.h | 166 ++++++++++++------ clang/lib/StaticAnalyzer/Checkers/Taint.cpp | 2 +- .../Checkers/TrustNonnullChecker.cpp | 2 +- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 2 +- .../Core/RangeConstraintManager.cpp | 38 ++-- .../Core/RangedConstraintManager.cpp | 15 +- clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 89 +++++----- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 29 +-- .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 6 + .../ensure-capped-symbol-complexity.cpp | 53 ++++++ 12 files changed, 298 insertions(+), 153 deletions(-) create mode 100644 clang/test/Analysis/ensure-capped-symbol-complexity.cpp diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index 2911554de9d97..0458a6125db9a 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -57,6 +57,8 @@ class SValBuilder { protected: ASTContext &Context; + const AnalyzerOptions &AnOpts; + /// Manager of APSInt values. BasicValueFactory BasicVals; @@ -68,8 +70,6 @@ class SValBuilder { ProgramStateManager &StateMgr; - const AnalyzerOptions &AnOpts; - /// The scalar type to use for array indices. const QualType ArrayIndexTy; @@ -317,21 +317,21 @@ class SValBuilder { return nonloc::LocAsInteger(BasicVals.getPersistentSValWithData(loc, bits)); } - nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, - APSIntPtr rhs, QualType type); + DefinedOrUnknownSVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, + APSIntPtr rhs, QualType type); - nonloc::SymbolVal makeNonLoc(APSIntPtr rhs, BinaryOperator::Opcode op, - const SymExpr *lhs, QualType type); + DefinedOrUnknownSVal makeNonLoc(APSIntPtr rhs, BinaryOperator::Opcode op, + const SymExpr *lhs, QualType type); - nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, - const SymExpr *rhs, QualType type); + DefinedOrUnknownSVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, + const SymExpr *rhs, QualType type); - NonLoc makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op, - QualType type); + DefinedOrUnknownSVal makeNonLoc(const SymExpr *operand, + UnaryOperator::Opcode op, QualType type); /// Create a NonLoc value for cast. - nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy, - QualType toTy); + DefinedOrUnknownSVal makeNonLoc(const SymExpr *operand, QualType fromTy, + QualType toTy); nonloc::ConcreteInt makeTruthVal(bool b, QualType type) { return nonloc::ConcreteInt(BasicVals.getTruthValue(b, type)); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h index aca14cf813c4b..11d0a22a31c46 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -51,9 +51,11 @@ class SymExpr : public llvm::FoldingSetNode { /// Note, however, that it can't be used in Profile because SymbolManager /// needs to compute Profile before allocating SymExpr. const SymbolID Sym; + const unsigned Complexity; protected: - SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {} + SymExpr(Kind k, SymbolID Sym, unsigned Complexity) + : K(k), Sym(Sym), Complexity(Complexity) {} static bool isValidTypeForSymbol(QualType T) { // FIXME: Depending on whether we choose to deprecate structural symbols, @@ -61,8 +63,6 @@ class SymExpr : public llvm::FoldingSetNode { return !T.isNull() && !T->isVoidType(); } - mutable unsigned Complexity = 0; - public: virtual ~SymExpr() = default; @@ -108,7 +108,7 @@ class SymExpr : public llvm::FoldingSetNode { return llvm::make_range(symbol_iterator(this), symbol_iterator()); } - virtual unsigned computeComplexity() const = 0; + unsigned complexity() const { return Complexity; } /// Find the region from which this symbol originates. /// @@ -136,10 +136,15 @@ using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>; /// A symbol representing data which can be stored in a memory location /// (region). class SymbolData : public SymExpr { + friend class SymbolManager; void anchor() override; protected: - SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); } + SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym, computeComplexity()) { + assert(classof(this)); + } + + static unsigned computeComplexity(...) { return 1; } public: ~SymbolData() override = default; @@ -147,14 +152,10 @@ class SymbolData : public SymExpr { /// Get a string representation of the kind of the region. virtual StringRef getKindStr() const = 0; - unsigned computeComplexity() const override { - return 1; - }; - // Implement isa<T> support. - static inline bool classof(const SymExpr *SE) { - Kind k = SE->getKind(); - return k >= BEGIN_SYMBOLS && k <= END_SYMBOLS; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { + return K >= BEGIN_SYMBOLS && K <= END_SYMBOLS; } }; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 86774ad5043dd..5239663788fb4 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -18,6 +18,7 @@ #include "clang/AST/Type.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Basic/LLVM.h" +#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" @@ -72,9 +73,9 @@ class SymbolRegionValue : public SymbolData { QualType getType() const override; // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolRegionValueKind; - } + static constexpr Kind ClassKind = SymbolRegionValueKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// A symbol representing the result of an expression in the case when we do @@ -128,9 +129,9 @@ class SymbolConjured : public SymbolData { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolConjuredKind; - } + static constexpr Kind ClassKind = SymbolConjuredKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// A symbol representing the value of a MemRegion whose parent region has @@ -172,9 +173,11 @@ class SymbolDerived : public SymbolData { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolDerivedKind; + static constexpr Kind ClassKind = SymbolDerivedKind; + static constexpr bool classof(const SymExpr *SE) { + return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// SymbolExtent - Represents the extent (size in bytes) of a bounded region. @@ -209,9 +212,9 @@ class SymbolExtent : public SymbolData { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolExtentKind; - } + static constexpr Kind ClassKind = SymbolExtentKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == SymbolExtentKind; } }; /// SymbolMetadata - Represents path-dependent metadata about a specific region. @@ -278,13 +281,14 @@ class SymbolMetadata : public SymbolData { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolMetadataKind; - } + static constexpr Kind ClassKind = SymbolMetadataKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// Represents a cast expression. class SymbolCast : public SymExpr { + friend class SymbolManager; const SymExpr *Operand; /// Type of the operand. @@ -295,20 +299,19 @@ class SymbolCast : public SymExpr { friend class SymExprAllocator; SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To) - : SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) { + : SymExpr(SymbolCastKind, Sym, computeComplexity(In, From, To)), + Operand(In), FromTy(From), ToTy(To) { assert(In); assert(isValidTypeForSymbol(From)); // FIXME: GenericTaintChecker creates symbols of void type. // Otherwise, 'To' should also be a valid type. } -public: - unsigned computeComplexity() const override { - if (Complexity == 0) - Complexity = 1 + Operand->computeComplexity(); - return Complexity; + static unsigned computeComplexity(const SymExpr *In, QualType, QualType) { + return In->complexity() + 1; } +public: QualType getType() const override { return ToTy; } LLVM_ATTRIBUTE_RETURNS_NONNULL @@ -329,13 +332,14 @@ class SymbolCast : public SymExpr { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolCastKind; - } + static constexpr Kind ClassKind = SymbolCastKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// Represents a symbolic expression involving a unary operator. class UnarySymExpr : public SymExpr { + friend class SymbolManager; const SymExpr *Operand; UnaryOperator::Opcode Op; QualType T; @@ -343,7 +347,8 @@ class UnarySymExpr : public SymExpr { friend class SymExprAllocator; UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op, QualType T) - : SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) { + : SymExpr(UnarySymExprKind, Sym, computeComplexity(In, Op, T)), + Operand(In), Op(Op), T(T) { // Note, some unary operators are modeled as a binary operator. E.g. ++x is // modeled as x + 1. assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression"); @@ -354,13 +359,12 @@ class UnarySymExpr : public SymExpr { assert(!Loc::isLocType(T) && "unary symbol should be nonloc"); } -public: - unsigned computeComplexity() const override { - if (Complexity == 0) - Complexity = 1 + Operand->computeComplexity(); - return Complexity; + static unsigned computeComplexity(const SymExpr *In, UnaryOperator::Opcode, + QualType) { + return In->complexity() + 1; } +public: const SymExpr *getOperand() const { return Operand; } UnaryOperator::Opcode getOpcode() const { return Op; } QualType getType() const override { return T; } @@ -380,9 +384,9 @@ class UnarySymExpr : public SymExpr { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == UnarySymExprKind; - } + static constexpr Kind ClassKind = UnarySymExprKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// Represents a symbolic expression involving a binary operator @@ -391,8 +395,9 @@ class BinarySymExpr : public SymExpr { QualType T; protected: - BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t) - : SymExpr(k, Sym), Op(op), T(t) { + BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t, + unsigned Complexity) + : SymExpr(k, Sym, Complexity), Op(op), T(t) { assert(classof(this)); // Binary expressions are results of arithmetic. Pointer arithmetic is not // handled by binary expressions, but it is instead handled by applying @@ -408,14 +413,14 @@ class BinarySymExpr : public SymExpr { BinaryOperator::Opcode getOpcode() const { return Op; } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - Kind k = SE->getKind(); - return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { + return K >= BEGIN_BINARYSYMEXPRS && K <= END_BINARYSYMEXPRS; } protected: static unsigned computeOperandComplexity(const SymExpr *Value) { - return Value->computeComplexity(); + return Value->complexity(); } static unsigned computeOperandComplexity(const llvm::APSInt &Value) { return 1; @@ -430,19 +435,28 @@ class BinarySymExpr : public SymExpr { }; /// Template implementation for all binary symbolic expressions -template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassKind> +template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassK> class BinarySymExprImpl : public BinarySymExpr { + friend class SymbolManager; LHSTYPE LHS; RHSTYPE RHS; friend class SymExprAllocator; BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) - : BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) { + : BinarySymExpr(Sym, ClassKind, op, t, + computeComplexity(lhs, op, rhs, t)), + LHS(lhs), RHS(rhs) { assert(getPointer(lhs)); assert(getPointer(rhs)); } + static unsigned computeComplexity(LHSTYPE lhs, BinaryOperator::Opcode, + RHSTYPE rhs, QualType) { + // FIXME: Should we add 1 to complexity? + return computeOperandComplexity(lhs) + computeOperandComplexity(rhs); + } + public: void dumpToStream(raw_ostream &os) const override { dumpToStreamImpl(os, LHS); @@ -453,13 +467,6 @@ class BinarySymExprImpl : public BinarySymExpr { LHSTYPE getLHS() const { return LHS; } RHSTYPE getRHS() const { return RHS; } - unsigned computeComplexity() const override { - if (Complexity == 0) - Complexity = - computeOperandComplexity(RHS) + computeOperandComplexity(LHS); - return Complexity; - } - static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) { ID.AddInteger((unsigned)ClassKind); @@ -474,7 +481,9 @@ class BinarySymExprImpl : public BinarySymExpr { } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { return SE->getKind() == ClassKind; } + static constexpr Kind ClassKind = ClassK; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// Represents a symbolic expression like 'x' + 3. @@ -489,6 +498,33 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *, using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *, SymExpr::Kind::SymSymExprKind>; +struct MaybeSymExpr { + MaybeSymExpr() = default; + explicit MaybeSymExpr(SymbolRef Sym) : Sym(Sym) {} + bool isValid() const { return Sym; } + bool isInvalid() const { return !isValid(); } + SymbolRef operator->() const { return Sym; } + + SymbolRef getOrNull() const { return Sym; } + template <typename SymT> const SymT *getOrNull() const { + return llvm::dyn_cast_if_present<SymT>(Sym); + } + + DefinedOrUnknownSVal getOrUnknown() const { + if (isInvalid()) + return UnknownVal(); + return nonloc::SymbolVal(Sym); + } + + nonloc::SymbolVal getOrAssert() const { + assert(Sym); + return nonloc::SymbolVal(Sym); + } + +private: + SymbolRef Sym = nullptr; +}; + class SymExprAllocator { SymbolID NextSymbolID = 0; llvm::BumpPtrAllocator &Alloc; @@ -518,27 +554,27 @@ class SymbolManager { SymExprAllocator Alloc; BasicValueFactory &BV; ASTContext &Ctx; + const unsigned MaxCompComplexity; public: SymbolManager(ASTContext &ctx, BasicValueFactory &bv, - llvm::BumpPtrAllocator &bpalloc) - : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {} + llvm::BumpPtrAllocator &bpalloc, const AnalyzerOptions &Opts) + : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx), + MaxCompComplexity(Opts.MaxSymbolComplexity) { + assert(MaxCompComplexity > 0 && "Zero max complexity doesn't make sense"); + } static bool canSymbolicate(QualType T); /// Create or retrieve a SymExpr of type \p SymExprT for the given arguments. /// Use the arguments to check for an existing SymExpr and return it, /// otherwise, create a new one and keep a pointer to it to avoid duplicates. - template <typename SymExprT, typename... Args> - const SymExprT *acquire(Args &&...args); + template <typename SymExprT, typename... Args> auto acquire(Args &&...args); const SymbolConjured *conjureSymbol(ConstCFGElementRef Elem, const LocationContext *LCtx, QualType T, unsigned VisitCount, - const void *SymbolTag = nullptr) { - - return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag); - } + const void *SymbolTag = nullptr); QualType getType(const SymExpr *SE) const { return SE->getType(); @@ -672,7 +708,16 @@ class SymbolVisitor { }; template <typename T, typename... Args> -const T *SymbolManager::acquire(Args &&...args) { +auto SymbolManager::acquire(Args &&...args) { + constexpr bool IsSymbolData = SymbolData::classof(T::ClassKind); + if constexpr (IsSymbolData) { + assert(T::computeComplexity(args...) == 1); + } else { + if (T::computeComplexity(args...) > MaxCompComplexity) { + return MaybeSymExpr(); + } + } + llvm::FoldingSetNodeID profile; T::Profile(profile, args...); void *InsertPos; @@ -681,7 +726,12 @@ const T *SymbolManager::acquire(Args &&...args) { SD = Alloc.make<T>(std::forward<Args>(args)...); DataSet.InsertNode(SD, InsertPos); } - return cast<T>(SD); + + if constexpr (IsSymbolData) { + return cast<T>(SD); + } else { + return MaybeSymExpr(SD); + } } } // namespace ento diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp index e55d064253b84..f0dc889f15e7a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -267,7 +267,7 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570 if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions(); - Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) { + Sym->complexity() > Opts.MaxTaintedSymbolComplexity) { return {}; } diff --git a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp index e2f8bd541c967..ab0e3d8f56d86 100644 --- a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp @@ -66,7 +66,7 @@ class TrustNonnullChecker : public Checker<check::PostCall, SVal Cond, bool Assumption) const { const SymbolRef CondS = Cond.getAsSymbol(); - if (!CondS || CondS->computeComplexity() > ComplexityThreshold) + if (!CondS || CondS->complexity() > ComplexityThreshold) return State; for (SymbolRef Antecedent : CondS->symbols()) { diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index fa8e669b6bb2f..3486485dcd686 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -67,7 +67,7 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B, if (RightV.isUnknown()) { unsigned Count = currBldrCtx->blockCount(); RightV = svalBuilder.conjureSymbolVal(nullptr, getCFGElementRef(), LCtx, - Count); + RHS->getType(), Count); } // Simulate the effects of a "store": bind the value of the RHS // to the L-Value represented by the LHS. diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index ab45e678bafd5..599c787f64a3a 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1264,7 +1264,9 @@ class SymbolicRangeInferrer private: SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S) - : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {} + : SVB(S->getStateManager().getSValBuilder()), + SymMgr(S->getSymbolManager()), ValueFactory(F.getValueFactory()), + RangeFactory(F), State(S) {} /// Infer range information from the given integer constant. /// @@ -1471,8 +1473,10 @@ class SymbolicRangeInferrer return getRangeForNegatedExpr( [SSE, State = this->State]() -> SymbolRef { if (SSE->getOpcode() == BO_Sub) - return State->getSymbolManager().acquire<SymSymExpr>( - SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType()); + return State->getSymbolManager() + .acquire<SymSymExpr>(SSE->getRHS(), BO_Sub, SSE->getLHS(), + SSE->getType()) + .getOrNull(); return nullptr; }, SSE->getType()); @@ -1481,8 +1485,9 @@ class SymbolicRangeInferrer std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) { return getRangeForNegatedExpr( [Sym, State = this->State]() { - return State->getSymbolManager().acquire<UnarySymExpr>( - Sym, UO_Minus, Sym->getType()); + return State->getSymbolManager() + .acquire<UnarySymExpr>(Sym, UO_Minus, Sym->getType()) + .getOrNull(); }, Sym->getType()); } @@ -1495,8 +1500,13 @@ class SymbolicRangeInferrer if (!IsCommutative) return std::nullopt; - SymbolRef Commuted = State->getSymbolManager().acquire<SymSymExpr>( - SSE->getRHS(), Op, SSE->getLHS(), SSE->getType()); + SymbolRef Commuted = State->getSymbolManager() + .acquire<SymSymExpr>(SSE->getRHS(), Op, + SSE->getLHS(), SSE->getType()) + .getOrNull(); + if (!Commuted) + return std::nullopt; + if (const RangeSet *Range = getConstraint(State, Commuted)) return *Range; return std::nullopt; @@ -1525,8 +1535,6 @@ class SymbolicRangeInferrer const SymExpr *RHS = SSE->getRHS(); QualType T = SSE->getType(); - SymbolManager &SymMgr = State->getSymbolManager(); - // We use this variable to store the last queried operator (`QueriedOP`) // for which the `getCmpOpState` returned with `Unknown`. If there are two // different OPs that returned `Unknown` then we have to query the special @@ -1540,8 +1548,10 @@ class SymbolicRangeInferrer // Let's find an expression e.g. (x < y). BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i); - const SymSymExpr *SymSym = - SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T); + SymbolRef SymSym = + SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T).getOrNull(); + if (!SymSym) + continue; const RangeSet *QueriedRangeSet = getConstraint(State, SymSym); // If ranges were not previously found, @@ -1549,7 +1559,9 @@ class SymbolicRangeInferrer if (!QueriedRangeSet) { const BinaryOperatorKind ROP = BinaryOperator::reverseComparisonOp(QueriedOP); - SymSym = SymMgr.acquire<SymSymExpr>(RHS, ROP, LHS, T); + SymSym = SymMgr.acquire<SymSymExpr>(RHS, ROP, LHS, T).getOrNull(); + if (!SymSym) + continue; QueriedRangeSet = getConstraint(State, SymSym); } @@ -1622,6 +1634,8 @@ class SymbolicRangeInferrer return RangeSet(RangeFactory, Zero); } + SValBuilder &SVB; + SymbolManager &SymMgr; BasicValueFactory &ValueFactory; RangeSet::Factory &RangeFactory; ProgramStateRef State; diff --git a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp index 94dcdaf327689..8b86b48ccf8a9 100644 --- a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp @@ -62,8 +62,11 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, SymbolManager &SymMgr = getSymbolManager(); QualType DiffTy = SymMgr.getContext().getPointerDiffType(); - SymbolRef Subtraction = SymMgr.acquire<SymSymExpr>( - SSE->getRHS(), BO_Sub, SSE->getLHS(), DiffTy); + SymbolRef Subtraction = SymMgr + .acquire<SymSymExpr>(SSE->getRHS(), BO_Sub, + SSE->getLHS(), DiffTy) + .getOrNull(); + assert(Subtraction && "Just swapped the operands"); const llvm::APSInt &Zero = getBasicVals().getValue(0, DiffTy); Op = BinaryOperator::reverseComparisonOp(Op); @@ -76,8 +79,12 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, SymbolManager &SymMgr = getSymbolManager(); QualType ExprType = SSE->getType(); - SymbolRef CanonicalEquality = SymMgr.acquire<SymSymExpr>( - SSE->getLHS(), BO_EQ, SSE->getRHS(), ExprType); + SymbolRef CanonicalEquality = + SymMgr + .acquire<SymSymExpr>(SSE->getLHS(), BO_EQ, SSE->getRHS(), + ExprType) + .getOrNull(); + assert(CanonicalEquality && "Just swapped the operands"); bool WasEqual = SSE->getOpcode() == BO_EQ; bool IsExpectedEqual = WasEqual == Assumption; diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 2276c452cce76..18d6b907fa899 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -51,11 +51,11 @@ void SValBuilder::anchor() {} SValBuilder::SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr) - : Context(context), BasicVals(context, alloc), - SymMgr(context, BasicVals, alloc), MemMgr(context, alloc), - StateMgr(stateMgr), + : Context(context), AnOpts( stateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions()), + BasicVals(context, alloc), SymMgr(context, BasicVals, alloc, AnOpts), + MemMgr(context, alloc), StateMgr(stateMgr), ArrayIndexTy(context.LongLongTy), ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {} @@ -74,44 +74,50 @@ DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) { return UnknownVal(); } -nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs, - BinaryOperator::Opcode op, - APSIntPtr rhs, QualType type) { +DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *lhs, + BinaryOperator::Opcode op, + APSIntPtr rhs, QualType type) { + // The Environment ensures we always get a persistent APSInt in + // BasicValueFactory, so we don't need to get the APSInt from + // BasicValueFactory again. assert(lhs); assert(!Loc::isLocType(type)); - return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>(lhs, op, rhs, type)); + return SymMgr.acquire<SymIntExpr>(lhs, op, rhs, type).getOrUnknown(); } -nonloc::SymbolVal SValBuilder::makeNonLoc(APSIntPtr lhs, - BinaryOperator::Opcode op, - const SymExpr *rhs, QualType type) { +DefinedOrUnknownSVal SValBuilder::makeNonLoc(APSIntPtr lhs, + BinaryOperator::Opcode op, + const SymExpr *rhs, + QualType type) { assert(rhs); assert(!Loc::isLocType(type)); - return nonloc::SymbolVal(SymMgr.acquire<IntSymExpr>(lhs, op, rhs, type)); + return SymMgr.acquire<IntSymExpr>(lhs, op, rhs, type).getOrUnknown(); } -nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs, - BinaryOperator::Opcode op, - const SymExpr *rhs, QualType type) { +DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *lhs, + BinaryOperator::Opcode op, + const SymExpr *rhs, + QualType type) { assert(lhs && rhs); assert(!Loc::isLocType(type)); - return nonloc::SymbolVal(SymMgr.acquire<SymSymExpr>(lhs, op, rhs, type)); + return SymMgr.acquire<SymSymExpr>(lhs, op, rhs, type).getOrUnknown(); } -NonLoc SValBuilder::makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op, - QualType type) { +DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *operand, + UnaryOperator::Opcode op, + QualType type) { assert(operand); assert(!Loc::isLocType(type)); - return nonloc::SymbolVal(SymMgr.acquire<UnarySymExpr>(operand, op, type)); + return SymMgr.acquire<UnarySymExpr>(operand, op, type).getOrUnknown(); } -nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand, - QualType fromTy, QualType toTy) { +DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *operand, + QualType fromTy, QualType toTy) { assert(operand); assert(!Loc::isLocType(toTy)); if (fromTy == toTy) return nonloc::SymbolVal(operand); - return nonloc::SymbolVal(SymMgr.acquire<SymbolCast>(operand, fromTy, toTy)); + return SymMgr.acquire<SymbolCast>(operand, fromTy, toTy).getOrUnknown(); } SVal SValBuilder::convertToArrayIndex(SVal val) { @@ -432,24 +438,19 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op, QualType ResultTy) { SymbolRef symLHS = LHS.getAsSymbol(); SymbolRef symRHS = RHS.getAsSymbol(); + auto lInt = LHS.getAs<nonloc::ConcreteInt>(); + auto rInt = RHS.getAs<nonloc::ConcreteInt>(); // TODO: When the Max Complexity is reached, we should conjure a symbol // instead of generating an Unknown value and propagate the taint info to it. - const unsigned MaxComp = AnOpts.MaxSymbolComplexity; - - if (symLHS && symRHS && - (symLHS->computeComplexity() + symRHS->computeComplexity()) < MaxComp) + if (symLHS && symRHS) return makeNonLoc(symLHS, Op, symRHS, ResultTy); - if (symLHS && symLHS->computeComplexity() < MaxComp) - if (std::optional<nonloc::ConcreteInt> rInt = - RHS.getAs<nonloc::ConcreteInt>()) - return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy); + if (symLHS && rInt) + return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy); - if (symRHS && symRHS->computeComplexity() < MaxComp) - if (std::optional<nonloc::ConcreteInt> lInt = - LHS.getAs<nonloc::ConcreteInt>()) - return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy); + if (lInt && symRHS) + return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy); return UnknownVal(); } @@ -614,10 +615,12 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val, // Check the range of the symbol being casted against the maximum value of the // target type. QualType CmpTy = getConditionType(); - NonLoc CompVal = evalBinOpNN(state, BO_LE, *AsNonLoc, ToTypeMaxVal, CmpTy) - .castAs<NonLoc>(); + auto CompVal = + evalBinOpNN(state, BO_LE, *AsNonLoc, ToTypeMaxVal, CmpTy).getAs<NonLoc>(); + if (!CompVal) + return UnknownVal(); ProgramStateRef IsNotTruncated, IsTruncated; - std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal); + std::tie(IsNotTruncated, IsTruncated) = state->assume(*CompVal); if (!IsNotTruncated && IsTruncated) { // Symbol is truncated so we evaluate it as a cast. return makeNonLoc(AsSymbol, originalTy, castTy); @@ -1048,6 +1051,10 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { // (llong)(ulong)(uint x) -> (llong)(uint x) (sizeof(ulong) == // sizeof(uint)) + auto getAsSymValOrV = [V](DefinedOrUnknownSVal Val) { + return Val.getAs<nonloc::SymbolVal>().value_or(V); + }; + SymbolRef SE = V.getSymbol(); QualType T = Context.getCanonicalType(SE->getType()); @@ -1055,14 +1062,14 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { return V; if (!isa<SymbolCast>(SE)) - return VB.makeNonLoc(SE, T, CastTy); + return getAsSymValOrV(VB.makeNonLoc(SE, T, CastTy)); SymbolRef RootSym = cast<SymbolCast>(SE)->getOperand(); QualType RT = RootSym->getType().getCanonicalType(); // FIXME support simplification from non-integers. if (!RT->isIntegralOrEnumerationType()) - return VB.makeNonLoc(SE, T, CastTy); + return getAsSymValOrV(VB.makeNonLoc(SE, T, CastTy)); BasicValueFactory &BVF = VB.getBasicValueFactory(); APSIntType CTy = BVF.getAPSIntType(CastTy); @@ -1075,7 +1082,7 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { const bool isSameType = (RT == CastTy); if (isSameType) return nonloc::SymbolVal(RootSym); - return VB.makeNonLoc(RootSym, RT, CastTy); + return getAsSymValOrV(VB.makeNonLoc(RootSym, RT, CastTy)); } APSIntType RTy = BVF.getAPSIntType(RT); @@ -1084,9 +1091,9 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { const bool UR = RTy.isUnsigned(); if (((WT > WR) && (UR || !UT)) || ((WT == WR) && (UT == UR))) - return VB.makeNonLoc(RootSym, RT, CastTy); + return getAsSymValOrV(VB.makeNonLoc(RootSym, RT, CastTy)); - return VB.makeNonLoc(SE, T, CastTy); + return getAsSymValOrV(VB.makeNonLoc(SE, T, CastTy)); } }; } // end anonymous namespace diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 84a9c43d3572e..07458001ecb0f 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -290,10 +290,10 @@ static std::pair<SymbolRef, APSIntPtr> decomposeSymbol(SymbolRef Sym, // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the // same signed integral type and no overflows occur (which should be checked // by the caller). -static NonLoc doRearrangeUnchecked(ProgramStateRef State, - BinaryOperator::Opcode Op, - SymbolRef LSym, llvm::APSInt LInt, - SymbolRef RSym, llvm::APSInt RInt) { +static std::optional<NonLoc> +doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, + SymbolRef LSym, llvm::APSInt LInt, SymbolRef RSym, + llvm::APSInt RInt) { SValBuilder &SVB = State->getStateManager().getSValBuilder(); BasicValueFactory &BV = SVB.getBasicValueFactory(); SymbolManager &SymMgr = SVB.getSymbolManager(); @@ -320,7 +320,7 @@ static NonLoc doRearrangeUnchecked(ProgramStateRef State, nonloc::ConcreteInt(BV.getValue(RInt)), ResultTy) .castAs<NonLoc>(); - SymbolRef ResultSym = nullptr; + MaybeSymExpr ResultSym; BinaryOperator::Opcode ResultOp; llvm::APSInt ResultInt; if (BinaryOperator::isComparisonOp(Op)) { @@ -346,12 +346,20 @@ static NonLoc doRearrangeUnchecked(ProgramStateRef State, ResultOp = BO_Sub; } else if (ResultInt == 0) { // Shortcut: Simplify "$x + 0" to "$x". - return nonloc::SymbolVal(ResultSym); + if (ResultSym.isInvalid()) + return std::nullopt; + return ResultSym.getOrAssert(); } } + + if (ResultSym.isInvalid()) + return std::nullopt; + APSIntPtr PersistentResultInt = BV.getValue(ResultInt); - return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>( - ResultSym, ResultOp, PersistentResultInt, ResultTy)); + return SymMgr + .acquire<SymIntExpr>(ResultSym.getOrNull(), ResultOp, PersistentResultInt, + ResultTy) + .getOrAssert(); } // Rearrange if symbol type matches the result type and if the operator is a @@ -421,9 +429,8 @@ static std::optional<NonLoc> tryRearrange(ProgramStateRef State, } SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, - BinaryOperator::Opcode op, - NonLoc lhs, NonLoc rhs, - QualType resultTy) { + BinaryOperator::Opcode op, NonLoc lhs, + NonLoc rhs, QualType resultTy) { NonLoc InputLHS = lhs; NonLoc InputRHS = rhs; diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index d03f47fa301e6..6f4244811261e 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -238,6 +238,12 @@ bool SymbolManager::canSymbolicate(QualType T) { return false; } +const SymbolConjured *SymbolManager::conjureSymbol( + ConstCFGElementRef Elem, const LocationContext *LCtx, QualType T, + unsigned VisitCount, const void *SymbolTag /*=nullptr*/) { + return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag); +} + void SymbolManager::addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent) { auto &dependencies = SymbolDependencies[Primary]; diff --git a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp new file mode 100644 index 0000000000000..f3b5e633fee79 --- /dev/null +++ b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection %s -verify + +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ConfigDumper 2>&1 | FileCheck %s --match-full-lines +// CHECK: max-symbol-complexity = 35 + +void clang_analyzer_dump(int v); + +void pumpSymbolComplexity() { + extern int *p; + *p = (*p + 1) & 1023; // 2 + *p = (*p + 1) & 1023; // 4 + *p = (*p + 1) & 1023; // 6 + *p = (*p + 1) & 1023; // 8 + *p = (*p + 1) & 1023; // 10 + *p = (*p + 1) & 1023; // 12 + *p = (*p + 1) & 1023; // 14 + *p = (*p + 1) & 1023; // 16 + *p = (*p + 1) & 1023; // 18 + *p = (*p + 1) & 1023; // 20 + *p = (*p + 1) & 1023; // 22 + *p = (*p + 1) & 1023; // 24 + *p = (*p + 1) & 1023; // 26 + *p = (*p + 1) & 1023; // 28 + *p = (*p + 1) & 1023; // 30 + *p = (*p + 1) & 1023; // 32 + *p = (*p + 1) & 1023; // 34 + + // The complexity of "*p" is below 35, so it's accurate. + clang_analyzer_dump(*p); + // expected-warning-re@-1 {{{{^\({34}reg}}}} + + // We would increase the complexity over the threshold, thus it'll get simplified. + *p = (*p + 1) & 1023; // Would be 36, which is over 35. + + // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify. + clang_analyzer_dump(*p); + // expected-warning-re@-1 {{{{^}}conj_${{[0-9]+}}{int, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}} [debug.ExprInspection]{{$}}}} +} + +void hugelyOverComplicatedSymbol() { +#define TEN_TIMES(x) x x x x x x x x x x +#define HUNDRED_TIMES(x) TEN_TIMES(TEN_TIMES(x)) + extern int *p; + HUNDRED_TIMES(*p = (*p + 1) & 1023;) + HUNDRED_TIMES(*p = (*p + 1) & 1023;) + HUNDRED_TIMES(*p = (*p + 1) & 1023;) + HUNDRED_TIMES(*p = (*p + 1) & 1023;) + // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify. + clang_analyzer_dump(*p); + // expected-warning-re@-1 {{{{^}}((((((((conj_${{[0-9]+}}{int, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}) + 1) & 1023) + 1) & 1023) + 1) & 1023) + 1) & 1023 [debug.ExprInspection]{{$}}}} +#undef HUNDRED_TIMES +#undef TEN_TIMES +} From f8a9d4cff130567366cc6b8ab076fa52b040690e Mon Sep 17 00:00:00 2001 From: Balazs Benics <balazs.ben...@sonarsource.com> Date: Tue, 24 Jun 2025 09:28:03 +0200 Subject: [PATCH 2/3] Add SymExpr::SymbolTooComplex --- .../Core/PathSensitive/SValBuilder.h | 20 +- .../Core/PathSensitive/SymbolManager.h | 188 ++++++++++++------ .../Core/PathSensitive/Symbols.def | 1 + .../Core/RangeConstraintManager.cpp | 29 +-- .../Core/RangedConstraintManager.cpp | 15 +- clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 56 +++--- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 29 +-- .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 7 + .../ensure-capped-symbol-complexity.cpp | 9 +- 9 files changed, 203 insertions(+), 151 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index 0458a6125db9a..fae78c564e0ab 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -317,21 +317,21 @@ class SValBuilder { return nonloc::LocAsInteger(BasicVals.getPersistentSValWithData(loc, bits)); } - DefinedOrUnknownSVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, - APSIntPtr rhs, QualType type); + nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, + APSIntPtr rhs, QualType type); - DefinedOrUnknownSVal makeNonLoc(APSIntPtr rhs, BinaryOperator::Opcode op, - const SymExpr *lhs, QualType type); + nonloc::SymbolVal makeNonLoc(APSIntPtr rhs, BinaryOperator::Opcode op, + const SymExpr *lhs, QualType type); - DefinedOrUnknownSVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, - const SymExpr *rhs, QualType type); + nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, + const SymExpr *rhs, QualType type); - DefinedOrUnknownSVal makeNonLoc(const SymExpr *operand, - UnaryOperator::Opcode op, QualType type); + nonloc::SymbolVal makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op, + QualType type); /// Create a NonLoc value for cast. - DefinedOrUnknownSVal makeNonLoc(const SymExpr *operand, QualType fromTy, - QualType toTy); + nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy, + QualType toTy); nonloc::ConcreteInt makeTruthVal(bool b, QualType type) { return nonloc::ConcreteInt(BasicVals.getTruthValue(b, type)); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 5239663788fb4..b2e58a95fd2ec 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H #include "clang/AST/Expr.h" +#include "clang/AST/OperationKinds.h" #include "clang/AST/Type.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Basic/LLVM.h" @@ -27,9 +28,11 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include <cassert> +#include <type_traits> namespace clang { @@ -47,7 +50,7 @@ class SymbolRegionValue : public SymbolData { friend class SymExprAllocator; SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) - : SymbolData(SymbolRegionValueKind, sym), R(r) { + : SymbolData(ClassKind, sym), R(r) { assert(r); assert(isValidTypeForSymbol(r->getValueType())); } @@ -57,7 +60,7 @@ class SymbolRegionValue : public SymbolData { const TypedValueRegion *getRegion() const { return R; } static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { - profile.AddInteger((unsigned) SymbolRegionValueKind); + profile.AddInteger((unsigned)ClassKind); profile.AddPointer(R); } @@ -91,8 +94,8 @@ class SymbolConjured : public SymbolData { SymbolConjured(SymbolID sym, ConstCFGElementRef elem, const LocationContext *lctx, QualType t, unsigned count, const void *symbolTag) - : SymbolData(SymbolConjuredKind, sym), Elem(elem), T(t), Count(count), - LCtx(lctx), SymbolTag(symbolTag) { + : SymbolData(ClassKind, sym), Elem(elem), T(t), Count(count), LCtx(lctx), + SymbolTag(symbolTag) { assert(lctx); assert(isValidTypeForSymbol(t)); } @@ -116,7 +119,7 @@ class SymbolConjured : public SymbolData { static void Profile(llvm::FoldingSetNodeID &profile, ConstCFGElementRef Elem, const LocationContext *LCtx, QualType T, unsigned Count, const void *SymbolTag) { - profile.AddInteger((unsigned)SymbolConjuredKind); + profile.AddInteger((unsigned)ClassKind); profile.Add(Elem); profile.AddPointer(LCtx); profile.Add(T); @@ -134,6 +137,101 @@ class SymbolConjured : public SymbolData { static constexpr bool classof(Kind K) { return K == ClassKind; } }; +/// A symbol representing the result of an expression that became too +/// complicated. In other words, its complexity would have surpassed the +/// MaxSymbolComplexity threshold. +/// TODO: When the MaxSymbolComplexity is reached, we should propagate the taint +/// info to it. +class SymbolTooComplex final : public SymbolData { + // Pointer to either "SymExpr" or "APSInt". + const void *LHS; + const void *RHS = nullptr; // optional + QualType Ty; + using OpKindType = std::make_unsigned_t< + std::common_type_t<BinaryOperatorKind, UnaryOperatorKind>>; + OpKindType Op = 0; + + friend class SymExprAllocator; + + static const void *getPointer(APSIntPtr Value) { return Value.get(); } + static const void *getPointer(const SymExpr *Value) { return Value; } + + template <typename LHSTYPE, typename RHSTYPE> + using assertTypesForOperands = std::enable_if_t< + llvm::is_one_of<LHSTYPE, const SymExpr *, APSIntPtr>::value && + llvm::is_one_of<RHSTYPE, const SymExpr *, APSIntPtr>::value>; + + // Constructing from BinarySymExprImpl ctor arguments. + template <typename LHSTYPE, typename RHSTYPE, + typename = assertTypesForOperands<LHSTYPE, RHSTYPE>> + SymbolTooComplex(SymbolID Sym, LHSTYPE LHS, BinaryOperatorKind Op, + RHSTYPE RHS, QualType Ty) + : SymbolData(ClassKind, Sym), LHS(getPointer(LHS)), RHS(getPointer(RHS)), + Ty(Ty), Op(static_cast<OpKindType>(Op)) { + assert(this->LHS); + assert(this->RHS); + } + + // Constructing from UnarySymExpr ctor arguments. + SymbolTooComplex(SymbolID Sym, const SymExpr *Operand, UnaryOperatorKind Op, + QualType Ty) + : SymbolData(ClassKind, Sym), LHS(Operand), Ty(Ty), + Op(static_cast<OpKindType>(Op)) { + assert(LHS); + assert(!RHS); + } + + // Constructing from SymbolCast ctor arguments. + SymbolTooComplex(SymbolID Sym, const SymExpr *Operand, QualType, + QualType ToTy) + : SymbolData(ClassKind, Sym), LHS(Operand), Ty(ToTy), Op(~0) {} + +public: + QualType getType() const override { return Ty; } + + StringRef getKindStr() const override; + + void dumpToStream(raw_ostream &os) const override; + + static void Profile(llvm::FoldingSetNodeID &profile, const void *LHS, + OpKindType Op, const void *RHS, QualType Ty) { + profile.AddInteger((unsigned)ClassKind); + profile.AddPointer(LHS); + profile.AddPointer(RHS); + profile.Add(Ty); + profile.AddInteger(Op); + } + + // Profile from BinarySymExprImpl ctor arguments. + template <typename LHSTYPE, typename RHSTYPE, + typename = assertTypesForOperands<LHSTYPE, RHSTYPE>> + static void Profile(llvm::FoldingSetNodeID &profile, LHSTYPE LHS, + OpKindType Op, RHSTYPE RHS, QualType Ty) { + Profile(profile, getPointer(LHS), Op, getPointer(RHS), Ty); + } + + // Profile from UnarySymExpr ctor arguments. + static void Profile(llvm::FoldingSetNodeID &profile, const SymExpr *Operand, + UnaryOperatorKind Op, QualType Ty) { + Profile(profile, Operand, Op, /*RHS=*/nullptr, Ty); + } + + // Profile from SymbolCast ctor arguments. + static void Profile(llvm::FoldingSetNodeID &profile, const SymExpr *Operand, + QualType, QualType ToTy) { + Profile(profile, Operand, /*Op=*/~0, /*RHS=*/nullptr, ToTy); + } + + void Profile(llvm::FoldingSetNodeID &profile) override { + Profile(profile, LHS, Op, RHS, Ty); + } + + // Implement isa<T> support. + static constexpr Kind ClassKind = SymbolTooComplexKind; + static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } + static constexpr bool classof(Kind K) { return K == ClassKind; } +}; + /// A symbol representing the value of a MemRegion whose parent region has /// symbolic value. class SymbolDerived : public SymbolData { @@ -142,7 +240,7 @@ class SymbolDerived : public SymbolData { friend class SymExprAllocator; SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) - : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { + : SymbolData(ClassKind, sym), parentSymbol(parent), R(r) { assert(parent); assert(r); assert(isValidTypeForSymbol(r->getValueType())); @@ -163,7 +261,7 @@ class SymbolDerived : public SymbolData { static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, const TypedValueRegion *r) { - profile.AddInteger((unsigned) SymbolDerivedKind); + profile.AddInteger((unsigned)ClassKind); profile.AddPointer(r); profile.AddPointer(parent); } @@ -188,7 +286,7 @@ class SymbolExtent : public SymbolData { friend class SymExprAllocator; SymbolExtent(SymbolID sym, const SubRegion *r) - : SymbolData(SymbolExtentKind, sym), R(r) { + : SymbolData(ClassKind, sym), R(r) { assert(r); } @@ -203,7 +301,7 @@ class SymbolExtent : public SymbolData { void dumpToStream(raw_ostream &os) const override; static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { - profile.AddInteger((unsigned) SymbolExtentKind); + profile.AddInteger((unsigned)ClassKind); profile.AddPointer(R); } @@ -214,7 +312,7 @@ class SymbolExtent : public SymbolData { // Implement isa<T> support. static constexpr Kind ClassKind = SymbolExtentKind; static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } - static constexpr bool classof(Kind K) { return K == SymbolExtentKind; } + static constexpr bool classof(Kind K) { return K == ClassKind; } }; /// SymbolMetadata - Represents path-dependent metadata about a specific region. @@ -232,16 +330,16 @@ class SymbolMetadata : public SymbolData { const void *Tag; friend class SymExprAllocator; - SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, + SymbolMetadata(SymbolID sym, const MemRegion *r, const Stmt *s, QualType t, const LocationContext *LCtx, unsigned count, const void *tag) - : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), - Count(count), Tag(tag) { - assert(r); - assert(s); - assert(isValidTypeForSymbol(t)); - assert(LCtx); - assert(tag); - } + : SymbolData(ClassKind, sym), R(r), S(s), T(t), LCtx(LCtx), Count(count), + Tag(tag) { + assert(r); + assert(s); + assert(isValidTypeForSymbol(t)); + assert(LCtx); + assert(tag); + } public: LLVM_ATTRIBUTE_RETURNS_NONNULL @@ -267,7 +365,7 @@ class SymbolMetadata : public SymbolData { static void Profile(llvm::FoldingSetNodeID &profile, const MemRegion *R, const Stmt *S, QualType T, const LocationContext *LCtx, unsigned Count, const void *Tag) { - profile.AddInteger((unsigned)SymbolMetadataKind); + profile.AddInteger((unsigned)ClassKind); profile.AddPointer(R); profile.AddPointer(S); profile.Add(T); @@ -299,8 +397,8 @@ class SymbolCast : public SymExpr { friend class SymExprAllocator; SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To) - : SymExpr(SymbolCastKind, Sym, computeComplexity(In, From, To)), - Operand(In), FromTy(From), ToTy(To) { + : SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In), + FromTy(From), ToTy(To) { assert(In); assert(isValidTypeForSymbol(From)); // FIXME: GenericTaintChecker creates symbols of void type. @@ -321,7 +419,7 @@ class SymbolCast : public SymExpr { static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *In, QualType From, QualType To) { - ID.AddInteger((unsigned) SymbolCastKind); + ID.AddInteger((unsigned)ClassKind); ID.AddPointer(In); ID.Add(From); ID.Add(To); @@ -347,8 +445,8 @@ class UnarySymExpr : public SymExpr { friend class SymExprAllocator; UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op, QualType T) - : SymExpr(UnarySymExprKind, Sym, computeComplexity(In, Op, T)), - Operand(In), Op(Op), T(T) { + : SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In), + Op(Op), T(T) { // Note, some unary operators are modeled as a binary operator. E.g. ++x is // modeled as x + 1. assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression"); @@ -373,7 +471,7 @@ class UnarySymExpr : public SymExpr { static void Profile(llvm::FoldingSetNodeID &ID, const SymExpr *In, UnaryOperator::Opcode Op, QualType T) { - ID.AddInteger((unsigned)UnarySymExprKind); + ID.AddInteger((unsigned)ClassKind); ID.AddPointer(In); ID.AddInteger(Op); ID.Add(T); @@ -498,33 +596,6 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *, using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *, SymExpr::Kind::SymSymExprKind>; -struct MaybeSymExpr { - MaybeSymExpr() = default; - explicit MaybeSymExpr(SymbolRef Sym) : Sym(Sym) {} - bool isValid() const { return Sym; } - bool isInvalid() const { return !isValid(); } - SymbolRef operator->() const { return Sym; } - - SymbolRef getOrNull() const { return Sym; } - template <typename SymT> const SymT *getOrNull() const { - return llvm::dyn_cast_if_present<SymT>(Sym); - } - - DefinedOrUnknownSVal getOrUnknown() const { - if (isInvalid()) - return UnknownVal(); - return nonloc::SymbolVal(Sym); - } - - nonloc::SymbolVal getOrAssert() const { - assert(Sym); - return nonloc::SymbolVal(Sym); - } - -private: - SymbolRef Sym = nullptr; -}; - class SymExprAllocator { SymbolID NextSymbolID = 0; llvm::BumpPtrAllocator &Alloc; @@ -569,7 +640,8 @@ class SymbolManager { /// Create or retrieve a SymExpr of type \p SymExprT for the given arguments. /// Use the arguments to check for an existing SymExpr and return it, /// otherwise, create a new one and keep a pointer to it to avoid duplicates. - template <typename SymExprT, typename... Args> auto acquire(Args &&...args); + template <typename SymExprT, typename... Args> + LLVM_ATTRIBUTE_RETURNS_NONNULL const auto *acquire(Args &&...args); const SymbolConjured *conjureSymbol(ConstCFGElementRef Elem, const LocationContext *LCtx, QualType T, @@ -707,14 +779,15 @@ class SymbolVisitor { virtual bool VisitMemRegion(const MemRegion *) { return true; } }; +// Returns a pointer to T if T is a SymbolData, otherwise SymExpr. template <typename T, typename... Args> -auto SymbolManager::acquire(Args &&...args) { +const auto *SymbolManager::acquire(Args &&...args) { constexpr bool IsSymbolData = SymbolData::classof(T::ClassKind); if constexpr (IsSymbolData) { assert(T::computeComplexity(args...) == 1); } else { if (T::computeComplexity(args...) > MaxCompComplexity) { - return MaybeSymExpr(); + return static_cast<const SymExpr *>(acquire<SymbolTooComplex>(args...)); } } @@ -725,12 +798,13 @@ auto SymbolManager::acquire(Args &&...args) { if (!SD) { SD = Alloc.make<T>(std::forward<Args>(args)...); DataSet.InsertNode(SD, InsertPos); + assert(SD->complexity() <= MaxCompComplexity); } if constexpr (IsSymbolData) { return cast<T>(SD); } else { - return MaybeSymExpr(SD); + return SD; } } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def index b93f8e2501559..edf71553b4b1c 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def @@ -45,6 +45,7 @@ SYMBOL(SymbolCast, SymExpr) ABSTRACT_SYMBOL(SymbolData, SymExpr) SYMBOL(SymbolConjured, SymbolData) + SYMBOL(SymbolTooComplex, SymbolData) SYMBOL(SymbolDerived, SymbolData) SYMBOL(SymbolExtent, SymbolData) SYMBOL(SymbolMetadata, SymbolData) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 599c787f64a3a..0e84a7046ad08 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1473,10 +1473,8 @@ class SymbolicRangeInferrer return getRangeForNegatedExpr( [SSE, State = this->State]() -> SymbolRef { if (SSE->getOpcode() == BO_Sub) - return State->getSymbolManager() - .acquire<SymSymExpr>(SSE->getRHS(), BO_Sub, SSE->getLHS(), - SSE->getType()) - .getOrNull(); + return State->getSymbolManager().acquire<SymSymExpr>( + SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType()); return nullptr; }, SSE->getType()); @@ -1485,9 +1483,8 @@ class SymbolicRangeInferrer std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) { return getRangeForNegatedExpr( [Sym, State = this->State]() { - return State->getSymbolManager() - .acquire<UnarySymExpr>(Sym, UO_Minus, Sym->getType()) - .getOrNull(); + return State->getSymbolManager().acquire<UnarySymExpr>( + Sym, UO_Minus, Sym->getType()); }, Sym->getType()); } @@ -1500,13 +1497,8 @@ class SymbolicRangeInferrer if (!IsCommutative) return std::nullopt; - SymbolRef Commuted = State->getSymbolManager() - .acquire<SymSymExpr>(SSE->getRHS(), Op, - SSE->getLHS(), SSE->getType()) - .getOrNull(); - if (!Commuted) - return std::nullopt; - + SymbolRef Commuted = State->getSymbolManager().acquire<SymSymExpr>( + SSE->getRHS(), Op, SSE->getLHS(), SSE->getType()); if (const RangeSet *Range = getConstraint(State, Commuted)) return *Range; return std::nullopt; @@ -1548,10 +1540,7 @@ class SymbolicRangeInferrer // Let's find an expression e.g. (x < y). BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i); - SymbolRef SymSym = - SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T).getOrNull(); - if (!SymSym) - continue; + SymbolRef SymSym = SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T); const RangeSet *QueriedRangeSet = getConstraint(State, SymSym); // If ranges were not previously found, @@ -1559,9 +1548,7 @@ class SymbolicRangeInferrer if (!QueriedRangeSet) { const BinaryOperatorKind ROP = BinaryOperator::reverseComparisonOp(QueriedOP); - SymSym = SymMgr.acquire<SymSymExpr>(RHS, ROP, LHS, T).getOrNull(); - if (!SymSym) - continue; + SymSym = SymMgr.acquire<SymSymExpr>(RHS, ROP, LHS, T); QueriedRangeSet = getConstraint(State, SymSym); } diff --git a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp index 8b86b48ccf8a9..94dcdaf327689 100644 --- a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp @@ -62,11 +62,8 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, SymbolManager &SymMgr = getSymbolManager(); QualType DiffTy = SymMgr.getContext().getPointerDiffType(); - SymbolRef Subtraction = SymMgr - .acquire<SymSymExpr>(SSE->getRHS(), BO_Sub, - SSE->getLHS(), DiffTy) - .getOrNull(); - assert(Subtraction && "Just swapped the operands"); + SymbolRef Subtraction = SymMgr.acquire<SymSymExpr>( + SSE->getRHS(), BO_Sub, SSE->getLHS(), DiffTy); const llvm::APSInt &Zero = getBasicVals().getValue(0, DiffTy); Op = BinaryOperator::reverseComparisonOp(Op); @@ -79,12 +76,8 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, SymbolManager &SymMgr = getSymbolManager(); QualType ExprType = SSE->getType(); - SymbolRef CanonicalEquality = - SymMgr - .acquire<SymSymExpr>(SSE->getLHS(), BO_EQ, SSE->getRHS(), - ExprType) - .getOrNull(); - assert(CanonicalEquality && "Just swapped the operands"); + SymbolRef CanonicalEquality = SymMgr.acquire<SymSymExpr>( + SSE->getLHS(), BO_EQ, SSE->getRHS(), ExprType); bool WasEqual = SSE->getOpcode() == BO_EQ; bool IsExpectedEqual = WasEqual == Assumption; diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 18d6b907fa899..a0f959d8adc1a 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -74,50 +74,48 @@ DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) { return UnknownVal(); } -DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *lhs, - BinaryOperator::Opcode op, - APSIntPtr rhs, QualType type) { +nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs, + BinaryOperator::Opcode op, + APSIntPtr rhs, QualType type) { // The Environment ensures we always get a persistent APSInt in // BasicValueFactory, so we don't need to get the APSInt from // BasicValueFactory again. assert(lhs); assert(!Loc::isLocType(type)); - return SymMgr.acquire<SymIntExpr>(lhs, op, rhs, type).getOrUnknown(); + return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>(lhs, op, rhs, type)); } -DefinedOrUnknownSVal SValBuilder::makeNonLoc(APSIntPtr lhs, - BinaryOperator::Opcode op, - const SymExpr *rhs, - QualType type) { +nonloc::SymbolVal SValBuilder::makeNonLoc(APSIntPtr lhs, + BinaryOperator::Opcode op, + const SymExpr *rhs, QualType type) { assert(rhs); assert(!Loc::isLocType(type)); - return SymMgr.acquire<IntSymExpr>(lhs, op, rhs, type).getOrUnknown(); + return nonloc::SymbolVal(SymMgr.acquire<IntSymExpr>(lhs, op, rhs, type)); } -DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *lhs, - BinaryOperator::Opcode op, - const SymExpr *rhs, - QualType type) { +nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs, + BinaryOperator::Opcode op, + const SymExpr *rhs, QualType type) { assert(lhs && rhs); assert(!Loc::isLocType(type)); - return SymMgr.acquire<SymSymExpr>(lhs, op, rhs, type).getOrUnknown(); + return nonloc::SymbolVal(SymMgr.acquire<SymSymExpr>(lhs, op, rhs, type)); } -DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *operand, - UnaryOperator::Opcode op, - QualType type) { +nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand, + UnaryOperator::Opcode op, + QualType type) { assert(operand); assert(!Loc::isLocType(type)); - return SymMgr.acquire<UnarySymExpr>(operand, op, type).getOrUnknown(); + return nonloc::SymbolVal(SymMgr.acquire<UnarySymExpr>(operand, op, type)); } -DefinedOrUnknownSVal SValBuilder::makeNonLoc(const SymExpr *operand, - QualType fromTy, QualType toTy) { +nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand, + QualType fromTy, QualType toTy) { assert(operand); assert(!Loc::isLocType(toTy)); if (fromTy == toTy) return nonloc::SymbolVal(operand); - return SymMgr.acquire<SymbolCast>(operand, fromTy, toTy).getOrUnknown(); + return nonloc::SymbolVal(SymMgr.acquire<SymbolCast>(operand, fromTy, toTy)); } SVal SValBuilder::convertToArrayIndex(SVal val) { @@ -441,8 +439,6 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op, auto lInt = LHS.getAs<nonloc::ConcreteInt>(); auto rInt = RHS.getAs<nonloc::ConcreteInt>(); - // TODO: When the Max Complexity is reached, we should conjure a symbol - // instead of generating an Unknown value and propagate the taint info to it. if (symLHS && symRHS) return makeNonLoc(symLHS, Op, symRHS, ResultTy); @@ -1051,10 +1047,6 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { // (llong)(ulong)(uint x) -> (llong)(uint x) (sizeof(ulong) == // sizeof(uint)) - auto getAsSymValOrV = [V](DefinedOrUnknownSVal Val) { - return Val.getAs<nonloc::SymbolVal>().value_or(V); - }; - SymbolRef SE = V.getSymbol(); QualType T = Context.getCanonicalType(SE->getType()); @@ -1062,14 +1054,14 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { return V; if (!isa<SymbolCast>(SE)) - return getAsSymValOrV(VB.makeNonLoc(SE, T, CastTy)); + return VB.makeNonLoc(SE, T, CastTy); SymbolRef RootSym = cast<SymbolCast>(SE)->getOperand(); QualType RT = RootSym->getType().getCanonicalType(); // FIXME support simplification from non-integers. if (!RT->isIntegralOrEnumerationType()) - return getAsSymValOrV(VB.makeNonLoc(SE, T, CastTy)); + return VB.makeNonLoc(SE, T, CastTy); BasicValueFactory &BVF = VB.getBasicValueFactory(); APSIntType CTy = BVF.getAPSIntType(CastTy); @@ -1082,7 +1074,7 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { const bool isSameType = (RT == CastTy); if (isSameType) return nonloc::SymbolVal(RootSym); - return getAsSymValOrV(VB.makeNonLoc(RootSym, RT, CastTy)); + return VB.makeNonLoc(RootSym, RT, CastTy); } APSIntType RTy = BVF.getAPSIntType(RT); @@ -1091,9 +1083,9 @@ class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { const bool UR = RTy.isUnsigned(); if (((WT > WR) && (UR || !UT)) || ((WT == WR) && (UT == UR))) - return getAsSymValOrV(VB.makeNonLoc(RootSym, RT, CastTy)); + return VB.makeNonLoc(RootSym, RT, CastTy); - return getAsSymValOrV(VB.makeNonLoc(SE, T, CastTy)); + return VB.makeNonLoc(SE, T, CastTy); } }; } // end anonymous namespace diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 07458001ecb0f..ef8c3b6b4bdac 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -290,10 +290,10 @@ static std::pair<SymbolRef, APSIntPtr> decomposeSymbol(SymbolRef Sym, // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the // same signed integral type and no overflows occur (which should be checked // by the caller). -static std::optional<NonLoc> -doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, - SymbolRef LSym, llvm::APSInt LInt, SymbolRef RSym, - llvm::APSInt RInt) { +static NonLoc doRearrangeUnchecked(ProgramStateRef State, + BinaryOperator::Opcode Op, SymbolRef LSym, + llvm::APSInt LInt, SymbolRef RSym, + llvm::APSInt RInt) { SValBuilder &SVB = State->getStateManager().getSValBuilder(); BasicValueFactory &BV = SVB.getBasicValueFactory(); SymbolManager &SymMgr = SVB.getSymbolManager(); @@ -320,7 +320,7 @@ doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, nonloc::ConcreteInt(BV.getValue(RInt)), ResultTy) .castAs<NonLoc>(); - MaybeSymExpr ResultSym; + SymbolRef ResultSym = nullptr; BinaryOperator::Opcode ResultOp; llvm::APSInt ResultInt; if (BinaryOperator::isComparisonOp(Op)) { @@ -346,20 +346,12 @@ doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, ResultOp = BO_Sub; } else if (ResultInt == 0) { // Shortcut: Simplify "$x + 0" to "$x". - if (ResultSym.isInvalid()) - return std::nullopt; - return ResultSym.getOrAssert(); + return nonloc::SymbolVal(ResultSym); } } - - if (ResultSym.isInvalid()) - return std::nullopt; - APSIntPtr PersistentResultInt = BV.getValue(ResultInt); - return SymMgr - .acquire<SymIntExpr>(ResultSym.getOrNull(), ResultOp, PersistentResultInt, - ResultTy) - .getOrAssert(); + return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>( + ResultSym, ResultOp, PersistentResultInt, ResultTy)); } // Rearrange if symbol type matches the result type and if the operator is a @@ -429,8 +421,9 @@ static std::optional<NonLoc> tryRearrange(ProgramStateRef State, } SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, - BinaryOperator::Opcode op, NonLoc lhs, - NonLoc rhs, QualType resultTy) { + BinaryOperator::Opcode op, + NonLoc lhs, NonLoc rhs, + QualType resultTy) { NonLoc InputLHS = lhs; NonLoc InputRHS = rhs; diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 6f4244811261e..ed79076829264 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -32,6 +32,7 @@ using namespace ento; void SymExpr::anchor() {} StringRef SymbolConjured::getKindStr() const { return "conj_$"; } +StringRef SymbolTooComplex::getKindStr() const { return "complex_$"; } StringRef SymbolDerived::getKindStr() const { return "derived_$"; } StringRef SymbolExtent::getKindStr() const { return "extent_$"; } StringRef SymbolMetadata::getKindStr() const { return "meta_$"; } @@ -128,6 +129,10 @@ void SymbolConjured::dumpToStream(raw_ostream &os) const { os << ", #" << Count << '}'; } +void SymbolTooComplex::dumpToStream(raw_ostream &os) const { + os << getKindStr() << getSymbolID(); +} + void SymbolDerived::dumpToStream(raw_ostream &os) const { os << getKindStr() << getSymbolID() << '{' << getParentSymbol() << ',' << getRegion() << '}'; @@ -176,6 +181,7 @@ void SymExpr::symbol_iterator::expand() { switch (SE->getKind()) { case SymExpr::SymbolRegionValueKind: case SymExpr::SymbolConjuredKind: + case SymExpr::SymbolTooComplexKind: case SymExpr::SymbolDerivedKind: case SymExpr::SymbolExtentKind: case SymExpr::SymbolMetadataKind: @@ -352,6 +358,7 @@ bool SymbolReaper::isLive(SymbolRef sym) { KnownLive = isReadableRegion(cast<SymbolRegionValue>(sym)->getRegion()); break; case SymExpr::SymbolConjuredKind: + case SymExpr::SymbolTooComplexKind: KnownLive = false; break; case SymExpr::SymbolDerivedKind: diff --git a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp index f3b5e633fee79..bd076f2709422 100644 --- a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp +++ b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp @@ -34,7 +34,7 @@ void pumpSymbolComplexity() { // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify. clang_analyzer_dump(*p); - // expected-warning-re@-1 {{{{^}}conj_${{[0-9]+}}{int, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}} [debug.ExprInspection]{{$}}}} + // expected-warning-re@-1 {{{{^}}(complex_${{[0-9]+}}) & 1023}} [debug.ExprInspection]{{$}}}} } void hugelyOverComplicatedSymbol() { @@ -45,9 +45,14 @@ void hugelyOverComplicatedSymbol() { HUNDRED_TIMES(*p = (*p + 1) & 1023;) HUNDRED_TIMES(*p = (*p + 1) & 1023;) HUNDRED_TIMES(*p = (*p + 1) & 1023;) + *p = (*p + 1) & 1023; + *p = (*p + 1) & 1023; + *p = (*p + 1) & 1023; + *p = (*p + 1) & 1023; + // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify. clang_analyzer_dump(*p); - // expected-warning-re@-1 {{{{^}}((((((((conj_${{[0-9]+}}{int, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}) + 1) & 1023) + 1) & 1023) + 1) & 1023) + 1) & 1023 [debug.ExprInspection]{{$}}}} + // expected-warning-re@-1 {{{{^}}(((complex_${{[0-9]+}}) & 1023) + 1) & 1023 [debug.ExprInspection]{{$}}}} #undef HUNDRED_TIMES #undef TEN_TIMES } From b9edbc75bc87e4e263cb629f088ae9977a9cac03 Mon Sep 17 00:00:00 2001 From: Balazs Benics <balazs.ben...@sonarsource.com> Date: Tue, 24 Jun 2025 14:58:05 +0200 Subject: [PATCH 3/3] Rename SymbolTooComplex to SymbolOverlyComplex --- .../Core/PathSensitive/SymbolManager.h | 12 ++++++------ .../StaticAnalyzer/Core/PathSensitive/Symbols.def | 2 +- clang/lib/StaticAnalyzer/Core/SymbolManager.cpp | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index b2e58a95fd2ec..3ed94c7fe1b37 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -142,7 +142,7 @@ class SymbolConjured : public SymbolData { /// MaxSymbolComplexity threshold. /// TODO: When the MaxSymbolComplexity is reached, we should propagate the taint /// info to it. -class SymbolTooComplex final : public SymbolData { +class SymbolOverlyComplex final : public SymbolData { // Pointer to either "SymExpr" or "APSInt". const void *LHS; const void *RHS = nullptr; // optional @@ -164,7 +164,7 @@ class SymbolTooComplex final : public SymbolData { // Constructing from BinarySymExprImpl ctor arguments. template <typename LHSTYPE, typename RHSTYPE, typename = assertTypesForOperands<LHSTYPE, RHSTYPE>> - SymbolTooComplex(SymbolID Sym, LHSTYPE LHS, BinaryOperatorKind Op, + SymbolOverlyComplex(SymbolID Sym, LHSTYPE LHS, BinaryOperatorKind Op, RHSTYPE RHS, QualType Ty) : SymbolData(ClassKind, Sym), LHS(getPointer(LHS)), RHS(getPointer(RHS)), Ty(Ty), Op(static_cast<OpKindType>(Op)) { @@ -173,7 +173,7 @@ class SymbolTooComplex final : public SymbolData { } // Constructing from UnarySymExpr ctor arguments. - SymbolTooComplex(SymbolID Sym, const SymExpr *Operand, UnaryOperatorKind Op, + SymbolOverlyComplex(SymbolID Sym, const SymExpr *Operand, UnaryOperatorKind Op, QualType Ty) : SymbolData(ClassKind, Sym), LHS(Operand), Ty(Ty), Op(static_cast<OpKindType>(Op)) { @@ -182,7 +182,7 @@ class SymbolTooComplex final : public SymbolData { } // Constructing from SymbolCast ctor arguments. - SymbolTooComplex(SymbolID Sym, const SymExpr *Operand, QualType, + SymbolOverlyComplex(SymbolID Sym, const SymExpr *Operand, QualType, QualType ToTy) : SymbolData(ClassKind, Sym), LHS(Operand), Ty(ToTy), Op(~0) {} @@ -227,7 +227,7 @@ class SymbolTooComplex final : public SymbolData { } // Implement isa<T> support. - static constexpr Kind ClassKind = SymbolTooComplexKind; + static constexpr Kind ClassKind = SymbolOverlyComplexKind; static bool classof(const SymExpr *SE) { return classof(SE->getKind()); } static constexpr bool classof(Kind K) { return K == ClassKind; } }; @@ -787,7 +787,7 @@ const auto *SymbolManager::acquire(Args &&...args) { assert(T::computeComplexity(args...) == 1); } else { if (T::computeComplexity(args...) > MaxCompComplexity) { - return static_cast<const SymExpr *>(acquire<SymbolTooComplex>(args...)); + return static_cast<const SymExpr *>(acquire<SymbolOverlyComplex>(args...)); } } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def index edf71553b4b1c..1c96e05f05b7d 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def @@ -45,7 +45,7 @@ SYMBOL(SymbolCast, SymExpr) ABSTRACT_SYMBOL(SymbolData, SymExpr) SYMBOL(SymbolConjured, SymbolData) - SYMBOL(SymbolTooComplex, SymbolData) + SYMBOL(SymbolOverlyComplex, SymbolData) SYMBOL(SymbolDerived, SymbolData) SYMBOL(SymbolExtent, SymbolData) SYMBOL(SymbolMetadata, SymbolData) diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index ed79076829264..2149413189dcd 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -32,7 +32,7 @@ using namespace ento; void SymExpr::anchor() {} StringRef SymbolConjured::getKindStr() const { return "conj_$"; } -StringRef SymbolTooComplex::getKindStr() const { return "complex_$"; } +StringRef SymbolOverlyComplex::getKindStr() const { return "complex_$"; } StringRef SymbolDerived::getKindStr() const { return "derived_$"; } StringRef SymbolExtent::getKindStr() const { return "extent_$"; } StringRef SymbolMetadata::getKindStr() const { return "meta_$"; } @@ -129,7 +129,7 @@ void SymbolConjured::dumpToStream(raw_ostream &os) const { os << ", #" << Count << '}'; } -void SymbolTooComplex::dumpToStream(raw_ostream &os) const { +void SymbolOverlyComplex::dumpToStream(raw_ostream &os) const { os << getKindStr() << getSymbolID(); } @@ -181,7 +181,7 @@ void SymExpr::symbol_iterator::expand() { switch (SE->getKind()) { case SymExpr::SymbolRegionValueKind: case SymExpr::SymbolConjuredKind: - case SymExpr::SymbolTooComplexKind: + case SymExpr::SymbolOverlyComplexKind: case SymExpr::SymbolDerivedKind: case SymExpr::SymbolExtentKind: case SymExpr::SymbolMetadataKind: @@ -358,7 +358,7 @@ bool SymbolReaper::isLive(SymbolRef sym) { KnownLive = isReadableRegion(cast<SymbolRegionValue>(sym)->getRegion()); break; case SymExpr::SymbolConjuredKind: - case SymExpr::SymbolTooComplexKind: + case SymExpr::SymbolOverlyComplexKind: KnownLive = false; break; case SymExpr::SymbolDerivedKind: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits