Author: Samira Bakon Date: 2026-01-15T13:38:50-05:00 New Revision: 09e66be933f819b8287e9cee32887dfaa1d8d084
URL: https://github.com/llvm/llvm-project/commit/09e66be933f819b8287e9cee32887dfaa1d8d084 DIFF: https://github.com/llvm/llvm-project/commit/09e66be933f819b8287e9cee32887dfaa1d8d084.diff LOG: [clang][dataflow] Manage stacks of formulas directly instead of recur… (#175980) …sing during (de)serialization. Stack overflows have been seen in practice for very large formulas. Added: Modified: clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp Removed: ################################################################################ diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index 4196d6821c184..6e3a270e6bed6 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -18,6 +18,8 @@ #include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" @@ -27,6 +29,7 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <memory> +#include <stack> #include <string> #include <utility> #include <vector> @@ -261,23 +264,33 @@ void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( static void getReferencedAtoms(const Formula &F, llvm::DenseSet<dataflow::Atom> &Refs) { - switch (F.kind()) { - case Formula::AtomRef: - Refs.insert(F.getAtom()); - break; - case Formula::Literal: - break; - case Formula::Not: - getReferencedAtoms(*F.operands()[0], Refs); - break; - case Formula::And: - case Formula::Or: - case Formula::Implies: - case Formula::Equal: - ArrayRef<const Formula *> Operands = F.operands(); - getReferencedAtoms(*Operands[0], Refs); - getReferencedAtoms(*Operands[1], Refs); - break; + // Avoid recursion to avoid stack overflows from very large formulas. + // The shape of the tree structure for very large formulas is such that there + // are at most 2 children from any node, but there may be many generations. + std::stack<const Formula *> WorkList; + WorkList.push(&F); + + while (!WorkList.empty()) { + const Formula *Current = WorkList.top(); + WorkList.pop(); + switch (Current->kind()) { + case Formula::AtomRef: + Refs.insert(Current->getAtom()); + break; + case Formula::Literal: + break; + case Formula::Not: + WorkList.push(Current->operands()[0]); + break; + case Formula::And: + case Formula::Or: + case Formula::Implies: + case Formula::Equal: + ArrayRef<const Formula *> Operands = Current->operands(); + WorkList.push(Operands[0]); + WorkList.push(Operands[1]); + break; + } } } diff --git a/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp b/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp index df15a1d6eaadb..4fe2231b9ff65 100644 --- a/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp +++ b/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp @@ -11,12 +11,13 @@ #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> +#include <cstddef> +#include <stack> +#include <vector> namespace clang::dataflow { @@ -42,104 +43,162 @@ static char compactSigil(Formula::Kind K) { llvm_unreachable("unhandled formula kind"); } +// Avoids recursion to avoid stack overflows from very large formulas. void serializeFormula(const Formula &F, llvm::raw_ostream &OS) { - switch (Formula::numOperands(F.kind())) { - case 0: - switch (F.kind()) { - case Formula::AtomRef: - OS << F.getAtom(); + std::stack<const Formula *> WorkList; + WorkList.push(&F); + while (!WorkList.empty()) { + const Formula *Current = WorkList.top(); + WorkList.pop(); + switch (Formula::numOperands(Current->kind())) { + case 0: + switch (Current->kind()) { + case Formula::AtomRef: + OS << Current->getAtom(); + break; + case Formula::Literal: + OS << (Current->literal() ? 'T' : 'F'); + break; + default: + llvm_unreachable("unhandled formula kind"); + } break; - case Formula::Literal: - OS << (F.literal() ? 'T' : 'F'); + case 1: + OS << compactSigil(Current->kind()); + WorkList.push(Current->operands()[0]); + break; + case 2: + OS << compactSigil(Current->kind()); + WorkList.push(Current->operands()[1]); + WorkList.push(Current->operands()[0]); break; default: - llvm_unreachable("unhandled formula kind"); + llvm_unreachable("unhandled formula arity"); } - break; - case 1: - OS << compactSigil(F.kind()); - serializeFormula(*F.operands()[0], OS); - break; - case 2: - OS << compactSigil(F.kind()); - serializeFormula(*F.operands()[0], OS); - serializeFormula(*F.operands()[1], OS); - break; - default: - llvm_unreachable("unhandled formula arity"); } } -static llvm::Expected<const Formula *> -parsePrefix(llvm::StringRef &Str, Arena &A, - llvm::DenseMap<unsigned, Atom> &AtomMap) { - if (Str.empty()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "unexpected end of input"); - - char Prefix = Str[0]; - Str = Str.drop_front(); +struct Operation { + Operation(Formula::Kind Kind) : Kind(Kind) {} + const Formula::Kind Kind; + const unsigned ExpectedNumOperands = Formula::numOperands(Kind); + std::vector<const Formula *> Operands; +}; - switch (Prefix) { - case 'T': - return &A.makeLiteral(true); - case 'F': - return &A.makeLiteral(false); - case 'V': { - unsigned AtomID; - if (Str.consumeInteger(10, AtomID)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "expected atom id"); - auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom()); - if (Inserted) - It->second = A.makeAtom(); - return &A.makeAtomRef(It->second); - } - case '!': { - auto OperandOrErr = parsePrefix(Str, A, AtomMap); - if (!OperandOrErr) - return OperandOrErr.takeError(); - return &A.makeNot(**OperandOrErr); - } - case '&': - case '|': - case '>': - case '=': { - auto LeftOrErr = parsePrefix(Str, A, AtomMap); - if (!LeftOrErr) - return LeftOrErr.takeError(); - - auto RightOrErr = parsePrefix(Str, A, AtomMap); - if (!RightOrErr) - return RightOrErr.takeError(); +// Avoids recursion to avoid stack overflows from very large formulas. +static llvm::Expected<const Formula *> +parseFormulaInternal(llvm::StringRef &Str, Arena &A, + llvm::DenseMap<unsigned, Atom> &AtomMap) { + std::stack<Operation> ActiveOperations; - const Formula &LHS = **LeftOrErr; - const Formula &RHS = **RightOrErr; + while (true) { + if (ActiveOperations.empty() || + ActiveOperations.top().ExpectedNumOperands > + ActiveOperations.top().Operands.size()) { + if (Str.empty()) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "unexpected end of input"); + } + char Prefix = Str[0]; + Str = Str.drop_front(); - switch (Prefix) { - case '&': - return &A.makeAnd(LHS, RHS); - case '|': - return &A.makeOr(LHS, RHS); - case '>': - return &A.makeImplies(LHS, RHS); - case '=': - return &A.makeEquals(LHS, RHS); - default: - llvm_unreachable("unexpected binary op"); + switch (Prefix) { + // Terminals + case 'T': + case 'F': + case 'V': { + const Formula *TerminalFormula; + switch (Prefix) { + case 'T': + TerminalFormula = &A.makeLiteral(true); + break; + case 'F': + TerminalFormula = &A.makeLiteral(false); + break; + case 'V': { + unsigned AtomID; + if (Str.consumeInteger(10, AtomID)) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "expected atom id"); + auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom()); + if (Inserted) + It->second = A.makeAtom(); + TerminalFormula = &A.makeAtomRef(It->second); + break; + } + default: + llvm_unreachable("unexpected terminal character"); + } + if (ActiveOperations.empty()) { + return TerminalFormula; + } + Operation *Op = &ActiveOperations.top(); + Op->Operands.push_back(TerminalFormula); + } break; + case '!': + ActiveOperations.emplace(Formula::Kind::Not); + break; + case '&': + ActiveOperations.emplace(Formula::Kind::And); + break; + case '|': + ActiveOperations.emplace(Formula::Kind::Or); + break; + case '>': + ActiveOperations.emplace(Formula::Kind::Implies); + break; + case '=': + ActiveOperations.emplace(Formula::Kind::Equal); + break; + default: + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "unexpected prefix character: %c", + Prefix); + } + } else if (!ActiveOperations.empty() && + ActiveOperations.top().ExpectedNumOperands == + ActiveOperations.top().Operands.size()) { + Operation *Op = &ActiveOperations.top(); + const Formula *OpFormula = nullptr; + switch (Op->Kind) { + case Formula::Kind::Not: + OpFormula = &A.makeNot(*Op->Operands[0]); + break; + case Formula::Kind::And: + OpFormula = &A.makeAnd(*Op->Operands[0], *Op->Operands[1]); + break; + case Formula::Kind::Or: + OpFormula = &A.makeOr(*Op->Operands[0], *Op->Operands[1]); + break; + case Formula::Kind::Implies: + OpFormula = &A.makeImplies(*Op->Operands[0], *Op->Operands[1]); + break; + case Formula::Kind::Equal: + OpFormula = &A.makeEquals(*Op->Operands[0], *Op->Operands[1]); + break; + default: + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "only unary and binary operations are " + "expected, but got Formula::Kind %u", + Op->Kind); + } + ActiveOperations.pop(); + if (ActiveOperations.empty()) + return OpFormula; + Op = &ActiveOperations.top(); + Op->Operands.push_back(OpFormula); + } else { + llvm_unreachable( + "we should never have added more operands than expected"); } } - default: - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "unexpected prefix character: %c", Prefix); - } } llvm::Expected<const Formula *> parseFormula(llvm::StringRef Str, Arena &A, llvm::DenseMap<unsigned, Atom> &AtomMap) { size_t OriginalSize = Str.size(); - llvm::Expected<const Formula *> F = parsePrefix(Str, A, AtomMap); + llvm::Expected<const Formula *> F = parseFormulaInternal(Str, A, AtomMap); if (!F) return F.takeError(); if (!Str.empty()) diff --git a/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp b/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp index db9c02854c7fa..7a663fa5ae79f 100644 --- a/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp @@ -9,10 +9,13 @@ #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/Arena.h" #include "clang/Analysis/FlowSensitive/FormulaSerialization.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include <string> namespace { @@ -82,6 +85,12 @@ TEST_F(SerializeFormulaTest, NestedBinaryUnary) { EXPECT_EQ(Out, "=|V0V1V1"); } +TEST_F(SerializeFormulaTest, NestedBinaryUnaryNots) { + serializeFormula(A.makeEquals(A.makeOr(A1, A.makeNot(A2)), A.makeNot(A2)), + OS); + EXPECT_EQ(Out, "=|V0!V1!V1"); +} + TEST_F(SerializeFormulaTest, NestedBinaryBinary) { serializeFormula(A.makeEquals(A.makeOr(A1, A2), A.makeAnd(A1, A2)), OS); EXPECT_EQ(Out, "=|V0V1&V0V1"); @@ -94,7 +103,7 @@ class ParseFormulaTest : public ::testing::Test { AtomMap[1] = Atom2; } - // Convenience wrapper for `testParseFormula`. + // Convenience wrapper for `parseFormula`. llvm::Expected<const Formula *> testParseFormula(llvm::StringRef Str) { return parseFormula(Str, A, AtomMap); } @@ -154,6 +163,12 @@ TEST_F(ParseFormulaTest, NestedBinaryUnary) { HasValue(&A.makeEquals(A.makeOr(A1, A2), A2))); } +TEST_F(ParseFormulaTest, NestedBinaryUnaryNots) { + EXPECT_THAT_EXPECTED( + testParseFormula("=|V0!V1!V1"), + HasValue(&A.makeEquals(A.makeOr(A1, A.makeNot(A2)), A.makeNot(A2)))); +} + TEST_F(ParseFormulaTest, NestedBinaryBinary) { EXPECT_THAT_EXPECTED( testParseFormula("=|V0V1&V0V1"), @@ -194,6 +209,8 @@ TEST_F(ParseFormulaTest, MalformedFormulaFails) { EXPECT_THAT_EXPECTED(testParseFormula("|V0"), Failed()); EXPECT_THAT_EXPECTED(testParseFormula(">V0"), Failed()); EXPECT_THAT_EXPECTED(testParseFormula("=V0"), Failed()); + // Too many operands + EXPECT_THAT_EXPECTED(testParseFormula("=V0V1V2"), Failed()); } } // namespace _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
