llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Jan Voung (jvoung) <details> <summary>Changes</summary> Currently, the Clang Dataflow Framework only does Widen on backedges from structured loops. Missing some Widen calls (e.g., when there are backedges from gotos) could cause some analyses to iterate ~forever (until the max visits limit is hit). This adds a simple search for backedges, and triggers Widen on the additional backedge nodes. Fixes [issue 179083. ](https://github.com/llvm/llvm-project/issues/179083) --- Full diff: https://github.com/llvm/llvm-project/pull/179546.diff 7 Files Affected: - (added) clang/include/clang/Analysis/CFGBackEdges.h (+27) - (added) clang/lib/Analysis/CFGBackEdges.cpp (+68) - (modified) clang/lib/Analysis/CMakeLists.txt (+1) - (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+55-9) - (added) clang/unittests/Analysis/CFGBackEdgesTest.cpp (+239) - (modified) clang/unittests/Analysis/CMakeLists.txt (+1) - (modified) clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp (+53) ``````````diff diff --git a/clang/include/clang/Analysis/CFGBackEdges.h b/clang/include/clang/Analysis/CFGBackEdges.h new file mode 100644 index 0000000000000..46c86657502e2 --- /dev/null +++ b/clang/include/clang/Analysis/CFGBackEdges.h @@ -0,0 +1,27 @@ +//===- CFGBackEdges.h - Finds back edges in Clang CFGs -*- C++ ----------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_CFG_BACKEDGES_H +#define LLVM_CLANG_ANALYSIS_CFG_BACKEDGES_H + +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/DenseMap.h" + +namespace clang { + +/// Finds and returns back edges in Clang CFGs. The CFG already has some +/// backedge information for structured loops (\c CFGBlock::getLoopTarget). +/// However, unstructured back edges from \c goto statements are not included. +/// This helps find back edges, whether the CFG is reducible or not. +/// This includes CFGBlock::getLoopTarget nodes, but one can filter those out. +llvm::DenseMap<const CFGBlock *, const CFGBlock *> +findCFGBackEdges(const CFG &CFG); + +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_CFG_BACKEDGES_H diff --git a/clang/lib/Analysis/CFGBackEdges.cpp b/clang/lib/Analysis/CFGBackEdges.cpp new file mode 100644 index 0000000000000..303b2e44d8787 --- /dev/null +++ b/clang/lib/Analysis/CFGBackEdges.cpp @@ -0,0 +1,68 @@ +//===- CFGBackEdges.cpp - Finds back edges in Clang CFGs ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <stack> +#include <utility> +#include <vector> + +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/CFGBackEdges.h" +#include "llvm/ADT/DenseMap.h" + +namespace clang { + +namespace { +struct VisitClockTimes { + // Timestamp for when the node was visited / discovered. + int Pre = -1; + // Timestamp for when we finished visiting a node's successors. + int Post = -1; +}; +} // namespace + +llvm::DenseMap<const CFGBlock *, const CFGBlock *> +findCFGBackEdges(const clang::CFG &CFG) { + // Do a simple textbook DFS with pre and post numberings to find back edges. + llvm::DenseMap<const CFGBlock *, const CFGBlock *> BackEdges; + + std::vector<VisitClockTimes> VisitState; + VisitState.resize(CFG.getNumBlockIDs()); + std::stack<std::pair<const CFGBlock *, CFGBlock::const_succ_iterator>> + DFSStack; + int Clock = 0; + const CFGBlock &Entry = CFG.getEntry(); + VisitState[Entry.getBlockID()].Pre = Clock++; + DFSStack.push({&Entry, Entry.succ_begin()}); + + while (!DFSStack.empty()) { + auto &[Block, SuccIt] = DFSStack.top(); + if (SuccIt == Block->succ_end()) { + VisitState[Block->getBlockID()].Post = Clock++; + DFSStack.pop(); + continue; + } + + const CFGBlock::AdjacentBlock &AdjacentSucc = *SuccIt++; + const CFGBlock *Succ = AdjacentSucc.getReachableBlock(); + // Skip unreachable blocks. + if (Succ == nullptr) + continue; + + VisitClockTimes &SuccVisitState = VisitState[Succ->getBlockID()]; + if (SuccVisitState.Pre != -1) { + if (SuccVisitState.Post == -1) + BackEdges.insert({Block, Succ}); + } else { + SuccVisitState.Pre = Clock++; + DFSStack.push({Succ, Succ->succ_begin()}); + } + } + return BackEdges; +} + +} // namespace clang diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 65f160e965d47..fef688424978d 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -9,6 +9,7 @@ add_clang_library(clangAnalysis BodyFarm.cpp CalledOnceCheck.cpp CFG.cpp + CFGBackEdges.cpp CFGReachabilityAnalysis.cpp CFGStmtMap.cpp CallGraph.cpp diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 1113bbe7f4d9c..37daf1b254001 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -19,18 +19,23 @@ #include "clang/AST/ASTDumper.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtVisitor.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/CFG.h" +#include "clang/Analysis/CFGBackEdges.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" #include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/LLVM.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -64,14 +69,23 @@ static int blockIndexInPredecessor(const CFGBlock &Pred, return BlockPos - Pred.succ_begin(); } -// A "backedge" node is a block introduced in the CFG exclusively to indicate a -// loop backedge. They are exactly identified by the presence of a non-null -// pointer to the entry block of the loop condition. Note that this is not -// necessarily the block with the loop statement as terminator, because -// short-circuit operators will result in multiple blocks encoding the loop -// condition, only one of which will contain the loop statement as terminator. -static bool isBackedgeNode(const CFGBlock &B) { - return B.getLoopTarget() != nullptr; +// Given a backedge from B1 to B2, B1 is a "backedge node" in a CFG. +// It can be: +// - A block introduced in the CFG exclusively to indicate a structured loop's +// backedge. They are exactly identified by the presence of a non-null +// pointer to the entry block of the loop condition. Note that this is not +// necessarily the block with the loop statement as terminator, because +// short-circuit operators will result in multiple blocks encoding the loop +// condition, only one of which will contain the loop statement as terminator. +// - A block that is part of a backedge in a CFG with unstructured loops +// (e.g., a CFG with a `goto` statement). Note that this is not necessarily +// the block with the goto statement as terminator. The choice depends on how +// blocks and edges are ordered. +static bool isBackedgeNode( + const CFGBlock &B, + const llvm::SmallDenseSet<const CFGBlock *> &NonStructLoopBackedgeNodes) { + return B.getLoopTarget() != nullptr || + NonStructLoopBackedgeNodes.contains(&B); } namespace { @@ -484,6 +498,36 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, return State; } +// Returns true if the CFG contains any goto statements (direct or indirect). +static bool hasGotoInCFG(const clang::CFG &CFG) { + for (const CFGBlock *Block : CFG) { + const Stmt *Term = Block->getTerminatorStmt(); + if (Term == nullptr) + continue; + if (isa<GotoStmt>(Term) || isa<IndirectGotoStmt>(Term)) + return true; + } + return false; +} + +// Returns a set of CFG blocks that is the source of a backedge and is not +// tracked as part of a structured loop (with `CFGBlock::getLoopTarget`). +static llvm::SmallDenseSet<const CFGBlock *> +findNonStructuredLoopBackedgeNodes(const clang::CFG &CFG) { + llvm::SmallDenseSet<const CFGBlock *> NonStructLoopBackedgeNodes; + // We should only need this if the function has gotos. + if (!hasGotoInCFG(CFG)) + return NonStructLoopBackedgeNodes; + + llvm::DenseMap<const CFGBlock *, const CFGBlock *> Backedges = + findCFGBackEdges(CFG); + for (const auto &[From, To] : Backedges) { + if (From->getLoopTarget() == nullptr) + NonStructLoopBackedgeNodes.insert(From); + } + return NonStructLoopBackedgeNodes; +} + llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>> runTypeErasedDataflowAnalysis( const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, @@ -503,6 +547,8 @@ runTypeErasedDataflowAnalysis( const clang::CFG &CFG = ACFG.getCFG(); PostOrderCFGView POV(&CFG); ForwardDataflowWorklist Worklist(CFG, &POV); + llvm::SmallDenseSet<const CFGBlock *> NonStructLoopBackedgeNodes = + findNonStructuredLoopBackedgeNodes(CFG); std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates( CFG.size()); @@ -537,7 +583,7 @@ runTypeErasedDataflowAnalysis( llvm::errs() << "Old Env:\n"; OldBlockState->Env.dump(); }); - if (isBackedgeNode(*Block)) { + if (isBackedgeNode(*Block, NonStructLoopBackedgeNodes)) { LatticeJoinEffect Effect1 = Analysis.widenTypeErased( NewBlockState.Lattice, OldBlockState->Lattice); LatticeJoinEffect Effect2 = diff --git a/clang/unittests/Analysis/CFGBackEdgesTest.cpp b/clang/unittests/Analysis/CFGBackEdgesTest.cpp new file mode 100644 index 0000000000000..6ab8ccce4cdd0 --- /dev/null +++ b/clang/unittests/Analysis/CFGBackEdgesTest.cpp @@ -0,0 +1,239 @@ +//===- unittests/Analysis/CFGBackEdgesTest.cpp - CFG backedges tests ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CFGBackEdges.h" +#include "CFGBuildResult.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/LLVM.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace analysis { +namespace { + +using ::testing::IsNull; +using ::testing::NotNull; +using ::testing::SizeIs; + +TEST(CFGBackEdgesTest, NoBackedgesLinear) { + const char *Code = R"cc( + int f(int x) { + l1: + x++; + l2: + x++; + return x; + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_TRUE(BackEdges.empty()); +} + +TEST(CFGBackEdgesTest, NoBackedgesOnlyCrossEdge) { + const char *Code = R"cc( + int f(int x) { + if (x > 0) + x++; + else + x--; + return x; + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_TRUE(BackEdges.empty()); +} + +TEST(CFGBackEdgesTest, NoBackedgesWithUnreachableSuccessorForSwitch) { + const char *Code = R"cc( + enum class Kind { A, B }; + + void f(Kind kind) { + switch(kind) { + case Kind::A: return 0; + case Kind::B: break; + } + return 1; + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_TRUE(BackEdges.empty()); +} + +TEST(CFGBackEdgesTest, ForLoop) { + const char *Code = R"cc( + void f(int n) { + for (int i = 0; i < n; ++i) {} + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + // Finds one backedge, which is the one looping back to the loop header + // (has a loop target). + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(1)); + EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), NotNull()); +} + +TEST(CFGBackEdgesTest, WhileLoop) { + const char *Code = R"cc( + void f(int n) { + int i = 0; + while (i < n) { ++i; } + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(1)); + EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), NotNull()); +} + +TEST(CFGBackEdgesTest, DoWhileLoop) { + const char *Code = R"cc( + void f(int n) { + int i = 0; + do { ++i; } while (i < n); + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(1)); + EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), NotNull()); +} + +TEST(CFGBackEdgesTest, GotoLoop) { + const char *Code = R"cc( + void f(int n) { + int i = 0; + loop: + if (i < n) { + ++i; + goto loop; + } + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + // Finds one backedge, but since it's an unstructured loop, the loop target is + // null. Instead, the node has a goto terminator. + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(1)); + EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), IsNull()); + EXPECT_TRUE(isa<GotoStmt>(BackEdges.begin()->first->getTerminatorStmt())); +} + +TEST(CFGBackEdgesTest, WhileWithContinueLoop) { + const char *Code = R"cc( + void f(int n) { + int i = 0; + while (i < n) { + ++i; + if (i == 5) continue; + if (i == 10) break; + i *= 2; + } + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(testing::Gt(0))); + for (const auto &[From, To] : BackEdges) + EXPECT_THAT(From->getLoopTarget(), NotNull()); +} + +TEST(CFGBackEdgesTest, NestedForLoop) { + const char *Code = R"cc( + void f(int n) { + for (int i = 0; i < n; ++i) { + for (int j = i; j < n; ++j) {} + } + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + // Finds one backedge, which is the one looping back to the loop header + // (has a loop target). + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(2)); + auto it = BackEdges.begin(); + EXPECT_THAT(it->first->getLoopTarget(), NotNull()); + ++it; + EXPECT_THAT(it->first->getLoopTarget(), NotNull()); +} + +TEST(CFGBackEdgesTest, IrreducibleCFG) { + const char *Code = R"cc( + void f(int cond) { + if (cond) goto L1; + L0: + goto L1; + L1: + goto L0; + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + // In an irreducible CFG, we still expect to find a back edge. + EXPECT_THAT(BackEdges, SizeIs(1)); + EXPECT_TRUE(isa<GotoStmt>(BackEdges.begin()->first->getTerminatorStmt())); +} + +TEST(CFGBackEdgesTest, FirstBackedgeIsNotGoto) { + const char *Code = R"cc( + void f(int x, int y) { + if (x > y) { + } else { + L1: + --x; + if (x == 0) return; + } + goto L1; + })cc"; + BuildResult Result = BuildCFG(Code); + EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus()); + CFG *Cfg = Result.getCFG(); + + auto BackEdges = findCFGBackEdges(*Cfg); + EXPECT_THAT(BackEdges, SizeIs(1)); + // We might find a backedge where the source block doesn't terminate with + // a `goto`, due to the DFS search order. For example: + // + // B_entry: `if (x > y)` + // \--then--> B1: `<empty>` + // --> B2: `goto L1` + // --> B3: `--x; if (x == 0)` + // \--then--> B4 `return` --> B_exit. + // \--else--> B2: ... (the `if`'s else is a backedge from B3 to B2!) + // \--else--> B3: ... + EXPECT_FALSE(isa<GotoStmt>(BackEdges.begin()->first->getTerminatorStmt())); +} + +} // namespace +} // namespace analysis +} // namespace clang diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt index 97e768b11db69..7cefa2caf3c91 100644 --- a/clang/unittests/Analysis/CMakeLists.txt +++ b/clang/unittests/Analysis/CMakeLists.txt @@ -1,4 +1,5 @@ add_clang_unittest(ClangAnalysisTests + CFGBackEdgesTest.cpp CFGDominatorTree.cpp CFGTest.cpp CloneDetectionTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index d1dd4ff3ea33e..976ed3d82d378 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -1145,6 +1145,34 @@ TEST_F(WideningTest, DistinctValuesWithDifferentPropertiesWidenedToTop) { }); } +TEST_F(WideningTest, + DistinctValuesWithDifferentPropertiesWidenedToTopGotoInsteadOfWhile) { + std::string Code = R"cc( + void target(bool Cond) { + int *Foo; + int i = 0; + Foo = nullptr; + start: + if (Cond) { + Foo = &i; + goto start; + } + (void)0; + /*[[p]]*/ + } + )cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + const auto &FooVal = getValueForDecl<Value>(ASTCtx, Env, "Foo"); + ASSERT_THAT(FooVal.getProperty("is_null"), NotNull()); + EXPECT_TRUE(areEquivalentValues(*FooVal.getProperty("is_null"), + Env.makeTopBoolValue())); + }); +} + class FlowConditionTest : public Test { protected: template <typename Matcher> @@ -1275,6 +1303,31 @@ TEST_F(FlowConditionTest, WhileStmtWithAssignmentInCondition) { }); } +TEST_F(FlowConditionTest, GotoLoopWithAssignmentInCondition) { + std::string Code = R"cc( + void target(bool Foo) { + // This test checks whether the analysis preserves the connection between + // the value of `Foo` and the assignment expression, despite widening. + // The equality operator generates a fresh boolean variable on each + // interpretation, which forces use of widening. + start: + if ((Foo = (3 == 4))) { + (void)0; + /*[[p]]*/ + goto start; + } + } + )cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + auto &FooVal = getValueForDecl<BoolValue>(ASTCtx, Env, "Foo").formula(); + EXPECT_TRUE(Env.proves(FooVal)); + }); +} + TEST_F(FlowConditionTest, Conjunction) { std::string Code = R"( void target(bool Foo, bool Bar) { `````````` </details> https://github.com/llvm/llvm-project/pull/179546 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
