[clang] [llvm] [NFC][analyzer] Extract bounds checking library (PR #202372)

Donát Nagy via cfe-commits Thu, 02 Jul 2026 08:18:15 -0700

================
@@ -0,0 +1,211 @@
+//===- BoundsChecking.h - Bounds checking related APIs ----------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines APIs for performing a bounds check (i.e. comparing a
+//  symbolic Offset value to zero and a symbolic Extent value) and composing
+//  descriptions that explain its results.
+//
+//  This is intended as a replacement for `ProgramState::assumeInBound` to
+//  avoid its incorrect logic and compensate for deficiencies of other parts of
+//  the analyzer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <optional>
+
+namespace clang {
+namespace ento {
+
+/// If `E` is an array subscript expression with a base that is "clean" (= not
+/// modified by pointer arithmetic = the beginning of a memory region), return
+/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
+/// This helper function is used by two separate heuristics that are only valid
+/// in these "clean" cases.
+const ArraySubscriptExpr *getAsCleanArraySubscriptExpr(const Expr *E,
+                                                       const CheckerContext 
&C);
+
+class SizeUnit {
+  QualType AsType;
+  int64_t AsCharUnits;
+
+  SizeUnit() : AsType(), AsCharUnits(1) {}
+
+public:
+  SizeUnit(QualType T, const ASTContext &ACtx)
+      : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
+    assert(!T.isNull());
+  }
+
+  static SizeUnit bytes() { return SizeUnit(); }
+
+  bool isBytes() const { return AsType.isNull(); }
+
+  /// If `E` is a "clean" array subscript expression, return the type of the
+  /// accessed element; otherwise return 'Bytes' because that's the best (or
+  /// least bad) option for the assumption messages that use this.
+  static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
+    const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+    if (!ASE)
+      return bytes();
+
+    return SizeUnit(ASE->getType(), C.getASTContext());
+  }
+
+  /// Return the element type that is "natural" for reporting out-of-bounds
+  /// memory access to 'Location'.
+  /// FIXME: It is unfortunate that this heuristic differs from the heuristic
+  /// used for reporting assumption (`SizeUnit::forExpr`).
+  static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
+    const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+    assert(EReg && "this checker only handles element access");
+    return SizeUnit(EReg->getElementType(), ACtx);
+  }
+
+  int64_t asCharUnits() const { return AsCharUnits; }
+
+  std::string asExtentDesc() const {
+    if (isBytes())
+      return "the extent of";
+    return llvm::formatv("the number of '{0}' elements in",
+                         AsType.getAsString());
+  }
+
+  std::string asElementName() const {
+    if (isBytes())
+      return "byte";
+    return llvm::formatv("'{0}' element", AsType.getAsString());
+  }
+
+  std::string getOffsetName() const {
+    return isBytes() ? "byte offset" : "index";
+  }
+
+  /// Try to divide `Val1` and `Val2` (in place) by `this->asCharUnits()` and
+  /// return true if it can be performed without remainder. The values `Val1`
+  /// and `Val2` may be nullopt and in that case the corresponding division is
+  /// considered to be successful.
+  bool tryConvertValuesFromBytes(std::optional<int64_t> &Val1,
+                                 std::optional<int64_t> &Val2) const;
+};
+
+struct Messages {
+  std::string Short, Full;
+};
+
+enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
+
+constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
+                                              "a negative or overflowing"};
+inline StringRef asAdjective(BadOffsetKind Problem) {
+  return Adjectives[static_cast<int>(Problem)];
+}
+
+constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end 
of",
+                                                "around"};
+inline StringRef asPreposition(BadOffsetKind Problem) {
+  return Prepositions[static_cast<int>(Problem)];
+}
+
+struct CheckFlags {
+  bool CheckUnderflow;
+  bool OffsetObviouslyNonnegative;
+  bool AcceptPastTheEnd;
+};
+
+class BoundsCheckResult {
+public:
+  enum class Kind { Underflow, Overflow, TaintBug, Paradox, Valid };
----------------
NagyDonat wrote:


`Paradox` was renamed to `CorruptedState` in 
https://github.com/llvm/llvm-project/pull/202372/commits/95f609faf093fb141f2b7afae764c367b691d3e5

`Underflow` and `Overflow` was merged into a single `Invalid` enumerator in 
https://github.com/llvm/llvm-project/pull/202372/commits/07426529998d6313ca881befc62ce06f3df03add

`BadOffsetKind` was removed in 
https://github.com/llvm/llvm-project/pull/202372/commits/c168873510c1a2c19573841296bf99af6ecda06c

I thinks these resolve all the issues raised in this comment.

https://github.com/llvm/llvm-project/pull/202372
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [NFC][analyzer] Extract bounds checking library (PR #202372)

Reply via email to