================
@@ -0,0 +1,210 @@
+//===- BoundsChecking.h - Bounds checking related APIs ----------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines APIs for performing a bounds check (i.e. comparing a
+//  symbolic Offset value to zero and a symbolic Extent value) and composing
+//  descriptions that explain its results.
+//
+//  This fulfills a similar role as `ProgramState::assumeInBound`, but uses
+//  more accurate logic and heuristic workarounds to account for the quirks of
+//  signed/unsigned conversions and the lack of cast modeling in the analyzer.
+//
+//  As of now, this logic only supports the needs of `security.ArrayBound`, but
+//  in the future it will be generalized and applied in all checkers that
+//  perform bounds checking (to bring them out of `alpha` stage).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CHECKERS_BOUNDSCHECKING_H
+#define LLVM_CLANG_STATICANALYZER_CHECKERS_BOUNDSCHECKING_H
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <optional>
+
+namespace clang::ento::bounds {
+
+/// If `E` is an array subscript expression with a base that is "clean" (= not
+/// modified by pointer arithmetic = the beginning of a memory region), return
+/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
+/// This helper function is used by two separate heuristics that are only valid
+/// in these "clean" cases.
+const ArraySubscriptExpr *getAsCleanArraySubscriptExpr(const Expr *E,
+                                                       const CheckerContext 
&C);
+
+class SizeUnit {
+  QualType AsType;
+  int64_t AsCharUnits;
+
+  SizeUnit() : AsType(), AsCharUnits(1) {}
+
+public:
+  SizeUnit(QualType T, const ASTContext &ACtx)
+      : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
+    assert(!T.isNull());
+  }
+
+  static SizeUnit bytes() { return SizeUnit(); }
+
+  bool isBytes() const { return AsType.isNull(); }
+
+  /// If `E` is a "clean" array subscript expression, return the type of the
+  /// accessed element; otherwise return 'Bytes' because that's the best (or
+  /// least bad) option for the assumption messages that use this.
+  static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
+    const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+    return ASE ? SizeUnit(ASE->getType(), C.getASTContext()) : bytes();
+  }
+
+  /// Return the element type that is "natural" for reporting out-of-bounds
+  /// memory access to 'Location'.
+  /// FIXME: It is unfortunate that this heuristic differs from the heuristic
+  /// used for reporting assumption (`SizeUnit::forExpr`).
+  static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
+    const auto *TVR = Location.getAsRegion()->getAs<TypedValueRegion>();
+    return TVR ? SizeUnit(TVR->getValueType(), ACtx) : bytes();
+  }
+
+  int64_t asCharUnits() const { return AsCharUnits; }
+
+  std::string asExtentDesc() const {
+    if (isBytes())
+      return "the extent of";
+    return llvm::formatv("the number of '{0}' elements in",
+                         AsType.getAsString());
+  }
+
+  std::string asElementName() const {
+    if (isBytes())
+      return "byte";
+    return llvm::formatv("'{0}' element", AsType.getAsString());
+  }
+
+  std::string getOffsetName() const {
+    return isBytes() ? "byte offset" : "index";
+  }
+
+  /// Try to divide `Val1` and `Val2` (in place) by `this->asCharUnits()` and
+  /// return true if it can be performed without remainder. The values \p Val1
+  /// and \p Val2 may be nullopt and in that case the corresponding division is
+  /// considered to be successful.
+  bool tryConvertValuesFromBytes(std::optional<int64_t> &Val1,
+                                 std::optional<int64_t> &Val2) const;
+};
+
+struct Messages {
+  std::string Short;
+  std::string Full;
+};
+
+struct CheckFlags {
+  unsigned CheckUnderflow : 1;
+  unsigned OffsetObviouslyNonnegative : 1;
+  unsigned AcceptPastTheEnd : 1;
+};
+
+class CheckResult;
+
+class CheckInfo {
+protected:
+  // Changed to true if we see that underflow was not ruled out by the previous
+  // knowledge about the offset.
+  bool UnderflowFeasible = false;
+  // The offset from the beginning of the accessed region in CharUnits.
+  const NonLoc Offset;
+  // The extent of the accessed region in CharUnits; or `nullopt` if the extent
+  // is irrelevant because overflow was ruled out by previous knowledge about
+  // the offset and extent.
+  std::optional<NonLoc> Extent = std::nullopt;
+
+public:
+  bool hasAssumption() const { return UnderflowFeasible || Extent; }
+
+  friend CheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+                                 NonLoc Offset, std::optional<NonLoc> Extent,
+                                 CheckFlags Flags);
+
+protected:
+  explicit CheckInfo(NonLoc Offs) : Offset(Offs) {}
+
+  void recordUnderflowFeasible() { UnderflowFeasible = true; }
+  void recordRelevantExtent(NonLoc E) { Extent = E; }
+  void discardExtentInformation() { Extent = std::nullopt; }
+};
+
+class CheckResult : public CheckInfo {
+public:
+  enum class Kind { Valid, Invalid, TaintBug, CorruptedState };
+
+private:
+  Kind K = Kind::Valid;
+  ProgramStateRef State = nullptr;
+
+  CheckResult(CheckInfo CI, Kind K_, ProgramStateRef S)
+      : CheckInfo(CI), K(K_), State(S) {}
+
+public:
+  friend CheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+                                 NonLoc Offset, std::optional<NonLoc> Extent,
+                                 CheckFlags Flags);
+
+  ProgramStateRef getState() const { return State; }
+
+  Kind getKind() const { return K; }
+
+  Messages getTaintMsgs(std::string RegName, const char *OffsetName) const;
----------------
Xazax-hun wrote:

This public API is very confusing to me. What sort of messages are these? Is 
this one or multiple messages? What is `RegName`? Is it `RegionName`? Do not 
abbreviate. Why is one taken as a `std::string` and the other one as a `const 
char *`? Why not both as a `StringRef`? 

Does this have any preconditions? Can I call this if this was a valid access? 
When do I need to call this one vs the non-taint one? When the offset is 
tainted? Or when the base pointer is tainted?

Also I wonder if taint is the right abstraction here. I wonder if the callers 
just care whether we want to do "strict" checking, as in provably in bounds, vs 
"permissive" checking, as in provably out of bounds. And it is a checker's 
decision based on taint or other info whether they want to be strict or 
permissive.

Mentioning taint here sounds like a violation of the separation of concerns to 
me. 

https://github.com/llvm/llvm-project/pull/202372
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to