================
@@ -0,0 +1,210 @@
+//===- BoundsChecking.h - Bounds checking related APIs ----------*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines APIs for performing a bounds check (i.e. comparing a
+// symbolic Offset value to zero and a symbolic Extent value) and composing
+// descriptions that explain its results.
+//
+// This fulfills a similar role as `ProgramState::assumeInBound`, but uses
+// more accurate logic and heuristic workarounds to account for the quirks of
+// signed/unsigned conversions and the lack of cast modeling in the analyzer.
+//
+// As of now, this logic only supports the needs of `security.ArrayBound`, but
+// in the future it will be generalized and applied in all checkers that
+// perform bounds checking (to bring them out of `alpha` stage).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CHECKERS_BOUNDSCHECKING_H
+#define LLVM_CLANG_STATICANALYZER_CHECKERS_BOUNDSCHECKING_H
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <optional>
+
+namespace clang::ento::bounds {
+
+/// If `E` is an array subscript expression with a base that is "clean" (= not
+/// modified by pointer arithmetic = the beginning of a memory region), return
+/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
+/// This helper function is used by two separate heuristics that are only valid
+/// in these "clean" cases.
+const ArraySubscriptExpr *getAsCleanArraySubscriptExpr(const Expr *E,
+ const CheckerContext
&C);
+
+class SizeUnit {
+ QualType AsType;
+ int64_t AsCharUnits;
+
+ SizeUnit() : AsType(), AsCharUnits(1) {}
+
+public:
+ SizeUnit(QualType T, const ASTContext &ACtx)
+ : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
+ assert(!T.isNull());
+ }
+
+ static SizeUnit bytes() { return SizeUnit(); }
+
+ bool isBytes() const { return AsType.isNull(); }
+
+ /// If `E` is a "clean" array subscript expression, return the type of the
+ /// accessed element; otherwise return 'Bytes' because that's the best (or
+ /// least bad) option for the assumption messages that use this.
+ static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
+ const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+ return ASE ? SizeUnit(ASE->getType(), C.getASTContext()) : bytes();
+ }
+
+ /// Return the element type that is "natural" for reporting out-of-bounds
+ /// memory access to 'Location'.
+ /// FIXME: It is unfortunate that this heuristic differs from the heuristic
+ /// used for reporting assumption (`SizeUnit::forExpr`).
+ static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
+ const auto *TVR = Location.getAsRegion()->getAs<TypedValueRegion>();
+ return TVR ? SizeUnit(TVR->getValueType(), ACtx) : bytes();
+ }
+
+ int64_t asCharUnits() const { return AsCharUnits; }
+
+ std::string asExtentDesc() const {
+ if (isBytes())
+ return "the extent of";
+ return llvm::formatv("the number of '{0}' elements in",
+ AsType.getAsString());
+ }
+
+ std::string asElementName() const {
+ if (isBytes())
+ return "byte";
+ return llvm::formatv("'{0}' element", AsType.getAsString());
+ }
+
+ std::string getOffsetName() const {
+ return isBytes() ? "byte offset" : "index";
+ }
+
+ /// Try to divide `Val1` and `Val2` (in place) by `this->asCharUnits()` and
+ /// return true if it can be performed without remainder. The values \p Val1
+ /// and \p Val2 may be nullopt and in that case the corresponding division is
+ /// considered to be successful.
+ bool tryConvertValuesFromBytes(std::optional<int64_t> &Val1,
+ std::optional<int64_t> &Val2) const;
+};
+
+struct Messages {
+ std::string Short;
+ std::string Full;
+};
+
+struct CheckFlags {
+ unsigned CheckUnderflow : 1;
+ unsigned OffsetObviouslyNonnegative : 1;
+ unsigned AcceptPastTheEnd : 1;
+};
+
+class CheckResult;
+
+class CheckInfo {
+protected:
+ // Changed to true if we see that underflow was not ruled out by the previous
+ // knowledge about the offset.
+ bool UnderflowFeasible = false;
+ // The offset from the beginning of the accessed region in CharUnits.
+ const NonLoc Offset;
+ // The extent of the accessed region in CharUnits; or `nullopt` if the extent
+ // is irrelevant because overflow was ruled out by previous knowledge about
+ // the offset and extent.
+ std::optional<NonLoc> Extent = std::nullopt;
+
+public:
+ bool hasAssumption() const { return UnderflowFeasible || Extent; }
+
+ friend CheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+ NonLoc Offset, std::optional<NonLoc> Extent,
+ CheckFlags Flags);
+
+protected:
+ explicit CheckInfo(NonLoc Offs) : Offset(Offs) {}
+
+ void recordUnderflowFeasible() { UnderflowFeasible = true; }
+ void recordRelevantExtent(NonLoc E) { Extent = E; }
+ void discardExtentInformation() { Extent = std::nullopt; }
+};
+
+class CheckResult : public CheckInfo {
+public:
+ enum class Kind { Valid, Invalid, TaintBug, CorruptedState };
----------------
Xazax-hun wrote:
I don't like the `CorruptedState` either. It implies that the analyzer might
run into some UB internally, which is not the case. We can very easily find
contradictions in user case:
```
if (i > 10) {
}
if (i < 10) {
}
```
The first branch triggers a state split, and in one of the paths the second
check is impossible, so we never take that branch. I suspect that this
bifurcation behavior can also lead to similar scenarios where seemingly
impossibly things happen while indexing, but in fact we just took a path that
could never happen at runtime and the analyzer did not have that info earlier
to rule the path out.
Or do you really think this is a bug in the analyzer engine and cannot be
triggered by reasonable code that just simply does not expose enough info
statically so we can rule out paths quickly enough?
https://github.com/llvm/llvm-project/pull/202372
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits