donat.nagy updated this revision to Diff 522544.
donat.nagy edited the summary of this revision.
donat.nagy added a comment.

I managed to debug the issue that plagued my commit (see `test_multidim_zero()` 
for details, I think I'll fix it in a followup change); and I have uploaded a 
new version of this patch with testcases that highlight the improvements 
introduced by it.

@steakhal If you need any additional clarification, feel free to ask me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150446/new/

https://reviews.llvm.org/D150446

Files:
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  clang/test/Analysis/out-of-bounds.c

Index: clang/test/Analysis/out-of-bounds.c
===================================================================
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -174,3 +174,49 @@
   clang_analyzer_eval(x <= 99); // expected-warning{{TRUE}}
 }
 
+void use_ptr(int *arg) {
+  *arg = 42; //no-warning
+}
+
+void test_indirect_use(void) {
+  int arr[10];
+  use_ptr(&arr[100]); // expected-warning{{Out of bound memory access}}
+}
+
+extern int matrix[5][5];
+
+int test_multidim_zero(void) {
+  // TODO: Currently this does not warn because MemRegion::stripCasts() uses an
+  // overzealuos heuristic and thinks that matrix[0] just represents a cast
+  // expression and not a real indexing operation.
+  return matrix[0][10]; // expected - warning{{Out of bound memory access}}
+}
+
+int test_multidim_generic(void) {
+  // This is the generic case that does not trigger the stripCasts() issue:
+  return matrix[1][10]; // expected-warning{{Out of bound memory access}}
+}
+
+struct s {
+  int field;
+} arr[10] = {0};
+
+int test_field(void) {
+  return arr[20].field; // expected-warning{{Out of bound memory access}}
+}
+
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+char test_in_unknown_space(user_t *userptr) {
+  return userptr->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
+void test_undersized_region(void) {
+  // TODO: Currently the checker does not look for this kind of error.
+  char arr[2];
+  long *p = (long *)arr;
+  p[0] = 3; // expected - warning{{Out of bound memory access}}
+}
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -30,8 +30,7 @@
 using namespace taint;
 
 namespace {
-class ArrayBoundCheckerV2 :
-    public Checker<check::Location> {
+class ArrayBoundCheckerV2 : public Checker<check::PreStmt<ArraySubscriptExpr>> {
   mutable std::unique_ptr<BuiltinBug> BT;
   mutable std::unique_ptr<BugType> TaintBT;
 
@@ -45,8 +44,7 @@
   static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
 
 public:
-  void checkLocation(SVal l, bool isLoad, const Stmt *S,
-                     CheckerContext &C) const;
+  void checkPreStmt(const ArraySubscriptExpr *ASE, CheckerContext &C) const;
 };
 
 // FIXME: Eventually replace RegionRawOffset with this class.
@@ -63,7 +61,8 @@
   const SubRegion *getRegion() const { return baseRegion; }
 
   static std::optional<RegionRawOffsetV2>
-  computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
+  computeOffset(ProgramStateRef State, SValBuilder &SVB,
+                const LocationContext *LCtx, const ArraySubscriptExpr *ASE);
 
   void dump() const;
   void dumpToStream(raw_ostream &os) const;
@@ -86,8 +85,8 @@
           APSIntType(extent.getValue()).convert(SIE->getRHS());
       switch (SIE->getOpcode()) {
       case BO_Mul:
-        // The constant should never be 0 here, since it the result of scaling
-        // based on the size of a type which is never 0.
+        // In a SymIntExpr multiplication the constant cannot be 0, because
+        // then the enginge would've replaced it with a constant 0 value.
         if ((extent.getValue() % constant) != 0)
           return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
         else
@@ -140,44 +139,41 @@
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-                                        const Stmt* LoadS,
-                                        CheckerContext &checkerContext) const {
-
+void ArrayBoundCheckerV2::checkPreStmt(const ArraySubscriptExpr *ASE,
+                                       CheckerContext &C) const {
   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
   // some new logic here that reasons directly about memory region extents.
   // Once that logic is more mature, we can bring it back to assumeInBound()
   // for all clients to use.
-  //
-  // The algorithm we are using here for bounds checking is to see if the
-  // memory access is within the extent of the base region.  Since we
-  // have some flexibility in defining the base region, we can achieve
-  // various levels of conservatism in our buffer overflow checking.
 
   // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
   //   #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
   // and incomplete analysis of these leads to false positives. As even
   // accurate reports would be confusing for the users, just disable reports
   // from these macros:
-  if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
+  if (isFromCtypeMacro(ASE, C.getASTContext()))
     return;
 
-  ProgramStateRef state = checkerContext.getState();
+  ProgramStateRef state = C.getState();
+
+  SValBuilder &svalBuilder = C.getSValBuilder();
+
+  const LocationContext *LCtx = C.getLocationContext();
 
-  SValBuilder &svalBuilder = checkerContext.getSValBuilder();
   const std::optional<RegionRawOffsetV2> &RawOffset =
-      RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
+      RegionRawOffsetV2::computeOffset(state, svalBuilder, LCtx, ASE);
 
   if (!RawOffset)
     return;
 
   NonLoc ByteOffset = RawOffset->getByteOffset();
+  const SubRegion *Reg = RawOffset->getRegion();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
-  if (!llvm::isa<UnknownSpaceRegion>(SR)) {
-    // A pointer to UnknownSpaceRegion may point to the middle of
-    // an allocated region.
+  const MemSpaceRegion *Space = Reg->getMemorySpace();
+  if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
+    // Symbolic regions in unknown spaces are used for modelling unknown
+    // pointers, so they may point to the middle of an array.
 
     auto [state_precedesLowerBound, state_withinLowerBound] =
         compareValueToThreshold(state, ByteOffset,
@@ -185,7 +181,7 @@
 
     if (state_precedesLowerBound && !state_withinLowerBound) {
       // We know that the index definitely precedes the lower bound.
-      reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
+      reportOOB(C, state_precedesLowerBound, OOB_Precedes);
       return;
     }
 
@@ -194,8 +190,7 @@
   }
 
   // CHECK UPPER BOUND
-  DefinedOrUnknownSVal Size =
-      getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
+  DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
   if (auto KnownSize = Size.getAs<NonLoc>()) {
     auto [state_withinUpperBound, state_exceedsUpperBound] =
         compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
@@ -203,12 +198,12 @@
     if (state_exceedsUpperBound) {
       if (!state_withinUpperBound) {
         // We know that the index definitely exceeds the upper bound.
-        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+        reportOOB(C, state_exceedsUpperBound, OOB_Excedes);
         return;
       }
       if (isTainted(state, ByteOffset)) {
         // Both cases are possible, but the index is tainted, so report.
-        reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
+        reportTaintOOB(C, state_exceedsUpperBound, ByteOffset);
         return;
       }
     }
@@ -217,7 +212,7 @@
       state = state_withinUpperBound;
   }
 
-  checkerContext.addTransition(state);
+  C.addTransition(state);
 }
 
 void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
@@ -303,53 +298,50 @@
 }
 #endif
 
-/// For a given Location that can be represented as a symbolic expression
-/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
-/// Arr and the distance of Location from the beginning of Arr (expressed in a
-/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
-/// cannot be determined.
+/// For a given ArraySubscriptExpr 'Ptr[Idx]', calculate a (Region, Offset)
+/// pair where Region is the memory area that may be legitimately accessed via
+/// Ptr and Offset is the distance of Ptr[Idx] from the beginning of Region
+/// (expressed as a NonLoc that specifies the number of CharUnits). Returns
+/// nullopt when these cannot be determined.
 std::optional<RegionRawOffsetV2>
 RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
-                                 SVal Location) {
-  QualType T = SVB.getArrayIndexType();
-  auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
-    // We will use this utility to add and multiply values.
-    return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
-  };
-
-  const MemRegion *Region = Location.getAsRegion();
-  NonLoc Offset = SVB.makeZeroArrayIndex();
-
-  while (Region) {
-    if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
-      if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
-        QualType ElemType = ERegion->getElementType();
-        // If the element is an incomplete type, go no further.
-        if (ElemType->isIncompleteType())
-          return std::nullopt;
-
-        // Perform Offset += Index * sizeof(ElemType); then continue the offset
-        // calculations with SuperRegion:
-        NonLoc Size = SVB.makeArrayIndex(
-            SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
-        if (auto Delta = Calc(BO_Mul, *Index, Size)) {
-          if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
-            Offset = *NewOffset;
-            Region = ERegion->getSuperRegion();
-            continue;
-          }
-        }
+                                 const LocationContext *LCtx,
+                                 const ArraySubscriptExpr *ASE) {
+  QualType ElemType = ASE->getType();
+  assert(!ElemType->isIncompleteType());
+
+  QualType IdxType = SVB.getArrayIndexType();
+
+  if (auto Idx = State->getSVal(ASE->getIdx(), LCtx).getAs<NonLoc>()) {
+    const MemRegion *R = State->getSVal(ASE->getBase(), LCtx).getAsRegion();
+    const ElementRegion *ER;
+    while ((ER = dyn_cast_or_null<ElementRegion>(R)) &&
+           ER->getElementType() == ElemType) {
+      // Special case to strip ElementRegion layers that represent pointer
+      // arithmetics. For example in code like
+      // char Text[] = "Hello world!", *Word = Text+6;
+      // the area *Word is represented as Element{<Text>, 6 S64B, char} which
+      // is nominally a single byte area, but logically we want to handle
+      // Word[X] equivalently to Text[X + 6]. On the other hand if we have
+      // int Matrix[10][10];
+      // we want to warn when we see Matrix[0][20] because this is logically
+      // invalid and technically invokes undefined behavior when it takes the
+      // 20th element of the int[10] value Matrix[0].
+      R = ER->getSuperRegion();
+      Idx = SVB.evalBinOpNN(State, BO_Add, *Idx, ER->getIndex(), IdxType)
+                .getAs<NonLoc>();
+      if (!Idx)
+        return std::nullopt;
+    }
+
+    if (const auto *SRegion = dyn_cast_or_null<SubRegion>(R)) {
+      NonLoc ESize = SVB.makeArrayIndex(
+          SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
+      SVal Offset = SVB.evalBinOpNN(State, BO_Mul, ESize, *Idx, IdxType);
+      if (const auto OffsetNL = Offset.getAs<NonLoc>()) {
+        return RegionRawOffsetV2(SRegion, *OffsetNL);
       }
-    } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
-      // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
-      // to see a MemSpaceRegion at this point.
-      // FIXME: We may return with {<Region>, 0} even if we didn't handle any
-      // ElementRegion layers. I think that this behavior was introduced
-      // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
-      // it may be useful to review it in the future.
-      return RegionRawOffsetV2(SRegion, Offset);
     }
-    return std::nullopt;
   }
   return std::nullopt;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to