ASDenysPetrov created this revision.
ASDenysPetrov added reviewers: NoQ, aaron.ballman, steakhal, martong, 
vsavchenko.
Herald added subscribers: manas, dkrupp, donat.nagy, Szelethus, 
mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware, xazax.hun.
ASDenysPetrov requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Add support of multi-dimensional arrays in 
`RegionStoreManager::getBindingForElement`. Handle nested ElementRegion's 
getting offsets and checking for being in bounds. Get values from the nested 
initialization lists using obtained offsets.

Example:

  const int arr[3][2] = {{1, 2}, {3, 4}};
  arr[0][0];  // 1
  arr[0][1];  // 2
  arr[0][2];  // UB
  arr[1][0];  // 3
  arr[1][1];  // 4
  arr[1][-1]; // UB
  arr[2][0];  // 0
  arr[2][1];  // 0
  arr[-2][0]; // UB

P.S. this patch also includes D111542 <https://reviews.llvm.org/D111542> fix 
(without test cases, which I'll add separately).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111654

Files:
  clang/include/clang/AST/Type.h
  clang/lib/AST/Type.cpp
  clang/lib/StaticAnalyzer/Core/RegionStore.cpp
  clang/test/Analysis/initialization.c
  clang/test/Analysis/initialization.cpp

Index: clang/test/Analysis/initialization.cpp
===================================================================
--- clang/test/Analysis/initialization.cpp
+++ clang/test/Analysis/initialization.cpp
@@ -15,8 +15,7 @@
 int const arr[2][2] = {};
 void arr2init() {
   int i = 1;
-  // FIXME: Should recognize that it is 0.
-  clang_analyzer_eval(arr[i][0]); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(arr[i][0]); // expected-warning{{FALSE}}
 }
 
 int const glob_arr1[3] = {};
@@ -54,77 +53,55 @@
   return glob_arr3[0]; // no-warning (garbage or undefined)
 }
 
-// TODO: Support multidimensional array.
 int const glob_arr4[4][2] = {};
 void glob_array_index2() {
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(glob_arr4[0][0] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{TRUE}}
 }
 
-// TODO: Support multidimensional array.
 void glob_invalid_index3() {
   int idx = -42;
-  // FIXME: Should warn {{garbage or undefined}}.
-  auto x = glob_arr4[1][idx]; // no-warning
+  auto x = glob_arr4[1][idx]; // expected-warning{{garbage or undefined}}
 }
 
-// TODO: Support multidimensional array.
 void glob_invalid_index4() {
   const int *ptr = glob_arr4[1];
   int idx = -42;
-  // FIXME: Should warn {{garbage or undefined}}.
-  auto x = ptr[idx]; // no-warning
+  auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
 }
 
-// TODO: Support multidimensional array.
 int const glob_arr5[4][2] = {{1}, 3, 4, 5};
 void glob_array_index3() {
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be TRUE.
-  clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{TRUE}}
 }
 
-// TODO: Support multidimensional array.
 void glob_ptr_index2() {
+  // FIXME: Fix cast.
   int const *ptr = glob_arr5[1];
   // FIXME: Should be TRUE.
-  clang_analyzer_eval(ptr[0] == 3); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(ptr[0] == 3); // expected-warning{{UNDEFINED}}
   // FIXME: Should be TRUE.
-  clang_analyzer_eval(ptr[1] == 4); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be UNDEFINED.
-  clang_analyzer_eval(ptr[2] == 5); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be UNDEFINED.
-  clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNKNOWN}}
-  // FIXME: Should be UNDEFINED.
-  clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(ptr[1] == 4); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[2] == 0); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNDEFINED}}
 }
 
-// TODO: Support multidimensional array.
 void glob_invalid_index5() {
   int idx = -42;
-  // FIXME: Should warn {{garbage or undefined}}.
-  auto x = glob_arr5[1][idx]; // no-warning
+  auto x = glob_arr5[1][idx]; // expected-warning{{garbage or undefined}}
 }
 
-// TODO: Support multidimensional array.
 void glob_invalid_index6() {
   int const *ptr = &glob_arr5[1][0];
   int idx = 42;
-  // FIXME: Should warn {{garbage or undefined}}.
-  auto x = ptr[idx]; // // no-warning
+  auto x = ptr[idx]; // // expected-warning{{garbage or undefined}}
 }
Index: clang/test/Analysis/initialization.c
===================================================================
--- clang/test/Analysis/initialization.c
+++ clang/test/Analysis/initialization.c
@@ -58,42 +58,33 @@
   int res = ptr[x]; // expected-warning{{garbage or undefined}}
 }
 
-// TODO: Support multidimensional array.
 const int glob_arr2[3][3] = {[0][0] = 1, [1][1] = 5, [2][0] = 7};
 void glob_arr_index3() {
-  // FIXME: These all should be TRUE.
-  clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{TRUE}}
 }
 
-// TODO: Support multidimensional array.
 void negative_index() {
   int x = 2, y = -2;
-  // FIXME: Should be UNDEFINED.
-  clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNDEFINED}}
   x = 3;
   y = -3;
-  // FIXME: Should be UNDEFINED.
-  clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNDEFINED}}
 }
 
-// TODO: Support multidimensional array.
 void glob_invalid_index3() {
   int x = -1, y = -1;
-  // FIXME: Should warn {{garbage or undefined}}.
-  int res = glob_arr2[x][y]; // no-warning
+  int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}}
 }
 
-// TODO: Support multidimensional array.
 void glob_invalid_index4() {
   int x = 3, y = 2;
-  // FIXME: Should warn {{garbage or undefined}}.
-  int res = glob_arr2[x][y]; // no-warning
+  int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}}
 }
Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -1657,60 +1657,98 @@
       char c = (i >= length) ? '\0' : Str->getCodeUnit(i);
       return svalBuilder.makeIntVal(c, T);
     }
-  } else if (const VarRegion *VR = dyn_cast<VarRegion>(superR)) {
-    // Check if the containing array has an initialized value that we can trust.
-    // We can trust a const value or a value of a global initializer in main().
-    const VarDecl *VD = VR->getDecl();
-    if (VD->getType().isConstQualified() ||
-        R->getElementType().isConstQualified() ||
-        (B.isMainAnalysis() && VD->hasGlobalStorage())) {
-      if (const Expr *Init = VD->getAnyInitializer()) {
-        if (const auto *InitList = dyn_cast<InitListExpr>(Init)) {
-          // The array index has to be known.
-          if (auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) {
-            // If it is not an array, return Undef.
-            QualType T = VD->getType();
-            const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T);
-            if (!CAT)
-              return UndefinedVal();
-
-            // Support one-dimensional array.
-            // C++20 [expr.add] 7.6.6.4 (excerpt):
-            //   If P points to an array element i of an array object x with n
-            //   elements, where i < 0 or i > n, the behavior is undefined.
-            //   Dereferencing is not allowed on the "one past the last
-            //   element", when i == n.
-            // Example:
-            //   const int arr[4] = {1, 2};
-            //   const int *ptr = arr;
-            //   int x0 = ptr[0]; // 1
-            //   int x1 = ptr[1]; // 2
-            //   int x2 = ptr[2]; // 0
-            //   int x3 = ptr[3]; // 0
-            //   int x4 = ptr[4]; // UB
-            // TODO: Support multidimensional array.
-            if (!isa<ConstantArrayType>(CAT->getElementType())) {
-              // One-dimensional array.
-              const llvm::APSInt &Idx = CI->getValue();
-              const auto I = static_cast<uint64_t>(Idx.getExtValue());
-              // Use `getZExtValue` because array extent can not be negative.
-              const uint64_t Extent = CAT->getSize().getZExtValue();
-              // Check for `Idx < 0`, NOT for `I < 0`, because `Idx` CAN be
-              // negative, but `I` can NOT.
-              if (Idx < 0 || I >= Extent)
-                return UndefinedVal();
+  } else if (isa<ElementRegion>(superR) || isa<VarRegion>(superR)) {
+    // Assume we are treating a n-dimensional array.
+    // Get offsets from the expression, like `arr[4][2][1];`
+    // `SValOffsets` should be {1, 2, 4};
+    const VarRegion *VR = dyn_cast<VarRegion>(superR);
+    SmallVector<SVal, 2> SValOffsets;
+    SValOffsets.push_back(R->getIndex());
+    if (const ElementRegion *ER = dyn_cast<ElementRegion>(superR)) {
+      const ElementRegion *LastER = nullptr;
+      do {
+        SValOffsets.push_back(ER->getIndex());
+        LastER = ER;
+        ER = dyn_cast<ElementRegion>(ER->getSuperRegion());
+      } while (ER);
+      VR = dyn_cast<VarRegion>(LastER->getSuperRegion());
+    }
 
-              // C++20 [expr.add] 9.4.17.5 (excerpt):
-              //   i-th array element is value-initialized for each k < i ≤ n,
-              //   where k is an expression-list size and n is an array extent.
-              if (I >= InitList->getNumInits())
-                return svalBuilder.makeZeroVal(R->getElementType());
+    if (VR) {
+      const VarDecl *VD = VR->getDecl()->getMostRecentDecl();
+      if (VD->getType().isConstQualified() ||
+          R->getElementType().isConstQualified() ||
+          (B.isMainAnalysis() && VD->hasGlobalStorage())) {
+        QualType T = VD->getType();
+        if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T)) {
+          // The number of offsets should equal to the numbers of extents,
+          // otherwise wrong type punning occured. For instance:
+          //  int arr[1][2][3];
+          //  auto ptr = (int(*)[42])arr;
+          //  auto x = ptr[4][2]; // UB
+          SmallVector<uint64_t, 2> Extents = CAT->getAllExtents();
+          if (SValOffsets.size() != Extents.size())
+            return UndefinedVal();
+
+          // Check offsets for being out of bounds.
+          // C++20 [expr.add] 7.6.6.4 (excerpt):
+          //   If P points to an array element i of an array object x with n
+          //   elements, where i < 0 or i > n, the behavior is undefined.
+          //   Dereferencing is not allowed on the "one past the last
+          //   element", when i == n.
+          // Example:
+          //  const int arr[3][2] = {{1, 2}, {3, 4}};
+          //  arr[0][0];  // 1
+          //  arr[0][1];  // 2
+          //  arr[0][2];  // UB
+          //  arr[1][0];  // 3
+          //  arr[1][1];  // 4
+          //  arr[1][-1]; // UB
+          //  arr[2][0];  // 0
+          //  arr[2][1];  // 0
+          //  arr[-2][0]; // UB
+          SmallVector<uint64_t, 2> ConcreteOffsets;
+          ConcreteOffsets.resize(SValOffsets.size());
+          auto ExtentIt = Extents.begin();
+          auto OffsetIt = ConcreteOffsets.begin();
+          // Reversion `SValOffsets` to make it consistent with `Extents`.
+          for (SVal &V : llvm::reverse(SValOffsets)) {
+            if (auto CI = V.getAs<nonloc::ConcreteInt>()) {
+              const llvm::APSInt &Offset = CI->getValue();
+              const auto I = static_cast<uint64_t>(Offset.getExtValue());
+              // Check for `Offset < 0`, NOT for `I < 0`, because `Offset` CAN
+              // be negative, but `I` can NOT (because it's an uint64_t).
+              if (Offset < 0 || I >= *(ExtentIt++))
+                return UndefinedVal();
+              // Store index in a reversive order.
+              *(OffsetIt++) = I;
+            } else
+              // Symbolic index presented. Return Unknown value.
+              // FIXME: We also need to take ElementRegions with symbolic
+              // indexes into account.
+              return UnknownVal();
+          }
 
-              // Return a constant value, if it is presented.
-              // FIXME: Support other SVals.
-              const Expr *E = InitList->getInit(I);
-              if (Optional<SVal> V = svalBuilder.getConstantVal(E))
-                return *V;
+          // Here we are sure that all the indexes are in bounds.
+          if (const Expr *Init = VD->getAnyInitializer()) {
+            if (const auto *InitList = dyn_cast<InitListExpr>(Init)) {
+              for (auto I : ConcreteOffsets) {
+                // C++20 [expr.add] 9.4.17.5 (excerpt):
+                //   i-th array element is value-initialized for each k < i ≤ n,
+                //   where k is an expression-list size and n is an array
+                //   extent.
+                if (I >= InitList->getNumInits())
+                  return svalBuilder.makeZeroVal(R->getElementType());
+
+                const Expr *E = InitList->getInit(I);
+                // Go to the nested initializer list.
+                if (const auto *IL = dyn_cast<InitListExpr>(E))
+                  InitList = IL;
+                // Return a constant value, if it is presented.
+                // FIXME: Support other SVals.
+                else if (Optional<SVal> V = svalBuilder.getConstantVal(E))
+                  return *V;
+              }
             }
           }
         }
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -138,6 +138,18 @@
   ArrayTypeBits.SizeModifier = sm;
 }
 
+/// Return an array with extents of the declared array type.
+///
+/// E.g. for `const int x[1][2][3];` returns {1,2,3}.
+SmallVector<uint64_t, 2> ConstantArrayType::getAllExtents() const {
+  SmallVector<uint64_t, 2> Extents;
+  const ConstantArrayType *CAT = this;
+  do {
+    Extents.push_back(CAT->getSize().getZExtValue());
+  } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType())));
+  return Extents;
+}
+
 unsigned ConstantArrayType::getNumAddressingBits(const ASTContext &Context,
                                                  QualType ElementType,
                                                const llvm::APInt &NumElements) {
Index: clang/include/clang/AST/Type.h
===================================================================
--- clang/include/clang/AST/Type.h
+++ clang/include/clang/AST/Type.h
@@ -2950,6 +2950,7 @@
 
 public:
   const llvm::APInt &getSize() const { return Size; }
+  SmallVector<uint64_t, 2> getAllExtents() const;
   const Expr *getSizeExpr() const {
     return ConstantArrayTypeBits.HasStoredSizeExpr
                ? *getTrailingObjects<const Expr *>()
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to