https://github.com/NagyDonat updated 
https://github.com/llvm/llvm-project/pull/158639

From db0723ca737ec4613d186ff1137c7405c480baf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 15 Sep 2025 15:48:12 +0200
Subject: [PATCH 1/2] [analyzer] Show element count in ArrayBound underflow
 reports

The underflow reports of checker security.ArrayBound already displayed
the (negative) byte offset of the accessed location; but those numbers
were sometimes a bit hard to decipher, so I'm extending the message to
also display this offset as a multiple of the size of the accessed
element.

This logic is currently inactive when the byte offset is not an integer
multiple of the size of the accessed element -- primarily because it
would be a bit cumbersome to report the division and the remainder.

This change only affects the messages; the checker will report the same
issues before and after this commit.
---
 .../Checkers/ArrayBoundChecker.cpp            | 48 ++++++++++++-------
 .../test/Analysis/ArrayBound/verbose-tests.c  | 36 ++++++++++++--
 2 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index d35031b5c22df..c3c9eec3ad2fd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -389,15 +389,26 @@ static std::optional<int64_t> 
getConcreteValue(std::optional<NonLoc> SV) {
 }
 
 static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
-                                const SubRegion *Region, NonLoc Offset) {
-  std::string RegName = getRegionName(Space, Region), OffsetStr = "";
+                                const SubRegion *Region, NonLoc Offset,
+                                QualType ElemType, int64_t ElemSize) {
+  std::string RegName = getRegionName(Space, Region);
 
-  if (auto ConcreteOffset = getConcreteValue(Offset))
+  std::string OffsetStr = "", ElemInfoStr = "";
+  if (std::optional<int64_t> ConcreteOffset = getConcreteValue(Offset)) {
     OffsetStr = formatv(" {0}", ConcreteOffset);
+    if (*ConcreteOffset % ElemSize == 0) {
+      int64_t Count = *ConcreteOffset / ElemSize;
+      if (Count != -1)
+        ElemInfoStr =
+            formatv(" = {0} * sizeof({1})", Count, ElemType.getAsString());
+      else
+        ElemInfoStr = formatv(" = -sizeof({0})", ElemType.getAsString());
+    }
+  }
 
-  return {
-      formatv("Out of bound access to memory preceding {0}", RegName),
-      formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
+  return {formatv("Out of bound access to memory preceding {0}", RegName),
+          formatv("Access of {0} at negative byte offset{1}{2}", RegName,
+                  OffsetStr, ElemInfoStr)};
 }
 
 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
@@ -419,20 +430,15 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
   return true;
 }
 
-static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
+static Messages getExceedsMsgs(const MemSpaceRegion *Space,
                                const SubRegion *Region, NonLoc Offset,
-                               NonLoc Extent, SVal Location,
-                               bool AlsoMentionUnderflow) {
+                               NonLoc Extent, bool AlsoMentionUnderflow,
+                               QualType ElemType, int64_t ElemSize) {
   std::string RegName = getRegionName(Space, Region);
-  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
-  assert(EReg && "this checker only handles element access");
-  QualType ElemType = EReg->getElementType();
 
   std::optional<int64_t> OffsetN = getConcreteValue(Offset);
   std::optional<int64_t> ExtentN = getConcreteValue(Extent);
 
-  int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
-
   bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
   const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
 
@@ -585,6 +591,13 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
   if (!RawOffset)
     return;
 
+  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+  assert(EReg && "this checker only handles element access");
+  QualType ElemType = EReg->getElementType();
+
+  int64_t ElemSize =
+      C.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
+
   auto [Reg, ByteOffset] = *RawOffset;
 
   // The state updates will be reported as a single note tag, which will be
@@ -635,7 +648,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
       } else {
         if (!WithinLowerBound) {
           // ...and it cannot be valid (>= 0), so report an error.
-          Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
+          Messages Msgs =
+              getPrecedesMsgs(Space, Reg, ByteOffset, ElemType, ElemSize);
           reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
           return;
         }
@@ -678,8 +692,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
         }
 
         Messages Msgs =
-            getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
-                           *KnownSize, Location, AlsoMentionUnderflow);
+            getExceedsMsgs(Space, Reg, ByteOffset, *KnownSize,
+                           AlsoMentionUnderflow, ElemType, ElemSize);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
         return;
       }
diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c 
b/clang/test/Analysis/ArrayBound/verbose-tests.c
index 84d238ed1a2a4..9b6e33dce8a60 100644
--- a/clang/test/Analysis/ArrayBound/verbose-tests.c
+++ b/clang/test/Analysis/ArrayBound/verbose-tests.c
@@ -11,7 +11,7 @@ int TenElements[10];
 void arrayUnderflow(void) {
   TenElements[-3] = 5;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12 = 
-3 * sizeof(int)}}
 }
 
 int underflowWithDeref(void) {
@@ -19,9 +19,39 @@ int underflowWithDeref(void) {
   --p;
   return *p;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4 = 
-sizeof(int)}}
 }
 
+char underflowReportedAsChar(void) {
+  // The "= -... * sizeof(type)" part uses the type of the accessed element
+  // (here 'char'), not the type that appears in the declaration of the
+  // original array (which would be 'int').
+  return ((char *)TenElements)[-1];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -1 = 
-sizeof(char)}}
+}
+
+struct TwoInts {
+  int a, b;
+};
+
+struct TwoInts underflowReportedAsStruct(void) {
+  // Another case where the accessed type is used for reporting the offset.
+  return *(struct TwoInts*)(TenElements - 4);
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -16 = 
-2 * sizeof(struct TwoInts)}}
+}
+
+struct TwoInts underflowOnlyByteOffset(void) {
+  // In this case the negative byte offset is not a multiple of the size of the
+  // accessed element, so the part "= -... * sizeof(type)" is omitted at the
+  // end of the message.
+  return *(struct TwoInts*)(TenElements - 3);
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note-re@-2 {{Access of 'TenElements' at negative byte offset 
-12{{$}}}}
+}
+
+
 int rng(void);
 int getIndex(void) {
   switch (rng()) {
@@ -40,7 +70,7 @@ void gh86959(void) {
   while (rng())
     TenElements[getIndex()] = 10;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688 = 
-172 * sizeof(int)}}
 }
 
 int scanf(const char *restrict fmt, ...);

From 78be93922ee5c3ac4309e5d90981f8c3cdb85123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Tue, 16 Sep 2025 15:39:28 +0200
Subject: [PATCH 2/2] Reimplement with different approach

This unifies the code of `getPrecedesMsg` and `getExceedsMsg` and avoids
the `-count * sizeof(type)` multiplications that would evaluate to
nonsense due to C type conversion footguns.
---
 .../Checkers/ArrayBoundChecker.cpp            | 80 +++++++++----------
 .../ArrayBound/assumption-reporting.c         |  8 +-
 .../test/Analysis/ArrayBound/verbose-tests.c  | 20 ++---
 3 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index c3c9eec3ad2fd..bcd88f77c47e6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -130,6 +130,18 @@ struct Messages {
   std::string Short, Full;
 };
 
+enum class BadOffsetKind {Negative, Overflowing, Indeterminate};
+
+constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing", 
"a negative or overflowing"};
+StringRef asAdjective(BadOffsetKind Problem) {
+  return Adjectives[static_cast<int>(Problem)];
+}
+
+constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end 
of", "around"};
+StringRef asPreposition(BadOffsetKind Problem) {
+  return Prepositions[static_cast<int>(Problem)];
+}
+
 // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
 // instead of `PreStmt` because the current implementation passes the whole
 // expression to `CheckerContext::getSVal()` which only works after the
@@ -388,29 +400,6 @@ static std::optional<int64_t> 
getConcreteValue(std::optional<NonLoc> SV) {
   return SV ? getConcreteValue(*SV) : std::nullopt;
 }
 
-static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
-                                const SubRegion *Region, NonLoc Offset,
-                                QualType ElemType, int64_t ElemSize) {
-  std::string RegName = getRegionName(Space, Region);
-
-  std::string OffsetStr = "", ElemInfoStr = "";
-  if (std::optional<int64_t> ConcreteOffset = getConcreteValue(Offset)) {
-    OffsetStr = formatv(" {0}", ConcreteOffset);
-    if (*ConcreteOffset % ElemSize == 0) {
-      int64_t Count = *ConcreteOffset / ElemSize;
-      if (Count != -1)
-        ElemInfoStr =
-            formatv(" = {0} * sizeof({1})", Count, ElemType.getAsString());
-      else
-        ElemInfoStr = formatv(" = -sizeof({0})", ElemType.getAsString());
-    }
-  }
-
-  return {formatv("Out of bound access to memory preceding {0}", RegName),
-          formatv("Access of {0} at negative byte offset{1}{2}", RegName,
-                  OffsetStr, ElemInfoStr)};
-}
-
 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
 /// it can be performed (`Divisor` is nonzero and there is no remainder). The
 /// values `Val1` and `Val2` may be nullopt and in that case the corresponding
@@ -430,30 +419,41 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
   return true;
 }
 
-static Messages getExceedsMsgs(const MemSpaceRegion *Space,
+static Messages getNonTaintMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
                                const SubRegion *Region, NonLoc Offset,
-                               NonLoc Extent, bool AlsoMentionUnderflow,
-                               QualType ElemType, int64_t ElemSize) {
+                               std::optional<NonLoc> Extent, SVal Location,
+                               BadOffsetKind Problem) {
   std::string RegName = getRegionName(Space, Region);
+  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+  assert(EReg && "this checker only handles element access");
+  QualType ElemType = EReg->getElementType();
 
   std::optional<int64_t> OffsetN = getConcreteValue(Offset);
   std::optional<int64_t> ExtentN = getConcreteValue(Extent);
 
+  int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
+
   bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
   const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
 
   SmallString<256> Buf;
   llvm::raw_svector_ostream Out(Buf);
   Out << "Access of ";
-  if (!ExtentN && !UseByteOffsets)
+  if (OffsetN && !ExtentN && !UseByteOffsets) {
+    // If the offset is reported as an index, then the report must mention the
+    // element type (because it is not always clear from the code). It's more
+    // natural to mention the element type later where the extent is described,
+    // but if the extent is unknown/irrelevant, then the element type can be
+    // inserted into the message at this point.
     Out << "'" << ElemType.getAsString() << "' element in ";
+  }
   Out << RegName << " at ";
-  if (AlsoMentionUnderflow) {
-    Out << "a negative or overflowing " << OffsetOrIndex;
-  } else if (OffsetN) {
+  if (OffsetN) {
+    if (Problem == BadOffsetKind::Negative)
+      Out << "negative ";
     Out << OffsetOrIndex << " " << *OffsetN;
   } else {
-    Out << "an overflowing " << OffsetOrIndex;
+    Out << asAdjective(Problem) << " " << OffsetOrIndex;
   }
   if (ExtentN) {
     Out << ", while it holds only ";
@@ -471,7 +471,7 @@ static Messages getExceedsMsgs(const MemSpaceRegion *Space,
   }
 
   return {formatv("Out of bound access to memory {0} {1}",
-                  AlsoMentionUnderflow ? "around" : "after the end of",
+                  asPreposition(Problem),
                   RegName),
           std::string(Buf)};
 }
@@ -591,13 +591,6 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
   if (!RawOffset)
     return;
 
-  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
-  assert(EReg && "this checker only handles element access");
-  QualType ElemType = EReg->getElementType();
-
-  int64_t ElemSize =
-      C.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
-
   auto [Reg, ByteOffset] = *RawOffset;
 
   // The state updates will be reported as a single note tag, which will be
@@ -648,8 +641,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
       } else {
         if (!WithinLowerBound) {
           // ...and it cannot be valid (>= 0), so report an error.
-          Messages Msgs =
-              getPrecedesMsgs(Space, Reg, ByteOffset, ElemType, ElemSize);
+          Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg,
+              ByteOffset, /*Extent=*/std::nullopt, Location, 
BadOffsetKind::Negative);
           reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
           return;
         }
@@ -691,9 +684,10 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
           return;
         }
 
+        BadOffsetKind Problem = AlsoMentionUnderflow ? 
BadOffsetKind::Indeterminate : BadOffsetKind::Overflowing;
         Messages Msgs =
-            getExceedsMsgs(Space, Reg, ByteOffset, *KnownSize,
-                           AlsoMentionUnderflow, ElemType, ElemSize);
+            getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
+                           *KnownSize, Location, Problem);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
         return;
       }
diff --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c 
b/clang/test/Analysis/ArrayBound/assumption-reporting.c
index 535e623baa815..bffd5d9bc35b5 100644
--- a/clang/test/Analysis/ArrayBound/assumption-reporting.c
+++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c
@@ -70,7 +70,7 @@ int assumingUpper(int arg) {
   // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
   int b = TenElements[arg - 10];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b;
 }
 
@@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) {
   // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
   int b = TenElements[(int)arg - 10];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b;
 }
 
@@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) {
   int a = TenElements[arg]; // no note here, we already know that 'arg' is in 
bounds
   int b = TenElements[(int)arg - 10];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b;
 }
 
@@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) {
   // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 
'f.b'}}
   int c = TenElements[arg-2];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b + c;
 }
 
diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c 
b/clang/test/Analysis/ArrayBound/verbose-tests.c
index 9b6e33dce8a60..e3416886d13e5 100644
--- a/clang/test/Analysis/ArrayBound/verbose-tests.c
+++ b/clang/test/Analysis/ArrayBound/verbose-tests.c
@@ -11,7 +11,7 @@ int TenElements[10];
 void arrayUnderflow(void) {
   TenElements[-3] = 5;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12 = 
-3 * sizeof(int)}}
+  // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative 
index -3}}
 }
 
 int underflowWithDeref(void) {
@@ -19,16 +19,16 @@ int underflowWithDeref(void) {
   --p;
   return *p;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4 = 
-sizeof(int)}}
+  // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative 
index -1}}
 }
 
 char underflowReportedAsChar(void) {
-  // The "= -... * sizeof(type)" part uses the type of the accessed element
-  // (here 'char'), not the type that appears in the declaration of the
-  // original array (which would be 'int').
+  // Underflow is reported with the type of the accessed element (here 'char'),
+  // not the type that appears in the declaration of the original array (which
+  // would be 'int').
   return ((char *)TenElements)[-1];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -1 = 
-sizeof(char)}}
+  // expected-note@-2 {{Access of 'char' element in 'TenElements' at negative 
index -1}}
 }
 
 struct TwoInts {
@@ -39,7 +39,7 @@ struct TwoInts underflowReportedAsStruct(void) {
   // Another case where the accessed type is used for reporting the offset.
   return *(struct TwoInts*)(TenElements - 4);
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -16 = 
-2 * sizeof(struct TwoInts)}}
+  // expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at 
negative index -2}}
 }
 
 struct TwoInts underflowOnlyByteOffset(void) {
@@ -48,7 +48,7 @@ struct TwoInts underflowOnlyByteOffset(void) {
   // end of the message.
   return *(struct TwoInts*)(TenElements - 3);
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note-re@-2 {{Access of 'TenElements' at negative byte offset 
-12{{$}}}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
 }
 
 
@@ -70,10 +70,10 @@ void gh86959(void) {
   while (rng())
     TenElements[getIndex()] = 10;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688 = 
-172 * sizeof(int)}}
+  // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative 
index -172}}
 }
 
-int scanf(const char *restrict fmt, ...);
+int scanf(const char *fmt, ...);
 
 void taintedIndex(void) {
   int index;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to