https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/158639
From db0723ca737ec4613d186ff1137c7405c480baf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 15 Sep 2025 15:48:12 +0200 Subject: [PATCH 1/2] [analyzer] Show element count in ArrayBound underflow reports The underflow reports of checker security.ArrayBound already displayed the (negative) byte offset of the accessed location; but those numbers were sometimes a bit hard to decipher, so I'm extending the message to also display this offset as a multiple of the size of the accessed element. This logic is currently inactive when the byte offset is not an integer multiple of the size of the accessed element -- primarily because it would be a bit cumbersome to report the division and the remainder. This change only affects the messages; the checker will report the same issues before and after this commit. --- .../Checkers/ArrayBoundChecker.cpp | 48 ++++++++++++------- .../test/Analysis/ArrayBound/verbose-tests.c | 36 ++++++++++++-- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index d35031b5c22df..c3c9eec3ad2fd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -389,15 +389,26 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { } static Messages getPrecedesMsgs(const MemSpaceRegion *Space, - const SubRegion *Region, NonLoc Offset) { - std::string RegName = getRegionName(Space, Region), OffsetStr = ""; + const SubRegion *Region, NonLoc Offset, + QualType ElemType, int64_t ElemSize) { + std::string RegName = getRegionName(Space, Region); - if (auto ConcreteOffset = getConcreteValue(Offset)) + std::string OffsetStr = "", ElemInfoStr = ""; + if (std::optional<int64_t> ConcreteOffset = getConcreteValue(Offset)) { OffsetStr = formatv(" {0}", ConcreteOffset); + if (*ConcreteOffset % ElemSize == 0) { + int64_t Count = *ConcreteOffset / ElemSize; + if (Count != -1) + ElemInfoStr = + formatv(" = {0} * sizeof({1})", Count, ElemType.getAsString()); + else + ElemInfoStr = formatv(" = -sizeof({0})", ElemType.getAsString()); + } + } - return { - formatv("Out of bound access to memory preceding {0}", RegName), - formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)}; + return {formatv("Out of bound access to memory preceding {0}", RegName), + formatv("Access of {0} at negative byte offset{1}{2}", RegName, + OffsetStr, ElemInfoStr)}; } /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if @@ -419,20 +430,15 @@ static bool tryDividePair(std::optional<int64_t> &Val1, return true; } -static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, +static Messages getExceedsMsgs(const MemSpaceRegion *Space, const SubRegion *Region, NonLoc Offset, - NonLoc Extent, SVal Location, - bool AlsoMentionUnderflow) { + NonLoc Extent, bool AlsoMentionUnderflow, + QualType ElemType, int64_t ElemSize) { std::string RegName = getRegionName(Space, Region); - const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); - assert(EReg && "this checker only handles element access"); - QualType ElemType = EReg->getElementType(); std::optional<int64_t> OffsetN = getConcreteValue(Offset); std::optional<int64_t> ExtentN = getConcreteValue(Extent); - int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity(); - bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize); const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index"; @@ -585,6 +591,13 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { if (!RawOffset) return; + const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); + assert(EReg && "this checker only handles element access"); + QualType ElemType = EReg->getElementType(); + + int64_t ElemSize = + C.getASTContext().getTypeSizeInChars(ElemType).getQuantity(); + auto [Reg, ByteOffset] = *RawOffset; // The state updates will be reported as a single note tag, which will be @@ -635,7 +648,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { } else { if (!WithinLowerBound) { // ...and it cannot be valid (>= 0), so report an error. - Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset); + Messages Msgs = + getPrecedesMsgs(Space, Reg, ByteOffset, ElemType, ElemSize); reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); return; } @@ -678,8 +692,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { } Messages Msgs = - getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset, - *KnownSize, Location, AlsoMentionUnderflow); + getExceedsMsgs(Space, Reg, ByteOffset, *KnownSize, + AlsoMentionUnderflow, ElemType, ElemSize); reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c b/clang/test/Analysis/ArrayBound/verbose-tests.c index 84d238ed1a2a4..9b6e33dce8a60 100644 --- a/clang/test/Analysis/ArrayBound/verbose-tests.c +++ b/clang/test/Analysis/ArrayBound/verbose-tests.c @@ -11,7 +11,7 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12 = -3 * sizeof(int)}} } int underflowWithDeref(void) { @@ -19,9 +19,39 @@ int underflowWithDeref(void) { --p; return *p; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4 = -sizeof(int)}} } +char underflowReportedAsChar(void) { + // The "= -... * sizeof(type)" part uses the type of the accessed element + // (here 'char'), not the type that appears in the declaration of the + // original array (which would be 'int'). + return ((char *)TenElements)[-1]; + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -1 = -sizeof(char)}} +} + +struct TwoInts { + int a, b; +}; + +struct TwoInts underflowReportedAsStruct(void) { + // Another case where the accessed type is used for reporting the offset. + return *(struct TwoInts*)(TenElements - 4); + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -16 = -2 * sizeof(struct TwoInts)}} +} + +struct TwoInts underflowOnlyByteOffset(void) { + // In this case the negative byte offset is not a multiple of the size of the + // accessed element, so the part "= -... * sizeof(type)" is omitted at the + // end of the message. + return *(struct TwoInts*)(TenElements - 3); + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note-re@-2 {{Access of 'TenElements' at negative byte offset -12{{$}}}} +} + + int rng(void); int getIndex(void) { switch (rng()) { @@ -40,7 +70,7 @@ void gh86959(void) { while (rng()) TenElements[getIndex()] = 10; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688 = -172 * sizeof(int)}} } int scanf(const char *restrict fmt, ...); From 78be93922ee5c3ac4309e5d90981f8c3cdb85123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Tue, 16 Sep 2025 15:39:28 +0200 Subject: [PATCH 2/2] Reimplement with different approach This unifies the code of `getPrecedesMsg` and `getExceedsMsg` and avoids the `-count * sizeof(type)` multiplications that would evaluate to nonsense due to C type conversion footguns. --- .../Checkers/ArrayBoundChecker.cpp | 80 +++++++++---------- .../ArrayBound/assumption-reporting.c | 8 +- .../test/Analysis/ArrayBound/verbose-tests.c | 20 ++--- 3 files changed, 51 insertions(+), 57 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index c3c9eec3ad2fd..bcd88f77c47e6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -130,6 +130,18 @@ struct Messages { std::string Short, Full; }; +enum class BadOffsetKind {Negative, Overflowing, Indeterminate}; + +constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing", "a negative or overflowing"}; +StringRef asAdjective(BadOffsetKind Problem) { + return Adjectives[static_cast<int>(Problem)]; +} + +constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of", "around"}; +StringRef asPreposition(BadOffsetKind Problem) { + return Prepositions[static_cast<int>(Problem)]; +} + // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` // instead of `PreStmt` because the current implementation passes the whole // expression to `CheckerContext::getSVal()` which only works after the @@ -388,29 +400,6 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { return SV ? getConcreteValue(*SV) : std::nullopt; } -static Messages getPrecedesMsgs(const MemSpaceRegion *Space, - const SubRegion *Region, NonLoc Offset, - QualType ElemType, int64_t ElemSize) { - std::string RegName = getRegionName(Space, Region); - - std::string OffsetStr = "", ElemInfoStr = ""; - if (std::optional<int64_t> ConcreteOffset = getConcreteValue(Offset)) { - OffsetStr = formatv(" {0}", ConcreteOffset); - if (*ConcreteOffset % ElemSize == 0) { - int64_t Count = *ConcreteOffset / ElemSize; - if (Count != -1) - ElemInfoStr = - formatv(" = {0} * sizeof({1})", Count, ElemType.getAsString()); - else - ElemInfoStr = formatv(" = -sizeof({0})", ElemType.getAsString()); - } - } - - return {formatv("Out of bound access to memory preceding {0}", RegName), - formatv("Access of {0} at negative byte offset{1}{2}", RegName, - OffsetStr, ElemInfoStr)}; -} - /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if /// it can be performed (`Divisor` is nonzero and there is no remainder). The /// values `Val1` and `Val2` may be nullopt and in that case the corresponding @@ -430,30 +419,41 @@ static bool tryDividePair(std::optional<int64_t> &Val1, return true; } -static Messages getExceedsMsgs(const MemSpaceRegion *Space, +static Messages getNonTaintMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, const SubRegion *Region, NonLoc Offset, - NonLoc Extent, bool AlsoMentionUnderflow, - QualType ElemType, int64_t ElemSize) { + std::optional<NonLoc> Extent, SVal Location, + BadOffsetKind Problem) { std::string RegName = getRegionName(Space, Region); + const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); + assert(EReg && "this checker only handles element access"); + QualType ElemType = EReg->getElementType(); std::optional<int64_t> OffsetN = getConcreteValue(Offset); std::optional<int64_t> ExtentN = getConcreteValue(Extent); + int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity(); + bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize); const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index"; SmallString<256> Buf; llvm::raw_svector_ostream Out(Buf); Out << "Access of "; - if (!ExtentN && !UseByteOffsets) + if (OffsetN && !ExtentN && !UseByteOffsets) { + // If the offset is reported as an index, then the report must mention the + // element type (because it is not always clear from the code). It's more + // natural to mention the element type later where the extent is described, + // but if the extent is unknown/irrelevant, then the element type can be + // inserted into the message at this point. Out << "'" << ElemType.getAsString() << "' element in "; + } Out << RegName << " at "; - if (AlsoMentionUnderflow) { - Out << "a negative or overflowing " << OffsetOrIndex; - } else if (OffsetN) { + if (OffsetN) { + if (Problem == BadOffsetKind::Negative) + Out << "negative "; Out << OffsetOrIndex << " " << *OffsetN; } else { - Out << "an overflowing " << OffsetOrIndex; + Out << asAdjective(Problem) << " " << OffsetOrIndex; } if (ExtentN) { Out << ", while it holds only "; @@ -471,7 +471,7 @@ static Messages getExceedsMsgs(const MemSpaceRegion *Space, } return {formatv("Out of bound access to memory {0} {1}", - AlsoMentionUnderflow ? "around" : "after the end of", + asPreposition(Problem), RegName), std::string(Buf)}; } @@ -591,13 +591,6 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { if (!RawOffset) return; - const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); - assert(EReg && "this checker only handles element access"); - QualType ElemType = EReg->getElementType(); - - int64_t ElemSize = - C.getASTContext().getTypeSizeInChars(ElemType).getQuantity(); - auto [Reg, ByteOffset] = *RawOffset; // The state updates will be reported as a single note tag, which will be @@ -648,8 +641,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { } else { if (!WithinLowerBound) { // ...and it cannot be valid (>= 0), so report an error. - Messages Msgs = - getPrecedesMsgs(Space, Reg, ByteOffset, ElemType, ElemSize); + Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg, + ByteOffset, /*Extent=*/std::nullopt, Location, BadOffsetKind::Negative); reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); return; } @@ -691,9 +684,10 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { return; } + BadOffsetKind Problem = AlsoMentionUnderflow ? BadOffsetKind::Indeterminate : BadOffsetKind::Overflowing; Messages Msgs = - getExceedsMsgs(Space, Reg, ByteOffset, *KnownSize, - AlsoMentionUnderflow, ElemType, ElemSize); + getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset, + *KnownSize, Location, Problem); reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } diff --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c b/clang/test/Analysis/ArrayBound/assumption-reporting.c index 535e623baa815..bffd5d9bc35b5 100644 --- a/clang/test/Analysis/ArrayBound/assumption-reporting.c +++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c @@ -70,7 +70,7 @@ int assumingUpper(int arg) { // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) { // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[(int)arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) { int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds int b = TenElements[(int)arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) { // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}} int c = TenElements[arg-2]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b + c; } diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c b/clang/test/Analysis/ArrayBound/verbose-tests.c index 9b6e33dce8a60..e3416886d13e5 100644 --- a/clang/test/Analysis/ArrayBound/verbose-tests.c +++ b/clang/test/Analysis/ArrayBound/verbose-tests.c @@ -11,7 +11,7 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12 = -3 * sizeof(int)}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}} } int underflowWithDeref(void) { @@ -19,16 +19,16 @@ int underflowWithDeref(void) { --p; return *p; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4 = -sizeof(int)}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -1}} } char underflowReportedAsChar(void) { - // The "= -... * sizeof(type)" part uses the type of the accessed element - // (here 'char'), not the type that appears in the declaration of the - // original array (which would be 'int'). + // Underflow is reported with the type of the accessed element (here 'char'), + // not the type that appears in the declaration of the original array (which + // would be 'int'). return ((char *)TenElements)[-1]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -1 = -sizeof(char)}} + // expected-note@-2 {{Access of 'char' element in 'TenElements' at negative index -1}} } struct TwoInts { @@ -39,7 +39,7 @@ struct TwoInts underflowReportedAsStruct(void) { // Another case where the accessed type is used for reporting the offset. return *(struct TwoInts*)(TenElements - 4); // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -16 = -2 * sizeof(struct TwoInts)}} + // expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at negative index -2}} } struct TwoInts underflowOnlyByteOffset(void) { @@ -48,7 +48,7 @@ struct TwoInts underflowOnlyByteOffset(void) { // end of the message. return *(struct TwoInts*)(TenElements - 3); // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note-re@-2 {{Access of 'TenElements' at negative byte offset -12{{$}}}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} } @@ -70,10 +70,10 @@ void gh86959(void) { while (rng()) TenElements[getIndex()] = 10; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688 = -172 * sizeof(int)}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}} } -int scanf(const char *restrict fmt, ...); +int scanf(const char *fmt, ...); void taintedIndex(void) { int index; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits