https://github.com/balazske created 
https://github.com/llvm/llvm-project/pull/159795

Handle cases like `strlen(string_constant + 3)` in `CStringChecker` by 
returning the original string length minus offset.

From 40b60643888e2b82437c01abd17ed149fe81c34f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <balazs.k...@ericsson.com>
Date: Thu, 18 Sep 2025 15:20:37 +0200
Subject: [PATCH] [clang][analyzer] Support strlen with offset to string
 literal in CStringChecker

Handle cases like `strlen(string_constant + 3)` in `CStringChecker` by
returning the original string length minus offset.
---
 .../Checkers/CStringChecker.cpp               | 75 +++++++++++++------
 clang/test/Analysis/string.c                  | 39 ++++++++++
 2 files changed, 90 insertions(+), 24 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 36f316df0c3ff..dd35173a6b109 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -251,6 +251,8 @@ class CStringChecker
                                         const Expr *Ex,
                                         const MemRegion *MR,
                                         bool hypothetical);
+  static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
+
   SVal getCStringLength(CheckerContext &C,
                         ProgramStateRef &state,
                         const Expr *Ex,
@@ -979,6 +981,22 @@ SVal 
CStringChecker::getCStringLengthForRegion(CheckerContext &C,
   return strLength;
 }
 
+const StringLiteral *
+CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind:
+    return cast<StringRegion>(MR)->getStringLiteral();
+  case MemRegion::NonParamVarRegionKind:
+    if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
+        Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
+      return dyn_cast_or_null<StringLiteral>(Decl->getInit());
+    return nullptr;
+  default:
+    return nullptr;
+  }
+  return nullptr;
+}
+
 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef 
&state,
                                       const Expr *Ex, SVal Buf,
                                       bool hypothetical) const {
@@ -1009,30 +1027,19 @@ SVal CStringChecker::getCStringLength(CheckerContext 
&C, ProgramStateRef &state,
   // its length. For anything we can't figure out, just return UnknownVal.
   MR = MR->StripCasts();
 
-  switch (MR->getKind()) {
-  case MemRegion::StringRegionKind: {
-    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
-    // so we can assume that the byte length is the correct C string length.
-    SValBuilder &svalBuilder = C.getSValBuilder();
-    QualType sizeTy = svalBuilder.getContext().getSizeType();
-    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
-    return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
-  }
-  case MemRegion::NonParamVarRegionKind: {
+  if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
     // If we have a global constant with a string literal initializer,
     // compute the initializer's length.
-    const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
-    if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
-      if (const Expr *Init = Decl->getInit()) {
-        if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
-          SValBuilder &SvalBuilder = C.getSValBuilder();
-          QualType SizeTy = SvalBuilder.getContext().getSizeType();
-          return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
-        }
-      }
-    }
-    [[fallthrough]];
+    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
+    // so we can assume that the byte length is the correct C string length.
+    // FIXME: Embedded null characters are not handled.
+    SValBuilder &SVB = C.getSValBuilder();
+    return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
   }
+
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind:
+  case MemRegion::NonParamVarRegionKind:
   case MemRegion::SymbolicRegionKind:
   case MemRegion::AllocaRegionKind:
   case MemRegion::ParamVarRegionKind:
@@ -1042,10 +1049,29 @@ SVal CStringChecker::getCStringLength(CheckerContext 
&C, ProgramStateRef &state,
   case MemRegion::CompoundLiteralRegionKind:
     // FIXME: Can we track this? Is it necessary?
     return UnknownVal();
-  case MemRegion::ElementRegionKind:
-    // FIXME: How can we handle this? It's not good enough to subtract the
-    // offset from the base string length; consider "123\x00567" and &a[5].
+  case MemRegion::ElementRegionKind: {
+    // If an offset into the string literal is used, use the original length
+    // minus the offset.
+    // FIXME: Embedded null characters are not handled.
+    const ElementRegion *ER = cast<ElementRegion>(MR);
+    const SubRegion *SuperReg =
+        cast<SubRegion>(ER->getSuperRegion()->StripCasts());
+    const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
+    if (!StrLit)
+      return UnknownVal();
+    SValBuilder &SVB = C.getSValBuilder();
+    NonLoc Idx = ER->getIndex();
+    NonLoc LengthVal =
+        SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType())
+            .castAs<NonLoc>();
+    if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
+                                      SVB.getConditionType())
+                          .castAs<DefinedOrUnknownSVal>(),
+                      true))
+      return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx,
+                           SVB.getContext().getSizeType());
     return UnknownVal();
+  }
   default:
     // Other regions (mostly non-data) can't have a reliable C string length.
     // In this case, an error is emitted and UndefinedVal is returned.
@@ -1070,6 +1096,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, 
ProgramStateRef &state,
 
 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
   ProgramStateRef &state, const Expr *expr, SVal val) const {
+  // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
 
   // Get the memory region pointed to by the val.
   const MemRegion *bufRegion = val.getAsRegion();
diff --git a/clang/test/Analysis/string.c b/clang/test/Analysis/string.c
index cdd36275568e3..c0216d758c377 100644
--- a/clang/test/Analysis/string.c
+++ b/clang/test/Analysis/string.c
@@ -82,16 +82,21 @@ size_t strlen(const char *s);
 
 void strlen_constant0(void) {
   clang_analyzer_eval(strlen("123") == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(&("123"[1])) == 2); // expected-warning{{TRUE}}
 }
 
 void strlen_constant1(void) {
   const char *a = "123";
   clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 3) == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 4)); // expected-warning{{UNKNOWN}}
 }
 
 void strlen_constant2(char x) {
   char a[] = "123";
   clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{UNKNOWN}}
 
   a[0] = x;
   clang_analyzer_eval(strlen(a) == 3); // expected-warning{{UNKNOWN}}
@@ -105,10 +110,12 @@ char global_non_const_arr[] = "op";
 
 void strlen_global_constant_ptr(void) {
   clang_analyzer_eval(strlen(global_str_ptr) == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(global_str_ptr + 1) == 3); // 
expected-warning{{TRUE}}
 }
 
 void strlen_global_constant_arr(void) {
   clang_analyzer_eval(strlen(global_str_arr) == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(global_str_arr + 1) == 3); // 
expected-warning{{TRUE}}
 }
 
 void strlen_global_non_const_ptr(void) {
@@ -235,6 +242,19 @@ void testStrlenCallee(void) {
   clang_analyzer_eval(lenBefore == lenAfter); // expected-warning{{UNKNOWN}}
 }
 
+void strlen_symbolic_offset(unsigned x) {
+  const char *str = "abcd";
+  if (x > 3)
+    return;
+  // FIXME: these should be known
+  clang_analyzer_eval(strlen(str + x) > 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(strlen(str + x) > 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(strlen(str + x) <= 4); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(strlen(str + x) <= 3); // expected-warning{{UNKNOWN}}
+  if (x != 1)
+    return;
+  clang_analyzer_eval(strlen(str + x) == 3); // expected-warning{{TRUE}}
+}
 
 //===----------------------------------------------------------------------===
 // strnlen()
@@ -244,32 +264,38 @@ size_t strnlen(const char *s, size_t maxlen);
 
 void strnlen_constant0(void) {
   clang_analyzer_eval(strnlen("123", 10) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(&("123"[1]), 10) == 2); // 
expected-warning{{TRUE}}
 }
 
 void strnlen_constant1(void) {
   const char *a = "123";
   clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{TRUE}}
 }
 
 void strnlen_constant2(char x) {
   char a[] = "123";
   clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{UNKNOWN}}
   a[0] = x;
   clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{UNKNOWN}}
 }
 
 void strnlen_constant4(void) {
   clang_analyzer_eval(strnlen("123456", 3) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(&("123456"[1]), 3) == 3); // 
expected-warning{{TRUE}}
 }
 
 void strnlen_constant5(void) {
   const char *a = "123456";
   clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{TRUE}}
 }
 
 void strnlen_constant6(char x) {
   char a[] = "123456";
   clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{UNKNOWN}}
   a[0] = x;
   clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{UNKNOWN}}
 }
@@ -326,6 +352,19 @@ void strnlen_at_actual(size_t limit) {
   }
 }
 
+void strnlen_at_actual_1(size_t limit) {
+  const char *str = "abc";
+  size_t len = strnlen(str + 1, limit);
+  clang_analyzer_eval(len <= 2); // expected-warning{{TRUE}}
+  // This is due to eager assertion in strnlen.
+  if (limit == 0) {
+    clang_analyzer_eval(len == 0); // expected-warning{{TRUE}}
+  } else {
+    clang_analyzer_eval(len == 2); // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(len < 2); // expected-warning{{UNKNOWN}}
+  }
+}
+
 //===----------------------------------------------------------------------===
 // strcpy()
 //===----------------------------------------------------------------------===

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to