https://github.com/Pppp1116 updated 
https://github.com/llvm/llvm-project/pull/190411

>From d2c9ca009621b364d62d014f65655d45d3ff6c40 Mon Sep 17 00:00:00 2001
From: Pppp1116 <[email protected]>
Date: Fri, 3 Apr 2026 22:30:38 +0100
Subject: [PATCH] Improve C string null terminator diagnostics

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  16 ++
 clang/lib/Sema/AnalysisBasedWarnings.cpp      | 224 ++++++++++++++++++
 clang/lib/Sema/SemaChecking.cpp               | 108 +++++++++
 .../test/Sema/warn-cstring-null-terminator.c  |  59 +++++
 4 files changed, 407 insertions(+)
 create mode 100644 clang/test/Sema/warn-cstring-null-terminator.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eddf9c50033e1..81206d174fe7f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -965,6 +965,22 @@ def warn_fortify_strlen_overflow: Warning<
   " but the source string has length %2 (including NUL byte)">,
   InGroup<FortifySource>;
 
+def warn_fortify_missing_null_terminator_space : Warning<
+  "%select{allocation size|copy size}0 does not include space for null "
+  "terminator; consider '%1 + 1'">, InGroup<FortifySource>;
+
+def warn_fortify_allocation_maybe_small_for_cstring : Warning<
+  "allocation of '%0' bytes may be insufficient for null-terminated string; "
+  "consider '%0 + 1'">, InGroup<FortifySource>;
+
+def warn_fortify_realloc_maybe_removes_null_space : Warning<
+  "realloc may remove space required for null terminator; expected at least "
+  "%0 + 1">, InGroup<FortifySource>;
+
+def warn_fortify_literal_copy_too_large : Warning<
+  "copying %0 bytes into buffer of size %1 (including null terminator)">,
+  InGroup<FortifySource>;
+
 def subst_format_overflow : TextSubstitution<
   "'%0' will always overflow; destination buffer has size %1,"
   " but format string expands to at least %2">;
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 37ed7488bb927..c6f16cc32a9a2 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -51,6 +51,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
@@ -58,6 +59,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TimeProfiler.h"
 #include <algorithm>
 #include <deque>
@@ -2851,6 +2853,221 @@ void 
sema::AnalysisBasedWarnings::issueWarningsForRegisteredVarDecl(
       S, AC, std::make_pair(SecondRange.begin(), SecondRange.end()));
 }
 
+namespace {
+
+static StringRef getSimpleLibraryFunctionName(const FunctionDecl *FD) {
+  if (!FD)
+    return {};
+  IdentifierInfo *II = FD->getIdentifier();
+  if (!II)
+    return {};
+
+  StringRef Name = II->getName();
+  Name.consume_front("__builtin_");
+  return Name;
+}
+
+static bool isCharacterPointerType(QualType Ty) {
+  const auto *PT = Ty->getAs<PointerType>();
+  return PT && PT->getPointeeType().getUnqualifiedType()->isCharType();
+}
+
+static const VarDecl *getCharacterPointerVar(const Expr *E) {
+  const auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts());
+  const auto *VD = DRE ? dyn_cast<VarDecl>(DRE->getDecl()) : nullptr;
+  if (!VD || !isCharacterPointerType(VD->getType()))
+    return nullptr;
+  return VD;
+}
+
+static const Expr *getDirectStrlenArgument(const Expr *E) {
+  const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts());
+  if (!CE || CE->getNumArgs() != 1)
+    return nullptr;
+
+  if (getSimpleLibraryFunctionName(CE->getDirectCallee()) != "strlen")
+    return nullptr;
+  return CE->getArg(0)->IgnoreParenCasts();
+}
+
+static bool isLiteralOne(const Expr *E) {
+  const auto *IL = dyn_cast<IntegerLiteral>(E->IgnoreParenCasts());
+  return IL && IL->getValue() == 1;
+}
+
+static const Expr *getDirectStrlenArgumentWithOptionalPlusOne(const Expr *E) {
+  E = E->IgnoreParenCasts();
+  if (const Expr *Arg = getDirectStrlenArgument(E))
+    return Arg;
+
+  const auto *BO = dyn_cast<BinaryOperator>(E);
+  if (!BO || BO->getOpcode() != BO_Add)
+    return nullptr;
+
+  const Expr *LHS = BO->getLHS()->IgnoreParenCasts();
+  const Expr *RHS = BO->getRHS()->IgnoreParenCasts();
+  if (isLiteralOne(LHS))
+    return getDirectStrlenArgument(RHS);
+  if (isLiteralOne(RHS))
+    return getDirectStrlenArgument(LHS);
+  return nullptr;
+}
+
+static bool areSameSimpleDeclRef(const Expr *E1, const Expr *E2) {
+  const auto *D1 = dyn_cast<DeclRefExpr>(E1->IgnoreParenCasts());
+  const auto *D2 = dyn_cast<DeclRefExpr>(E2->IgnoreParenCasts());
+  return D1 && D2 && D1->getDecl() == D2->getDecl();
+}
+
+static std::string getExprAsString(const Expr *E, const PrintingPolicy 
&Policy) {
+  SmallString<64> Text;
+  llvm::raw_svector_ostream OS(Text);
+  E->printPretty(OS, nullptr, Policy);
+  return std::string(OS.str());
+}
+
+static FixItHint getPlusOneFixIt(const Expr *E, const PrintingPolicy &Policy) {
+  const Expr *SimpleExpr = E->IgnoreParenCasts();
+  if (!isa<DeclRefExpr>(SimpleExpr))
+    return {};
+
+  SourceRange Range = E->getSourceRange();
+  if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
+    return {};
+
+  SmallString<32> Replacement;
+  llvm::raw_svector_ostream OS(Replacement);
+  E->printPretty(OS, nullptr, Policy);
+  OS << " + 1";
+  return FixItHint::CreateReplacement(Range, OS.str());
+}
+
+struct TrackedCStringAllocation {
+  const Expr *SizeExpr = nullptr;
+  const CallExpr *AllocCall = nullptr;
+  bool IsRealloc = false;
+  bool Warned = false;
+};
+
+class CStringAllocationVisitor : public StmtVisitor<CStringAllocationVisitor> {
+  Sema &S;
+  llvm::DenseMap<const VarDecl *, TrackedCStringAllocation> Tracked;
+  llvm::DenseSet<const VarDecl *> KnownStringBuffers;
+
+  void VisitChildren(Stmt *Node) {
+    for (Stmt *Child : Node->children())
+      if (Child)
+        Visit(Child);
+  }
+
+  void DiagnoseTrackedAllocation(const VarDecl *VD) {
+    auto It = Tracked.find(VD);
+    if (It == Tracked.end() || It->second.Warned)
+      return;
+
+    TrackedCStringAllocation &Info = It->second;
+    std::string SizeText = getExprAsString(Info.SizeExpr, 
S.getPrintingPolicy());
+    S.Diag(Info.AllocCall->getBeginLoc(),
+           Info.IsRealloc ? diag::warn_fortify_realloc_maybe_removes_null_space
+                          : 
diag::warn_fortify_allocation_maybe_small_for_cstring)
+        << SizeText << getPlusOneFixIt(Info.SizeExpr, S.getPrintingPolicy());
+    Info.Warned = true;
+  }
+
+  void MarkAsStringBuffer(const Expr *E) {
+    const VarDecl *VD = getCharacterPointerVar(E);
+    if (!VD)
+      return;
+
+    KnownStringBuffers.insert(VD);
+    DiagnoseTrackedAllocation(VD);
+  }
+
+  std::optional<TrackedCStringAllocation>
+  getTrackedAllocation(const Expr *E, const VarDecl *AssignedVar) const {
+    const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts());
+    if (!CE)
+      return std::nullopt;
+
+    StringRef Name = getSimpleLibraryFunctionName(CE->getDirectCallee());
+    if (Name == "malloc" && CE->getNumArgs() == 1) {
+      const Expr *SizeArg = CE->getArg(0)->IgnoreParenCasts();
+      if (isa<DeclRefExpr>(SizeArg))
+        return TrackedCStringAllocation{SizeArg, CE, false, false};
+      return std::nullopt;
+    }
+
+    if (Name != "realloc" || CE->getNumArgs() != 2 || !AssignedVar)
+      return std::nullopt;
+
+    const Expr *SizeArg = CE->getArg(1)->IgnoreParenCasts();
+    const VarDecl *ReallocVar = getCharacterPointerVar(CE->getArg(0));
+    if (ReallocVar == AssignedVar && isa<DeclRefExpr>(SizeArg))
+      return TrackedCStringAllocation{SizeArg, CE, true, false};
+    return std::nullopt;
+  }
+
+  void UpdateTrackedAllocation(const VarDecl *VD, const Expr *Init) {
+    if (!VD || !Init || !isCharacterPointerType(VD->getType()))
+      return;
+
+    if (std::optional<TrackedCStringAllocation> Info =
+            getTrackedAllocation(Init, VD)) {
+      Tracked[VD] = *Info;
+      if (KnownStringBuffers.contains(VD))
+        DiagnoseTrackedAllocation(VD);
+      return;
+    }
+
+    Tracked.erase(VD);
+  }
+
+public:
+  explicit CStringAllocationVisitor(Sema &S) : S(S) {}
+
+  void VisitStmt(Stmt *Node) { VisitChildren(Node); }
+
+  void VisitLambdaExpr(LambdaExpr *) {}
+
+  void VisitBlockExpr(BlockExpr *) {}
+
+  void VisitDeclStmt(DeclStmt *DS) {
+    for (Decl *D : DS->decls())
+      if (auto *VD = dyn_cast<VarDecl>(D))
+        UpdateTrackedAllocation(VD, VD->getInit());
+    VisitChildren(DS);
+  }
+
+  void VisitBinaryOperator(BinaryOperator *BO) {
+    if (BO->getOpcode() == BO_Assign)
+      if (const auto *VD = getCharacterPointerVar(BO->getLHS()))
+        UpdateTrackedAllocation(VD, BO->getRHS());
+    VisitChildren(BO);
+  }
+
+  void VisitCallExpr(CallExpr *CE) {
+    StringRef Name = getSimpleLibraryFunctionName(CE->getDirectCallee());
+    if (Name == "strlen" && CE->getNumArgs() == 1) {
+      MarkAsStringBuffer(CE->getArg(0));
+    } else if ((Name == "strcpy" || Name == "strcat") && CE->getNumArgs() == 
2) {
+      MarkAsStringBuffer(CE->getArg(0));
+    } else if (Name == "memcpy" && CE->getNumArgs() == 3) {
+      if (const Expr *StrlenArg =
+              getDirectStrlenArgumentWithOptionalPlusOne(CE->getArg(2));
+          StrlenArg && areSameSimpleDeclRef(StrlenArg, CE->getArg(1))) {
+        MarkAsStringBuffer(CE->getArg(0));
+      }
+    }
+    VisitChildren(CE);
+  }
+};
+
+static void CheckCStringBufferAllocations(Sema &S, const Stmt *Body) {
+  CStringAllocationVisitor(S).Visit(const_cast<Stmt *>(Body));
+}
+
+} // namespace
+
 // An AST Visitor that calls a callback function on each callable DEFINITION
 // that is NOT in a dependent context:
 class CallableVisitor : public DynamicRecursiveASTVisitor {
@@ -3022,6 +3239,13 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   const Stmt *Body = D->getBody();
   assert(Body);
 
+  if (!Diags.isIgnored(diag::warn_fortify_allocation_maybe_small_for_cstring,
+                       D->getBeginLoc()) ||
+      !Diags.isIgnored(diag::warn_fortify_realloc_maybe_removes_null_space,
+                       D->getBeginLoc())) {
+    CheckCStringBufferAllocations(S, Body);
+  }
+
   // Construct the analysis context with the specified CFG build options.
   AnalysisDeclContext AC(/* AnalysisDeclContextManager */ nullptr, D);
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index de8b965144971..d88aa1213ac78 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1141,6 +1141,97 @@ static bool ProcessFormatStringLiteral(const Expr 
*FormatExpr,
   return false;
 }
 
+static StringRef getSimpleLibraryFunctionName(const FunctionDecl *FD) {
+  if (!FD)
+    return {};
+  IdentifierInfo *II = FD->getIdentifier();
+  if (!II)
+    return {};
+
+  StringRef Name = II->getName();
+  Name.consume_front("__builtin_");
+  return Name;
+}
+
+static const Expr *getDirectStrlenCallArgument(const Expr *E) {
+  const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts());
+  if (!CE || CE->getNumArgs() != 1)
+    return nullptr;
+
+  if (getSimpleLibraryFunctionName(CE->getDirectCallee()) != "strlen")
+    return nullptr;
+  return CE->getArg(0)->IgnoreParenCasts();
+}
+
+static bool areSameSimpleDeclRef(const Expr *E1, const Expr *E2) {
+  const auto *D1 = dyn_cast<DeclRefExpr>(E1->IgnoreParenCasts());
+  const auto *D2 = dyn_cast<DeclRefExpr>(E2->IgnoreParenCasts());
+  return D1 && D2 && D1->getDecl() == D2->getDecl();
+}
+
+static std::string getExprAsString(const Expr *E, const PrintingPolicy 
&Policy) {
+  SmallString<64> Text;
+  llvm::raw_svector_ostream OS(Text);
+  E->printPretty(OS, nullptr, Policy);
+  return std::string(OS.str());
+}
+
+static FixItHint getPlusOneFixIt(const Expr *E, const PrintingPolicy &Policy) {
+  const Expr *SimpleExpr = E->IgnoreParenCasts();
+  if (!isa<DeclRefExpr, CallExpr>(SimpleExpr))
+    return {};
+
+  SourceRange Range = E->getSourceRange();
+  if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
+    return {};
+
+  SmallString<64> Replacement;
+  llvm::raw_svector_ostream OS(Replacement);
+  E->printPretty(OS, nullptr, Policy);
+  OS << " + 1";
+  return FixItHint::CreateReplacement(Range, OS.str());
+}
+
+static void DiagnoseMissingNullTerminatorSpace(Sema &S, SourceLocation Loc,
+                                               const Expr *SizeExpr,
+                                               unsigned MissingKind) {
+  std::string SizeText = getExprAsString(SizeExpr, S.getPrintingPolicy());
+  S.Diag(Loc, diag::warn_fortify_missing_null_terminator_space)
+      << MissingKind << SizeText
+      << getPlusOneFixIt(SizeExpr, S.getPrintingPolicy());
+}
+
+static void CheckCStringNullTerminatorSizeArguments(Sema &S,
+                                                    const FunctionDecl *FD,
+                                                    const CallExpr *TheCall) {
+  if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
+      S.isConstantEvaluatedContext() ||
+      S.Diags.isIgnored(diag::warn_fortify_missing_null_terminator_space,
+                        TheCall->getBeginLoc()))
+    return;
+
+  StringRef Name = getSimpleLibraryFunctionName(FD);
+  if (Name == "malloc") {
+    if (TheCall->getNumArgs() != 1)
+      return;
+    const Expr *SizeArg = TheCall->getArg(0)->IgnoreParenCasts();
+    if (getDirectStrlenCallArgument(SizeArg))
+      DiagnoseMissingNullTerminatorSpace(S, TheCall->getBeginLoc(), SizeArg,
+                                         /*MissingKind=*/0);
+    return;
+  }
+
+  if (Name != "memcpy" || TheCall->getNumArgs() != 3)
+    return;
+
+  const Expr *SrcArg = TheCall->getArg(1)->IgnoreParenCasts();
+  const Expr *SizeArg = TheCall->getArg(2)->IgnoreParenCasts();
+  const Expr *StrlenArg = getDirectStrlenCallArgument(SizeArg);
+  if (StrlenArg && areSameSimpleDeclRef(StrlenArg, SrcArg))
+    DiagnoseMissingNullTerminatorSpace(S, TheCall->getBeginLoc(), SizeArg,
+                                       /*MissingKind=*/1);
+}
+
 void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
                                                CallExpr *TheCall) {
   if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -1247,6 +1338,7 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
   std::optional<llvm::APSInt> DestinationSize;
   unsigned DiagID = 0;
   bool IsChkVariant = false;
+  bool UseLiteralCopyOverflowDiag = false;
 
   auto GetFunctionName = [&]() {
     std::string FunctionNameStr =
@@ -1276,6 +1368,10 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     DiagID = diag::warn_fortify_strlen_overflow;
     SourceSize = ComputeStrLenArgument(1);
     DestinationSize = ComputeSizeArgument(0);
+    UseLiteralCopyOverflowDiag =
+        (BuiltinID == Builtin::BI__builtin_strcpy ||
+         BuiltinID == Builtin::BIstrcpy) &&
+        isa<StringLiteral>(TheCall->getArg(1)->IgnoreParenCasts());
     break;
   }
 
@@ -1286,6 +1382,9 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     SourceSize = ComputeStrLenArgument(1);
     DestinationSize = ComputeExplicitObjectSizeArgument(2);
     IsChkVariant = true;
+    UseLiteralCopyOverflowDiag =
+        BuiltinID == Builtin::BI__builtin___strcpy_chk &&
+        isa<StringLiteral>(TheCall->getArg(1)->IgnoreParenCasts());
     break;
   }
 
@@ -1470,6 +1569,14 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
   SmallString<16> SourceStr;
   DestinationSize->toString(DestinationStr, /*Radix=*/10);
   SourceSize->toString(SourceStr, /*Radix=*/10);
+
+  if (UseLiteralCopyOverflowDiag) {
+    DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+                        PDiag(diag::warn_fortify_literal_copy_too_large)
+                            << SourceStr << DestinationStr);
+    return;
+  }
+
   DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
                       PDiag(DiagID)
                           << FunctionName << DestinationStr << SourceStr);
@@ -4449,6 +4556,7 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, 
CallExpr *TheCall,
   CheckAbsoluteValueFunction(TheCall, FDecl);
   CheckMaxUnsignedZero(TheCall, FDecl);
   CheckInfNaNFunction(TheCall, FDecl);
+  CheckCStringNullTerminatorSizeArguments(*this, FDecl, TheCall);
 
   if (getLangOpts().ObjC)
     ObjC().DiagnoseCStringFormatDirectiveInCFAPI(FDecl, Args, NumArgs);
diff --git a/clang/test/Sema/warn-cstring-null-terminator.c 
b/clang/test/Sema/warn-cstring-null-terminator.c
new file mode 100644
index 0000000000000..779e71c376e38
--- /dev/null
+++ b/clang/test/Sema/warn-cstring-null-terminator.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -fsyntax-only -Wfortify-source -verify %s
+
+typedef __SIZE_TYPE__ size_t;
+
+void *malloc(size_t);
+void *realloc(void *, size_t);
+void *memcpy(void *, const void *, size_t);
+char *strcpy(char *, const char *);
+size_t strlen(const char *);
+
+void direct_malloc_strlen(const char *src) {
+  char *p = malloc(strlen(src)); // expected-warning{{allocation size does not 
include space for null terminator; consider 'strlen(src) + 1'}}
+  char *ok = malloc(strlen(src) + 1);
+  (void)p;
+  (void)ok;
+}
+
+void direct_memcpy_strlen(char *dst, const char *src) {
+  memcpy(dst, src, strlen(src)); // expected-warning{{copy size does not 
include space for null terminator; consider 'strlen(src) + 1'}}
+  memcpy(dst, src, strlen(src) + 1);
+}
+
+void literal_strcpy_overflow(void) {
+  char buf[4];
+  char ok[5];
+  strcpy(buf, "abcd"); // expected-warning{{copying 5 bytes into buffer of 
size 4 (including null terminator)}}
+  strcpy(ok, "abcd");
+}
+
+void malloc_len_then_strcpy(const char *src, size_t len) {
+  char *s = malloc(len); // expected-warning{{allocation of 'len' bytes may be 
insufficient for null-terminated string; consider 'len + 1'}}
+  strcpy(s, src);
+}
+
+void malloc_len_non_string(char *dst, const char *src, size_t len) {
+  char *buf = malloc(len);
+  memcpy(buf, src, len);
+  memcpy(dst, buf, len);
+}
+
+void malloc_complex_size(const char *src, size_t len) {
+  char *s = malloc(len * 2);
+  strcpy(s, src);
+}
+
+void realloc_len_then_strcpy(char *s, const char *src, size_t len) {
+  s = realloc(s, len); // expected-warning{{realloc may remove space required 
for null terminator; expected at least len + 1}}
+  strcpy(s, src);
+}
+
+void realloc_ok(char *s, const char *src, size_t len) {
+  s = realloc(s, len + 1);
+  strcpy(s, src);
+}
+
+void malloc_len_then_memcpy_string(const char *src, size_t len) {
+  char *s = malloc(len); // expected-warning{{allocation of 'len' bytes may be 
insufficient for null-terminated string; consider 'len + 1'}}
+  memcpy(s, src, strlen(src) + 1);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to