https://github.com/Pppp1116 created https://github.com/llvm/llvm-project/pull/190411
Improve a few common C string diagnostics around missing space for the null terminator. Warn on malloc(strlen(x)), memcpy(..., strlen(src)), simple malloc(len)/realloc(len) string-buffer cases, and obvious strcpy overflows into fixed arrays. Built with `ninja -C build clangBasic clangSema`. >From d2c9ca009621b364d62d014f65655d45d3ff6c40 Mon Sep 17 00:00:00 2001 From: Pppp1116 <[email protected]> Date: Fri, 3 Apr 2026 22:30:38 +0100 Subject: [PATCH] Improve C string null terminator diagnostics --- .../clang/Basic/DiagnosticSemaKinds.td | 16 ++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 224 ++++++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 108 +++++++++ .../test/Sema/warn-cstring-null-terminator.c | 59 +++++ 4 files changed, 407 insertions(+) create mode 100644 clang/test/Sema/warn-cstring-null-terminator.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index eddf9c50033e1..81206d174fe7f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -965,6 +965,22 @@ def warn_fortify_strlen_overflow: Warning< " but the source string has length %2 (including NUL byte)">, InGroup<FortifySource>; +def warn_fortify_missing_null_terminator_space : Warning< + "%select{allocation size|copy size}0 does not include space for null " + "terminator; consider '%1 + 1'">, InGroup<FortifySource>; + +def warn_fortify_allocation_maybe_small_for_cstring : Warning< + "allocation of '%0' bytes may be insufficient for null-terminated string; " + "consider '%0 + 1'">, InGroup<FortifySource>; + +def warn_fortify_realloc_maybe_removes_null_space : Warning< + "realloc may remove space required for null terminator; expected at least " + "%0 + 1">, InGroup<FortifySource>; + +def warn_fortify_literal_copy_too_large : Warning< + "copying %0 bytes into buffer of size %1 (including null terminator)">, + InGroup<FortifySource>; + def subst_format_overflow : TextSubstitution< "'%0' will always overflow; destination buffer has size %1," " but format string expands to at least %2">; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 37ed7488bb927..c6f16cc32a9a2 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -51,6 +51,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLFunctionalExtras.h" @@ -58,6 +59,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TimeProfiler.h" #include <algorithm> #include <deque> @@ -2851,6 +2853,221 @@ void sema::AnalysisBasedWarnings::issueWarningsForRegisteredVarDecl( S, AC, std::make_pair(SecondRange.begin(), SecondRange.end())); } +namespace { + +static StringRef getSimpleLibraryFunctionName(const FunctionDecl *FD) { + if (!FD) + return {}; + IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return {}; + + StringRef Name = II->getName(); + Name.consume_front("__builtin_"); + return Name; +} + +static bool isCharacterPointerType(QualType Ty) { + const auto *PT = Ty->getAs<PointerType>(); + return PT && PT->getPointeeType().getUnqualifiedType()->isCharType(); +} + +static const VarDecl *getCharacterPointerVar(const Expr *E) { + const auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()); + const auto *VD = DRE ? dyn_cast<VarDecl>(DRE->getDecl()) : nullptr; + if (!VD || !isCharacterPointerType(VD->getType())) + return nullptr; + return VD; +} + +static const Expr *getDirectStrlenArgument(const Expr *E) { + const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts()); + if (!CE || CE->getNumArgs() != 1) + return nullptr; + + if (getSimpleLibraryFunctionName(CE->getDirectCallee()) != "strlen") + return nullptr; + return CE->getArg(0)->IgnoreParenCasts(); +} + +static bool isLiteralOne(const Expr *E) { + const auto *IL = dyn_cast<IntegerLiteral>(E->IgnoreParenCasts()); + return IL && IL->getValue() == 1; +} + +static const Expr *getDirectStrlenArgumentWithOptionalPlusOne(const Expr *E) { + E = E->IgnoreParenCasts(); + if (const Expr *Arg = getDirectStrlenArgument(E)) + return Arg; + + const auto *BO = dyn_cast<BinaryOperator>(E); + if (!BO || BO->getOpcode() != BO_Add) + return nullptr; + + const Expr *LHS = BO->getLHS()->IgnoreParenCasts(); + const Expr *RHS = BO->getRHS()->IgnoreParenCasts(); + if (isLiteralOne(LHS)) + return getDirectStrlenArgument(RHS); + if (isLiteralOne(RHS)) + return getDirectStrlenArgument(LHS); + return nullptr; +} + +static bool areSameSimpleDeclRef(const Expr *E1, const Expr *E2) { + const auto *D1 = dyn_cast<DeclRefExpr>(E1->IgnoreParenCasts()); + const auto *D2 = dyn_cast<DeclRefExpr>(E2->IgnoreParenCasts()); + return D1 && D2 && D1->getDecl() == D2->getDecl(); +} + +static std::string getExprAsString(const Expr *E, const PrintingPolicy &Policy) { + SmallString<64> Text; + llvm::raw_svector_ostream OS(Text); + E->printPretty(OS, nullptr, Policy); + return std::string(OS.str()); +} + +static FixItHint getPlusOneFixIt(const Expr *E, const PrintingPolicy &Policy) { + const Expr *SimpleExpr = E->IgnoreParenCasts(); + if (!isa<DeclRefExpr>(SimpleExpr)) + return {}; + + SourceRange Range = E->getSourceRange(); + if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) + return {}; + + SmallString<32> Replacement; + llvm::raw_svector_ostream OS(Replacement); + E->printPretty(OS, nullptr, Policy); + OS << " + 1"; + return FixItHint::CreateReplacement(Range, OS.str()); +} + +struct TrackedCStringAllocation { + const Expr *SizeExpr = nullptr; + const CallExpr *AllocCall = nullptr; + bool IsRealloc = false; + bool Warned = false; +}; + +class CStringAllocationVisitor : public StmtVisitor<CStringAllocationVisitor> { + Sema &S; + llvm::DenseMap<const VarDecl *, TrackedCStringAllocation> Tracked; + llvm::DenseSet<const VarDecl *> KnownStringBuffers; + + void VisitChildren(Stmt *Node) { + for (Stmt *Child : Node->children()) + if (Child) + Visit(Child); + } + + void DiagnoseTrackedAllocation(const VarDecl *VD) { + auto It = Tracked.find(VD); + if (It == Tracked.end() || It->second.Warned) + return; + + TrackedCStringAllocation &Info = It->second; + std::string SizeText = getExprAsString(Info.SizeExpr, S.getPrintingPolicy()); + S.Diag(Info.AllocCall->getBeginLoc(), + Info.IsRealloc ? diag::warn_fortify_realloc_maybe_removes_null_space + : diag::warn_fortify_allocation_maybe_small_for_cstring) + << SizeText << getPlusOneFixIt(Info.SizeExpr, S.getPrintingPolicy()); + Info.Warned = true; + } + + void MarkAsStringBuffer(const Expr *E) { + const VarDecl *VD = getCharacterPointerVar(E); + if (!VD) + return; + + KnownStringBuffers.insert(VD); + DiagnoseTrackedAllocation(VD); + } + + std::optional<TrackedCStringAllocation> + getTrackedAllocation(const Expr *E, const VarDecl *AssignedVar) const { + const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts()); + if (!CE) + return std::nullopt; + + StringRef Name = getSimpleLibraryFunctionName(CE->getDirectCallee()); + if (Name == "malloc" && CE->getNumArgs() == 1) { + const Expr *SizeArg = CE->getArg(0)->IgnoreParenCasts(); + if (isa<DeclRefExpr>(SizeArg)) + return TrackedCStringAllocation{SizeArg, CE, false, false}; + return std::nullopt; + } + + if (Name != "realloc" || CE->getNumArgs() != 2 || !AssignedVar) + return std::nullopt; + + const Expr *SizeArg = CE->getArg(1)->IgnoreParenCasts(); + const VarDecl *ReallocVar = getCharacterPointerVar(CE->getArg(0)); + if (ReallocVar == AssignedVar && isa<DeclRefExpr>(SizeArg)) + return TrackedCStringAllocation{SizeArg, CE, true, false}; + return std::nullopt; + } + + void UpdateTrackedAllocation(const VarDecl *VD, const Expr *Init) { + if (!VD || !Init || !isCharacterPointerType(VD->getType())) + return; + + if (std::optional<TrackedCStringAllocation> Info = + getTrackedAllocation(Init, VD)) { + Tracked[VD] = *Info; + if (KnownStringBuffers.contains(VD)) + DiagnoseTrackedAllocation(VD); + return; + } + + Tracked.erase(VD); + } + +public: + explicit CStringAllocationVisitor(Sema &S) : S(S) {} + + void VisitStmt(Stmt *Node) { VisitChildren(Node); } + + void VisitLambdaExpr(LambdaExpr *) {} + + void VisitBlockExpr(BlockExpr *) {} + + void VisitDeclStmt(DeclStmt *DS) { + for (Decl *D : DS->decls()) + if (auto *VD = dyn_cast<VarDecl>(D)) + UpdateTrackedAllocation(VD, VD->getInit()); + VisitChildren(DS); + } + + void VisitBinaryOperator(BinaryOperator *BO) { + if (BO->getOpcode() == BO_Assign) + if (const auto *VD = getCharacterPointerVar(BO->getLHS())) + UpdateTrackedAllocation(VD, BO->getRHS()); + VisitChildren(BO); + } + + void VisitCallExpr(CallExpr *CE) { + StringRef Name = getSimpleLibraryFunctionName(CE->getDirectCallee()); + if (Name == "strlen" && CE->getNumArgs() == 1) { + MarkAsStringBuffer(CE->getArg(0)); + } else if ((Name == "strcpy" || Name == "strcat") && CE->getNumArgs() == 2) { + MarkAsStringBuffer(CE->getArg(0)); + } else if (Name == "memcpy" && CE->getNumArgs() == 3) { + if (const Expr *StrlenArg = + getDirectStrlenArgumentWithOptionalPlusOne(CE->getArg(2)); + StrlenArg && areSameSimpleDeclRef(StrlenArg, CE->getArg(1))) { + MarkAsStringBuffer(CE->getArg(0)); + } + } + VisitChildren(CE); + } +}; + +static void CheckCStringBufferAllocations(Sema &S, const Stmt *Body) { + CStringAllocationVisitor(S).Visit(const_cast<Stmt *>(Body)); +} + +} // namespace + // An AST Visitor that calls a callback function on each callable DEFINITION // that is NOT in a dependent context: class CallableVisitor : public DynamicRecursiveASTVisitor { @@ -3022,6 +3239,13 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( const Stmt *Body = D->getBody(); assert(Body); + if (!Diags.isIgnored(diag::warn_fortify_allocation_maybe_small_for_cstring, + D->getBeginLoc()) || + !Diags.isIgnored(diag::warn_fortify_realloc_maybe_removes_null_space, + D->getBeginLoc())) { + CheckCStringBufferAllocations(S, Body); + } + // Construct the analysis context with the specified CFG build options. AnalysisDeclContext AC(/* AnalysisDeclContextManager */ nullptr, D); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index de8b965144971..d88aa1213ac78 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1141,6 +1141,97 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr, return false; } +static StringRef getSimpleLibraryFunctionName(const FunctionDecl *FD) { + if (!FD) + return {}; + IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return {}; + + StringRef Name = II->getName(); + Name.consume_front("__builtin_"); + return Name; +} + +static const Expr *getDirectStrlenCallArgument(const Expr *E) { + const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts()); + if (!CE || CE->getNumArgs() != 1) + return nullptr; + + if (getSimpleLibraryFunctionName(CE->getDirectCallee()) != "strlen") + return nullptr; + return CE->getArg(0)->IgnoreParenCasts(); +} + +static bool areSameSimpleDeclRef(const Expr *E1, const Expr *E2) { + const auto *D1 = dyn_cast<DeclRefExpr>(E1->IgnoreParenCasts()); + const auto *D2 = dyn_cast<DeclRefExpr>(E2->IgnoreParenCasts()); + return D1 && D2 && D1->getDecl() == D2->getDecl(); +} + +static std::string getExprAsString(const Expr *E, const PrintingPolicy &Policy) { + SmallString<64> Text; + llvm::raw_svector_ostream OS(Text); + E->printPretty(OS, nullptr, Policy); + return std::string(OS.str()); +} + +static FixItHint getPlusOneFixIt(const Expr *E, const PrintingPolicy &Policy) { + const Expr *SimpleExpr = E->IgnoreParenCasts(); + if (!isa<DeclRefExpr, CallExpr>(SimpleExpr)) + return {}; + + SourceRange Range = E->getSourceRange(); + if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) + return {}; + + SmallString<64> Replacement; + llvm::raw_svector_ostream OS(Replacement); + E->printPretty(OS, nullptr, Policy); + OS << " + 1"; + return FixItHint::CreateReplacement(Range, OS.str()); +} + +static void DiagnoseMissingNullTerminatorSpace(Sema &S, SourceLocation Loc, + const Expr *SizeExpr, + unsigned MissingKind) { + std::string SizeText = getExprAsString(SizeExpr, S.getPrintingPolicy()); + S.Diag(Loc, diag::warn_fortify_missing_null_terminator_space) + << MissingKind << SizeText + << getPlusOneFixIt(SizeExpr, S.getPrintingPolicy()); +} + +static void CheckCStringNullTerminatorSizeArguments(Sema &S, + const FunctionDecl *FD, + const CallExpr *TheCall) { + if (TheCall->isValueDependent() || TheCall->isTypeDependent() || + S.isConstantEvaluatedContext() || + S.Diags.isIgnored(diag::warn_fortify_missing_null_terminator_space, + TheCall->getBeginLoc())) + return; + + StringRef Name = getSimpleLibraryFunctionName(FD); + if (Name == "malloc") { + if (TheCall->getNumArgs() != 1) + return; + const Expr *SizeArg = TheCall->getArg(0)->IgnoreParenCasts(); + if (getDirectStrlenCallArgument(SizeArg)) + DiagnoseMissingNullTerminatorSpace(S, TheCall->getBeginLoc(), SizeArg, + /*MissingKind=*/0); + return; + } + + if (Name != "memcpy" || TheCall->getNumArgs() != 3) + return; + + const Expr *SrcArg = TheCall->getArg(1)->IgnoreParenCasts(); + const Expr *SizeArg = TheCall->getArg(2)->IgnoreParenCasts(); + const Expr *StrlenArg = getDirectStrlenCallArgument(SizeArg); + if (StrlenArg && areSameSimpleDeclRef(StrlenArg, SrcArg)) + DiagnoseMissingNullTerminatorSpace(S, TheCall->getBeginLoc(), SizeArg, + /*MissingKind=*/1); +} + void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { if (TheCall->isValueDependent() || TheCall->isTypeDependent() || @@ -1247,6 +1338,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, std::optional<llvm::APSInt> DestinationSize; unsigned DiagID = 0; bool IsChkVariant = false; + bool UseLiteralCopyOverflowDiag = false; auto GetFunctionName = [&]() { std::string FunctionNameStr = @@ -1276,6 +1368,10 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, DiagID = diag::warn_fortify_strlen_overflow; SourceSize = ComputeStrLenArgument(1); DestinationSize = ComputeSizeArgument(0); + UseLiteralCopyOverflowDiag = + (BuiltinID == Builtin::BI__builtin_strcpy || + BuiltinID == Builtin::BIstrcpy) && + isa<StringLiteral>(TheCall->getArg(1)->IgnoreParenCasts()); break; } @@ -1286,6 +1382,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, SourceSize = ComputeStrLenArgument(1); DestinationSize = ComputeExplicitObjectSizeArgument(2); IsChkVariant = true; + UseLiteralCopyOverflowDiag = + BuiltinID == Builtin::BI__builtin___strcpy_chk && + isa<StringLiteral>(TheCall->getArg(1)->IgnoreParenCasts()); break; } @@ -1470,6 +1569,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, SmallString<16> SourceStr; DestinationSize->toString(DestinationStr, /*Radix=*/10); SourceSize->toString(SourceStr, /*Radix=*/10); + + if (UseLiteralCopyOverflowDiag) { + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + PDiag(diag::warn_fortify_literal_copy_too_large) + << SourceStr << DestinationStr); + return; + } + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, PDiag(DiagID) << FunctionName << DestinationStr << SourceStr); @@ -4449,6 +4556,7 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, CheckAbsoluteValueFunction(TheCall, FDecl); CheckMaxUnsignedZero(TheCall, FDecl); CheckInfNaNFunction(TheCall, FDecl); + CheckCStringNullTerminatorSizeArguments(*this, FDecl, TheCall); if (getLangOpts().ObjC) ObjC().DiagnoseCStringFormatDirectiveInCFAPI(FDecl, Args, NumArgs); diff --git a/clang/test/Sema/warn-cstring-null-terminator.c b/clang/test/Sema/warn-cstring-null-terminator.c new file mode 100644 index 0000000000000..779e71c376e38 --- /dev/null +++ b/clang/test/Sema/warn-cstring-null-terminator.c @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -fsyntax-only -Wfortify-source -verify %s + +typedef __SIZE_TYPE__ size_t; + +void *malloc(size_t); +void *realloc(void *, size_t); +void *memcpy(void *, const void *, size_t); +char *strcpy(char *, const char *); +size_t strlen(const char *); + +void direct_malloc_strlen(const char *src) { + char *p = malloc(strlen(src)); // expected-warning{{allocation size does not include space for null terminator; consider 'strlen(src) + 1'}} + char *ok = malloc(strlen(src) + 1); + (void)p; + (void)ok; +} + +void direct_memcpy_strlen(char *dst, const char *src) { + memcpy(dst, src, strlen(src)); // expected-warning{{copy size does not include space for null terminator; consider 'strlen(src) + 1'}} + memcpy(dst, src, strlen(src) + 1); +} + +void literal_strcpy_overflow(void) { + char buf[4]; + char ok[5]; + strcpy(buf, "abcd"); // expected-warning{{copying 5 bytes into buffer of size 4 (including null terminator)}} + strcpy(ok, "abcd"); +} + +void malloc_len_then_strcpy(const char *src, size_t len) { + char *s = malloc(len); // expected-warning{{allocation of 'len' bytes may be insufficient for null-terminated string; consider 'len + 1'}} + strcpy(s, src); +} + +void malloc_len_non_string(char *dst, const char *src, size_t len) { + char *buf = malloc(len); + memcpy(buf, src, len); + memcpy(dst, buf, len); +} + +void malloc_complex_size(const char *src, size_t len) { + char *s = malloc(len * 2); + strcpy(s, src); +} + +void realloc_len_then_strcpy(char *s, const char *src, size_t len) { + s = realloc(s, len); // expected-warning{{realloc may remove space required for null terminator; expected at least len + 1}} + strcpy(s, src); +} + +void realloc_ok(char *s, const char *src, size_t len) { + s = realloc(s, len + 1); + strcpy(s, src); +} + +void malloc_len_then_memcpy_string(const char *src, size_t len) { + char *s = malloc(len); // expected-warning{{allocation of 'len' bytes may be insufficient for null-terminated string; consider 'len + 1'}} + memcpy(s, src, strlen(src) + 1); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
