https://github.com/Pppp1116 created 
https://github.com/llvm/llvm-project/pull/190453

Adds a small `-Wformat-libc` subgroup under `-Wformat` for two libc-specific 
checks:
- warn on `%s` / `%[` without a field width when the destination is a 
fixed-size character array
- warn on invalid constant `fopen` mode strings

Validation:
- fresh build of 
`tools/clang/lib/Basic/CMakeFiles/obj.clangBasic.dir/DiagnosticIDs.cpp.o`
- fresh build of 
`tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/SemaChecking.cpp.o`


>From d683878c5dacfc3b3d62040d32b970d9fc5327c8 Mon Sep 17 00:00:00 2001
From: Pppp1116 <[email protected]>
Date: Sat, 4 Apr 2026 10:50:07 +0100
Subject: [PATCH] Warn on unbounded scanf and invalid fopen modes

---
 clang/include/clang/Basic/DiagnosticGroups.td |   4 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   7 +
 clang/lib/Sema/SemaChecking.cpp               | 202 +++++++++++++++++-
 clang/test/Sema/warn-format-libc.c            |  47 ++++
 4 files changed, 258 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Sema/warn-format-libc.c

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index dc7280c66ea79..f458e8d8111e8 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1316,6 +1316,7 @@ def FormatY2K : DiagGroup<"format-y2k">;
 def FormatPedantic : DiagGroup<"format-pedantic">;
 def FormatSignedness : DiagGroup<"format-signedness">;
 def FormatTypeConfusion : DiagGroup<"format-type-confusion">;
+def FormatLibc : DiagGroup<"format-libc">;
 def MissingFormatAttribute : DiagGroup<"missing-format-attribute">;
 
 def FormatOverflowNonKprintf: DiagGroup<"format-overflow-non-kprintf">;
@@ -1326,7 +1327,8 @@ def FormatTruncation: DiagGroup<"format-truncation", 
[FormatTruncationNonKprintf
 def Format : DiagGroup<"format",
                        [FormatExtraArgs, FormatZeroLength, NonNull,
                         FormatSecurity, FormatY2K, FormatInvalidSpecifier,
-                        FormatInsufficientArgs, FormatOverflow, 
FormatTruncation]>,
+                        FormatInsufficientArgs, FormatOverflow,
+                        FormatTruncation, FormatLibc]>,
              DiagCategory<"Format String Issue">;
 def FormatNonLiteral : DiagGroup<"format-nonliteral", 
[MissingFormatAttribute]>;
 def Format2 : DiagGroup<"format=2",
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eddf9c50033e1..3c30bb8628713 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10688,6 +10688,10 @@ def warn_missing_format_string : Warning<
 def warn_scanf_nonzero_width : Warning<
   "zero field width in scanf format string is unused">,
   InGroup<Format>;
+def warn_scanf_no_field_width : Warning<
+  "'%%%0' conversion used without a field width, destination buffer has %1 "
+  "element%s2">,
+  InGroup<FormatLibc>, DefaultIgnore;
 def warn_format_conversion_argument_type_mismatch : Warning<
   "format specifies type %0 but the argument has "
   "%select{type|underlying type}2 %1">,
@@ -10792,6 +10796,9 @@ def warn_printf_narg_not_supported : Warning<
 def warn_scanf_scanlist_incomplete : Warning<
   "no closing ']' for '%%[' in scanf format string">,
   InGroup<Format>;
+def warn_fopen_invalid_mode : Warning<
+  "invalid fopen mode string '%0'">,
+  InGroup<FormatLibc>, DefaultIgnore;
 def warn_format_bool_as_character : Warning<
   "using '%0' format specifier, but argument has boolean value">,
   InGroup<Format>;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index de8b965144971..9dc7fa4a7bbb4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1141,6 +1141,140 @@ static bool ProcessFormatStringLiteral(const Expr 
*FormatExpr,
   return false;
 }
 
+static std::optional<uint64_t>
+getScanfDestinationSizeInElements(const Expr *Arg, ASTContext &Context) {
+  const Expr *BaseArg = Arg->IgnoreParenImpCasts();
+  if (const auto *ArrayTy = Context.getAsConstantArrayType(BaseArg->getType());
+      ArrayTy && ArrayTy->getSizeModifier() == ArraySizeModifier::Normal &&
+      ArrayTy->getElementType()->isAnyCharacterType()) {
+    return ArrayTy->getZExtSize();
+  }
+
+  const auto *PointerTy = Arg->getType()->getAs<PointerType>();
+  if (!PointerTy || !PointerTy->getPointeeType()->isAnyCharacterType())
+    return std::nullopt;
+
+  std::optional<uint64_t> ObjectSize = Arg->tryEvaluateObjectSize(Context, 0);
+  if (!ObjectSize)
+    return std::nullopt;
+
+  CharUnits ElementSize = 
Context.getTypeSizeInChars(PointerTy->getPointeeType());
+  if (ElementSize.isZero())
+    return std::nullopt;
+
+  uint64_t ElementSizeBytes = ElementSize.getQuantity();
+  if (*ObjectSize % ElementSizeBytes != 0)
+    return std::nullopt;
+
+  return *ObjectSize / ElementSizeBytes;
+}
+
+static bool isValidFOpenModeString(StringRef Mode) {
+  if (Mode.empty())
+    return false;
+
+  char MainMode = Mode.front();
+  if (MainMode != 'r' && MainMode != 'w' && MainMode != 'a')
+    return false;
+
+  bool SeenPlus = false;
+  bool SeenBinary = false;
+  bool SeenExclusive = false;
+  bool SeenCloseOnExec = false;
+  bool SeenMMap = false;
+  bool SeenModeC = false;
+  bool SeenModeN = false;
+  bool SeenText = false;
+  bool SeenCommit = false;
+  bool SeenSequential = false;
+  bool SeenShortLived = false;
+  bool SeenTemporary = false;
+  bool SeenNoInherit = false;
+
+  for (size_t I = 1; I < Mode.size(); ++I) {
+    StringRef Remaining = Mode.substr(I);
+    if (Remaining.consume_front(",")) {
+      Remaining = Remaining.ltrim();
+      if (!Remaining.consume_front("ccs="))
+        return false;
+      StringRef Encoding = Remaining;
+      return !Encoding.empty() && Encoding.find(',') == StringRef::npos;
+    }
+
+    switch (Mode[I]) {
+    case '+':
+      if (SeenPlus)
+        return false;
+      SeenPlus = true;
+      break;
+    case 'b':
+      if (SeenBinary)
+        return false;
+      SeenBinary = true;
+      break;
+    case 'x':
+      if (SeenExclusive || MainMode == 'r')
+        return false;
+      SeenExclusive = true;
+      break;
+    case 'c':
+      if (SeenModeC)
+        return false;
+      SeenModeC = true;
+      break;
+    case 'n':
+      if (SeenModeN)
+        return false;
+      SeenModeN = true;
+      break;
+    case 'e':
+      if (SeenCloseOnExec)
+        return false;
+      SeenCloseOnExec = true;
+      break;
+    case 'm':
+      if (SeenMMap)
+        return false;
+      SeenMMap = true;
+      break;
+    case 't':
+      if (SeenText)
+        return false;
+      SeenText = true;
+      break;
+    case 'R':
+      if (SeenCommit)
+        return false;
+      SeenCommit = true;
+      break;
+    case 'S':
+      if (SeenSequential)
+        return false;
+      SeenSequential = true;
+      break;
+    case 'T':
+      if (SeenTemporary)
+        return false;
+      SeenTemporary = true;
+      break;
+    case 'D':
+      if (SeenShortLived)
+        return false;
+      SeenShortLived = true;
+      break;
+    case 'N':
+      if (SeenNoInherit)
+        return false;
+      SeenNoInherit = true;
+      break;
+    default:
+      return false;
+    }
+  }
+
+  return true;
+}
+
 void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
                                                CallExpr *TheCall) {
   if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -1243,6 +1377,14 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     return std::nullopt;
   };
 
+  auto ComputeStringArgument =
+      [&](unsigned Index) -> std::optional<std::string> {
+    std::optional<unsigned> IndexOptional = TranslateIndex(Index);
+    if (!IndexOptional)
+      return std::nullopt;
+    return TheCall->getArg(*IndexOptional)->tryEvaluateString(getASTContext());
+  };
+
   std::optional<llvm::APSInt> SourceSize;
   std::optional<llvm::APSInt> DestinationSize;
   unsigned DiagID = 0;
@@ -1289,6 +1431,21 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     break;
   }
 
+  case Builtin::BIfopen: {
+    if (Diags.isIgnored(diag::warn_fopen_invalid_mode, TheCall->getBeginLoc()))
+      return;
+
+    std::optional<unsigned> ModeIndex = TranslateIndex(1);
+    std::optional<std::string> Mode = ComputeStringArgument(1);
+    if (!ModeIndex || !Mode || isValidFOpenModeString(*Mode))
+      return;
+
+    Expr *ModeArg = TheCall->getArg(*ModeIndex);
+    Diag(ModeArg->getBeginLoc(), diag::warn_fopen_invalid_mode)
+        << *Mode << ModeArg->getSourceRange();
+    return;
+  }
+
   case Builtin::BIscanf:
   case Builtin::BIfscanf:
   case Builtin::BIsscanf: {
@@ -9452,6 +9609,10 @@ class CheckScanfHandler : public CheckFormatHandler {
           unsigned specifierLen) override;
 
   void HandleIncompleteScanList(const char *start, const char *end) override;
+
+  void DiagnoseMissingFieldWidth(const analyze_scanf::ScanfSpecifier &FS,
+                                 const Expr *Arg, const char *startSpecifier,
+                                 unsigned specifierLen);
 };
 
 } // namespace
@@ -9476,6 +9637,43 @@ bool 
CheckScanfHandler::HandleInvalidScanfConversionSpecifier(
                                           CS.getStart(), CS.getLength());
 }
 
+void CheckScanfHandler::DiagnoseMissingFieldWidth(
+    const analyze_scanf::ScanfSpecifier &FS, const Expr *Arg,
+    const char *startSpecifier, unsigned specifierLen) {
+  using namespace analyze_format_string;
+
+  const ConversionSpecifier &CS = FS.getConversionSpecifier();
+  if (CS.getKind() != ConversionSpecifier::sArg &&
+      CS.getKind() != ConversionSpecifier::ScanListArg)
+    return;
+
+  if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified)
+    return;
+
+  std::optional<uint64_t> BufferElements =
+      getScanfDestinationSizeInElements(Arg, S.Context);
+  if (!BufferElements)
+    return;
+
+  ArrayRef<FixItHint> FixIts;
+  SmallVector<FixItHint, 1> LocalFixIts;
+  if (*BufferElements > 1) {
+    const LengthModifier &LM = FS.getLengthModifier();
+    const char *InsertionPoint = LM.getLength() ? LM.getStart() : 
CS.getStart();
+    LocalFixIts.push_back(FixItHint::CreateInsertion(
+        getLocationOfByte(InsertionPoint), std::to_string(*BufferElements - 
1)));
+    FixIts = LocalFixIts;
+  }
+
+  EmitFormatDiagnostic(S.PDiag(diag::warn_scanf_no_field_width)
+                           << CS.toString() << *BufferElements
+                           << (*BufferElements != 1),
+                       getLocationOfByte(CS.getStart()),
+                       /*IsStringLocation*/ true,
+                       getSpecifierRange(startSpecifier, specifierLen),
+                       FixIts);
+}
+
 bool CheckScanfHandler::HandleScanfSpecifier(
                                        const analyze_scanf::ScanfSpecifier &FS,
                                        const char *startSpecifier,
@@ -9562,8 +9760,10 @@ bool CheckScanfHandler::HandleScanfSpecifier(
   analyze_format_string::ArgType::MatchKind Match =
       AT.matchesType(S.Context, Ex->getType());
   Match = handleFormatSignedness(Match, S.getDiagnostics(), Ex->getExprLoc());
-  if (Match == analyze_format_string::ArgType::Match)
+  if (Match == analyze_format_string::ArgType::Match) {
+    DiagnoseMissingFieldWidth(FS, Ex, startSpecifier, specifierLen);
     return true;
+  }
   bool Pedantic = Match == analyze_format_string::ArgType::NoMatchPedantic;
   bool Signedness = Match == analyze_format_string::ArgType::NoMatchSignedness;
 
diff --git a/clang/test/Sema/warn-format-libc.c 
b/clang/test/Sema/warn-format-libc.c
new file mode 100644
index 0000000000000..06bb0fe3da309
--- /dev/null
+++ b/clang/test/Sema/warn-format-libc.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat -verify=expected %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wall -verify=expected %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat-libc -verify=expected %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat -Wno-format-libc 
-verify=disabled %s
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -verify=plain %s
+// disabled-no-diagnostics
+// plain-no-diagnostics
+
+typedef struct FILE FILE;
+
+int scanf(const char *restrict format, ...);
+FILE *fopen(const char *restrict path, const char *restrict mode);
+
+int wrapped_scanf(const char *format, ...)
+    __attribute__((format(scanf, 1, 2)));
+FILE *wrapped_fopen(const char *path, const char *mode)
+    __attribute__((diagnose_as_builtin(fopen, 1, 2)));
+
+void test_scanf(void) {
+  char buf[8];
+  char one[1];
+
+  scanf("%s", buf); // expected-warning {{'%s' conversion used without a field 
width, destination buffer has 8 elements}}
+  scanf("%[a-z]", buf); // expected-warning {{'%[' conversion used without a 
field width, destination buffer has 8 elements}}
+  wrapped_scanf("%s", buf); // expected-warning {{'%s' conversion used without 
a field width, destination buffer has 8 elements}}
+  scanf("%s", one); // expected-warning {{'%s' conversion used without a field 
width, destination buffer has 1 element}}
+
+  scanf("%7s", buf);
+  scanf("%7[a-z]", buf);
+}
+
+void test_scanf_unknown(char *ptr) {
+  scanf("%s", ptr);
+}
+
+void test_fopen(const char *path, const char *mode) {
+  fopen(path, "r");
+  fopen(path, "w+b");
+  fopen(path, "rb+cmxe");
+  fopen(path, "w+, ccs=UTF-8");
+  fopen(path, mode);
+
+  fopen(path, "oo"); // expected-warning {{invalid fopen mode string 'oo'}}
+  fopen(path, "rx"); // expected-warning {{invalid fopen mode string 'rx'}}
+  fopen(path, "r++"); // expected-warning {{invalid fopen mode string 'r++'}}
+  wrapped_fopen(path, ""); // expected-warning {{invalid fopen mode string ''}}
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to