https://github.com/Pppp1116 created https://github.com/llvm/llvm-project/pull/190453
Adds a small `-Wformat-libc` subgroup under `-Wformat` for two libc-specific checks: - warn on `%s` / `%[` without a field width when the destination is a fixed-size character array - warn on invalid constant `fopen` mode strings Validation: - fresh build of `tools/clang/lib/Basic/CMakeFiles/obj.clangBasic.dir/DiagnosticIDs.cpp.o` - fresh build of `tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/SemaChecking.cpp.o` >From d683878c5dacfc3b3d62040d32b970d9fc5327c8 Mon Sep 17 00:00:00 2001 From: Pppp1116 <[email protected]> Date: Sat, 4 Apr 2026 10:50:07 +0100 Subject: [PATCH] Warn on unbounded scanf and invalid fopen modes --- clang/include/clang/Basic/DiagnosticGroups.td | 4 +- .../clang/Basic/DiagnosticSemaKinds.td | 7 + clang/lib/Sema/SemaChecking.cpp | 202 +++++++++++++++++- clang/test/Sema/warn-format-libc.c | 47 ++++ 4 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 clang/test/Sema/warn-format-libc.c diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index dc7280c66ea79..f458e8d8111e8 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1316,6 +1316,7 @@ def FormatY2K : DiagGroup<"format-y2k">; def FormatPedantic : DiagGroup<"format-pedantic">; def FormatSignedness : DiagGroup<"format-signedness">; def FormatTypeConfusion : DiagGroup<"format-type-confusion">; +def FormatLibc : DiagGroup<"format-libc">; def MissingFormatAttribute : DiagGroup<"missing-format-attribute">; def FormatOverflowNonKprintf: DiagGroup<"format-overflow-non-kprintf">; @@ -1326,7 +1327,8 @@ def FormatTruncation: DiagGroup<"format-truncation", [FormatTruncationNonKprintf def Format : DiagGroup<"format", [FormatExtraArgs, FormatZeroLength, NonNull, FormatSecurity, FormatY2K, FormatInvalidSpecifier, - FormatInsufficientArgs, FormatOverflow, FormatTruncation]>, + FormatInsufficientArgs, FormatOverflow, + FormatTruncation, FormatLibc]>, DiagCategory<"Format String Issue">; def FormatNonLiteral : DiagGroup<"format-nonliteral", [MissingFormatAttribute]>; def Format2 : DiagGroup<"format=2", diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index eddf9c50033e1..3c30bb8628713 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10688,6 +10688,10 @@ def warn_missing_format_string : Warning< def warn_scanf_nonzero_width : Warning< "zero field width in scanf format string is unused">, InGroup<Format>; +def warn_scanf_no_field_width : Warning< + "'%%%0' conversion used without a field width, destination buffer has %1 " + "element%s2">, + InGroup<FormatLibc>, DefaultIgnore; def warn_format_conversion_argument_type_mismatch : Warning< "format specifies type %0 but the argument has " "%select{type|underlying type}2 %1">, @@ -10792,6 +10796,9 @@ def warn_printf_narg_not_supported : Warning< def warn_scanf_scanlist_incomplete : Warning< "no closing ']' for '%%[' in scanf format string">, InGroup<Format>; +def warn_fopen_invalid_mode : Warning< + "invalid fopen mode string '%0'">, + InGroup<FormatLibc>, DefaultIgnore; def warn_format_bool_as_character : Warning< "using '%0' format specifier, but argument has boolean value">, InGroup<Format>; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index de8b965144971..9dc7fa4a7bbb4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1141,6 +1141,140 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr, return false; } +static std::optional<uint64_t> +getScanfDestinationSizeInElements(const Expr *Arg, ASTContext &Context) { + const Expr *BaseArg = Arg->IgnoreParenImpCasts(); + if (const auto *ArrayTy = Context.getAsConstantArrayType(BaseArg->getType()); + ArrayTy && ArrayTy->getSizeModifier() == ArraySizeModifier::Normal && + ArrayTy->getElementType()->isAnyCharacterType()) { + return ArrayTy->getZExtSize(); + } + + const auto *PointerTy = Arg->getType()->getAs<PointerType>(); + if (!PointerTy || !PointerTy->getPointeeType()->isAnyCharacterType()) + return std::nullopt; + + std::optional<uint64_t> ObjectSize = Arg->tryEvaluateObjectSize(Context, 0); + if (!ObjectSize) + return std::nullopt; + + CharUnits ElementSize = Context.getTypeSizeInChars(PointerTy->getPointeeType()); + if (ElementSize.isZero()) + return std::nullopt; + + uint64_t ElementSizeBytes = ElementSize.getQuantity(); + if (*ObjectSize % ElementSizeBytes != 0) + return std::nullopt; + + return *ObjectSize / ElementSizeBytes; +} + +static bool isValidFOpenModeString(StringRef Mode) { + if (Mode.empty()) + return false; + + char MainMode = Mode.front(); + if (MainMode != 'r' && MainMode != 'w' && MainMode != 'a') + return false; + + bool SeenPlus = false; + bool SeenBinary = false; + bool SeenExclusive = false; + bool SeenCloseOnExec = false; + bool SeenMMap = false; + bool SeenModeC = false; + bool SeenModeN = false; + bool SeenText = false; + bool SeenCommit = false; + bool SeenSequential = false; + bool SeenShortLived = false; + bool SeenTemporary = false; + bool SeenNoInherit = false; + + for (size_t I = 1; I < Mode.size(); ++I) { + StringRef Remaining = Mode.substr(I); + if (Remaining.consume_front(",")) { + Remaining = Remaining.ltrim(); + if (!Remaining.consume_front("ccs=")) + return false; + StringRef Encoding = Remaining; + return !Encoding.empty() && Encoding.find(',') == StringRef::npos; + } + + switch (Mode[I]) { + case '+': + if (SeenPlus) + return false; + SeenPlus = true; + break; + case 'b': + if (SeenBinary) + return false; + SeenBinary = true; + break; + case 'x': + if (SeenExclusive || MainMode == 'r') + return false; + SeenExclusive = true; + break; + case 'c': + if (SeenModeC) + return false; + SeenModeC = true; + break; + case 'n': + if (SeenModeN) + return false; + SeenModeN = true; + break; + case 'e': + if (SeenCloseOnExec) + return false; + SeenCloseOnExec = true; + break; + case 'm': + if (SeenMMap) + return false; + SeenMMap = true; + break; + case 't': + if (SeenText) + return false; + SeenText = true; + break; + case 'R': + if (SeenCommit) + return false; + SeenCommit = true; + break; + case 'S': + if (SeenSequential) + return false; + SeenSequential = true; + break; + case 'T': + if (SeenTemporary) + return false; + SeenTemporary = true; + break; + case 'D': + if (SeenShortLived) + return false; + SeenShortLived = true; + break; + case 'N': + if (SeenNoInherit) + return false; + SeenNoInherit = true; + break; + default: + return false; + } + } + + return true; +} + void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { if (TheCall->isValueDependent() || TheCall->isTypeDependent() || @@ -1243,6 +1377,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, return std::nullopt; }; + auto ComputeStringArgument = + [&](unsigned Index) -> std::optional<std::string> { + std::optional<unsigned> IndexOptional = TranslateIndex(Index); + if (!IndexOptional) + return std::nullopt; + return TheCall->getArg(*IndexOptional)->tryEvaluateString(getASTContext()); + }; + std::optional<llvm::APSInt> SourceSize; std::optional<llvm::APSInt> DestinationSize; unsigned DiagID = 0; @@ -1289,6 +1431,21 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, break; } + case Builtin::BIfopen: { + if (Diags.isIgnored(diag::warn_fopen_invalid_mode, TheCall->getBeginLoc())) + return; + + std::optional<unsigned> ModeIndex = TranslateIndex(1); + std::optional<std::string> Mode = ComputeStringArgument(1); + if (!ModeIndex || !Mode || isValidFOpenModeString(*Mode)) + return; + + Expr *ModeArg = TheCall->getArg(*ModeIndex); + Diag(ModeArg->getBeginLoc(), diag::warn_fopen_invalid_mode) + << *Mode << ModeArg->getSourceRange(); + return; + } + case Builtin::BIscanf: case Builtin::BIfscanf: case Builtin::BIsscanf: { @@ -9452,6 +9609,10 @@ class CheckScanfHandler : public CheckFormatHandler { unsigned specifierLen) override; void HandleIncompleteScanList(const char *start, const char *end) override; + + void DiagnoseMissingFieldWidth(const analyze_scanf::ScanfSpecifier &FS, + const Expr *Arg, const char *startSpecifier, + unsigned specifierLen); }; } // namespace @@ -9476,6 +9637,43 @@ bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier( CS.getStart(), CS.getLength()); } +void CheckScanfHandler::DiagnoseMissingFieldWidth( + const analyze_scanf::ScanfSpecifier &FS, const Expr *Arg, + const char *startSpecifier, unsigned specifierLen) { + using namespace analyze_format_string; + + const ConversionSpecifier &CS = FS.getConversionSpecifier(); + if (CS.getKind() != ConversionSpecifier::sArg && + CS.getKind() != ConversionSpecifier::ScanListArg) + return; + + if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) + return; + + std::optional<uint64_t> BufferElements = + getScanfDestinationSizeInElements(Arg, S.Context); + if (!BufferElements) + return; + + ArrayRef<FixItHint> FixIts; + SmallVector<FixItHint, 1> LocalFixIts; + if (*BufferElements > 1) { + const LengthModifier &LM = FS.getLengthModifier(); + const char *InsertionPoint = LM.getLength() ? LM.getStart() : CS.getStart(); + LocalFixIts.push_back(FixItHint::CreateInsertion( + getLocationOfByte(InsertionPoint), std::to_string(*BufferElements - 1))); + FixIts = LocalFixIts; + } + + EmitFormatDiagnostic(S.PDiag(diag::warn_scanf_no_field_width) + << CS.toString() << *BufferElements + << (*BufferElements != 1), + getLocationOfByte(CS.getStart()), + /*IsStringLocation*/ true, + getSpecifierRange(startSpecifier, specifierLen), + FixIts); +} + bool CheckScanfHandler::HandleScanfSpecifier( const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, @@ -9562,8 +9760,10 @@ bool CheckScanfHandler::HandleScanfSpecifier( analyze_format_string::ArgType::MatchKind Match = AT.matchesType(S.Context, Ex->getType()); Match = handleFormatSignedness(Match, S.getDiagnostics(), Ex->getExprLoc()); - if (Match == analyze_format_string::ArgType::Match) + if (Match == analyze_format_string::ArgType::Match) { + DiagnoseMissingFieldWidth(FS, Ex, startSpecifier, specifierLen); return true; + } bool Pedantic = Match == analyze_format_string::ArgType::NoMatchPedantic; bool Signedness = Match == analyze_format_string::ArgType::NoMatchSignedness; diff --git a/clang/test/Sema/warn-format-libc.c b/clang/test/Sema/warn-format-libc.c new file mode 100644 index 0000000000000..06bb0fe3da309 --- /dev/null +++ b/clang/test/Sema/warn-format-libc.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat -verify=expected %s +// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wall -verify=expected %s +// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat-libc -verify=expected %s +// RUN: %clang_cc1 -std=c11 -fsyntax-only -Wformat -Wno-format-libc -verify=disabled %s +// RUN: %clang_cc1 -std=c11 -fsyntax-only -verify=plain %s +// disabled-no-diagnostics +// plain-no-diagnostics + +typedef struct FILE FILE; + +int scanf(const char *restrict format, ...); +FILE *fopen(const char *restrict path, const char *restrict mode); + +int wrapped_scanf(const char *format, ...) + __attribute__((format(scanf, 1, 2))); +FILE *wrapped_fopen(const char *path, const char *mode) + __attribute__((diagnose_as_builtin(fopen, 1, 2))); + +void test_scanf(void) { + char buf[8]; + char one[1]; + + scanf("%s", buf); // expected-warning {{'%s' conversion used without a field width, destination buffer has 8 elements}} + scanf("%[a-z]", buf); // expected-warning {{'%[' conversion used without a field width, destination buffer has 8 elements}} + wrapped_scanf("%s", buf); // expected-warning {{'%s' conversion used without a field width, destination buffer has 8 elements}} + scanf("%s", one); // expected-warning {{'%s' conversion used without a field width, destination buffer has 1 element}} + + scanf("%7s", buf); + scanf("%7[a-z]", buf); +} + +void test_scanf_unknown(char *ptr) { + scanf("%s", ptr); +} + +void test_fopen(const char *path, const char *mode) { + fopen(path, "r"); + fopen(path, "w+b"); + fopen(path, "rb+cmxe"); + fopen(path, "w+, ccs=UTF-8"); + fopen(path, mode); + + fopen(path, "oo"); // expected-warning {{invalid fopen mode string 'oo'}} + fopen(path, "rx"); // expected-warning {{invalid fopen mode string 'rx'}} + fopen(path, "r++"); // expected-warning {{invalid fopen mode string 'r++'}} + wrapped_fopen(path, ""); // expected-warning {{invalid fopen mode string ''}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
