https://github.com/nuclearcat updated https://github.com/llvm/llvm-project/pull/198130
>From dec8ad56a9e40f2e79eb7e782f8d86684d5d9b34 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko <[email protected]> Date: Wed, 13 May 2026 21:59:33 +0300 Subject: [PATCH] [clang] Warn on umask() argument bits outside 0777 Add a -Wfortify-source warning when the constant-evaluated argument to umask(mode_t) has bits set outside 0777. Those bits are silently discarded by the kernel, so setting them is almost always a typo (0xFFFF, 7777-as-decimal, etc.). Match the corresponding bionic libc diagnose_if check. The Sema-side dispatch is gated to identify the libc declaration of umask and reject user-supplied lookalikes: at least one redeclaration of the resolved function must come from a system header. That is where libc declares umask. Matching on the `mode_t` typedef name is not used because libcs disagree on the spelling: glibc's <sys/types.h> writes the prototype as `__mode_t umask(__mode_t)` via an internal typedef, bionic and musl use `mode_t` directly, and so on. The system-header origin is the portable libc-identity signal; coarser shape checks (name + extern "C" + non-variadic + 1 integer arg + integer return) fill in the remaining filtering. The negative tests cover: * `int umask(int)` in user code, * `typedef unsigned mode_t; extern mode_t umask(mode_t);` in user code (matches the libc typedef name but not the system-header origin). A positive test mirrors glibc's `__mode_t umask(__mode_t)` shape from a `#pragma GCC system_header`-marked Inputs/ header to lock the gate against regressions on the spelling. No Static Analyzer summary is added: modeling umask's argument as a WithinRange precondition would prune feasible caller branches such as `umask(m); if (m > 0777) ...`, even though umask is defined behavior for any input (the kernel masks off the high bits). The Sema check covers the constant-typo case that bionic's diagnose_if targets. Signed-off-by: Denys Fedoryshchenko <[email protected]> --- clang/docs/ReleaseNotes.rst | 5 +++ .../clang/Basic/DiagnosticSemaKinds.td | 5 +++ clang/include/clang/Sema/Sema.h | 4 ++ clang/lib/Sema/SemaChecking.cpp | 42 +++++++++++++++++++ clang/lib/Sema/SemaExpr.cpp | 1 + .../Inputs/warn-fortify-source-umask-glibc.h | 19 +++++++++ .../Sema/Inputs/warn-fortify-source-umask.h | 12 ++++++ .../Sema/warn-fortify-source-umask-glibc.c | 16 +++++++ .../Sema/warn-fortify-source-umask-non-libc.c | 14 +++++++ .../warn-fortify-source-umask-non-system.c | 17 ++++++++ .../warn-fortify-source-umask-record-member.c | 15 +++++++ clang/test/Sema/warn-fortify-source.c | 16 +++++++ 12 files changed, 166 insertions(+) create mode 100644 clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h create mode 100644 clang/test/Sema/Inputs/warn-fortify-source-umask.h create mode 100644 clang/test/Sema/warn-fortify-source-umask-glibc.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-non-libc.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-non-system.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-record-member.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index cf16e40d026c3..771a3781a8fcc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -551,6 +551,11 @@ Improvements to Clang's diagnostics - Clang now emits error when attribute is missing closing ``]]`` followed by ``;;``. (#GH187223) +- ``-Wfortify-source`` now warns when the constant-evaluated argument to + ``umask`` has bits set outside ``0777``. Those bits are silently discarded + by the kernel, so setting them is almost always a typo (matching the + bionic libc ``diagnose_if`` check). + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index dbe6cb2c3a41c..bbaeee080e2dd 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -971,6 +971,11 @@ def warn_fortify_strlen_overflow: Warning< " but the source string has length %2 (including NUL byte)">, InGroup<FortifySource>; +def warn_fortify_umask_unused_bits : Warning< + "'umask' argument sets non-file-permission bits (0%0); those bits are " + "ignored">, + InGroup<FortifySource>; + def subst_format_overflow : TextSubstitution< "'%0' will always overflow; destination buffer has size %1," " but format string expands to at least %2">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e71794b2d92c9..c39835399d887 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2972,6 +2972,10 @@ class Sema final : public SemaBase { void checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall); + /// Argument-value fortify checks for libc functions that are not builtins, + /// dispatched by name (e.g. umask). Diagnostics belong to -Wfortify-source. + void checkFortifiedLibcArgument(FunctionDecl *FD, CallExpr *TheCall); + /// Check the arguments to '__builtin_va_start', '__builtin_ms_va_start', /// or '__builtin_c23_va_start' for validity. Emit an error and return true /// on failure; return false on success. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cc834bbee23c4..f1edb9db58449 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1490,6 +1490,48 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, << FunctionName << DestinationStr << SourceStr); } +void Sema::checkFortifiedLibcArgument(FunctionDecl *FD, CallExpr *TheCall) { + if (TheCall->isValueDependent() || TheCall->isTypeDependent()) + return; + if (!FD->isExternC()) + return; + const IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return; + + // umask(mode_t): warn when the constant-evaluated argument has bits set + // outside the file-permission mask (0777). Those bits are ignored. + // Require a system-header declaration to avoid warning on user-defined + // lookalikes; a hand-declared umask() (no <sys/stat.h>) is not diagnosed. + auto AnyDeclInSystemHeader = [&](const FunctionDecl *F) { + SourceManager &SM = getSourceManager(); + for (const FunctionDecl *R : F->redecls()) + if (SM.isInSystemHeader(R->getLocation())) + return true; + return false; + }; + if (II->isStr("umask") && TheCall->getNumArgs() == 1 && + AnyDeclInSystemHeader(FD)) { + Expr *Arg = TheCall->getArg(0); + if (!Arg->getType()->isIntegerType()) + return; + Expr::EvalResult R; + if (!Arg->EvaluateAsInt(R, getASTContext())) + return; + // Operate on the raw two's-complement bit pattern so that negative + // literals (which convert to large unsigned mode_t values) are caught. + llvm::APInt RawValue = R.Val.getInt(); + llvm::APInt Mask(RawValue.getBitWidth(), 0777); + llvm::APInt Extra = RawValue & ~Mask; + if (Extra == 0) + return; + SmallString<16> ExtraStr; + Extra.toString(ExtraStr, /*Radix=*/8, /*Signed=*/false); + Diag(TheCall->getBeginLoc(), diag::warn_fortify_umask_unused_bits) + << ExtraStr; + } +} + static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, Scope::ScopeFlags NeededScopeFlags, unsigned DiagID) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 521a8516ac179..c3d8996ff5b75 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7368,6 +7368,7 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, return ExprError(); checkFortifiedBuiltinMemoryFunction(FDecl, TheCall); + checkFortifiedLibcArgument(FDecl, TheCall); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); diff --git a/clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h b/clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h new file mode 100644 index 0000000000000..40071c2982c8a --- /dev/null +++ b/clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h @@ -0,0 +1,19 @@ +#pragma GCC system_header + +// Mimic glibc's typedef chain: the prototype is written in terms of +// __mode_t, an internal typedef that itself aliases unsigned int. +// `mode_t` is then defined as an alias of __mode_t. This shape would +// reject any check that walks one layer of typedef sugar and matches +// on the typedef name `mode_t`. + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned int __mode_t; +typedef __mode_t mode_t; +extern __mode_t umask(__mode_t __mask); + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/Inputs/warn-fortify-source-umask.h b/clang/test/Sema/Inputs/warn-fortify-source-umask.h new file mode 100644 index 0000000000000..620ea723a072d --- /dev/null +++ b/clang/test/Sema/Inputs/warn-fortify-source-umask.h @@ -0,0 +1,12 @@ +#pragma GCC system_header + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned mode_t; +mode_t umask(mode_t cmask); + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/warn-fortify-source-umask-glibc.c b/clang/test/Sema/warn-fortify-source-umask-glibc.c new file mode 100644 index 0000000000000..6ac0a5a3c69c1 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-glibc.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -verify + +// Verify the fortify gate accepts a glibc-style declaration of umask +// whose prototype is written in terms of the internal __mode_t typedef +// (not mode_t directly). Earlier versions of this check matched on the +// `mode_t` typedef name and silently failed to fire on glibc. + +#include "Inputs/warn-fortify-source-umask-glibc.h" + +void call_umask_glibc(mode_t runtime_mode) { + umask(0); + umask(0644); + umask(01000); // expected-warning {{'umask' argument sets non-file-permission bits (01000); those bits are ignored}} + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} + umask(runtime_mode); // no warning, not a constant +} diff --git a/clang/test/Sema/warn-fortify-source-umask-non-libc.c b/clang/test/Sema/warn-fortify-source-umask-non-libc.c new file mode 100644 index 0000000000000..e464024aaf243 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-non-libc.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify +// expected-no-diagnostics + +// User-defined umask declared in user code (not a system header). The +// fortify gate requires the resolved function to have at least one +// declaration in a system header, so no -Wfortify-source warning fires. + +extern int umask(int); + +void call_user_umask(void) { + (void)umask(01000); + (void)umask(0xFFFF); + (void)umask(7777); +} diff --git a/clang/test/Sema/warn-fortify-source-umask-non-system.c b/clang/test/Sema/warn-fortify-source-umask-non-system.c new file mode 100644 index 0000000000000..42e53a6dc86ca --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-non-system.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify +// expected-no-diagnostics + +// User-defined umask whose signature spells the typedef `mode_t` +// (matching libc on systems where the prototype isn't written in terms +// of a libc-internal alias), but the declaration is in user code rather +// than a system header. The fortify gate requires a system-header +// origin, so no warning fires. + +typedef unsigned mode_t; +extern mode_t umask(mode_t cmask); + +void call_user_mode_umask(void) { + (void)umask(01000); + (void)umask(0xFFFF); + (void)umask(7777); +} diff --git a/clang/test/Sema/warn-fortify-source-umask-record-member.c b/clang/test/Sema/warn-fortify-source-umask-record-member.c new file mode 100644 index 0000000000000..ca1a36d1b3bc2 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-record-member.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify + +typedef unsigned mode_t; + +// A struct member with function type is rejected at ActOnField and becomes +// a FieldDecl, not a record-context FunctionDecl, so the call below never +// reaches the fortify gate. Regression test that this error-recovery path +// does not crash (would otherwise risk the assert in Decl.cpp isDeclExternC). +struct S { + mode_t umask(mode_t); // expected-error {{field 'umask' declared as a function}} +}; + +void call_member_umask(struct S *s) { + (void)s->umask(0xFFFF); +} diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c index d0b519a516545..be101c36de3b5 100644 --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -28,6 +28,10 @@ void bzero(void *dst, size_t n); } #endif +// Declare umask via a system header so the libc gate (system-header origin) +// matches without needing the real <sys/stat.h> on every test host. +#include "Inputs/warn-fortify-source-umask.h" + void call_memcpy(void) { char dst[10]; char src[20]; @@ -242,6 +246,18 @@ void call_sprintf(void) { sprintf(buf, "5%.1e", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}} } +void call_umask(mode_t runtime_mode) { + umask(0); + umask(022); + umask(0644); + umask(0777); + umask(01000); // expected-warning {{'umask' argument sets non-file-permission bits (01000); those bits are ignored}} + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} + umask(7777); // expected-warning {{'umask' argument sets non-file-permission bits (017000); those bits are ignored}} + umask(-1); // expected-warning {{'umask' argument sets non-file-permission bits (}} + umask(runtime_mode); // no warning, not a constant +} + #ifdef __cplusplus template <class> struct S { void mf() const { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
