https://github.com/nuclearcat updated https://github.com/llvm/llvm-project/pull/198130
>From 24c947d396afca7de4fe4efff0596a153b2d951c Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko <[email protected]> Date: Wed, 13 May 2026 21:59:33 +0300 Subject: [PATCH] [clang] Warn on umask() argument bits outside 0777 Add a -Wfortify-source warning when the constant-evaluated argument to umask(mode_t) has bits set outside 0777. Those bits are silently discarded by the kernel, so setting them is almost always a typo (0xFFFF, 7777-as-decimal, etc.). Match the corresponding bionic libc diagnose_if check. umask is modeled as a LibBuiltin in Builtins.td so the Sema check recognizes it by builtin identity (Builtin::BIumask) instead of by name plus system-header origin. mode_t is target- and libc-dependent (unsigned int on Linux, unsigned short on some BSDs/Darwin), so there is no portable prototype to match. The builtin is therefore given an empty prototype (as pthread_create does) and marked IgnoreSignature. With no prototype, no implicit builtin declaration is formed, so the libc's own umask is never treated as an incompatible redeclaration -- on 16-bit-mode_t targets (Darwin, the BSDs) such a redeclaration would otherwise strip the builtin id and silently disable the check, even for <sys/stat.h>. IgnoreSignature then attaches the builtin id to any file-scope, C-linkage declaration of umask by name, regardless of how the libc spells mode_t. This recognizes umask the way other fortify checks recognize their functions via getBuiltinID, and (unlike the earlier system-header gate) keeps working when umask is forward-declared without <sys/stat.h>. A file-local (internal linkage) umask, or a C++ umask without C language linkage, keeps a zero builtin id and is not diagnosed. The accepted tradeoff, mirroring the read/write LibBuiltin experience, is that any file-scope, C-linkage function named umask taking an integer is treated as the libc umask and checked. An unrelated user function of that name could in principle get a false positive, but only on a constant argument with bits set above 0777; umask is a rare enough name that the blast radius is small. No Static Analyzer summary is added: modeling umask's argument as a WithinRange precondition would prune feasible caller branches such as umask(m); if (m > 0777) ..., even though umask is defined behavior for any input (the kernel masks off the high bits). The Sema check covers the constant-typo case that bionic's diagnose_if targets. --- clang/docs/ReleaseNotes.rst | 5 +++ clang/include/clang/Basic/BuiltinHeaders.def | 1 + clang/include/clang/Basic/Builtins.td | 8 +++++ .../clang/Basic/DiagnosticSemaKinds.td | 5 +++ clang/include/clang/Sema/Sema.h | 4 +++ clang/lib/Sema/SemaChecking.cpp | 36 +++++++++++++++++++ clang/lib/Sema/SemaExpr.cpp | 1 + .../Inputs/warn-fortify-source-umask-darwin.h | 17 +++++++++ .../Inputs/warn-fortify-source-umask-glibc.h | 19 ++++++++++ .../Sema/Inputs/warn-fortify-source-umask.h | 12 +++++++ .../Sema/warn-fortify-source-umask-darwin.c | 19 ++++++++++ .../warn-fortify-source-umask-forward-decl.c | 19 ++++++++++ .../Sema/warn-fortify-source-umask-glibc.c | 16 +++++++++ .../warn-fortify-source-umask-not-builtin.c | 21 +++++++++++ .../warn-fortify-source-umask-record-member.c | 15 ++++++++ .../Sema/warn-fortify-source-umask-redecl.c | 16 +++++++++ clang/test/Sema/warn-fortify-source.c | 16 +++++++++ 17 files changed, 230 insertions(+) create mode 100644 clang/test/Sema/Inputs/warn-fortify-source-umask-darwin.h create mode 100644 clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h create mode 100644 clang/test/Sema/Inputs/warn-fortify-source-umask.h create mode 100644 clang/test/Sema/warn-fortify-source-umask-darwin.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-forward-decl.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-glibc.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-not-builtin.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-record-member.c create mode 100644 clang/test/Sema/warn-fortify-source-umask-redecl.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8a00b235860e4..9c75c212c487b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -625,6 +625,11 @@ Improvements to Clang's diagnostics - Diagnostics for the C++11 range-based for statement now report the correct iterator type in notes for invalid iterator types. +- ``-Wfortify-source`` now warns when the constant-evaluated argument to + ``umask`` has bits set outside ``0777``. Those bits are silently discarded + by the kernel, so setting them is almost always a typo (matching the + bionic libc ``diagnose_if`` check). + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/BuiltinHeaders.def b/clang/include/clang/Basic/BuiltinHeaders.def index 23889a22769ed..b18e470a8bd25 100644 --- a/clang/include/clang/Basic/BuiltinHeaders.def +++ b/clang/include/clang/Basic/BuiltinHeaders.def @@ -38,6 +38,7 @@ HEADER(STDIO_H, "stdio.h") HEADER(STDLIB_H, "stdlib.h") HEADER(STRINGS_H, "strings.h") HEADER(STRING_H, "string.h") +HEADER(SYS_STAT_H, "sys/stat.h") HEADER(UNISTD_H, "unistd.h") HEADER(UTILITY, "utility") HEADER(WCHAR_H, "wchar.h") diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index e7fb43be8794e..9d65b224c899f 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -3800,6 +3800,14 @@ def VFork : LibBuiltin<"unistd.h"> { let Prototype = "pid_t()"; } +// POSIX sys/stat.h + +def Umask : LibBuiltin<"sys/stat.h"> { + let Spellings = ["umask"]; + let Attributes = [IgnoreSignature]; + let Prototype = ""; +} + // POSIX pthread.h def PthreadCreate : GNULibBuiltin<"pthread.h"> { diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d3e2d616a8b80..c1808d9ddcb7c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -971,6 +971,11 @@ def warn_fortify_strlen_overflow: Warning< " but the source string has length %2 (including NUL byte)">, InGroup<FortifySource>; +def warn_fortify_umask_unused_bits : Warning< + "'umask' argument sets non-file-permission bits (0%0); those bits are " + "ignored">, + InGroup<FortifySource>; + def subst_format_overflow : TextSubstitution< "'%0' will always overflow; destination buffer has size %1," " but format string expands to at least %2">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ff474fdd99562..500026374e68e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2972,6 +2972,10 @@ class Sema final : public SemaBase { void checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall); + /// Argument-value fortify checks for libc functions that are not builtins, + /// dispatched by name (e.g. umask). Diagnostics belong to -Wfortify-source. + void checkFortifiedLibcArgument(FunctionDecl *FD, CallExpr *TheCall); + /// Check the arguments to '__builtin_va_start', '__builtin_ms_va_start', /// or '__builtin_c23_va_start' for validity. Emit an error and return true /// on failure; return false on success. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index b8a3f48a32f24..6c56715d34204 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1490,6 +1490,42 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, << FunctionName << DestinationStr << SourceStr); } +void Sema::checkFortifiedLibcArgument(FunctionDecl *FD, CallExpr *TheCall) { + if (TheCall->isValueDependent() || TheCall->isTypeDependent()) + return; + + // Recognize the libc function by builtin identity rather than by name and + // system-header origin. umask is a LibBuiltin marked IgnoreSignature, so the + // builtin id is attached to any file-scope, C-linkage declaration of umask + // regardless of the libc's mode_t spelling -- including a hand-written + // forward declaration without <sys/stat.h>. A static/local lookalike or a + // C++ (non-extern-"C") declaration keeps a zero builtin id and is ignored. + if (FD->getBuiltinID() != Builtin::BIumask) + return; + + // umask(mode_t): warn when the constant-evaluated argument has bits set + // outside the file-permission mask (0777). Those bits are ignored. + if (TheCall->getNumArgs() != 1) + return; + Expr *Arg = TheCall->getArg(0); + if (!Arg->getType()->isIntegerType()) + return; + Expr::EvalResult R; + if (!Arg->EvaluateAsInt(R, getASTContext())) + return; + // Operate on the raw two's-complement bit pattern so that negative literals + // (which convert to large unsigned mode_t values) are caught. + llvm::APInt RawValue = R.Val.getInt(); + llvm::APInt Mask(RawValue.getBitWidth(), 0777); + llvm::APInt Extra = RawValue & ~Mask; + if (Extra == 0) + return; + SmallString<16> ExtraStr; + Extra.toString(ExtraStr, /*Radix=*/8, /*Signed=*/false); + Diag(TheCall->getBeginLoc(), diag::warn_fortify_umask_unused_bits) + << ExtraStr; +} + static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, Scope::ScopeFlags NeededScopeFlags, unsigned DiagID) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ad6e7183cb3a4..2a33333233341 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7382,6 +7382,7 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, return ExprError(); checkFortifiedBuiltinMemoryFunction(FDecl, TheCall); + checkFortifiedLibcArgument(FDecl, TheCall); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); diff --git a/clang/test/Sema/Inputs/warn-fortify-source-umask-darwin.h b/clang/test/Sema/Inputs/warn-fortify-source-umask-darwin.h new file mode 100644 index 0000000000000..878706c4ca3d1 --- /dev/null +++ b/clang/test/Sema/Inputs/warn-fortify-source-umask-darwin.h @@ -0,0 +1,17 @@ +#pragma GCC system_header + +// Darwin and the BSDs spell mode_t as a 16-bit type (__uint16_t), unlike +// glibc's unsigned int. umask is recognized by builtin name with no prototype +// to match, so the width the libc picks for mode_t does not matter. + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned short __uint16_t; +typedef __uint16_t mode_t; +mode_t umask(mode_t); + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h b/clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h new file mode 100644 index 0000000000000..40071c2982c8a --- /dev/null +++ b/clang/test/Sema/Inputs/warn-fortify-source-umask-glibc.h @@ -0,0 +1,19 @@ +#pragma GCC system_header + +// Mimic glibc's typedef chain: the prototype is written in terms of +// __mode_t, an internal typedef that itself aliases unsigned int. +// `mode_t` is then defined as an alias of __mode_t. This shape would +// reject any check that walks one layer of typedef sugar and matches +// on the typedef name `mode_t`. + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned int __mode_t; +typedef __mode_t mode_t; +extern __mode_t umask(__mode_t __mask); + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/Inputs/warn-fortify-source-umask.h b/clang/test/Sema/Inputs/warn-fortify-source-umask.h new file mode 100644 index 0000000000000..620ea723a072d --- /dev/null +++ b/clang/test/Sema/Inputs/warn-fortify-source-umask.h @@ -0,0 +1,12 @@ +#pragma GCC system_header + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned mode_t; +mode_t umask(mode_t cmask); + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/warn-fortify-source-umask-darwin.c b/clang/test/Sema/warn-fortify-source-umask-darwin.c new file mode 100644 index 0000000000000..22df37517d6e8 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-darwin.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple arm64-apple-macosx11.0 %s -verify + +// Regression test for the macOS/BSD case. There mode_t is a 16-bit type, so an +// earlier attempt that gave umask a fixed unsigned-int builtin prototype turned +// the libc's own <sys/stat.h> declaration into an incompatible library +// redeclaration and silently dropped its builtin identity -- the fortify check +// went dead, even for the system header. umask now has no prototype to match +// (it is recognized purely by name), so the check fires regardless of how wide +// the libc makes mode_t. + +#include "Inputs/warn-fortify-source-umask-darwin.h" + +void call_umask_darwin(mode_t runtime_mode) { + umask(0); + umask(0644); + umask(01000); // expected-warning {{'umask' argument sets non-file-permission bits (01000); those bits are ignored}} + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} + umask(runtime_mode); // no warning, not a constant +} diff --git a/clang/test/Sema/warn-fortify-source-umask-forward-decl.c b/clang/test/Sema/warn-fortify-source-umask-forward-decl.c new file mode 100644 index 0000000000000..f3df651d07cb0 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-forward-decl.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -verify + +// umask forward-declared in user code without including <sys/stat.h>. Because +// umask is a LibBuiltin recognized by name (not by system-header origin), the +// fortify check still fires here -- this is the case the earlier system-header +// gate went silent on. umask has no prototype to match against, so the +// declaration is never reported as an incompatible library redeclaration +// regardless of how this target spells mode_t. + +typedef unsigned mode_t; +mode_t umask(mode_t); + +void call_user_umask(mode_t runtime_mode) { + umask(0); + umask(0644); + umask(01000); // expected-warning {{'umask' argument sets non-file-permission bits (01000); those bits are ignored}} + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} + umask(runtime_mode); // no warning, not a constant +} diff --git a/clang/test/Sema/warn-fortify-source-umask-glibc.c b/clang/test/Sema/warn-fortify-source-umask-glibc.c new file mode 100644 index 0000000000000..6d5d4dbab79c5 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-glibc.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -verify + +// Verify the builtin-based fortify check fires on a glibc-style declaration +// of umask whose prototype is written in terms of the internal __mode_t +// typedef (= unsigned int) rather than mode_t directly. Recognition is by +// builtin name, so the libc's particular mode_t spelling does not matter. + +#include "Inputs/warn-fortify-source-umask-glibc.h" + +void call_umask_glibc(mode_t runtime_mode) { + umask(0); + umask(0644); + umask(01000); // expected-warning {{'umask' argument sets non-file-permission bits (01000); those bits are ignored}} + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} + umask(runtime_mode); // no warning, not a constant +} diff --git a/clang/test/Sema/warn-fortify-source-umask-not-builtin.c b/clang/test/Sema/warn-fortify-source-umask-not-builtin.c new file mode 100644 index 0000000000000..57e3706a5f1b1 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-not-builtin.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c %s -verify +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ %s -verify +// expected-no-diagnostics + +// Declarations that are not the POSIX umask keep a zero builtin id, so the +// fortify check is skipped even for out-of-range constants: +// * a file-local umask with internal linkage; +// * in C++, a umask without C language linkage (e.g. a user's own overload). + +static int umask(int m) { return m; } + +void call_static_umask(void) { + (void)umask(0xFFFF); +} + +#ifdef __cplusplus +namespace user { +int umask(int); +void call(void) { (void)umask(0xFFFF); } +} // namespace user +#endif diff --git a/clang/test/Sema/warn-fortify-source-umask-record-member.c b/clang/test/Sema/warn-fortify-source-umask-record-member.c new file mode 100644 index 0000000000000..ca1a36d1b3bc2 --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-record-member.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify + +typedef unsigned mode_t; + +// A struct member with function type is rejected at ActOnField and becomes +// a FieldDecl, not a record-context FunctionDecl, so the call below never +// reaches the fortify gate. Regression test that this error-recovery path +// does not crash (would otherwise risk the assert in Decl.cpp isDeclExternC). +struct S { + mode_t umask(mode_t); // expected-error {{field 'umask' declared as a function}} +}; + +void call_member_umask(struct S *s) { + (void)s->umask(0xFFFF); +} diff --git a/clang/test/Sema/warn-fortify-source-umask-redecl.c b/clang/test/Sema/warn-fortify-source-umask-redecl.c new file mode 100644 index 0000000000000..c125a6a17936e --- /dev/null +++ b/clang/test/Sema/warn-fortify-source-umask-redecl.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -verify + +// A user declaration of umask whose integer type differs from the historical +// unsigned-int spelling -- here a plain int, as in a K&R-era or sloppy +// prototype -- is still recognized as the libc umask and checked. Because +// umask is a LibBuiltin with no prototype to match, the declaration is not +// flagged as an incompatible library redeclaration (it would have been when +// the builtin carried a fixed unsigned-int prototype), and the fortify check +// still fires on an out-of-range constant. + +extern int umask(int); + +void call_user_umask(void) { + umask(0644); + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} +} diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c index d0b519a516545..530fa8104bd56 100644 --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -28,6 +28,10 @@ void bzero(void *dst, size_t n); } #endif +// umask is recognized as a builtin by name; this header just supplies a +// realistic system-header declaration and the mode_t typedef used below. +#include "Inputs/warn-fortify-source-umask.h" + void call_memcpy(void) { char dst[10]; char src[20]; @@ -242,6 +246,18 @@ void call_sprintf(void) { sprintf(buf, "5%.1e", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}} } +void call_umask(mode_t runtime_mode) { + umask(0); + umask(022); + umask(0644); + umask(0777); + umask(01000); // expected-warning {{'umask' argument sets non-file-permission bits (01000); those bits are ignored}} + umask(0xFFFF); // expected-warning {{'umask' argument sets non-file-permission bits (0177000); those bits are ignored}} + umask(7777); // expected-warning {{'umask' argument sets non-file-permission bits (017000); those bits are ignored}} + umask(-1); // expected-warning {{'umask' argument sets non-file-permission bits (}} + umask(runtime_mode); // no warning, not a constant +} + #ifdef __cplusplus template <class> struct S { void mf() const { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
