On 2026-03-26, Jori Koolstra <[email protected]> wrote:
> To get an operable version of an O_PATH file descriptor, it is possible
> to use openat(fd, ".", O_DIRECTORY) for directories, but other files
> currently require going through open("/proc/<pid>/fd/<nr>"), which
> depends on a functioning procfs.
> 
> This patch adds the OPENAT2_EMPTY_PATH flag to openat2(2). If passed,
> LOOKUP_EMPTY is set at path resolve time.
> 
> Note: This implies that you cannot rely anymore on disabling procfs from
> being mounted (e.g. inside a container without procfs mounted and with
> CAP_SYS_ADMIN dropped) to prevent O_PATH fds from being re-opened
> read-write.

Actually, this flag doesn't need to be openat2(2) only -- all existing
kernels will reject a pathname of "" with ENOENT. This means that
O_EMPTYPATH being set acting as a no-op is fine for older kernels (no
program will get an unexpected result from O_EMPTYPATH).

In my view, adding it to openat(2) is preferable because it means that
systemd et al. can use it (they currently block openat2(2) with
seccomp). This is what I did in the original openat2(2) patchset[1].

[1]: https://lore.kernel.org/lkml/[email protected]/

> Signed-off-by: Jori Koolstra <[email protected]>
> ---
>  fs/fcntl.c                   |  4 ++--
>  fs/open.c                    | 11 +++++------
>  include/linux/fcntl.h        |  5 ++++-
>  include/uapi/linux/openat2.h |  4 ++++
>  4 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index beab8080badf..d9ae3c71edfe 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -1169,8 +1169,8 @@ static int __init fcntl_init(void)
>        * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
>        * is defined as O_NONBLOCK on some platforms and not on others.
>        */
> -     BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
> -             HWEIGHT32(
> +     BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
> +             HWEIGHT64(
>                       (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
>                       __FMODE_EXEC));
>  
> diff --git a/fs/open.c b/fs/open.c
> index 91f1139591ab..e019ddecc73c 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -1160,12 +1160,12 @@ struct file *kernel_file_open(const struct path 
> *path, int flags,
>  EXPORT_SYMBOL_GPL(kernel_file_open);
>  
>  #define WILL_CREATE(flags)   (flags & (O_CREAT | __O_TMPFILE))
> -#define O_PATH_FLAGS         (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
> +#define O_PATH_FLAGS         (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC 
> | OPENAT2_EMPTY_PATH)
>  
>  inline struct open_how build_open_how(int flags, umode_t mode)
>  {
>       struct open_how how = {
> -             .flags = flags & VALID_OPEN_FLAGS,
> +             .flags = ((unsigned int) flags) & VALID_OPEN_FLAGS,
>               .mode = mode & S_IALLUGO,
>       };
>  
> @@ -1185,9 +1185,6 @@ inline int build_open_flags(const struct open_how *how, 
> struct open_flags *op)
>       int lookup_flags = 0;
>       int acc_mode = ACC_MODE(flags);
>  
> -     BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
> -                      "struct open_flags doesn't yet handle flags > 32 
> bits");
> -
>       /*
>        * Strip flags that aren't relevant in determining struct open_flags.
>        */
> @@ -1281,6 +1278,8 @@ inline int build_open_flags(const struct open_how *how, 
> struct open_flags *op)
>               lookup_flags |= LOOKUP_DIRECTORY;
>       if (!(flags & O_NOFOLLOW))
>               lookup_flags |= LOOKUP_FOLLOW;
> +     if (flags & OPENAT2_EMPTY_PATH)
> +             lookup_flags |= LOOKUP_EMPTY;
>  
>       if (how->resolve & RESOLVE_NO_XDEV)
>               lookup_flags |= LOOKUP_NO_XDEV;
> @@ -1362,7 +1361,7 @@ static int do_sys_openat2(int dfd, const char __user 
> *filename,
>       if (unlikely(err))
>               return err;
>  
> -     CLASS(filename, name)(filename);
> +     CLASS(filename_flags, name)(filename, op.lookup_flags);
>       return FD_ADD(how->flags, do_file_open(dfd, name, &op));
>  }
>  
> diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
> index a332e79b3207..d1bb87ff70e3 100644
> --- a/include/linux/fcntl.h
> +++ b/include/linux/fcntl.h
> @@ -7,10 +7,13 @@
>  
>  /* List of all valid flags for the open/openat flags argument: */
>  #define VALID_OPEN_FLAGS \
> +      /* lower 32-bit flags */ \
>       (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | 
> \
>        O_APPEND | O_NDELAY | O_NONBLOCK | __O_SYNC | O_DSYNC | \
>        FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
> -      O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
> +      O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | \
> +      /* upper 32-bit flags (openat2(2) only) */ \
> +      OPENAT2_EMPTY_PATH)
>  
>  /* List of all valid flags for the how->resolve argument: */
>  #define VALID_RESOLVE_FLAGS \
> diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h
> index a5feb7604948..c34f32e6fa96 100644
> --- a/include/uapi/linux/openat2.h
> +++ b/include/uapi/linux/openat2.h
> @@ -40,4 +40,8 @@ struct open_how {
>                                       return -EAGAIN if that's not
>                                       possible. */
>  
> +/* openat2(2) exclusive flags are defined in the upper 32 bits of
> +   open_how->flags  */
> +#define OPENAT2_EMPTY_PATH   0x100000000 /* (1ULL << 32) */
> +
>  #endif /* _UAPI_LINUX_OPENAT2_H */
> -- 
> 2.53.0
> 

-- 
Aleksa Sarai
https://www.cyphar.com/

Attachment: signature.asc
Description: PGP signature

Reply via email to