On 2026-03-26, Jori Koolstra <[email protected]> wrote: > To get an operable version of an O_PATH file descriptor, it is possible > to use openat(fd, ".", O_DIRECTORY) for directories, but other files > currently require going through open("/proc/<pid>/fd/<nr>"), which > depends on a functioning procfs. > > This patch adds the OPENAT2_EMPTY_PATH flag to openat2(2). If passed, > LOOKUP_EMPTY is set at path resolve time. > > Note: This implies that you cannot rely anymore on disabling procfs from > being mounted (e.g. inside a container without procfs mounted and with > CAP_SYS_ADMIN dropped) to prevent O_PATH fds from being re-opened > read-write.
Actually, this flag doesn't need to be openat2(2) only -- all existing kernels will reject a pathname of "" with ENOENT. This means that O_EMPTYPATH being set acting as a no-op is fine for older kernels (no program will get an unexpected result from O_EMPTYPATH). In my view, adding it to openat(2) is preferable because it means that systemd et al. can use it (they currently block openat2(2) with seccomp). This is what I did in the original openat2(2) patchset[1]. [1]: https://lore.kernel.org/lkml/[email protected]/ > Signed-off-by: Jori Koolstra <[email protected]> > --- > fs/fcntl.c | 4 ++-- > fs/open.c | 11 +++++------ > include/linux/fcntl.h | 5 ++++- > include/uapi/linux/openat2.h | 4 ++++ > 4 files changed, 15 insertions(+), 9 deletions(-) > > diff --git a/fs/fcntl.c b/fs/fcntl.c > index beab8080badf..d9ae3c71edfe 100644 > --- a/fs/fcntl.c > +++ b/fs/fcntl.c > @@ -1169,8 +1169,8 @@ static int __init fcntl_init(void) > * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY > * is defined as O_NONBLOCK on some platforms and not on others. > */ > - BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != > - HWEIGHT32( > + BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != > + HWEIGHT64( > (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | > __FMODE_EXEC)); > > diff --git a/fs/open.c b/fs/open.c > index 91f1139591ab..e019ddecc73c 100644 > --- a/fs/open.c > +++ b/fs/open.c > @@ -1160,12 +1160,12 @@ struct file *kernel_file_open(const struct path > *path, int flags, > EXPORT_SYMBOL_GPL(kernel_file_open); > > #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) > -#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC) > +#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC > | OPENAT2_EMPTY_PATH) > > inline struct open_how build_open_how(int flags, umode_t mode) > { > struct open_how how = { > - .flags = flags & VALID_OPEN_FLAGS, > + .flags = ((unsigned int) flags) & VALID_OPEN_FLAGS, > .mode = mode & S_IALLUGO, > }; > > @@ -1185,9 +1185,6 @@ inline int build_open_flags(const struct open_how *how, > struct open_flags *op) > int lookup_flags = 0; > int acc_mode = ACC_MODE(flags); > > - BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS), > - "struct open_flags doesn't yet handle flags > 32 > bits"); > - > /* > * Strip flags that aren't relevant in determining struct open_flags. > */ > @@ -1281,6 +1278,8 @@ inline int build_open_flags(const struct open_how *how, > struct open_flags *op) > lookup_flags |= LOOKUP_DIRECTORY; > if (!(flags & O_NOFOLLOW)) > lookup_flags |= LOOKUP_FOLLOW; > + if (flags & OPENAT2_EMPTY_PATH) > + lookup_flags |= LOOKUP_EMPTY; > > if (how->resolve & RESOLVE_NO_XDEV) > lookup_flags |= LOOKUP_NO_XDEV; > @@ -1362,7 +1361,7 @@ static int do_sys_openat2(int dfd, const char __user > *filename, > if (unlikely(err)) > return err; > > - CLASS(filename, name)(filename); > + CLASS(filename_flags, name)(filename, op.lookup_flags); > return FD_ADD(how->flags, do_file_open(dfd, name, &op)); > } > > diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h > index a332e79b3207..d1bb87ff70e3 100644 > --- a/include/linux/fcntl.h > +++ b/include/linux/fcntl.h > @@ -7,10 +7,13 @@ > > /* List of all valid flags for the open/openat flags argument: */ > #define VALID_OPEN_FLAGS \ > + /* lower 32-bit flags */ \ > (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | > \ > O_APPEND | O_NDELAY | O_NONBLOCK | __O_SYNC | O_DSYNC | \ > FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ > - O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) > + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | \ > + /* upper 32-bit flags (openat2(2) only) */ \ > + OPENAT2_EMPTY_PATH) > > /* List of all valid flags for the how->resolve argument: */ > #define VALID_RESOLVE_FLAGS \ > diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h > index a5feb7604948..c34f32e6fa96 100644 > --- a/include/uapi/linux/openat2.h > +++ b/include/uapi/linux/openat2.h > @@ -40,4 +40,8 @@ struct open_how { > return -EAGAIN if that's not > possible. */ > > +/* openat2(2) exclusive flags are defined in the upper 32 bits of > + open_how->flags */ > +#define OPENAT2_EMPTY_PATH 0x100000000 /* (1ULL << 32) */ > + > #endif /* _UAPI_LINUX_OPENAT2_H */ > -- > 2.53.0 > -- Aleksa Sarai https://www.cyphar.com/
signature.asc
Description: PGP signature

