Re: [Qemu-devel] [for-2.10 PATCH v4] 9pfs: local: fix fchmodat_nofollow() limitations
Tested-by: Zhi Yong WuRegards, Zhi Yong Wu At 2017-08-10 00:40:57, "Greg Kurz" wrote: >This function has to ensure it doesn't follow a symlink that could be used >to escape the virtfs directory. This could be easily achieved if fchmodat() >on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but >it doesn't. There was a tentative to implement a new fchmodat2() syscall >with the correct semantics: > >https://patchwork.kernel.org/patch/9596301/ > >but it didn't gain much momentum. Also it was suggested to look at an O_PATH >based solution in the first place. > >The current implementation covers most use-cases, but it notably fails if: >- the target path has access rights equal to (openat() returns EPERM), > => once you've done chmod() on a file, you can never chmod() again >- the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > >The solution is to use O_PATH: openat() now succeeds in both cases, and we >can ensure the path isn't a symlink with fstat(). The associated entry in >"/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > >The previous behavior is kept for older systems that don't have O_PATH. > >Signed-off-by: Greg Kurz >Reviewed-by: Eric Blake >--- >v4: - fixed #if condition >- moved out: label above #endif >- fixed typo in changelog >- added Eric's r-b > >v3: - O_PATH in a separate block of code >- added a reference to the fchmodat2() tentative in the changelog > >v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement > for O_PATH to avoid build breaks on O_PATH-less systems >- keep current behavior for O_PATH-less systems >- added comments >- TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() >--- > hw/9pfs/9p-local.c | 43 --- > hw/9pfs/9p-util.h | 24 +++- > 2 files changed, 51 insertions(+), 16 deletions(-) > >diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c >index 6e478f4765ef..d9ef57d343c9 100644 >--- a/hw/9pfs/9p-local.c >+++ b/hw/9pfs/9p-local.c >@@ -333,17 +333,27 @@ update_map_file: > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > { >+struct stat stbuf; > int fd, ret; > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). >- * Unfortunately, the linux kernel doesn't implement it yet. As an >- * alternative, let's open the file and use fchmod() instead. This >- * may fail depending on the permissions of the file, but it is the >- * best we can do to avoid TOCTTOU. We first try to open read-only >- * in case name points to a directory. If that fails, we try write-only >- * in case name doesn't point to a directory. >+ * Unfortunately, the linux kernel doesn't implement it yet. > */ >-fd = openat_file(dirfd, name, O_RDONLY, 0); >+ >+ /* First, we clear non-racing symlinks out of the way. */ >+if (fstatat(dirfd, name, , AT_SYMLINK_NOFOLLOW)) { >+return -1; >+} >+if (S_ISLNK(stbuf.st_mode)) { >+errno = ELOOP; >+return -1; >+} >+ >+/* Access modes are ignored when O_PATH is supported. We try O_RDONLY and >+ * O_WRONLY for old-systems that don't support O_PATH. >+ */ >+fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); >+#if O_PATH_9P_UTIL == 0 > if (fd == -1) { > /* In case the file is writable-only and isn't a directory. */ > if (errno == EACCES) { >@@ -357,6 +367,25 @@ static int fchmodat_nofollow(int dirfd, const char *name, >mode_t mode) > return -1; > } > ret = fchmod(fd, mode); >+#else >+/* Now we handle racing symlinks. */ >+ret = fstat(fd, ); >+if (ret) { >+goto out; >+} >+if (S_ISLNK(stbuf.st_mode)) { >+errno = ELOOP; >+ret = -1; >+goto out; >+} >+ >+{ >+char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd); >+ret = chmod(proc_path, mode); >+g_free(proc_path); >+} >+out: >+#endif > close_preserve_errno(fd); > return ret; > } >diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h >index 91299a24b8af..dc0d2e29aa3b 100644 >--- a/hw/9pfs/9p-util.h >+++ b/hw/9pfs/9p-util.h >@@ -13,6 +13,12 @@ > #ifndef QEMU_9P_UTIL_H > #define QEMU_9P_UTIL_H > >+#ifdef O_PATH >+#define O_PATH_9P_UTIL O_PATH >+#else >+#define O_PATH_9P_UTIL 0 >+#endif >+ > static inline void close_preserve_errno(int fd) > { > int serrno = errno; >@@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd) > > static inline int openat_dir(int dirfd, const char *name) > { >-#ifdef O_PATH >-#define OPENAT_DIR_O_PATH O_PATH >-#else >-#define OPENAT_DIR_O_PATH 0 >-#endif > return openat(dirfd, name, >- O_DIRECTORY | O_RDONLY | O_NOFOLLOW |
Re: [Qemu-devel] [for-2.10 PATCH v4] 9pfs: local: fix fchmodat_nofollow() limitations
On Wed, 09 Aug 2017 18:40:57 +0200 Greg Kurzwrote: > This function has to ensure it doesn't follow a symlink that could be used > to escape the virtfs directory. This could be easily achieved if fchmodat() > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > it doesn't. There was a tentative to implement a new fchmodat2() syscall > with the correct semantics: > > https://patchwork.kernel.org/patch/9596301/ > > but it didn't gain much momentum. Also it was suggested to look at an O_PATH > based solution in the first place. > > The current implementation covers most use-cases, but it notably fails if: > - the target path has access rights equal to (openat() returns EPERM), > => once you've done chmod() on a file, you can never chmod() again > - the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > can ensure the path isn't a symlink with fstat(). The associated entry in > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > The previous behavior is kept for older systems that don't have O_PATH. > > Signed-off-by: Greg Kurz > Reviewed-by: Eric Blake > --- > v4: - fixed #if condition > - moved out: label above #endif > - fixed typo in changelog > - added Eric's r-b > > v3: - O_PATH in a separate block of code > - added a reference to the fchmodat2() tentative in the changelog > > v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement > for O_PATH to avoid build breaks on O_PATH-less systems > - keep current behavior for O_PATH-less systems > - added comments > - TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() > --- > hw/9pfs/9p-local.c | 43 --- > hw/9pfs/9p-util.h | 24 +++- > 2 files changed, 51 insertions(+), 16 deletions(-) > > diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c > index 6e478f4765ef..d9ef57d343c9 100644 > --- a/hw/9pfs/9p-local.c > +++ b/hw/9pfs/9p-local.c > @@ -333,17 +333,27 @@ update_map_file: > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > { > +struct stat stbuf; > int fd, ret; > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). > - * Unfortunately, the linux kernel doesn't implement it yet. As an > - * alternative, let's open the file and use fchmod() instead. This > - * may fail depending on the permissions of the file, but it is the > - * best we can do to avoid TOCTTOU. We first try to open read-only > - * in case name points to a directory. If that fails, we try write-only > - * in case name doesn't point to a directory. > + * Unfortunately, the linux kernel doesn't implement it yet. > */ > -fd = openat_file(dirfd, name, O_RDONLY, 0); > + > + /* First, we clear non-racing symlinks out of the way. */ > +if (fstatat(dirfd, name, , AT_SYMLINK_NOFOLLOW)) { > +return -1; > +} > +if (S_ISLNK(stbuf.st_mode)) { > +errno = ELOOP; > +return -1; > +} > + > +/* Access modes are ignored when O_PATH is supported. We try O_RDONLY and > + * O_WRONLY for old-systems that don't support O_PATH. > + */ > +fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); > +#if O_PATH_9P_UTIL == 0 > if (fd == -1) { > /* In case the file is writable-only and isn't a directory. */ > if (errno == EACCES) { > @@ -357,6 +367,25 @@ static int fchmodat_nofollow(int dirfd, const char > *name, mode_t mode) > return -1; > } > ret = fchmod(fd, mode); > +#else Oops, missing fd == -1 check... > +/* Now we handle racing symlinks. */ > +ret = fstat(fd, ); > +if (ret) { > +goto out; > +} > +if (S_ISLNK(stbuf.st_mode)) { > +errno = ELOOP; > +ret = -1; > +goto out; > +} > + > +{ > +char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > +ret = chmod(proc_path, mode); > +g_free(proc_path); > +} > +out: > +#endif > close_preserve_errno(fd); > return ret; > } > diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h > index 91299a24b8af..dc0d2e29aa3b 100644 > --- a/hw/9pfs/9p-util.h > +++ b/hw/9pfs/9p-util.h > @@ -13,6 +13,12 @@ > #ifndef QEMU_9P_UTIL_H > #define QEMU_9P_UTIL_H > > +#ifdef O_PATH > +#define O_PATH_9P_UTIL O_PATH > +#else > +#define O_PATH_9P_UTIL 0 > +#endif > + > static inline void close_preserve_errno(int fd) > { > int serrno = errno; > @@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd) > > static inline int openat_dir(int dirfd, const char *name) > { > -#ifdef O_PATH > -#define OPENAT_DIR_O_PATH O_PATH > -#else > -#define OPENAT_DIR_O_PATH 0 >
[Qemu-devel] [for-2.10 PATCH v4] 9pfs: local: fix fchmodat_nofollow() limitations
This function has to ensure it doesn't follow a symlink that could be used to escape the virtfs directory. This could be easily achieved if fchmodat() on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but it doesn't. There was a tentative to implement a new fchmodat2() syscall with the correct semantics: https://patchwork.kernel.org/patch/9596301/ but it didn't gain much momentum. Also it was suggested to look at an O_PATH based solution in the first place. The current implementation covers most use-cases, but it notably fails if: - the target path has access rights equal to (openat() returns EPERM), => once you've done chmod() on a file, you can never chmod() again - the target path is UNIX domain socket (openat() returns ENXIO) => bind() of UNIX domain sockets fails if the file is on 9pfs The solution is to use O_PATH: openat() now succeeds in both cases, and we can ensure the path isn't a symlink with fstat(). The associated entry in "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. The previous behavior is kept for older systems that don't have O_PATH. Signed-off-by: Greg KurzReviewed-by: Eric Blake --- v4: - fixed #if condition - moved out: label above #endif - fixed typo in changelog - added Eric's r-b v3: - O_PATH in a separate block of code - added a reference to the fchmodat2() tentative in the changelog v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement for O_PATH to avoid build breaks on O_PATH-less systems - keep current behavior for O_PATH-less systems - added comments - TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() --- hw/9pfs/9p-local.c | 43 --- hw/9pfs/9p-util.h | 24 +++- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 6e478f4765ef..d9ef57d343c9 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -333,17 +333,27 @@ update_map_file: static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) { +struct stat stbuf; int fd, ret; /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). - * Unfortunately, the linux kernel doesn't implement it yet. As an - * alternative, let's open the file and use fchmod() instead. This - * may fail depending on the permissions of the file, but it is the - * best we can do to avoid TOCTTOU. We first try to open read-only - * in case name points to a directory. If that fails, we try write-only - * in case name doesn't point to a directory. + * Unfortunately, the linux kernel doesn't implement it yet. */ -fd = openat_file(dirfd, name, O_RDONLY, 0); + + /* First, we clear non-racing symlinks out of the way. */ +if (fstatat(dirfd, name, , AT_SYMLINK_NOFOLLOW)) { +return -1; +} +if (S_ISLNK(stbuf.st_mode)) { +errno = ELOOP; +return -1; +} + +/* Access modes are ignored when O_PATH is supported. We try O_RDONLY and + * O_WRONLY for old-systems that don't support O_PATH. + */ +fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); +#if O_PATH_9P_UTIL == 0 if (fd == -1) { /* In case the file is writable-only and isn't a directory. */ if (errno == EACCES) { @@ -357,6 +367,25 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) return -1; } ret = fchmod(fd, mode); +#else +/* Now we handle racing symlinks. */ +ret = fstat(fd, ); +if (ret) { +goto out; +} +if (S_ISLNK(stbuf.st_mode)) { +errno = ELOOP; +ret = -1; +goto out; +} + +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd); +ret = chmod(proc_path, mode); +g_free(proc_path); +} +out: +#endif close_preserve_errno(fd); return ret; } diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 91299a24b8af..dc0d2e29aa3b 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -13,6 +13,12 @@ #ifndef QEMU_9P_UTIL_H #define QEMU_9P_UTIL_H +#ifdef O_PATH +#define O_PATH_9P_UTIL O_PATH +#else +#define O_PATH_9P_UTIL 0 +#endif + static inline void close_preserve_errno(int fd) { int serrno = errno; @@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd) static inline int openat_dir(int dirfd, const char *name) { -#ifdef O_PATH -#define OPENAT_DIR_O_PATH O_PATH -#else -#define OPENAT_DIR_O_PATH 0 -#endif return openat(dirfd, name, - O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH); + O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL); } static inline int openat_file(int dirfd, const char *name, int flags, @@ -43,9 +44,14 @@ static inline int openat_file(int dirfd, const char *name, int flags, } serrno =