Author: mjg
Date: Thu Feb 20 16:58:19 2020
New Revision: 358172
URL: https://svnweb.freebsd.org/changeset/base/358172

Log:
  vfs: add realpathat syscall
  
  realpath(3) is used a lot e.g., by clang and is a major source of getcwd
  and fstatat calls. This can be done more efficiently in the kernel.
  
  This works by performing a regular lookup while saving the name and found
  parent directory. If the terminal vnode is a directory we can resolve it using
  usual means. Otherwise we can use the name saved by lookup and resolve the
  parent.
  
  See the review for sample syscall counts.
  
  Reviewed by:  kib
  Differential Revision:        https://reviews.freebsd.org/D23574

Modified:
  head/lib/libc/stdlib/realpath.c
  head/libexec/rtld-elf/rtld-libc/Makefile.inc
  head/sys/bsm/audit_kevents.h
  head/sys/compat/freebsd32/syscalls.master
  head/sys/kern/syscalls.master
  head/sys/kern/vfs_cache.c
  head/sys/security/audit/audit_bsm.c
  head/sys/sys/param.h
  head/usr.bin/truss/syscalls.c

Modified: head/lib/libc/stdlib/realpath.c
==============================================================================
--- head/lib/libc/stdlib/realpath.c     Thu Feb 20 16:38:30 2020        
(r358171)
+++ head/lib/libc/stdlib/realpath.c     Thu Feb 20 16:58:19 2020        
(r358172)
@@ -42,14 +42,19 @@ __FBSDID("$FreeBSD$");
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include "un-namespace.h"
+#include "libc_private.h"
 
+extern int __realpathat(int fd, const char *path, char *buf, size_t size,
+    int flags);
+
 /*
  * Find the real name of path, by removing all ".", ".." and symlink
  * components.  Returns (resolved) on success, or (NULL) on failure,
  * in which case the path which caused trouble is left in (resolved).
  */
-static char *
+static char * __noinline
 realpath1(const char *path, char *resolved)
 {
        struct stat sb;
@@ -222,6 +227,10 @@ realpath(const char * __restrict path, char * __restri
                m = resolved = malloc(PATH_MAX);
                if (resolved == NULL)
                        return (NULL);
+       }
+       if (__getosreldate() >= 1300080) {
+               if (__realpathat(AT_FDCWD, path, resolved, PATH_MAX, 0) == 0)
+                       return (resolved);
        }
        res = realpath1(path, resolved);
        if (res == NULL)

Modified: head/libexec/rtld-elf/rtld-libc/Makefile.inc
==============================================================================
--- head/libexec/rtld-elf/rtld-libc/Makefile.inc        Thu Feb 20 16:38:30 
2020        (r358171)
+++ head/libexec/rtld-elf/rtld-libc/Makefile.inc        Thu Feb 20 16:58:19 
2020        (r358172)
@@ -49,7 +49,7 @@ _libc_other_objects= sigsetjmp lstat stat fstat fstata
     sysarch __sysctl issetugid __getcwd utrace \
     thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \
     getdirentries _getdirentries _close _fcntl _open _openat _read \
-    _sigprocmask _write readlink _setjmp setjmp setjmperr
+    _sigprocmask _write readlink __realpathat _setjmp setjmp setjmperr
 
 
 # Finally add additional architecture-dependent libc dependencies

Modified: head/sys/bsm/audit_kevents.h
==============================================================================
--- head/sys/bsm/audit_kevents.h        Thu Feb 20 16:38:30 2020        
(r358171)
+++ head/sys/bsm/audit_kevents.h        Thu Feb 20 16:58:19 2020        
(r358172)
@@ -657,6 +657,7 @@
 #define        AUE_LGETUUID            43261   /* CADETS. */
 #define        AUE_EXECVEAT            43262   /* FreeBSD/Linux. */
 #define        AUE_SHMRENAME           43263   /* FreeBSD-specific. */
+#define        AUE_REALPATHAT          43264   /* FreeBSD-specific. */
 
 /*
  * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the

Modified: head/sys/compat/freebsd32/syscalls.master
==============================================================================
--- head/sys/compat/freebsd32/syscalls.master   Thu Feb 20 16:38:30 2020        
(r358171)
+++ head/sys/compat/freebsd32/syscalls.master   Thu Feb 20 16:58:19 2020        
(r358172)
@@ -1160,5 +1160,7 @@
 572    AUE_SHMRENAME   NOPROTO { int shm_rename(const char *path_from, \
                                    const char *path_to, int flags); }
 573    AUE_NULL        NOPROTO { int sigfastblock(int cmd, uint32_t *ptr); }
+574    AUE_REALPATHAT  NOPROTO { int __realpathat(int fd, const char *path, \
+                                   char *buf, size_t size, int flags); }
 
 ; vim: syntax=off

Modified: head/sys/kern/syscalls.master
==============================================================================
--- head/sys/kern/syscalls.master       Thu Feb 20 16:38:30 2020        
(r358171)
+++ head/sys/kern/syscalls.master       Thu Feb 20 16:58:19 2020        
(r358172)
@@ -3218,6 +3218,15 @@
                    _Inout_opt_ uint32_t *ptr
                );
        }
+574    AUE_REALPATHAT  STD {
+               int __realpathat(
+                   int fd,
+                   _In_z_ const char *path,
+                   _Out_writes_z_(size) char *buf,
+                   size_t size,
+                   int flags
+               );
+       }
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c   Thu Feb 20 16:38:30 2020        (r358171)
+++ head/sys/kern/vfs_cache.c   Thu Feb 20 16:58:19 2020        (r358172)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/capsicum.h>
 #include <sys/counter.h>
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
@@ -387,8 +388,12 @@ STATNODE_COUNTER(shrinking_skipped,
     "Number of times shrinking was already in progress");
 
 static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
-static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode 
*rdir,
+static int vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char 
**retbuf,
+    char **freebuf, size_t *buflen);
+static int vn_fullpath_any(struct thread *td, struct vnode *vp, struct vnode 
*rdir,
     char *buf, char **retbuf, size_t *buflen);
+static int vn_fullpath_dir(struct thread *td, struct vnode *vp, struct vnode 
*rdir,
+    char *buf, char **retbuf, size_t *len, bool slash_prefixed, size_t addend);
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
@@ -2201,7 +2206,7 @@ vn_getcwd(struct thread *td, char *buf, char **retbuf,
        rdir = fdp->fd_rdir;
        vrefact(rdir);
        FILEDESC_SUNLOCK(fdp);
-       error = vn_fullpath1(td, cdir, rdir, buf, retbuf, buflen);
+       error = vn_fullpath_any(td, cdir, rdir, buf, retbuf, buflen);
        vrele(rdir);
        vrele(cdir);
 
@@ -2212,6 +2217,37 @@ vn_getcwd(struct thread *td, char *buf, char **retbuf,
        return (error);
 }
 
+static int
+kern___realpathat(struct thread *td, int fd, const char *path, char *buf,
+    size_t size, int flags, enum uio_seg pathseg)
+{
+       struct nameidata nd;
+       char *retbuf, *freebuf;
+       int error;
+
+       if (flags != 0)
+               return (EINVAL);
+       NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | SAVENAME | WANTPARENT | 
AUDITVNODE1,
+           pathseg, path, fd, &cap_fstat_rights, td);
+       if ((error = namei(&nd)) != 0)
+               return (error);
+       error = vn_fullpath_hardlink(td, &nd, &retbuf, &freebuf, &size);
+       if (error == 0) {
+               error = copyout(retbuf, buf, size);
+               free(freebuf, M_TEMP);
+       }
+       NDFREE(&nd, 0);
+       return (error);
+}
+
+int
+sys___realpathat(struct thread *td, struct __realpathat_args *uap)
+{
+
+       return (kern___realpathat(td, uap->fd, uap->path, uap->buf, uap->size,
+           uap->flags, UIO_USERSPACE));
+}
+
 /*
  * Retrieve the full filesystem path that correspond to a vnode from the name
  * cache (if available)
@@ -2235,7 +2271,7 @@ vn_fullpath(struct thread *td, struct vnode *vn, char 
        rdir = fdp->fd_rdir;
        vrefact(rdir);
        FILEDESC_SUNLOCK(fdp);
-       error = vn_fullpath1(td, vn, rdir, buf, retbuf, &buflen);
+       error = vn_fullpath_any(td, vn, rdir, buf, retbuf, &buflen);
        vrele(rdir);
 
        if (!error)
@@ -2263,7 +2299,7 @@ vn_fullpath_global(struct thread *td, struct vnode *vn
                return (EINVAL);
        buflen = MAXPATHLEN;
        buf = malloc(buflen, M_TEMP, M_WAITOK);
-       error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, &buflen);
+       error = vn_fullpath_any(td, vn, rootvnode, buf, retbuf, &buflen);
        if (!error)
                *freebuf = buf;
        else
@@ -2334,40 +2370,40 @@ vn_vptocnp(struct vnode **vp, struct ucred *cred, char
 }
 
 /*
- * The magic behind vn_getcwd() and vn_fullpath().
+ * Resolve a directory to a pathname.
+ *
+ * The name of the directory can always be found in the namecache or fetched
+ * from the filesystem. There is also guaranteed to be only one parent, meaning
+ * we can just follow vnodes up until we find the root.
+ *
+ * The vnode must be referenced.
  */
 static int
-vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
-    char *buf, char **retbuf, size_t *len)
+vn_fullpath_dir(struct thread *td, struct vnode *vp, struct vnode *rdir,
+    char *buf, char **retbuf, size_t *len, bool slash_prefixed, size_t addend)
 {
-       int error, slash_prefixed;
 #ifdef KDTRACE_HOOKS
        struct vnode *startvp = vp;
 #endif
        struct vnode *vp1;
        size_t buflen;
+       int error;
 
+       VNPASS(vp->v_type == VDIR || VN_IS_DOOMED(vp), vp);
+       VNPASS(vp->v_usecount > 0, vp);
+
        buflen = *len;
 
-       buflen--;
-       buf[buflen] = '\0';
+       if (!slash_prefixed) {
+               MPASS(*len >= 2);
+               buflen--;
+               buf[buflen] = '\0';
+       }
+
        error = 0;
-       slash_prefixed = 0;
 
        SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
        counter_u64_add(numfullpathcalls, 1);
-       vref(vp);
-       if (vp->v_type != VDIR) {
-               error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen);
-               if (error)
-                       return (error);
-               if (buflen == 0) {
-                       vrele(vp);
-                       return (ENOMEM);
-               }
-               buf[--buflen] = '/';
-               slash_prefixed = 1;
-       }
        while (vp != rdir && vp != rootvnode) {
                /*
                 * The vp vnode must be already fully constructed,
@@ -2420,7 +2456,7 @@ vn_fullpath1(struct thread *td, struct vnode *vp, stru
                        break;
                }
                buf[--buflen] = '/';
-               slash_prefixed = 1;
+               slash_prefixed = true;
        }
        if (error)
                return (error);
@@ -2437,10 +2473,126 @@ vn_fullpath1(struct thread *td, struct vnode *vp, stru
        counter_u64_add(numfullpathfound, 1);
        vrele(vp);
 
-       SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
        *retbuf = buf + buflen;
+       SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, *retbuf);
        *len -= buflen;
+       *len += addend;
        return (0);
+}
+
+/*
+ * Resolve an arbitrary vnode to a pathname.
+ *
+ * Note 2 caveats:
+ * - hardlinks are not tracked, thus if the vnode is not a directory this can
+ *   resolve to a different path than the one used to find it
+ * - namecache is not mandatory, meaning names are not guaranteed to be added
+ *   (in which case resolving fails)
+ */
+static int
+vn_fullpath_any(struct thread *td, struct vnode *vp, struct vnode *rdir,
+    char *buf, char **retbuf, size_t *buflen)
+{
+       size_t orig_buflen;
+       bool slash_prefixed;
+       int error;
+
+       if (*buflen < 2)
+               return (EINVAL);
+
+       orig_buflen = *buflen;
+
+       vref(vp);
+       slash_prefixed = false;
+       if (vp->v_type != VDIR) {
+               *buflen -= 1;
+               buf[*buflen] = '\0';
+               error = vn_vptocnp(&vp, td->td_ucred, buf, buflen);
+               if (error)
+                       return (error);
+               if (*buflen == 0) {
+                       vrele(vp);
+                       return (ENOMEM);
+               }
+               *buflen -= 1;
+               buf[*buflen] = '/';
+               slash_prefixed = true;
+       }
+
+       return (vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, 
slash_prefixed,
+           orig_buflen - *buflen));
+}
+
+/*
+ * Resolve an arbitrary vnode to a pathname (taking care of hardlinks).
+ *
+ * Since the namecache does not track handlings, the caller is expected to 
first
+ * look up the target vnode with SAVENAME | WANTPARENT flags passed to namei.
+ *
+ * Then we have 2 cases:
+ * - if the found vnode is a directory, the path can be constructed just by
+ *   fullowing names up the chain
+ * - otherwise we populate the buffer with the saved name and start resolving
+ *   from the parent
+ */
+static int
+vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf,
+    char **freebuf, size_t *buflen)
+{
+       char *buf, *tmpbuf;
+       struct filedesc *fdp;
+       struct vnode *rdir;
+       struct componentname *cnp;
+       struct vnode *vp;
+       size_t addend;
+       int error;
+       bool slash_prefixed;
+
+       if (*buflen < 2)
+               return (EINVAL);
+       if (*buflen > MAXPATHLEN)
+               *buflen = MAXPATHLEN;
+
+       slash_prefixed = false;
+
+       buf = malloc(*buflen, M_TEMP, M_WAITOK);
+       fdp = td->td_proc->p_fd;
+       FILEDESC_SLOCK(fdp);
+       rdir = fdp->fd_rdir;
+       vrefact(rdir);
+       FILEDESC_SUNLOCK(fdp);
+
+       addend = 0;
+       vp = ndp->ni_vp;
+       if (vp->v_type != VDIR) {
+               cnp = &ndp->ni_cnd;
+               addend = cnp->cn_namelen + 2;
+               if (*buflen < addend) {
+                       error = ENOMEM;
+                       goto out_bad;
+               }
+               *buflen -= addend;
+               tmpbuf = buf + *buflen;
+               tmpbuf[0] = '/';
+               memcpy(&tmpbuf[1], cnp->cn_nameptr, cnp->cn_namelen);
+               tmpbuf[addend - 1] = '\0';
+               slash_prefixed = true;
+               vp = ndp->ni_dvp;
+       }
+
+       vref(vp);
+       error = vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, 
slash_prefixed, addend);
+       if (error != 0)
+               goto out_bad;
+
+       vrele(rdir);
+       *freebuf = buf;
+
+       return (0);
+out_bad:
+       vrele(rdir);
+       free(buf, M_TEMP);
+       return (error);
 }
 
 struct vnode *

Modified: head/sys/security/audit/audit_bsm.c
==============================================================================
--- head/sys/security/audit/audit_bsm.c Thu Feb 20 16:38:30 2020        
(r358171)
+++ head/sys/security/audit/audit_bsm.c Thu Feb 20 16:58:19 2020        
(r358172)
@@ -830,6 +830,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_rec
        case AUE_UNLINK:
        case AUE_UNLINKAT:
        case AUE_UTIMES:
+       case AUE_REALPATHAT:
                ATFD1_TOKENS(1);
                UPATH1_VNODE1_TOKENS;
                break;

Modified: head/sys/sys/param.h
==============================================================================
--- head/sys/sys/param.h        Thu Feb 20 16:38:30 2020        (r358171)
+++ head/sys/sys/param.h        Thu Feb 20 16:58:19 2020        (r358172)
@@ -60,7 +60,7 @@
  *             in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1300079      /* Master, propagated to newvers */
+#define __FreeBSD_version 1300080      /* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,

Modified: head/usr.bin/truss/syscalls.c
==============================================================================
--- head/usr.bin/truss/syscalls.c       Thu Feb 20 16:38:30 2020        
(r358171)
+++ head/usr.bin/truss/syscalls.c       Thu Feb 20 16:58:19 2020        
(r358172)
@@ -115,6 +115,9 @@ static struct syscall decoded_syscalls[] = {
          .args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } },
        { .name = "__getcwd", .ret_type = 1, .nargs = 2,
          .args = { { Name | OUT, 0 }, { Int, 1 } } },
+       { .name = "__realpathat", .ret_type = 1, .nargs = 5,
+         .args = { { Atfd, 0 }, { Name | IN, 1 }, { Name | OUT, 2 },
+                   { Sizet, 3 }, { Int, 4} } },
        { .name = "_umtx_op", .ret_type = 1, .nargs = 5,
          .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 },
                    { Ptr, 4 } } },
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to