Author: kevans
Date: Wed Sep 25 17:32:43 2019
New Revision: 352695
URL: https://svnweb.freebsd.org/changeset/base/352695

Log:
  [1/3] Add mostly Linux-compatible file sealing support
  
  File sealing applies protections against certain actions
  (currently: write, growth, shrink) at the inode level. New fileops are added
  to accommodate seals - EINVAL is returned by fcntl(2) if they are not
  implemented.
  
  Reviewed by:  markj, kib
  Differential Revision:        https://reviews.freebsd.org/D21391

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/uipc_shm.c
  head/sys/sys/fcntl.h
  head/sys/sys/file.h
  head/sys/sys/mman.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Wed Sep 25 17:30:28 2019        
(r352694)
+++ head/sys/kern/kern_descrip.c        Wed Sep 25 17:32:43 2019        
(r352695)
@@ -489,7 +489,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_
        struct filedescent *fde;
        struct proc *p;
        struct vnode *vp;
-       int error, flg, tmp;
+       int error, flg, seals, tmp;
        uint64_t bsize;
        off_t foffset;
 
@@ -753,6 +753,25 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_
                vp = fp->f_vnode;
                error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
                    F_POSIX);
+               fdrop(fp, td);
+               break;
+
+       case F_ADD_SEALS:
+               error = fget_unlocked(fdp, fd, &cap_no_rights, &fp, NULL);
+               if (error != 0)
+                       break;
+               error = fo_add_seals(fp, arg);
+               fdrop(fp, td);
+               break;
+
+       case F_GET_SEALS:
+               error = fget_unlocked(fdp, fd, &cap_no_rights, &fp, NULL);
+               if (error != 0)
+                       break;
+               if (fo_get_seals(fp, &seals) == 0)
+                       td->td_retval[0] = seals;
+               else
+                       error = EINVAL;
                fdrop(fp, td);
                break;
 

Modified: head/sys/kern/uipc_shm.c
==============================================================================
--- head/sys/kern/uipc_shm.c    Wed Sep 25 17:30:28 2019        (r352694)
+++ head/sys/kern/uipc_shm.c    Wed Sep 25 17:32:43 2019        (r352695)
@@ -119,6 +119,8 @@ static void shm_init(void *arg);
 static void    shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd);
 static struct shmfd *shm_lookup(char *path, Fnv32_t fnv);
 static int     shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
+static int     shm_dotruncate_locked(struct shmfd *shmfd, off_t length,
+    void *rl_cookie);
 
 static fo_rdwr_t       shm_read;
 static fo_rdwr_t       shm_write;
@@ -131,6 +133,8 @@ static fo_chown_t   shm_chown;
 static fo_seek_t       shm_seek;
 static fo_fill_kinfo_t shm_fill_kinfo;
 static fo_mmap_t       shm_mmap;
+static fo_get_seals_t  shm_get_seals;
+static fo_add_seals_t  shm_add_seals;
 
 /* File descriptor operations. */
 struct fileops shm_ops = {
@@ -148,6 +152,8 @@ struct fileops shm_ops = {
        .fo_seek = shm_seek,
        .fo_fill_kinfo = shm_fill_kinfo,
        .fo_mmap = shm_mmap,
+       .fo_get_seals = shm_get_seals,
+       .fo_add_seals = shm_add_seals,
        .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
@@ -316,8 +322,10 @@ shm_write(struct file *fp, struct uio *uio, struct ucr
                rl_cookie = rangelock_wlock(&shmfd->shm_rl, uio->uio_offset,
                    uio->uio_offset + uio->uio_resid, &shmfd->shm_mtx);
        }
-
-       error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio);
+       if ((shmfd->shm_seals & F_SEAL_WRITE) != 0)
+               error = EPERM;
+       else
+               error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio);
        rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
        foffset_unlock_uio(fp, uio, flags);
        return (error);
@@ -412,8 +420,8 @@ shm_close(struct file *fp, struct thread *td)
        return (0);
 }
 
-int
-shm_dotruncate(struct shmfd *shmfd, off_t length)
+static int
+shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
 {
        vm_object_t object;
        vm_page_t m;
@@ -423,23 +431,23 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
 
        KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
        object = shmfd->shm_object;
-       VM_OBJECT_WLOCK(object);
-       if (length == shmfd->shm_size) {
-               VM_OBJECT_WUNLOCK(object);
+       VM_OBJECT_ASSERT_WLOCKED(object);
+       rangelock_cookie_assert(rl_cookie, RA_WLOCKED);
+       if (length == shmfd->shm_size)
                return (0);
-       }
        nobjsize = OFF_TO_IDX(length + PAGE_MASK);
 
        /* Are we shrinking?  If so, trim the end. */
        if (length < shmfd->shm_size) {
+               if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0)
+                       return (EPERM);
+
                /*
                 * Disallow any requests to shrink the size if this
                 * object is mapped into the kernel.
                 */
-               if (shmfd->shm_kmappings > 0) {
-                       VM_OBJECT_WUNLOCK(object);
+               if (shmfd->shm_kmappings > 0)
                        return (EBUSY);
-               }
 
                /*
                 * Zero the truncated part of the last page.
@@ -499,12 +507,13 @@ retry:
                swap_release_by_cred(delta, object->cred);
                object->charge -= delta;
        } else {
+               if ((shmfd->shm_seals & F_SEAL_GROW) != 0)
+                       return (EPERM);
+
                /* Try to reserve additional swap space. */
                delta = IDX_TO_OFF(nobjsize - object->size);
-               if (!swap_reserve_by_cred(delta, object->cred)) {
-                       VM_OBJECT_WUNLOCK(object);
+               if (!swap_reserve_by_cred(delta, object->cred))
                        return (ENOMEM);
-               }
                object->charge += delta;
        }
        shmfd->shm_size = length;
@@ -513,10 +522,24 @@ retry:
        shmfd->shm_mtime = shmfd->shm_ctime;
        mtx_unlock(&shm_timestamp_lock);
        object->size = nobjsize;
-       VM_OBJECT_WUNLOCK(object);
        return (0);
 }
 
+int
+shm_dotruncate(struct shmfd *shmfd, off_t length)
+{
+       void *rl_cookie;
+       int error;
+
+       rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX,
+           &shmfd->shm_mtx);
+       VM_OBJECT_WLOCK(shmfd->shm_object);
+       error = shm_dotruncate_locked(shmfd, length, rl_cookie);
+       VM_OBJECT_WUNLOCK(shmfd->shm_object);
+       rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+       return (error);
+}
+
 /*
  * shmfd object management including creation and reference counting
  * routines.
@@ -878,10 +901,13 @@ shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *a
        vm_prot_t maxprot;
        int error;
        bool writecnt;
+       void *rl_cookie;
 
        shmfd = fp->f_data;
        maxprot = VM_PROT_NONE;
 
+       rl_cookie = rangelock_rlock(&shmfd->shm_rl, 0, objsize,
+           &shmfd->shm_mtx);
        /* FREAD should always be set. */
        if ((fp->f_flag & FREAD) != 0)
                maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
@@ -890,9 +916,16 @@ shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *a
 
        writecnt = (flags & MAP_SHARED) != 0 && (prot & VM_PROT_WRITE) != 0;
 
+       if (writecnt && (shmfd->shm_seals & F_SEAL_WRITE) != 0) {
+               error = EPERM;
+               goto out;
+       }
+
        /* Don't permit shared writable mappings on read-only descriptors. */
-       if (writecnt && (maxprot & VM_PROT_WRITE) == 0)
-               return (EACCES);
+       if (writecnt && (maxprot & VM_PROT_WRITE) == 0) {
+               error = EACCES;
+               goto out;
+       }
        maxprot &= cap_maxprot;
 
        /* See comment in vn_mmap(). */
@@ -900,13 +933,15 @@ shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *a
 #ifdef _LP64
            objsize > OFF_MAX ||
 #endif
-           foff < 0 || foff > OFF_MAX - objsize)
-               return (EINVAL);
+           foff < 0 || foff > OFF_MAX - objsize) {
+               error = EINVAL;
+               goto out;
+       }
 
 #ifdef MAC
        error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags);
        if (error != 0)
-               return (error);
+               goto out;
 #endif
        
        mtx_lock(&shm_timestamp_lock);
@@ -924,6 +959,8 @@ shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *a
                            objsize);
                vm_object_deallocate(shmfd->shm_object);
        }
+out:
+       rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
        return (error);
 }
 
@@ -1129,6 +1166,57 @@ shm_fill_kinfo(struct file *fp, struct kinfo_file *kif
        res = shm_fill_kinfo_locked(fp->f_data, kif, false);
        sx_sunlock(&shm_dict_lock);
        return (res);
+}
+
+static int
+shm_add_seals(struct file *fp, int seals)
+{
+       struct shmfd *shmfd;
+       void *rl_cookie;
+       vm_ooffset_t writemappings;
+       int error, nseals;
+
+       error = 0;
+       shmfd = fp->f_data;
+       rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX,
+           &shmfd->shm_mtx);
+
+       /* Even already-set seals should result in EPERM. */
+       if ((shmfd->shm_seals & F_SEAL_SEAL) != 0) {
+               error = EPERM;
+               goto out;
+       }
+       nseals = seals & ~shmfd->shm_seals;
+       if ((nseals & F_SEAL_WRITE) != 0) {
+               /*
+                * The rangelock above prevents writable mappings from being
+                * added after we've started applying seals.  The RLOCK here
+                * is to avoid torn reads on ILP32 arches as unmapping/reducing
+                * writemappings will be done without a rangelock.
+                */
+               VM_OBJECT_RLOCK(shmfd->shm_object);
+               writemappings = shmfd->shm_object->un_pager.swp.writemappings;
+               VM_OBJECT_RUNLOCK(shmfd->shm_object);
+               /* kmappings are also writable */
+               if (writemappings > 0) {
+                       error = EBUSY;
+                       goto out;
+               }
+       }
+       shmfd->shm_seals |= nseals;
+out:
+       rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+       return (error);
+}
+
+static int
+shm_get_seals(struct file *fp, int *seals)
+{
+       struct shmfd *shmfd;
+
+       shmfd = fp->f_data;
+       *seals = shmfd->shm_seals;
+       return (0);
 }
 
 static int

Modified: head/sys/sys/fcntl.h
==============================================================================
--- head/sys/sys/fcntl.h        Wed Sep 25 17:30:28 2019        (r352694)
+++ head/sys/sys/fcntl.h        Wed Sep 25 17:32:43 2019        (r352695)
@@ -248,7 +248,15 @@ typedef    __pid_t         pid_t;
 #endif
 #if __BSD_VISIBLE
 #define        F_DUP2FD_CLOEXEC 18             /* Like F_DUP2FD, but 
FD_CLOEXEC is set */
-#endif
+#define        F_ADD_SEALS     19
+#define        F_GET_SEALS     20
+
+/* Seals (F_ADD_SEALS, F_GET_SEALS). */
+#define        F_SEAL_SEAL     0x0001          /* Prevent adding sealings */
+#define        F_SEAL_SHRINK   0x0002          /* May not shrink */
+#define        F_SEAL_GROW     0x0004          /* May not grow */
+#define        F_SEAL_WRITE    0x0008          /* May not write */
+#endif /* __BSD_VISIBLE */
 
 /* file descriptor flags (F_GETFD, F_SETFD) */
 #define        FD_CLOEXEC      1               /* close-on-exec flag */

Modified: head/sys/sys/file.h
==============================================================================
--- head/sys/sys/file.h Wed Sep 25 17:30:28 2019        (r352694)
+++ head/sys/sys/file.h Wed Sep 25 17:32:43 2019        (r352695)
@@ -123,6 +123,8 @@ typedef int fo_mmap_t(struct file *fp, vm_map_t map, v
                    vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot,
                    int flags, vm_ooffset_t foff, struct thread *td);
 typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job);
+typedef int fo_add_seals_t(struct file *fp, int flags);
+typedef int fo_get_seals_t(struct file *fp, int *flags);
 typedef        int fo_flags_t;
 
 struct fileops {
@@ -141,6 +143,8 @@ struct fileops {
        fo_fill_kinfo_t *fo_fill_kinfo;
        fo_mmap_t       *fo_mmap;
        fo_aio_queue_t  *fo_aio_queue;
+       fo_add_seals_t  *fo_add_seals;
+       fo_get_seals_t  *fo_get_seals;
        fo_flags_t      fo_flags;       /* DFLAG_* below */
 };
 
@@ -424,6 +428,24 @@ fo_aio_queue(struct file *fp, struct kaiocb *job)
 {
 
        return ((*fp->f_ops->fo_aio_queue)(fp, job));
+}
+
+static __inline int
+fo_add_seals(struct file *fp, int seals)
+{
+
+       if (fp->f_ops->fo_add_seals == NULL)
+               return (EINVAL);
+       return ((*fp->f_ops->fo_add_seals)(fp, seals));
+}
+
+static __inline int
+fo_get_seals(struct file *fp, int *seals)
+{
+
+       if (fp->f_ops->fo_get_seals == NULL)
+               return (EINVAL);
+       return ((*fp->f_ops->fo_get_seals)(fp, seals));
 }
 
 #endif /* _KERNEL */

Modified: head/sys/sys/mman.h
==============================================================================
--- head/sys/sys/mman.h Wed Sep 25 17:30:28 2019        (r352694)
+++ head/sys/sys/mman.h Wed Sep 25 17:32:43 2019        (r352695)
@@ -238,6 +238,8 @@ struct shmfd {
 
        struct rangelock shm_rl;
        struct mtx      shm_mtx;
+
+       int             shm_seals;
 };
 #endif
 
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to