Struct file again.

f_flag isn’t modified often, so it’s modifacation can be atomic.
f_msgcount and f_rxfer, f_wxfer, f_seek, f_rbytes, f_wbytes can be protected by 
rwlock. 
f_offset protection is actual for vnodes only.
FIF_MARK and FIF_DEFER flags are used only by unpc garbage collector. This 
flags can
be moved out from f_iflags, for example to f_unpc_flags, and use their own 
protection.
FIF_HASLOCK checked only in vn_closefile(), but this flag doesn’t indicate 
actual vnode lock
state, because VOP_ADVLOCK()’s return value is not checked. May be it can be 
replaced by
 new vn_islocked() function, which will check actual vnode lock possibility and 
lock state?
only FIF_LARVAL remains in f_iflags, this flag sets only once, so it’s 
modification can be atomic.
f_count may be modified and checked under rwlock, but I think atomic ops are 
better on smp,
afaik, with uniprocessor kernel simple increment/decrement over volatile 
variable will be enough.

This modification doesn’t break pstat, all related FIF_* flags can be set in 
kinfo_file struct.  

f_offset protection can be done like in patch below. it is just 
proof-of-concept. I think f_offset 
protection stuff can be moved to external struct, which will be stored in hash 
with fp address as key.

FIF_MARK and FIF_DEFER stuff can be moved to external struct too, I suppose.

Index: compat/common/compat_dir.c
===================================================================
RCS file: /cvs/src/sys/compat/common/compat_dir.c,v
retrieving revision 1.11
diff -u -p -r1.11 compat_dir.c
--- compat/common/compat_dir.c  16 Dec 2014 21:25:28 -0000      1.11
+++ compat/common/compat_dir.c  9 Apr 2015 10:40:55 -0000
@@ -51,7 +51,6 @@ readdir_with_callback(struct file *fp, o
        struct iovec aiov;
        int eofflag = 0;
        int error, len, reclen;
-       off_t newoff = *off;
        struct vnode *vp;
        struct vattr va;
                
@@ -84,10 +83,16 @@ again:
        auio.uio_segflg = UIO_SYSSPACE;
        auio.uio_procp = curproc;
        auio.uio_resid = buflen;
-       auio.uio_offset = newoff;
-
+       if (&fp->f_offset == off)
+               foffset_lock(fp, &auio.uio_offset);
+       else
+               auio.uio_offset = *off;
        error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
-       *off = auio.uio_offset;
+       if (&fp->f_offset == off)
+               foffset_unlock(fp, &auio.uio_offset);
+       else
+               *off = auio.uio_offset;
+
        if (error)
                goto out;
 
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.116
diff -u -p -r1.116 kern_descrip.c
--- kern/kern_descrip.c 19 Jan 2015 01:19:17 -0000      1.116
+++ kern/kern_descrip.c 9 Apr 2015 10:41:10 -0000
@@ -61,6 +61,7 @@
 #include <sys/event.h>
 #include <sys/pool.h>
 #include <sys/ktrace.h>
+#include <sys/rwlock.h>
 
 #include <sys/pipe.h>
 
@@ -451,9 +452,9 @@ restart:
                        if (fl.l_start == 0 && fl.l_len < 0) {
                                /* lockf(3) compliance hack */
                                fl.l_len = -fl.l_len;
-                               fl.l_start = fp->f_offset - fl.l_len;
+                               fl.l_start = foffset_get(fp) - fl.l_len;
                        } else
-                               fl.l_start += fp->f_offset;
+                               fl.l_start += foffset_get(fp);
                }
                switch (fl.l_type) {
 
@@ -514,9 +515,9 @@ restart:
                        if (fl.l_start == 0 && fl.l_len < 0) {
                                /* lockf(3) compliance hack */
                                fl.l_len = -fl.l_len;
-                               fl.l_start = fp->f_offset - fl.l_len;
+                               fl.l_start = foffset_get(fp) - fl.l_len;
                        } else
-                               fl.l_start += fp->f_offset;
+                               fl.l_start += foffset_get(fp);
                }
                if (fl.l_type != F_RDLCK &&
                    fl.l_type != F_WRLCK &&
@@ -869,6 +870,7 @@ restart:
         */
        nfiles++;
        fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
+       rw_init(&fp->f_offset_lck, "f_offset_lck");
        fp->f_iflags = FIF_LARVAL;
        if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
                LIST_INSERT_AFTER(fq, fp, f_list);
@@ -1125,6 +1127,47 @@ fdrop(struct file *fp, struct proc *p)
        pool_put(&file_pool, fp);
 
        return (error);
+}
+
+off_t
+foffset_get(struct file *fp)
+{
+       off_t offset;
+
+       rw_enter_read(&fp->f_offset_lck);
+       offset = fp->f_offset;
+       rw_exit_read(&fp->f_offset_lck);
+       
+       return offset;
+}
+
+void
+foffset_lock(struct file *fp, off_t *foffset)
+{
+       KASSERT(foffset != NULL);
+       rw_enter_write(&fp->f_offset_lck);
+       while (fp->f_offset_lckf & FOFFSET_LOCKED) {
+               fp->f_offset_lckf |= FOFFSET_LOCK_WAITING;
+               rw_exit_write(&fp->f_offset_lck);
+               tsleep(&fp->f_offset_lckf, PUSER, "f_offset_lck", 0);
+               rw_enter_write(&fp->f_offset_lck);
+       }
+       fp->f_offset_lckf |= FOFFSET_LOCKED;
+       *foffset = fp->f_offset;
+       rw_exit_write(&fp->f_offset_lck);
+}
+
+void
+foffset_unlock(struct file *fp, off_t *foffset)
+{
+       rw_enter_write(&fp->f_offset_lck);
+       if (foffset != NULL)
+               fp->f_offset = *foffset;
+       KASSERT((fp->f_offset_lckf & FOFFSET_LOCKED) != 0);
+       if (fp->f_offset_lckf & FOFFSET_LOCK_WAITING)
+               wakeup(&fp->f_offset_lckf);
+       fp->f_offset_lckf=0;
+       rw_exit_write(&fp->f_offset_lck);
 }
 
 /*
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.283
diff -u -p -r1.283 kern_sysctl.c
--- kern/kern_sysctl.c  11 Feb 2015 05:09:33 -0000      1.283
+++ kern/kern_sysctl.c  9 Apr 2015 10:41:10 -0000
@@ -1027,7 +1027,7 @@ fill_file(struct kinfo_file *kf, struct 
                kf->f_usecount = 0;
 
                if (suser(p, 0) == 0 || p->p_ucred->cr_uid == 
fp->f_cred->cr_uid) {
-                       kf->f_offset = fp->f_offset;
+                       kf->f_offset = foffset_get(fp);
                        kf->f_rxfer = fp->f_rxfer;
                        kf->f_rwfer = fp->f_wxfer;
                        kf->f_seek = fp->f_seek;
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.216
diff -u -p -r1.216 vfs_syscalls.c
--- kern/vfs_syscalls.c 16 Dec 2014 18:30:04 -0000      1.216
+++ kern/vfs_syscalls.c 9 Apr 2015 10:41:10 -0000
@@ -1547,30 +1547,37 @@ sys_lseek(struct proc *p, void *v, regis
                special = 0;
        offarg = SCARG(uap, offset);
 
+       foffset_lock(fp, &newoff);
+
        switch (SCARG(uap, whence)) {
        case SEEK_CUR:
-               newoff = fp->f_offset + offarg;
+               newoff += offarg;
                break;
        case SEEK_END:
                error = VOP_GETATTR(vp, &vattr, cred, p);
-               if (error)
+               if (error) {
+                       foffset_unlock(fp, NULL);
                        goto bad;
+               }
                newoff = offarg + (off_t)vattr.va_size;
                break;
        case SEEK_SET:
                newoff = offarg;
                break;
        default:
+               foffset_unlock(fp, NULL);
                error = EINVAL;
                goto bad;
        }
        if (!special) {
                if (newoff < 0) {
+                       foffset_unlock(fp, NULL);
                        error = EINVAL;
                        goto bad;
                }
        }
-       *(off_t *)retval = fp->f_offset = newoff;
+       foffset_unlock(fp, &newoff);
+       *(off_t *)retval = newoff;
        fp->f_seek++;
        error = 0;
  bad:
@@ -2690,6 +2697,7 @@ sys_getdents(struct proc *p, void *v, re
        struct uio auio;
        struct iovec aiov;
        size_t buflen;
+       off_t foffset;
        int error, eofflag;
 
        buflen = SCARG(uap, buflen);
@@ -2702,12 +2710,15 @@ sys_getdents(struct proc *p, void *v, re
                error = EBADF;
                goto bad;
        }
-       if (fp->f_offset < 0) {
+       foffset_lock(fp, &foffset);
+       if (foffset < 0) {
+               foffset_unlock(fp, NULL);
                error = EINVAL;
                goto bad;
        }
        vp = fp->f_data;
        if (vp->v_type != VDIR) {
+               foffset_unlock(fp, NULL);
                error = EINVAL;
                goto bad;
        }
@@ -2720,9 +2731,9 @@ sys_getdents(struct proc *p, void *v, re
        auio.uio_procp = p;
        auio.uio_resid = buflen;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-       auio.uio_offset = fp->f_offset;
+       auio.uio_offset = foffset;
        error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
-       fp->f_offset = auio.uio_offset;
+       foffset_unlock(fp, &auio.uio_offset);
        VOP_UNLOCK(vp, 0, p);
        if (error)
                goto bad;
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
retrieving revision 1.80
diff -u -p -r1.80 vfs_vnops.c
--- kern/vfs_vnops.c    16 Dec 2014 18:30:04 -0000      1.80
+++ kern/vfs_vnops.c    9 Apr 2015 10:41:10 -0000
@@ -332,17 +332,30 @@ vn_read(struct file *fp, off_t *poff, st
        int error = 0;
        size_t count = uio->uio_resid;
        struct proc *p = uio->uio_procp;
+       off_t offset;
 
        /* no wrap around of offsets except on character devices */
-       if (vp->v_type != VCHR && count > LLONG_MAX - *poff)
+       if(&fp->f_offset == poff)
+               foffset_lock(fp, &offset);
+       else
+               offset=*poff;
+
+       if (vp->v_type != VCHR && count > LLONG_MAX - offset) {
+               if (&fp->f_offset == poff)
+                       foffset_unlock(fp, NULL);
                return (EINVAL);
+       }
 
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-       uio->uio_offset = *poff;
+       uio->uio_offset = offset;
        if (vp->v_type != VDIR)
                error = VOP_READ(vp, uio,
                    (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
-       *poff += count - uio->uio_resid;
+       offset += count - uio->uio_resid;
+       if(&fp->f_offset == poff)
+               foffset_unlock(fp, &offset);
+       else
+               *poff = offset;
        VOP_UNLOCK(vp, 0, p);
        return (error);
 }
@@ -357,24 +370,35 @@ vn_write(struct file *fp, off_t *poff, s
        struct proc *p = uio->uio_procp;
        int error, ioflag = IO_UNIT;
        size_t count;
+       off_t offset;
 
        /* note: pwrite/pwritev are unaffected by O_APPEND */
-       if (vp->v_type == VREG && (fp->f_flag & O_APPEND) &&
-           poff == &fp->f_offset)
-               ioflag |= IO_APPEND;
+       if (&fp->f_offset == poff) {
+               if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+                       ioflag |= IO_APPEND;
+               foffset_lock(fp, &offset);
+       } else {
+               offset = *poff;
+       }
+
        if (fp->f_flag & FNONBLOCK)
                ioflag |= IO_NDELAY;
        if ((fp->f_flag & FFSYNC) ||
            (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
                ioflag |= IO_SYNC;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-       uio->uio_offset = *poff;
+       uio->uio_offset = offset;
        count = uio->uio_resid;
        error = VOP_WRITE(vp, uio, ioflag, cred);
        if (ioflag & IO_APPEND)
-               *poff = uio->uio_offset;
+               offset = uio->uio_offset;
+       else
+               offset += count - uio->uio_resid;
+       
+       if (&fp->f_offset == poff)
+               foffset_unlock(fp, &offset);
        else
-               *poff += count - uio->uio_resid;
+               *poff = offset;
        VOP_UNLOCK(vp, 0, p);
        return (error);
 }
Index: sys/file.h
===================================================================
RCS file: /cvs/src/sys/sys/file.h,v
retrieving revision 1.34
diff -u -p -r1.34 file.h
--- sys/file.h  18 Nov 2014 15:16:35 -0000      1.34
+++ sys/file.h  9 Apr 2015 10:41:18 -0000
@@ -75,6 +75,8 @@ struct file {
        long    f_msgcount;     /* references from message queue */
        struct  ucred *f_cred;  /* credentials associated with descriptor */
        struct  fileops *f_ops;
+       struct rwlock f_offset_lck;     /* f_offset lock */
+       short   f_offset_lckf;  /* f_offset lock flags */
        off_t   f_offset;
        void    *f_data;        /* private data */
        int     f_iflags;       /* internal flags */
@@ -85,6 +87,9 @@ struct file {
        u_int64_t f_wbytes;     /* total bytes written */
 };
 
+#define FOFFSET_LOCKED         0x01
+#define FOFFSET_LOCK_WAITING   0x02
+
 #define FIF_HASLOCK            0x01    /* descriptor holds advisory lock */
 #define FIF_LARVAL             0x02    /* not fully constructed, don't use */
 #define FIF_MARK               0x04    /* mark during gc() */
@@ -102,6 +107,10 @@ struct file {
 } while (0)
 
 int    fdrop(struct file *, struct proc *);
+
+off_t  foffset_get(struct file *);
+void   foffset_lock(struct file *, off_t *);
+void   foffset_unlock(struct file *, off_t *);
 
 LIST_HEAD(filelist, file);
 extern struct filelist filehead;       /* head of list of open files */ 


Reply via email to