Struct file again.
f_flag isn’t modified often, so it’s modifacation can be atomic.
f_msgcount and f_rxfer, f_wxfer, f_seek, f_rbytes, f_wbytes can be protected by
rwlock.
f_offset protection is actual for vnodes only.
FIF_MARK and FIF_DEFER flags are used only by unpc garbage collector. This
flags can
be moved out from f_iflags, for example to f_unpc_flags, and use their own
protection.
FIF_HASLOCK checked only in vn_closefile(), but this flag doesn’t indicate
actual vnode lock
state, because VOP_ADVLOCK()’s return value is not checked. May be it can be
replaced by
new vn_islocked() function, which will check actual vnode lock possibility and
lock state?
only FIF_LARVAL remains in f_iflags, this flag sets only once, so it’s
modification can be atomic.
f_count may be modified and checked under rwlock, but I think atomic ops are
better on smp,
afaik, with uniprocessor kernel simple increment/decrement over volatile
variable will be enough.
This modification doesn’t break pstat, all related FIF_* flags can be set in
kinfo_file struct.
f_offset protection can be done like in patch below. it is just
proof-of-concept. I think f_offset
protection stuff can be moved to external struct, which will be stored in hash
with fp address as key.
FIF_MARK and FIF_DEFER stuff can be moved to external struct too, I suppose.
Index: compat/common/compat_dir.c
===================================================================
RCS file: /cvs/src/sys/compat/common/compat_dir.c,v
retrieving revision 1.11
diff -u -p -r1.11 compat_dir.c
--- compat/common/compat_dir.c 16 Dec 2014 21:25:28 -0000 1.11
+++ compat/common/compat_dir.c 9 Apr 2015 10:40:55 -0000
@@ -51,7 +51,6 @@ readdir_with_callback(struct file *fp, o
struct iovec aiov;
int eofflag = 0;
int error, len, reclen;
- off_t newoff = *off;
struct vnode *vp;
struct vattr va;
@@ -84,10 +83,16 @@ again:
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_procp = curproc;
auio.uio_resid = buflen;
- auio.uio_offset = newoff;
-
+ if (&fp->f_offset == off)
+ foffset_lock(fp, &auio.uio_offset);
+ else
+ auio.uio_offset = *off;
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
- *off = auio.uio_offset;
+ if (&fp->f_offset == off)
+ foffset_unlock(fp, &auio.uio_offset);
+ else
+ *off = auio.uio_offset;
+
if (error)
goto out;
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.116
diff -u -p -r1.116 kern_descrip.c
--- kern/kern_descrip.c 19 Jan 2015 01:19:17 -0000 1.116
+++ kern/kern_descrip.c 9 Apr 2015 10:41:10 -0000
@@ -61,6 +61,7 @@
#include <sys/event.h>
#include <sys/pool.h>
#include <sys/ktrace.h>
+#include <sys/rwlock.h>
#include <sys/pipe.h>
@@ -451,9 +452,9 @@ restart:
if (fl.l_start == 0 && fl.l_len < 0) {
/* lockf(3) compliance hack */
fl.l_len = -fl.l_len;
- fl.l_start = fp->f_offset - fl.l_len;
+ fl.l_start = foffset_get(fp) - fl.l_len;
} else
- fl.l_start += fp->f_offset;
+ fl.l_start += foffset_get(fp);
}
switch (fl.l_type) {
@@ -514,9 +515,9 @@ restart:
if (fl.l_start == 0 && fl.l_len < 0) {
/* lockf(3) compliance hack */
fl.l_len = -fl.l_len;
- fl.l_start = fp->f_offset - fl.l_len;
+ fl.l_start = foffset_get(fp) - fl.l_len;
} else
- fl.l_start += fp->f_offset;
+ fl.l_start += foffset_get(fp);
}
if (fl.l_type != F_RDLCK &&
fl.l_type != F_WRLCK &&
@@ -869,6 +870,7 @@ restart:
*/
nfiles++;
fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
+ rw_init(&fp->f_offset_lck, "f_offset_lck");
fp->f_iflags = FIF_LARVAL;
if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
LIST_INSERT_AFTER(fq, fp, f_list);
@@ -1125,6 +1127,47 @@ fdrop(struct file *fp, struct proc *p)
pool_put(&file_pool, fp);
return (error);
+}
+
+off_t
+foffset_get(struct file *fp)
+{
+ off_t offset;
+
+ rw_enter_read(&fp->f_offset_lck);
+ offset = fp->f_offset;
+ rw_exit_read(&fp->f_offset_lck);
+
+ return offset;
+}
+
+void
+foffset_lock(struct file *fp, off_t *foffset)
+{
+ KASSERT(foffset != NULL);
+ rw_enter_write(&fp->f_offset_lck);
+ while (fp->f_offset_lckf & FOFFSET_LOCKED) {
+ fp->f_offset_lckf |= FOFFSET_LOCK_WAITING;
+ rw_exit_write(&fp->f_offset_lck);
+ tsleep(&fp->f_offset_lckf, PUSER, "f_offset_lck", 0);
+ rw_enter_write(&fp->f_offset_lck);
+ }
+ fp->f_offset_lckf |= FOFFSET_LOCKED;
+ *foffset = fp->f_offset;
+ rw_exit_write(&fp->f_offset_lck);
+}
+
+void
+foffset_unlock(struct file *fp, off_t *foffset)
+{
+ rw_enter_write(&fp->f_offset_lck);
+ if (foffset != NULL)
+ fp->f_offset = *foffset;
+ KASSERT((fp->f_offset_lckf & FOFFSET_LOCKED) != 0);
+ if (fp->f_offset_lckf & FOFFSET_LOCK_WAITING)
+ wakeup(&fp->f_offset_lckf);
+ fp->f_offset_lckf=0;
+ rw_exit_write(&fp->f_offset_lck);
}
/*
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.283
diff -u -p -r1.283 kern_sysctl.c
--- kern/kern_sysctl.c 11 Feb 2015 05:09:33 -0000 1.283
+++ kern/kern_sysctl.c 9 Apr 2015 10:41:10 -0000
@@ -1027,7 +1027,7 @@ fill_file(struct kinfo_file *kf, struct
kf->f_usecount = 0;
if (suser(p, 0) == 0 || p->p_ucred->cr_uid ==
fp->f_cred->cr_uid) {
- kf->f_offset = fp->f_offset;
+ kf->f_offset = foffset_get(fp);
kf->f_rxfer = fp->f_rxfer;
kf->f_rwfer = fp->f_wxfer;
kf->f_seek = fp->f_seek;
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.216
diff -u -p -r1.216 vfs_syscalls.c
--- kern/vfs_syscalls.c 16 Dec 2014 18:30:04 -0000 1.216
+++ kern/vfs_syscalls.c 9 Apr 2015 10:41:10 -0000
@@ -1547,30 +1547,37 @@ sys_lseek(struct proc *p, void *v, regis
special = 0;
offarg = SCARG(uap, offset);
+ foffset_lock(fp, &newoff);
+
switch (SCARG(uap, whence)) {
case SEEK_CUR:
- newoff = fp->f_offset + offarg;
+ newoff += offarg;
break;
case SEEK_END:
error = VOP_GETATTR(vp, &vattr, cred, p);
- if (error)
+ if (error) {
+ foffset_unlock(fp, NULL);
goto bad;
+ }
newoff = offarg + (off_t)vattr.va_size;
break;
case SEEK_SET:
newoff = offarg;
break;
default:
+ foffset_unlock(fp, NULL);
error = EINVAL;
goto bad;
}
if (!special) {
if (newoff < 0) {
+ foffset_unlock(fp, NULL);
error = EINVAL;
goto bad;
}
}
- *(off_t *)retval = fp->f_offset = newoff;
+ foffset_unlock(fp, &newoff);
+ *(off_t *)retval = newoff;
fp->f_seek++;
error = 0;
bad:
@@ -2690,6 +2697,7 @@ sys_getdents(struct proc *p, void *v, re
struct uio auio;
struct iovec aiov;
size_t buflen;
+ off_t foffset;
int error, eofflag;
buflen = SCARG(uap, buflen);
@@ -2702,12 +2710,15 @@ sys_getdents(struct proc *p, void *v, re
error = EBADF;
goto bad;
}
- if (fp->f_offset < 0) {
+ foffset_lock(fp, &foffset);
+ if (foffset < 0) {
+ foffset_unlock(fp, NULL);
error = EINVAL;
goto bad;
}
vp = fp->f_data;
if (vp->v_type != VDIR) {
+ foffset_unlock(fp, NULL);
error = EINVAL;
goto bad;
}
@@ -2720,9 +2731,9 @@ sys_getdents(struct proc *p, void *v, re
auio.uio_procp = p;
auio.uio_resid = buflen;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- auio.uio_offset = fp->f_offset;
+ auio.uio_offset = foffset;
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
- fp->f_offset = auio.uio_offset;
+ foffset_unlock(fp, &auio.uio_offset);
VOP_UNLOCK(vp, 0, p);
if (error)
goto bad;
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
retrieving revision 1.80
diff -u -p -r1.80 vfs_vnops.c
--- kern/vfs_vnops.c 16 Dec 2014 18:30:04 -0000 1.80
+++ kern/vfs_vnops.c 9 Apr 2015 10:41:10 -0000
@@ -332,17 +332,30 @@ vn_read(struct file *fp, off_t *poff, st
int error = 0;
size_t count = uio->uio_resid;
struct proc *p = uio->uio_procp;
+ off_t offset;
/* no wrap around of offsets except on character devices */
- if (vp->v_type != VCHR && count > LLONG_MAX - *poff)
+ if(&fp->f_offset == poff)
+ foffset_lock(fp, &offset);
+ else
+ offset=*poff;
+
+ if (vp->v_type != VCHR && count > LLONG_MAX - offset) {
+ if (&fp->f_offset == poff)
+ foffset_unlock(fp, NULL);
return (EINVAL);
+ }
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- uio->uio_offset = *poff;
+ uio->uio_offset = offset;
if (vp->v_type != VDIR)
error = VOP_READ(vp, uio,
(fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
- *poff += count - uio->uio_resid;
+ offset += count - uio->uio_resid;
+ if(&fp->f_offset == poff)
+ foffset_unlock(fp, &offset);
+ else
+ *poff = offset;
VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -357,24 +370,35 @@ vn_write(struct file *fp, off_t *poff, s
struct proc *p = uio->uio_procp;
int error, ioflag = IO_UNIT;
size_t count;
+ off_t offset;
/* note: pwrite/pwritev are unaffected by O_APPEND */
- if (vp->v_type == VREG && (fp->f_flag & O_APPEND) &&
- poff == &fp->f_offset)
- ioflag |= IO_APPEND;
+ if (&fp->f_offset == poff) {
+ if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+ ioflag |= IO_APPEND;
+ foffset_lock(fp, &offset);
+ } else {
+ offset = *poff;
+ }
+
if (fp->f_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
if ((fp->f_flag & FFSYNC) ||
(vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
ioflag |= IO_SYNC;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- uio->uio_offset = *poff;
+ uio->uio_offset = offset;
count = uio->uio_resid;
error = VOP_WRITE(vp, uio, ioflag, cred);
if (ioflag & IO_APPEND)
- *poff = uio->uio_offset;
+ offset = uio->uio_offset;
+ else
+ offset += count - uio->uio_resid;
+
+ if (&fp->f_offset == poff)
+ foffset_unlock(fp, &offset);
else
- *poff += count - uio->uio_resid;
+ *poff = offset;
VOP_UNLOCK(vp, 0, p);
return (error);
}
Index: sys/file.h
===================================================================
RCS file: /cvs/src/sys/sys/file.h,v
retrieving revision 1.34
diff -u -p -r1.34 file.h
--- sys/file.h 18 Nov 2014 15:16:35 -0000 1.34
+++ sys/file.h 9 Apr 2015 10:41:18 -0000
@@ -75,6 +75,8 @@ struct file {
long f_msgcount; /* references from message queue */
struct ucred *f_cred; /* credentials associated with descriptor */
struct fileops *f_ops;
+ struct rwlock f_offset_lck; /* f_offset lock */
+ short f_offset_lckf; /* f_offset lock flags */
off_t f_offset;
void *f_data; /* private data */
int f_iflags; /* internal flags */
@@ -85,6 +87,9 @@ struct file {
u_int64_t f_wbytes; /* total bytes written */
};
+#define FOFFSET_LOCKED 0x01
+#define FOFFSET_LOCK_WAITING 0x02
+
#define FIF_HASLOCK 0x01 /* descriptor holds advisory lock */
#define FIF_LARVAL 0x02 /* not fully constructed, don't use */
#define FIF_MARK 0x04 /* mark during gc() */
@@ -102,6 +107,10 @@ struct file {
} while (0)
int fdrop(struct file *, struct proc *);
+
+off_t foffset_get(struct file *);
+void foffset_lock(struct file *, off_t *);
+void foffset_unlock(struct file *, off_t *);
LIST_HEAD(filelist, file);
extern struct filelist filehead; /* head of list of open files */