Diff below is the first of 3 steps to unlock most of the network
syscalls.  It deals with the simpler problem of protecting statistic
fields of the descriptor.  For that I'm using a simple IPL_NONE mutex.

Diff below also documents how the various fields are protected.  In the
next steps I'll deal with the reference counts, mainly `f_count' and the
one in `filehead' then the associated `f_data' pointer.

Note that the diff below doesn't deal with `f_offset' because it is more
tricky and not needed for recvmsg(2), sendmsg(2) & friends which are my
targets at the moment.

Comments?  Ok?

Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.156
diff -u -p -r1.156 kern_descrip.c
--- kern/kern_descrip.c 2 May 2018 02:24:56 -0000       1.156
+++ kern/kern_descrip.c 7 May 2018 16:47:58 -0000
@@ -957,6 +957,7 @@ restart:
         */
        numfiles++;
        fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
+       mtx_init(&fp->f_mtx, IPL_NONE);
        fp->f_iflags = FIF_LARVAL;
        if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
                LIST_INSERT_AFTER(fq, fp, f_list);
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.333
diff -u -p -r1.333 kern_sysctl.c
--- kern/kern_sysctl.c  25 Apr 2018 10:29:16 -0000      1.333
+++ kern/kern_sysctl.c  7 May 2018 16:15:18 -0000
@@ -1067,11 +1067,13 @@ fill_file(struct kinfo_file *kf, struct 
 
                if (suser(p) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
                        kf->f_offset = fp->f_offset;
+                       mtx_enter(&fp->f_mtx);
                        kf->f_rxfer = fp->f_rxfer;
                        kf->f_rwfer = fp->f_wxfer;
                        kf->f_seek = fp->f_seek;
                        kf->f_rbytes = fp->f_rbytes;
                        kf->f_wbytes = fp->f_wbytes;
+                       mtx_leave(&fp->f_mtx);
                } else
                        kf->f_offset = -1;
        } else if (vp != NULL) {
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.118
diff -u -p -r1.118 sys_generic.c
--- kern/sys_generic.c  27 Apr 2018 10:13:37 -0000      1.118
+++ kern/sys_generic.c  7 May 2018 16:13:02 -0000
@@ -203,8 +203,10 @@ dofilereadv(struct proc *p, int fd, stru
                        error = 0;
        cnt -= auio.uio_resid;
 
+       mtx_enter(&fp->f_mtx);
        fp->f_rxfer++;
        fp->f_rbytes += cnt;
+       mtx_leave(&fp->f_mtx);
 #ifdef KTRACE
        if (ktriov != NULL) {
                if (error == 0)
@@ -355,8 +357,10 @@ dofilewritev(struct proc *p, int fd, str
        }
        cnt -= auio.uio_resid;
 
+       mtx_enter(&fp->f_mtx);
        fp->f_wxfer++;
        fp->f_wbytes += cnt;
+       mtx_leave(&fp->f_mtx);
 #ifdef KTRACE
        if (ktriov != NULL) {
                if (error == 0)
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.169
diff -u -p -r1.169 uipc_syscalls.c
--- kern/uipc_syscalls.c        27 Apr 2018 10:13:37 -0000      1.169
+++ kern/uipc_syscalls.c        7 May 2018 16:13:40 -0000
@@ -687,8 +687,10 @@ sendit(struct proc *p, int s, struct msg
        }
        if (error == 0) {
                *retsize = len - auio.uio_resid;
+               mtx_enter(&fp->f_mtx);
                fp->f_wxfer++;
                fp->f_wbytes += *retsize;
+               mtx_leave(&fp->f_mtx);
        }
 #ifdef KTRACE
        if (ktriov != NULL) {
@@ -902,8 +904,10 @@ recvit(struct proc *p, int s, struct msg
                mp->msg_controllen = len;
        }
        if (!error) {
+               mtx_enter(&fp->f_mtx);
                fp->f_rxfer++;
                fp->f_rbytes += *retsize;
+               mtx_leave(&fp->f_mtx);
        }
 out:
        FRELE(fp, p);
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.282
diff -u -p -r1.282 vfs_syscalls.c
--- kern/vfs_syscalls.c 2 May 2018 02:24:56 -0000       1.282
+++ kern/vfs_syscalls.c 7 May 2018 16:13:02 -0000
@@ -1650,7 +1650,9 @@ sys_lseek(struct proc *p, void *v, regis
                }
        }
        *(off_t *)retval = fp->f_offset = newoff;
+       mtx_enter(&fp->f_mtx);
        fp->f_seek++;
+       mtx_leave(&fp->f_mtx);
        error = 0;
  bad:
        FRELE(fp, p);
Index: sys/file.h
===================================================================
RCS file: /cvs/src/sys/sys/file.h,v
retrieving revision 1.41
diff -u -p -r1.41 file.h
--- sys/file.h  25 Apr 2018 10:29:16 -0000      1.41
+++ sys/file.h  7 May 2018 16:52:11 -0000
@@ -37,6 +37,7 @@
 
 #else /* _KERNEL */
 #include <sys/queue.h>
+#include <sys/mutex.h>
 
 struct proc;
 struct uio;
@@ -61,26 +62,32 @@ struct      fileops {
 /*
  * Kernel descriptor table.
  * One entry for each open kernel vnode and socket.
+ *
+ *  Locks used to protect struct members in this file:
+ *     I       immutable after creation
+ *     f       per file `f_mtx'
+ *     k       kernel lock
  */
 struct file {
-       LIST_ENTRY(file) f_list;/* list of active files */
-       short   f_flag;         /* see fcntl.h */
+       LIST_ENTRY(file) f_list;/* [k] list of active files */
+       struct mutex f_mtx;
+       short   f_flag;         /* [I] see fcntl.h */
 #define        DTYPE_VNODE     1       /* file */
 #define        DTYPE_SOCKET    2       /* communications endpoint */
 #define        DTYPE_PIPE      3       /* pipe */
 #define        DTYPE_KQUEUE    4       /* event queue */
-       short   f_type;         /* descriptor type */
-       long    f_count;        /* reference count */
-       struct  ucred *f_cred;  /* credentials associated with descriptor */
-       struct  fileops *f_ops;
-       off_t   f_offset;
-       void    *f_data;        /* private data */
-       int     f_iflags;       /* internal flags */
-       u_int64_t f_rxfer;      /* total number of read transfers */
-       u_int64_t f_wxfer;      /* total number of write transfers */
-       u_int64_t f_seek;       /* total independent seek operations */
-       u_int64_t f_rbytes;     /* total bytes read */
-       u_int64_t f_wbytes;     /* total bytes written */
+       short   f_type;         /* [I] descriptor type */
+       long    f_count;        /* [k] reference count */
+       struct  ucred *f_cred;  /* [I] credentials associated with descriptor */
+       struct  fileops *f_ops; /* [I] file operation pointers */
+       off_t   f_offset;       /* [k] */
+       void    *f_data;        /* [k] private data */
+       int     f_iflags;       /* [k] internal flags */
+       uint64_t f_rxfer;       /* [f] total number of read transfers */
+       uint64_t f_wxfer;       /* [f] total number of write transfers */
+       uint64_t f_seek;        /* [f] total independent seek operations */
+       uint64_t f_rbytes;      /* [f] total bytes read */
+       uint64_t f_wbytes;      /* [f] total bytes written */
 };
 
 #define FIF_HASLOCK            0x01    /* descriptor holds advisory lock */

Reply via email to