On Thu, Nov 26, 2020 at 04:51:23PM +0100, Marcus MERIGHI wrote:
> Starting stack trace...
> panic(ffffffff81de557b) at panic+0x11d
> kerntrap(ffff8000229c1630) at kerntrap+0x114
> alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
> fill_file(ffff800000ca8000,fffffd811c6a23d0,fffffd81012516f8,3,0,ffff800022735658)
>  at fil6
> sysctl_file(ffff8000229c1c58,4,b4769963000,ffff8000229c1c88,ffff8000227d8c98) 
> at sysctl_f2
> kern_sysctl(ffff8000229c1c54,5,b4769963000,ffff8000229c1c88,0,0) at 
> kern_sysctl+0x1d1
> sys_sysctl(ffff8000227d8c98,ffff8000229c1cf0,ffff8000229c1d50) at 
> sys_sysctl+0x184
> syscall(ffff8000229c1dc0) at syscall+0x389
> Xsyscall() at Xsyscall+0x128
> end of kernel

This trace looks like a bug in the fstat sysctl.  I guess that
NET_LOCK() sleeps and the allprocess ps_list is not protected.

Does calling fstat in a loop trigger the same bug?

When I tried to grab the netlock outside the process loop some years
ago I remember some other locking problems.  So my new idea is to
try to get the netlock, sleep, and if we did sleep, run the loop
from the beginning.

Does this diff fix the crash?

I am not sure if we want to go this way.  There is no guarantee
that fstat will terminate on a box with a lot of network traffic.
Anyway it would be good to know whether my fix matches the problem
you see.

bluhm

Index: kern/kern_rwlock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.45
diff -u -p -r1.45 kern_rwlock.c
--- kern/kern_rwlock.c  2 Mar 2020 17:07:49 -0000       1.45
+++ kern/kern_rwlock.c  30 Nov 2020 16:42:39 -0000
@@ -39,7 +39,7 @@ void  rw_do_exit(struct rwlock *, unsigne
 #define RW_SPINS       1000
 
 #ifdef MULTIPROCESSOR
-#define rw_cas(p, o, n)        (atomic_cas_ulong(p, o, n) != o)
+#define rw_cas(p, o, n)        (atomic_cas_ulong((p), (o), (n)) != (o))
 #else
 static inline int
 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
@@ -126,6 +126,26 @@ rw_enter_write(struct rwlock *rwl)
                    LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
                WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
        }
+}
+
+int
+rw_enter_write_try(struct rwlock *rwl)
+{
+       struct proc *p = curproc;
+       int error = 0;
+
+       if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
+           RW_PROC(p) | RWLOCK_WRLOCK))) {
+               error = rw_enter(rwl, RW_WRITE | RW_SLEEPFAIL);
+               if (error)
+                       rw_cas(&rwl->rwl_owner, RW_PROC(p) | RWLOCK_WRLOCK, 0);
+       } else {
+               membar_enter_after_atomic();
+               WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
+                   LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
+               WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
+       }
+       return error;
 }
 
 void
Index: kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.383
diff -u -p -r1.383 kern_sysctl.c
--- kern/kern_sysctl.c  16 Nov 2020 06:42:12 -0000      1.383
+++ kern/kern_sysctl.c  30 Nov 2020 16:44:07 -0000
@@ -144,7 +144,7 @@ int sysctl_audio(int *, u_int, void *, s
 int sysctl_cpustats(int *, u_int, void *, size_t *, void *, size_t);
 int sysctl_utc_offset(void *, size_t *, void *, size_t);
 
-void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
+int fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
     struct vnode *, struct process *, struct proc *, struct socket *, int);
 void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
 
@@ -1079,7 +1079,7 @@ sysctl_rdstruct(void *oldp, size_t *oldl
 }
 
 #ifndef SMALL_KERNEL
-void
+int
 fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
          int fd, struct vnode *vp, struct process *pr, struct proc *p,
          struct socket *so, int show_pointers)
@@ -1160,7 +1160,7 @@ fill_file(struct kinfo_file *kf, struct 
                break;
 
        case DTYPE_SOCKET: {
-               int locked = 0;
+               int error, locked = 0;
 
                if (so == NULL) {
                        so = (struct socket *)fp->f_data;
@@ -1168,7 +1168,9 @@ fill_file(struct kinfo_file *kf, struct 
                        switch (so->so_proto->pr_domain->dom_family) {
                        case AF_INET:
                        case AF_INET6:
-                               NET_LOCK();
+                               NET_LOCK_TRY(error);
+                               if (error)
+                                       return error;
                                locked = 1;
                                break;
                        }
@@ -1304,6 +1306,7 @@ fill_file(struct kinfo_file *kf, struct 
                kf->fd_ofileflags = fdp->fd_ofileflags[fd];
                fdpunlock(fdp);
        }
+       return 0;
 }
 
 /*
@@ -1344,7 +1347,10 @@ sysctl_file(int *name, u_int namelen, ch
 
 #define FILLIT2(fp, fdp, i, vp, pr, so) do {                           \
        if (buflen >= elem_size && elem_count > 0) {                    \
-               fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
+               error = fill_file(kf, fp, fdp, i, vp, pr, p, so,        \
+                   show_pointers);                                     \
+               if (error)                                              \
+                       break;                                          \
                error = copyout(kf, dp, outsize);                       \
                if (error)                                              \
                        break;                                          \
@@ -1400,6 +1406,7 @@ sysctl_file(int *name, u_int namelen, ch
                        error = EINVAL;
                        break;
                }
+ retry_bypid:
                matched = 0;
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        /*
@@ -1427,12 +1434,17 @@ sysctl_file(int *name, u_int namelen, ch
                                        continue;
                                FILLIT(fp, fdp, i, NULL, pr);
                                FRELE(fp, p);
+                               if (error == EAGAIN) {
+                                       error = 0;
+                                       goto retry_bypid;
+                               }
                        }
                }
                if (!matched)
                        error = ESRCH;
                break;
        case KERN_FILE_BYUID:
+ retry_byuid:
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        /*
                         * skip system, exiting, embryonic and undead
@@ -1456,6 +1468,10 @@ sysctl_file(int *name, u_int namelen, ch
                                        continue;
                                FILLIT(fp, fdp, i, NULL, pr);
                                FRELE(fp, p);
+                               if (error == EAGAIN) {
+                                       error = 0;
+                                       goto retry_byuid;
+                               }
                        }
                }
                break;
Index: sys/rwlock.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/rwlock.h,v
retrieving revision 1.26
diff -u -p -r1.26 rwlock.h
--- sys/rwlock.h        16 Jul 2019 01:40:49 -0000      1.26
+++ sys/rwlock.h        30 Nov 2020 16:43:39 -0000
@@ -151,6 +151,7 @@ void        rw_enter_read(struct rwlock *);
 void   rw_enter_write(struct rwlock *);
 void   rw_exit_read(struct rwlock *);
 void   rw_exit_write(struct rwlock *);
+int    rw_enter_write_try(struct rwlock *);
 
 #ifdef DIAGNOSTIC
 void   rw_assert_wrlock(struct rwlock *);
Index: sys/systm.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/systm.h,v
retrieving revision 1.148
diff -u -p -r1.148 systm.h
--- sys/systm.h 26 Aug 2020 03:29:07 -0000      1.148
+++ sys/systm.h 30 Nov 2020 16:42:17 -0000
@@ -333,6 +333,7 @@ extern struct rwlock netlock;
  */
 #define        NET_LOCK()      do { rw_enter_write(&netlock); } while (0)
 #define        NET_UNLOCK()    do { rw_exit_write(&netlock); } while (0)
+#define        NET_LOCK_TRY(e) do { (e) = rw_enter_write_try(&netlock); } 
while (0)
 
 /*
  * Reader version of NET_LOCK() to be used in "softnet" thread only.

Reply via email to