On Thu, Nov 26, 2020 at 04:51:23PM +0100, Marcus MERIGHI wrote:
> Starting stack trace...
> panic(ffffffff81de557b) at panic+0x11d
> kerntrap(ffff8000229c1630) at kerntrap+0x114
> alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
> fill_file(ffff800000ca8000,fffffd811c6a23d0,fffffd81012516f8,3,0,ffff800022735658)
> at fil6
> sysctl_file(ffff8000229c1c58,4,b4769963000,ffff8000229c1c88,ffff8000227d8c98)
> at sysctl_f2
> kern_sysctl(ffff8000229c1c54,5,b4769963000,ffff8000229c1c88,0,0) at
> kern_sysctl+0x1d1
> sys_sysctl(ffff8000227d8c98,ffff8000229c1cf0,ffff8000229c1d50) at
> sys_sysctl+0x184
> syscall(ffff8000229c1dc0) at syscall+0x389
> Xsyscall() at Xsyscall+0x128
> end of kernel
This trace looks like a bug in the fstat sysctl. I guess that
NET_LOCK() sleeps and the allprocess ps_list is not protected.
Does calling fstat in a loop trigger the same bug?
When I tried to grab the netlock outside the process loop some years
ago I remember some other locking problems. So my new idea is to
try to get the netlock, sleep, and if we did sleep, run the loop
from the beginning.
Does this diff fix the crash?
I am not sure if we want to go this way. There is no guarantee
that fstat will terminate on a box with a lot of network traffic.
Anyway it would be good to know whether my fix matches the problem
you see.
bluhm
Index: kern/kern_rwlock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.45
diff -u -p -r1.45 kern_rwlock.c
--- kern/kern_rwlock.c 2 Mar 2020 17:07:49 -0000 1.45
+++ kern/kern_rwlock.c 30 Nov 2020 16:42:39 -0000
@@ -39,7 +39,7 @@ void rw_do_exit(struct rwlock *, unsigne
#define RW_SPINS 1000
#ifdef MULTIPROCESSOR
-#define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o)
+#define rw_cas(p, o, n) (atomic_cas_ulong((p), (o), (n)) != (o))
#else
static inline int
rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
@@ -126,6 +126,26 @@ rw_enter_write(struct rwlock *rwl)
LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
}
+}
+
+int
+rw_enter_write_try(struct rwlock *rwl)
+{
+ struct proc *p = curproc;
+ int error = 0;
+
+ if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
+ RW_PROC(p) | RWLOCK_WRLOCK))) {
+ error = rw_enter(rwl, RW_WRITE | RW_SLEEPFAIL);
+ if (error)
+ rw_cas(&rwl->rwl_owner, RW_PROC(p) | RWLOCK_WRLOCK, 0);
+ } else {
+ membar_enter_after_atomic();
+ WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
+ LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
+ WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
+ }
+ return error;
}
void
Index: kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.383
diff -u -p -r1.383 kern_sysctl.c
--- kern/kern_sysctl.c 16 Nov 2020 06:42:12 -0000 1.383
+++ kern/kern_sysctl.c 30 Nov 2020 16:44:07 -0000
@@ -144,7 +144,7 @@ int sysctl_audio(int *, u_int, void *, s
int sysctl_cpustats(int *, u_int, void *, size_t *, void *, size_t);
int sysctl_utc_offset(void *, size_t *, void *, size_t);
-void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
+int fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
struct vnode *, struct process *, struct proc *, struct socket *, int);
void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
@@ -1079,7 +1079,7 @@ sysctl_rdstruct(void *oldp, size_t *oldl
}
#ifndef SMALL_KERNEL
-void
+int
fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
int fd, struct vnode *vp, struct process *pr, struct proc *p,
struct socket *so, int show_pointers)
@@ -1160,7 +1160,7 @@ fill_file(struct kinfo_file *kf, struct
break;
case DTYPE_SOCKET: {
- int locked = 0;
+ int error, locked = 0;
if (so == NULL) {
so = (struct socket *)fp->f_data;
@@ -1168,7 +1168,9 @@ fill_file(struct kinfo_file *kf, struct
switch (so->so_proto->pr_domain->dom_family) {
case AF_INET:
case AF_INET6:
- NET_LOCK();
+ NET_LOCK_TRY(error);
+ if (error)
+ return error;
locked = 1;
break;
}
@@ -1304,6 +1306,7 @@ fill_file(struct kinfo_file *kf, struct
kf->fd_ofileflags = fdp->fd_ofileflags[fd];
fdpunlock(fdp);
}
+ return 0;
}
/*
@@ -1344,7 +1347,10 @@ sysctl_file(int *name, u_int namelen, ch
#define FILLIT2(fp, fdp, i, vp, pr, so) do { \
if (buflen >= elem_size && elem_count > 0) { \
- fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
+ error = fill_file(kf, fp, fdp, i, vp, pr, p, so, \
+ show_pointers); \
+ if (error) \
+ break; \
error = copyout(kf, dp, outsize); \
if (error) \
break; \
@@ -1400,6 +1406,7 @@ sysctl_file(int *name, u_int namelen, ch
error = EINVAL;
break;
}
+ retry_bypid:
matched = 0;
LIST_FOREACH(pr, &allprocess, ps_list) {
/*
@@ -1427,12 +1434,17 @@ sysctl_file(int *name, u_int namelen, ch
continue;
FILLIT(fp, fdp, i, NULL, pr);
FRELE(fp, p);
+ if (error == EAGAIN) {
+ error = 0;
+ goto retry_bypid;
+ }
}
}
if (!matched)
error = ESRCH;
break;
case KERN_FILE_BYUID:
+ retry_byuid:
LIST_FOREACH(pr, &allprocess, ps_list) {
/*
* skip system, exiting, embryonic and undead
@@ -1456,6 +1468,10 @@ sysctl_file(int *name, u_int namelen, ch
continue;
FILLIT(fp, fdp, i, NULL, pr);
FRELE(fp, p);
+ if (error == EAGAIN) {
+ error = 0;
+ goto retry_byuid;
+ }
}
}
break;
Index: sys/rwlock.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/rwlock.h,v
retrieving revision 1.26
diff -u -p -r1.26 rwlock.h
--- sys/rwlock.h 16 Jul 2019 01:40:49 -0000 1.26
+++ sys/rwlock.h 30 Nov 2020 16:43:39 -0000
@@ -151,6 +151,7 @@ void rw_enter_read(struct rwlock *);
void rw_enter_write(struct rwlock *);
void rw_exit_read(struct rwlock *);
void rw_exit_write(struct rwlock *);
+int rw_enter_write_try(struct rwlock *);
#ifdef DIAGNOSTIC
void rw_assert_wrlock(struct rwlock *);
Index: sys/systm.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/systm.h,v
retrieving revision 1.148
diff -u -p -r1.148 systm.h
--- sys/systm.h 26 Aug 2020 03:29:07 -0000 1.148
+++ sys/systm.h 30 Nov 2020 16:42:17 -0000
@@ -333,6 +333,7 @@ extern struct rwlock netlock;
*/
#define NET_LOCK() do { rw_enter_write(&netlock); } while (0)
#define NET_UNLOCK() do { rw_exit_write(&netlock); } while (0)
+#define NET_LOCK_TRY(e) do { (e) = rw_enter_write_try(&netlock); }
while (0)
/*
* Reader version of NET_LOCK() to be used in "softnet" thread only.