Hi,
A while ago I made kernel logging interrupt safe by adding a
splhigh(). When we are going MP this is not sufficient, so replace
it with a mutex. The idea is to hold the mutex every time msgbufp
is dereferenced. This allows to print to dmesg without kernel lock.
Note that we take the mutex for every character. That should be
not performance critical as when you log too much in kernel, you
have other problems anyway.
There is still a race with logsoftc.sc_state |= LOG_RDWAIT, but I
want to address that separately.
ok?
bluhm
Index: kern/subr_log.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.48
diff -u -p -u -p -r1.48 subr_log.c
--- kern/subr_log.c 23 Jun 2016 15:41:42 -0000 1.48
+++ kern/subr_log.c 19 Oct 2016 19:07:27 -0000
@@ -79,6 +79,7 @@ int msgbufmapped; /* is the message bu
struct msgbuf *msgbufp; /* the mapped buffer, itself. */
struct msgbuf *consbufp; /* console message buffer. */
struct file *syslogf;
+struct mutex log_mtx = MUTEX_INITIALIZER(IPL_HIGH);
void filt_logrdetach(struct knote *kn);
int filt_logread(struct knote *kn, long hint);
@@ -140,13 +141,11 @@ initconsbuf(void)
void
msgbuf_putchar(struct msgbuf *mbp, const char c)
{
- int s;
-
if (mbp->msg_magic != MSG_MAGIC)
/* Nothing we can do */
return;
- s = splhigh();
+ mtx_enter(&log_mtx);
mbp->msg_bufc[mbp->msg_bufx++] = c;
mbp->msg_bufl = lmin(mbp->msg_bufl+1, mbp->msg_bufs);
if (mbp->msg_bufx < 0 || mbp->msg_bufx >= mbp->msg_bufs)
@@ -157,7 +156,7 @@ msgbuf_putchar(struct msgbuf *mbp, const
mbp->msg_bufr = 0;
mbp->msg_bufd++;
}
- splx(s);
+ mtx_leave(&log_mtx);
}
int
@@ -186,19 +185,21 @@ logread(dev_t dev, struct uio *uio, int
{
struct msgbuf *mbp = msgbufp;
size_t l;
- int s, error = 0;
+ int error = 0;
- s = splhigh();
+ mtx_enter(&log_mtx);
while (mbp->msg_bufr == mbp->msg_bufx) {
if (flag & IO_NDELAY) {
- error = EWOULDBLOCK;
- goto out;
+ mtx_leave(&log_mtx);
+ return (EWOULDBLOCK);
}
logsoftc.sc_state |= LOG_RDWAIT;
- error = tsleep(mbp, LOG_RDPRI | PCATCH,
+ error = msleep(mbp, &log_mtx, LOG_RDPRI | PCATCH,
"klog", 0);
- if (error)
- goto out;
+ if (error) {
+ mtx_leave(&log_mtx);
+ return (error);
+ }
}
logsoftc.sc_state &= ~LOG_RDWAIT;
@@ -209,13 +210,17 @@ logread(dev_t dev, struct uio *uio, int
"<%d>klog: dropped %ld byte%s, message buffer full\n",
LOG_KERN|LOG_WARNING, mbp->msg_bufd,
mbp->msg_bufd == 1 ? "" : "s");
+ mtx_leave(&log_mtx);
error = uiomove(buf, ulmin(l, sizeof(buf) - 1), uio);
if (error)
- goto out;
+ return (error);
+ mtx_enter(&log_mtx);
mbp->msg_bufd = 0;
}
while (uio->uio_resid > 0) {
+ char *buf;
+
if (mbp->msg_bufx >= mbp->msg_bufr)
l = mbp->msg_bufx - mbp->msg_bufr;
else
@@ -223,31 +228,34 @@ logread(dev_t dev, struct uio *uio, int
l = ulmin(l, uio->uio_resid);
if (l == 0)
break;
- error = uiomove(&mbp->msg_bufc[mbp->msg_bufr], l, uio);
+ buf = &mbp->msg_bufc[mbp->msg_bufr];
+ mtx_leave(&log_mtx);
+ error = uiomove(buf, l, uio);
if (error)
- break;
+ return (error);
+ mtx_enter(&log_mtx);
mbp->msg_bufr += l;
if (mbp->msg_bufr < 0 || mbp->msg_bufr >= mbp->msg_bufs)
mbp->msg_bufr = 0;
}
- out:
- splx(s);
+ mtx_leave(&log_mtx);
+
return (error);
}
int
logpoll(dev_t dev, int events, struct proc *p)
{
- int s, revents = 0;
+ int revents = 0;
- s = splhigh();
+ mtx_enter(&log_mtx);
if (events & (POLLIN | POLLRDNORM)) {
if (msgbufp->msg_bufr != msgbufp->msg_bufx)
revents |= events & (POLLIN | POLLRDNORM);
else
selrecord(p, &logsoftc.sc_selp);
}
- splx(s);
+ mtx_leave(&log_mtx);
return (revents);
}
@@ -289,12 +297,12 @@ int
filt_logread(struct knote *kn, long hint)
{
struct msgbuf *p = (struct msgbuf *)kn->kn_hook;
- int s, event = 0;
+ int event = 0;
- s = splhigh();
+ mtx_enter(&log_mtx);
kn->kn_data = (int)(p->msg_bufx - p->msg_bufr);
event = (p->msg_bufx != p->msg_bufr);
- splx(s);
+ mtx_leave(&log_mtx);
return (event);
}
@@ -318,17 +326,17 @@ logioctl(dev_t dev, u_long com, caddr_t
{
struct file *fp;
long l;
- int error, s;
+ int error;
switch (com) {
/* return number of characters immediately available */
case FIONREAD:
- s = splhigh();
+ mtx_enter(&log_mtx);
l = msgbufp->msg_bufx - msgbufp->msg_bufr;
- splx(s);
if (l < 0)
l += msgbufp->msg_bufs;
+ mtx_leave(&log_mtx);
*(int *)data = l;
break;