Hi,

Since we stash log messages in the kernel, the timestamps added by
syslogd are delayed.  The kernel could add the timestamp when it
receives the message by sendsyslog(2).  This is more precise and
can be expressed by more digits in the ISO timestamp.

I have to copyin(9) at the beginning of sendsyslog(2) to have both
kernel timestamp and log message in kernel space.  This makes the
code easier, but requires a malloc(9) for each log message.

Do we want to go into this direction?

Note that old syslogd and new kernel produces ugly messages.
Do we need an update path?

bluhm

Index: sys/kern/subr_log.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.74
diff -u -p -r1.74 subr_log.c
--- sys/kern/subr_log.c 18 Mar 2021 08:43:38 -0000      1.74
+++ sys/kern/subr_log.c 18 Mar 2021 13:06:13 -0000
@@ -108,7 +108,7 @@ const struct filterops logread_filtops =
        .f_event        = filt_logread,
 };
 
-int dosendsyslog(struct proc *, const char *, size_t, int, enum uio_seg);
+int dosendsyslog(struct proc *, char *, size_t, int, struct timeval *);
 void logtick(void *);
 size_t msgbuf_getlen(struct msgbuf *);
 void msgbuf_putchar_locked(struct msgbuf *, const char);
@@ -468,14 +468,15 @@ logioctl(dev_t dev, u_long com, caddr_t 
  * If syslogd is not running, temporarily store a limited amount of messages
  * in kernel.  After log stash is full, drop messages and count them.  When
  * syslogd is available again, next log message will flush the stashed
- * messages and insert a message with drop count.  Calls to malloc(9) and
- * copyin(9) may sleep, protect data structures with rwlock.
+ * messages and insert a message with drop count.  Calls to malloc(9) may
+ * sleep, protect data structures with rwlock.
  */
 
 #define LOGSTASH_SIZE  100
 struct logstash_message {
        char    *lgs_buffer;
        size_t   lgs_size;
+       struct   timeval lgs_time;
 } logstash_messages[LOGSTASH_SIZE];
 
 struct logstash_message *logstash_in = &logstash_messages[0];
@@ -485,9 +486,9 @@ struct      rwlock logstash_rwlock = RWLOCK_I
 
 int    logstash_dropped, logstash_error, logstash_pid;
 
-int    logstash_insert(const char *, size_t, int, pid_t);
-void   logstash_remove(void);
-int    logstash_sendsyslog(struct proc *);
+void   logstash_insert(char *, size_t, int, pid_t, struct timeval *);
+void   logstash_remove(struct timeval *);
+int    logstash_sendsyslog(struct proc *, struct timeval *);
 
 static inline int
 logstash_full(void)
@@ -511,11 +512,10 @@ logstash_increment(struct logstash_messa
                (*msg)++;
 }
 
-int
-logstash_insert(const char *buf, size_t nbyte, int logerror, pid_t pid)
+void
+logstash_insert(char *buffer, size_t nbyte, int logerror, pid_t pid,
+    struct timeval *now)
 {
-       int error;
-
        rw_enter_write(&logstash_rwlock);
 
        if (logstash_full()) {
@@ -524,29 +524,22 @@ logstash_insert(const char *buf, size_t 
                        logstash_pid = pid;
                }
                logstash_dropped++;
+               free(buffer, M_LOG, nbyte);
 
                rw_exit(&logstash_rwlock);
-               return (0);
+               return;
        }
 
-       logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK);
-       error = copyin(buf, logstash_in->lgs_buffer, nbyte);
-       if (error) {
-               free(logstash_in->lgs_buffer, M_LOG, nbyte);
-               logstash_in->lgs_buffer = NULL;
-
-               rw_exit(&logstash_rwlock);
-               return (error);
-       }
+       logstash_in->lgs_buffer = buffer;
        logstash_in->lgs_size = nbyte;
+       logstash_in->lgs_time = *now;
        logstash_increment(&logstash_in);
 
        rw_exit(&logstash_rwlock);
-       return (0);
 }
 
 void
-logstash_remove(void)
+logstash_remove(struct timeval *now)
 {
        rw_assert_wrlock(&logstash_rwlock);
 
@@ -575,12 +568,13 @@ logstash_remove(void)
                logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK);
                memcpy(logstash_in->lgs_buffer, buf, nbyte);
                logstash_in->lgs_size = nbyte;
+               logstash_in->lgs_time = *now;
                logstash_increment(&logstash_in);
        }
 }
 
 int
-logstash_sendsyslog(struct proc *p)
+logstash_sendsyslog(struct proc *p, struct timeval *now)
 {
        int error;
 
@@ -588,12 +582,12 @@ logstash_sendsyslog(struct proc *p)
 
        while (logstash_out->lgs_buffer != NULL) {
                error = dosendsyslog(p, logstash_out->lgs_buffer,
-                   logstash_out->lgs_size, 0, UIO_SYSSPACE);
+                   logstash_out->lgs_size, 0, &logstash_out->lgs_time);
                if (error) {
                        rw_exit(&logstash_rwlock);
                        return (error);
                }
-               logstash_remove();
+               logstash_remove(now);
        }
 
        rw_exit(&logstash_rwlock);
@@ -615,33 +609,51 @@ sys_sendsyslog(struct proc *p, void *v, 
                syscallarg(size_t) nbyte;
                syscallarg(int) flags;
        } */ *uap = v;
+       struct timeval now;
+       char *buffer;
        size_t nbyte;
        int error;
 
+       microtime(&now);
+
        nbyte = SCARG(uap, nbyte);
        if (nbyte > LOG_MAXLINE)
                nbyte = LOG_MAXLINE;
 
-       logstash_sendsyslog(p);
-       error = dosendsyslog(p, SCARG(uap, buf), nbyte, SCARG(uap, flags),
-           UIO_USERSPACE);
-       if (error && error != EFAULT)
-               logstash_insert(SCARG(uap, buf), nbyte, error, p->p_p->ps_pid);
-       return (error);
+       buffer = malloc(nbyte, M_LOG, M_WAITOK);
+       error = copyin(SCARG(uap, buf), buffer, nbyte);
+       if (error)
+               return (error);
+#ifdef KTRACE
+       if (KTRPOINT(p, KTR_GENIO)) {
+               struct iovec ktriov;
+
+               ktriov.iov_base = (char *)SCARG(uap, buf);
+               ktriov.iov_len = nbyte;
+               ktrgenio(p, -1, UIO_WRITE, &ktriov, nbyte);
+       }
+#endif
+
+       logstash_sendsyslog(p, &now);
+       error = dosendsyslog(p, buffer, nbyte, SCARG(uap, flags), &now);
+       if (error) {
+               /* logstash will free buffer later */
+               logstash_insert(buffer, nbyte, error, p->p_p->ps_pid, &now);
+               return (error);
+       }
+       free(buffer, M_LOG, nbyte);
+       return (0);
 }
 
 int
-dosendsyslog(struct proc *p, const char *buf, size_t nbyte, int flags,
-    enum uio_seg sflg)
+dosendsyslog(struct proc *p, char *buffer, size_t nbyte, int flags,
+    struct timeval *time)
 {
-#ifdef KTRACE
-       struct iovec ktriov;
-#endif
        struct file *fp;
-       char pri[6], *kbuf;
-       struct iovec aiov;
+       char logtime[sizeof("-9223372036854775808.123456 ")];
+       struct iovec aiov[3];
        struct uio auio;
-       size_t i, len;
+       size_t len;
        int error;
 
        /* Global variable syslogf may change during sleep, use local copy. */
@@ -651,106 +663,87 @@ dosendsyslog(struct proc *p, const char 
                FREF(fp);
        rw_exit(&syslogf_rwlock);
 
-       if (fp == NULL) {
-               if (!ISSET(flags, LOG_CONS))
-                       return (ENOTCONN);
+       if (fp) {
+               len = snprintf(logtime, sizeof(logtime), "%lld.%06ld ",
+                   time->tv_sec, time->tv_usec);
+               len = MIN(len, sizeof(logtime));
+       } else if (!ISSET(flags, LOG_CONS)) {
+               return (ENOTCONN);
+       } else {
+               size_t i;
+
                /*
                 * Strip off syslog priority when logging to console.
                 * LOG_PRIMASK | LOG_FACMASK is 0x03ff, so at most 4
                 * decimal digits may appear in priority as <1023>.
                 */
-               len = MIN(nbyte, sizeof(pri));
-               if (sflg == UIO_USERSPACE) {
-                       if ((error = copyin(buf, pri, len)))
-                               return (error);
-               } else
-                       memcpy(pri, buf, len);
-               if (0 < len && pri[0] == '<') {
+               len = MIN(nbyte, 6);
+               if (0 < len && buffer[0] == '<') {
                        for (i = 1; i < len; i++) {
-                               if (pri[i] < '0' || pri[i] > '9')
+                               if (buffer[i] < '0' || buffer[i] > '9')
                                        break;
                        }
-                       if (i < len && pri[i] == '>') {
+                       if (i < len && buffer[i] == '>') {
                                i++;
                                /* There must be at least one digit <0>. */
                                if (i >= 3) {
-                                       buf += i;
+                                       buffer += i;
                                        nbyte -= i;
                                }
                        }
                }
+               len = 0;
        }
 
-       aiov.iov_base = (char *)buf;
-       aiov.iov_len = nbyte;
-       auio.uio_iov = &aiov;
-       auio.uio_iovcnt = 1;
-       auio.uio_segflg = sflg;
+       aiov[0].iov_base = logtime;
+       aiov[0].iov_len = len;
+       aiov[1].iov_base = buffer;
+       aiov[1].iov_len = nbyte;
+       aiov[2].iov_base = "\r\n";
+       aiov[2].iov_len = 2;
+       auio.uio_segflg = UIO_SYSSPACE;
        auio.uio_rw = UIO_WRITE;
        auio.uio_procp = p;
        auio.uio_offset = 0;
-       auio.uio_resid = aiov.iov_len;
-#ifdef KTRACE
-       if (sflg == UIO_USERSPACE && KTRPOINT(p, KTR_GENIO))
-               ktriov = aiov;
-       else
-               ktriov.iov_len = 0;
-#endif
 
-       len = auio.uio_resid;
        if (fp) {
                int flags = (fp->f_flag & FNONBLOCK) ? MSG_DONTWAIT : 0;
+
+               auio.uio_iov = &aiov[0];
+               auio.uio_iovcnt = 2;
+               auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
                error = sosend(fp->f_data, NULL, &auio, NULL, NULL, flags);
-               if (error == 0)
-                       len -= auio.uio_resid;
        } else {
                KERNEL_LOCK();
                if (constty || cn_devvp) {
+                       auio.uio_iov = &aiov[1];
+                       auio.uio_iovcnt = 2;
+                       auio.uio_resid = aiov[1].iov_len + aiov[2].iov_len;
                        error = cnwrite(0, &auio, 0);
-                       if (error == 0)
-                               len -= auio.uio_resid;
-                       aiov.iov_base = "\r\n";
-                       aiov.iov_len = 2;
-                       auio.uio_iov = &aiov;
-                       auio.uio_iovcnt = 1;
-                       auio.uio_segflg = UIO_SYSSPACE;
-                       auio.uio_rw = UIO_WRITE;
-                       auio.uio_procp = p;
-                       auio.uio_offset = 0;
-                       auio.uio_resid = aiov.iov_len;
-                       cnwrite(0, &auio, 0);
                } else {
+                       char *p;
+
                        /* XXX console redirection breaks down... */
-                       if (sflg == UIO_USERSPACE) {
-                               kbuf = malloc(len, M_TEMP, M_WAITOK);
-                               error = copyin(aiov.iov_base, kbuf, len);
-                       } else {
-                               kbuf = aiov.iov_base;
-                               error = 0;
+                       auio.uio_iov = &aiov[1];
+                       auio.uio_iovcnt = 1;
+                       auio.uio_resid = aiov[1].iov_len;
+                       p = auio.uio_iov[0].iov_base;
+                       while (auio.uio_resid > 0) {
+                               if (*p == '\0')
+                                       break;
+                               cnputc(*p);
+                               p++;
+                               auio.uio_resid--;
                        }
-                       if (error == 0)
-                               for (i = 0; i < len; i++) {
-                                       if (kbuf[i] == '\0')
-                                               break;
-                                       cnputc(kbuf[i]);
-                                       auio.uio_resid--;
-                               }
-                       if (sflg == UIO_USERSPACE)
-                               free(kbuf, M_TEMP, len);
-                       if (error == 0)
-                               len -= auio.uio_resid;
                        cnputc('\n');
+                       error = 0;
                }
                KERNEL_UNLOCK();
        }
 
-#ifdef KTRACE
-       if (error == 0 && ktriov.iov_len != 0)
-               ktrgenio(p, -1, UIO_WRITE, &ktriov, len);
-#endif
        if (fp)
                FRELE(fp, p);
-       else if (error != EFAULT)
+       else
                error = ENOTCONN;
        return (error);
 }
Index: usr.sbin/syslogd/syslogd.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.265
diff -u -p -r1.265 syslogd.c
--- usr.sbin/syslogd/syslogd.c  9 Mar 2021 15:08:23 -0000       1.265
+++ usr.sbin/syslogd/syslogd.c  18 Mar 2021 16:04:22 -0000
@@ -134,6 +134,7 @@ const char ctty[] = _PATH_CONSOLE;
 #define SYNC_FILE      0x002   /* do fsync on file after printing */
 #define ADDDATE                0x004   /* add a date to the message */
 #define MARK           0x008   /* this message is a mark */
+#define KERNDATE       0x010   /* the kernel has added log time */
 
 /*
  * This structure represents the files that will have log
@@ -289,6 +290,7 @@ size_t      ctl_reply_offset = 0;   /* Number o
 
 char   *linebuf;
 int     linesize;
+struct  timeval now;
 
 int             fd_ctlconn, fd_udp, fd_udp6, send_udp, send_udp6;
 struct event   *ev_ctlaccept, *ev_ctlread, *ev_ctlwrite;
@@ -305,6 +307,7 @@ char hostname_unknown[] = "???";
 
 void    klog_readcb(int, short, void *);
 void    udp_readcb(int, short, void *);
+void    sendsys_readcb(int, short, void *);
 void    unix_readcb(int, short, void *);
 int     reserve_accept4(int, int, struct event *,
     void (*)(int, short, void *), struct sockaddr *, socklen_t *, int);
@@ -339,10 +342,11 @@ void      fprintlog(struct filed *, int, char
 void   dropped_warn(int *, const char *);
 void   init(void);
 void   logevent(int, const char *);
-void   logline(int, int, char *, char *);
+void   logline(int, int, struct timeval *, char *, char *);
 struct filed *find_dup(struct filed *);
+size_t parsekerntime(const char *, struct timeval *);
 size_t parsepriority(const char *, int *);
-void   printline(char *, char *);
+void   printline(int, char *, char *);
 void   printsys(char *);
 void   usage(void);
 void   wallmsg(struct filed *, struct iovec *);
@@ -796,7 +800,7 @@ main(int argc, char *argv[])
        event_set(ev_ctlwrite, fd_ctlconn, EV_WRITE|EV_PERSIST,
            ctlconn_writecb, ev_ctlwrite);
        event_set(ev_klog, fd_klog, EV_READ|EV_PERSIST, klog_readcb, ev_klog);
-       event_set(ev_sendsys, fd_sendsys, EV_READ|EV_PERSIST, unix_readcb,
+       event_set(ev_sendsys, fd_sendsys, EV_READ|EV_PERSIST, sendsys_readcb,
            ev_sendsys);
        event_set(ev_udp, fd_udp, EV_READ|EV_PERSIST, udp_readcb, ev_udp);
        event_set(ev_udp6, fd_udp6, EV_READ|EV_PERSIST, udp_readcb, ev_udp6);
@@ -1054,12 +1058,25 @@ udp_readcb(int fd, short event, void *ar
                linebuf[n] = '\0';
                cvthname((struct sockaddr *)&sa, resolve, sizeof(resolve));
                log_debug("cvthname res: %s", resolve);
-               printline(resolve, linebuf);
+               printline(0, resolve, linebuf);
        } else if (n == -1 && errno != EINTR && errno != EWOULDBLOCK)
                log_warn("recvfrom udp");
 }
 
 void
+sendsys_readcb(int fd, short event, void *arg)
+{
+       ssize_t                  n;
+
+       n = recvfrom(fd, linebuf, LOG_MAXLINE, 0, NULL, 0);
+       if (n > 0) {
+               linebuf[n] = '\0';
+               printline(KERNDATE, LocalHostName, linebuf);
+       } else if (n == -1 && errno != EINTR && errno != EWOULDBLOCK)
+               log_warn("recvfrom sendsyslog");
+}
+
+void
 unix_readcb(int fd, short event, void *arg)
 {
        struct sockaddr_un       sa;
@@ -1071,7 +1088,7 @@ unix_readcb(int fd, short event, void *a
            &salen);
        if (n > 0) {
                linebuf[n] = '\0';
-               printline(LocalHostName, linebuf);
+               printline(0, LocalHostName, linebuf);
        } else if (n == -1 && errno != EINTR && errno != EWOULDBLOCK)
                log_warn("recvfrom unix");
 }
@@ -1306,13 +1323,13 @@ tcp_readcb(struct bufferevent *bufev, vo
                        linebuf[MINIMUM(len, LOG_MAXLINE)] = '\0';
                        msg = linebuf;
                }
-               printline(p->p_hostname, msg);
+               printline(0, p->p_hostname, msg);
                evbuffer_drain(bufev->input, len);
        }
        /* Maximum frame has 5 digits, 1 space, MAXLINE chars, 1 new line. */
        if (EVBUFFER_LENGTH(bufev->input) >= 5 + 1 + LOG_MAXLINE + 1) {
                log_debug(", use %zu bytes", EVBUFFER_LENGTH(bufev->input));
-               printline(p->p_hostname, EVBUFFER_DATA(bufev->input));
+               printline(0, p->p_hostname, EVBUFFER_DATA(bufev->input));
                evbuffer_drain(bufev->input, -1);
        } else if (EVBUFFER_LENGTH(bufev->input) > 0)
                log_debug(", buffer %zu bytes", EVBUFFER_LENGTH(bufev->input));
@@ -1560,6 +1577,24 @@ usage(void)
 }
 
 /*
+ * Kernel prepends microtime(9) with space in front of log messages.
+ * It is called at beginnning of sendsyslog(2) and has 6 digits precision.
+ */
+size_t
+parsekerntime(const char *msg, struct timeval *logtime)
+{
+       struct timeval time;
+       int timelen, timenum;
+
+       timenum = sscanf(msg, "%lld.%06ld %n",
+           &time.tv_sec, &time.tv_usec, &timelen);
+       if (timenum != 2)
+               return (0);
+       *logtime = time;
+       return (timelen);
+}
+
+/*
  * Parse a priority code of the form "<123>" into pri, and return the
  * length of the priority code including the surrounding angle brackets.
  */
@@ -1591,14 +1626,21 @@ parsepriority(const char *msg, int *pri)
  * on the appropriate log files.
  */
 void
-printline(char *hname, char *msg)
+printline(int flags, char *hname, char *msg)
 {
+       struct timeval logtime;
        int pri;
        char *p, *q, line[LOG_MAXLINE + 4 + 1];  /* message, encoding, NUL */
 
        /* test for special codes */
-       pri = DEFUPRI;
        p = msg;
+       timerclear(&logtime);
+       if (flags & KERNDATE) {
+               p += parsekerntime(p, &logtime);
+               if (!timerisset(&logtime))
+                       flags &=~ KERNDATE;
+       }
+       pri = DEFUPRI;
        p += parsepriority(p, &pri);
        if (pri &~ (LOG_FACMASK|LOG_PRIMASK))
                pri = DEFUPRI;
@@ -1619,7 +1661,7 @@ printline(char *hname, char *msg)
        }
        line[LOG_MAXLINE] = *q = '\0';
 
-       logline(pri, 0, hname, line);
+       logline(pri, flags, &logtime, hname, line);
 }
 
 /*
@@ -1655,7 +1697,7 @@ printsys(char *msg)
                while (*p && (c = *p++) != '\n' && q < &line[sizeof(line) - 4])
                        q = vis(q, c, 0, 0);
 
-               logline(pri, flags, LocalHostName, line);
+               logline(pri, flags, NULL, LocalHostName, line);
        }
 }
 
@@ -1677,21 +1719,20 @@ vlogmsg(int pri, const char *proc, const
                init_dropped++;
                return;
        }
-       logline(pri, ADDDATE, LocalHostName, msg);
+       logline(pri, ADDDATE, NULL, LocalHostName, msg);
 }
 
-struct timeval now;
-
 /*
  * Log a message to the appropriate log files, users, etc. based on
  * the priority.
  */
 void
-logline(int pri, int flags, char *from, char *msg)
+logline(int pri, int flags, struct timeval *kerntime, char *from, char *msg)
 {
        struct filed *f;
+       struct timeval *logtime;
        int fac, msglen, prilev, i;
-       char timestamp[33];
+       char timestamp[sizeof("2003-08-24T05:14:15.000003-07:00")];
        char prog[NAME_MAX+1];
 
        log_debug("logline: pri 0%o, flags 0x%x, from %s, msg %s",
@@ -1768,21 +1809,25 @@ logline(int pri, int flags, char *from, 
        }
 
        (void)gettimeofday(&now, NULL);
+       logtime = (flags & KERNDATE) ? kerntime : &now;
        if (flags & ADDDATE) {
                if (ZuluTime) {
                        struct tm *tm;
                        size_t l;
 
-                       tm = gmtime(&now.tv_sec);
+                       tm = gmtime(&logtime->tv_sec);
                        l = strftime(timestamp, sizeof(timestamp), "%FT%T", tm);
                        /*
-                        * Use only millisecond precision as some time has
-                        * passed since syslog(3) was called.
+                        * Use millisecond precision for gettimeofday(2) as
+                        * some time has passed since syslog(3) was called.
+                        * Kernel time is more precise, use microseconds.
                         */
                        snprintf(timestamp + l, sizeof(timestamp) - l,
-                           ".%03ldZ", now.tv_usec / 1000);
+                           ".%0*ldZ", (flags & KERNDATE) ? 6 : 3,
+                           (flags & KERNDATE) ? logtime->tv_usec :
+                           logtime->tv_usec / 1000);
                } else
-                       strlcpy(timestamp, ctime(&now.tv_sec) + 4, 16);
+                       strlcpy(timestamp, ctime(&logtime->tv_sec) + 4, 16);
        }
 
        /* extract facility and priority level */
@@ -2961,9 +3006,9 @@ getmsgbufsize(void)
 
        mib[0] = CTL_KERN;
        mib[1] = KERN_MSGBUFSIZE;
-       size = sizeof msgbufsize;
+       size = sizeof(msgbufsize);
        if (sysctl(mib, 2, &msgbufsize, &size, NULL, 0) == -1) {
-               log_debug("couldn't get kern.msgbufsize");
+               log_warn("sysctl kern.msgbufsize");
                return (0);
        }
        return (msgbufsize);
@@ -3000,7 +3045,8 @@ markit(void)
        (void)gettimeofday(&now, NULL);
        MarkSeq += TIMERINTVL;
        if (MarkSeq >= MarkInterval) {
-               logline(LOG_INFO, ADDDATE|MARK, LocalHostName, "-- MARK --");
+               logline(LOG_INFO, ADDDATE|MARK, NULL, LocalHostName,
+                   "-- MARK --");
                MarkSeq = 0;
        }
 

Reply via email to