Hi, Since we stash log messages in the kernel, the timestamps added by syslogd are delayed. The kernel could add the timestamp when it receives the message by sendsyslog(2). This is more precise and can be expressed by more digits in the ISO timestamp.
I have to copyin(9) at the beginning of sendsyslog(2) to have both kernel timestamp and log message in kernel space. This makes the code easier, but requires a malloc(9) for each log message. Do we want to go into this direction? Note that old syslogd and new kernel produces ugly messages. Do we need an update path? bluhm Index: sys/kern/subr_log.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_log.c,v retrieving revision 1.74 diff -u -p -r1.74 subr_log.c --- sys/kern/subr_log.c 18 Mar 2021 08:43:38 -0000 1.74 +++ sys/kern/subr_log.c 18 Mar 2021 13:06:13 -0000 @@ -108,7 +108,7 @@ const struct filterops logread_filtops = .f_event = filt_logread, }; -int dosendsyslog(struct proc *, const char *, size_t, int, enum uio_seg); +int dosendsyslog(struct proc *, char *, size_t, int, struct timeval *); void logtick(void *); size_t msgbuf_getlen(struct msgbuf *); void msgbuf_putchar_locked(struct msgbuf *, const char); @@ -468,14 +468,15 @@ logioctl(dev_t dev, u_long com, caddr_t * If syslogd is not running, temporarily store a limited amount of messages * in kernel. After log stash is full, drop messages and count them. When * syslogd is available again, next log message will flush the stashed - * messages and insert a message with drop count. Calls to malloc(9) and - * copyin(9) may sleep, protect data structures with rwlock. + * messages and insert a message with drop count. Calls to malloc(9) may + * sleep, protect data structures with rwlock. */ #define LOGSTASH_SIZE 100 struct logstash_message { char *lgs_buffer; size_t lgs_size; + struct timeval lgs_time; } logstash_messages[LOGSTASH_SIZE]; struct logstash_message *logstash_in = &logstash_messages[0]; @@ -485,9 +486,9 @@ struct rwlock logstash_rwlock = RWLOCK_I int logstash_dropped, logstash_error, logstash_pid; -int logstash_insert(const char *, size_t, int, pid_t); -void logstash_remove(void); -int logstash_sendsyslog(struct proc *); +void logstash_insert(char *, size_t, int, pid_t, struct timeval *); +void logstash_remove(struct timeval *); +int logstash_sendsyslog(struct proc *, struct timeval *); static inline int logstash_full(void) @@ -511,11 +512,10 @@ logstash_increment(struct logstash_messa (*msg)++; } -int -logstash_insert(const char *buf, size_t nbyte, int logerror, pid_t pid) +void +logstash_insert(char *buffer, size_t nbyte, int logerror, pid_t pid, + struct timeval *now) { - int error; - rw_enter_write(&logstash_rwlock); if (logstash_full()) { @@ -524,29 +524,22 @@ logstash_insert(const char *buf, size_t logstash_pid = pid; } logstash_dropped++; + free(buffer, M_LOG, nbyte); rw_exit(&logstash_rwlock); - return (0); + return; } - logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK); - error = copyin(buf, logstash_in->lgs_buffer, nbyte); - if (error) { - free(logstash_in->lgs_buffer, M_LOG, nbyte); - logstash_in->lgs_buffer = NULL; - - rw_exit(&logstash_rwlock); - return (error); - } + logstash_in->lgs_buffer = buffer; logstash_in->lgs_size = nbyte; + logstash_in->lgs_time = *now; logstash_increment(&logstash_in); rw_exit(&logstash_rwlock); - return (0); } void -logstash_remove(void) +logstash_remove(struct timeval *now) { rw_assert_wrlock(&logstash_rwlock); @@ -575,12 +568,13 @@ logstash_remove(void) logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK); memcpy(logstash_in->lgs_buffer, buf, nbyte); logstash_in->lgs_size = nbyte; + logstash_in->lgs_time = *now; logstash_increment(&logstash_in); } } int -logstash_sendsyslog(struct proc *p) +logstash_sendsyslog(struct proc *p, struct timeval *now) { int error; @@ -588,12 +582,12 @@ logstash_sendsyslog(struct proc *p) while (logstash_out->lgs_buffer != NULL) { error = dosendsyslog(p, logstash_out->lgs_buffer, - logstash_out->lgs_size, 0, UIO_SYSSPACE); + logstash_out->lgs_size, 0, &logstash_out->lgs_time); if (error) { rw_exit(&logstash_rwlock); return (error); } - logstash_remove(); + logstash_remove(now); } rw_exit(&logstash_rwlock); @@ -615,33 +609,51 @@ sys_sendsyslog(struct proc *p, void *v, syscallarg(size_t) nbyte; syscallarg(int) flags; } */ *uap = v; + struct timeval now; + char *buffer; size_t nbyte; int error; + microtime(&now); + nbyte = SCARG(uap, nbyte); if (nbyte > LOG_MAXLINE) nbyte = LOG_MAXLINE; - logstash_sendsyslog(p); - error = dosendsyslog(p, SCARG(uap, buf), nbyte, SCARG(uap, flags), - UIO_USERSPACE); - if (error && error != EFAULT) - logstash_insert(SCARG(uap, buf), nbyte, error, p->p_p->ps_pid); - return (error); + buffer = malloc(nbyte, M_LOG, M_WAITOK); + error = copyin(SCARG(uap, buf), buffer, nbyte); + if (error) + return (error); +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO)) { + struct iovec ktriov; + + ktriov.iov_base = (char *)SCARG(uap, buf); + ktriov.iov_len = nbyte; + ktrgenio(p, -1, UIO_WRITE, &ktriov, nbyte); + } +#endif + + logstash_sendsyslog(p, &now); + error = dosendsyslog(p, buffer, nbyte, SCARG(uap, flags), &now); + if (error) { + /* logstash will free buffer later */ + logstash_insert(buffer, nbyte, error, p->p_p->ps_pid, &now); + return (error); + } + free(buffer, M_LOG, nbyte); + return (0); } int -dosendsyslog(struct proc *p, const char *buf, size_t nbyte, int flags, - enum uio_seg sflg) +dosendsyslog(struct proc *p, char *buffer, size_t nbyte, int flags, + struct timeval *time) { -#ifdef KTRACE - struct iovec ktriov; -#endif struct file *fp; - char pri[6], *kbuf; - struct iovec aiov; + char logtime[sizeof("-9223372036854775808.123456 ")]; + struct iovec aiov[3]; struct uio auio; - size_t i, len; + size_t len; int error; /* Global variable syslogf may change during sleep, use local copy. */ @@ -651,106 +663,87 @@ dosendsyslog(struct proc *p, const char FREF(fp); rw_exit(&syslogf_rwlock); - if (fp == NULL) { - if (!ISSET(flags, LOG_CONS)) - return (ENOTCONN); + if (fp) { + len = snprintf(logtime, sizeof(logtime), "%lld.%06ld ", + time->tv_sec, time->tv_usec); + len = MIN(len, sizeof(logtime)); + } else if (!ISSET(flags, LOG_CONS)) { + return (ENOTCONN); + } else { + size_t i; + /* * Strip off syslog priority when logging to console. * LOG_PRIMASK | LOG_FACMASK is 0x03ff, so at most 4 * decimal digits may appear in priority as <1023>. */ - len = MIN(nbyte, sizeof(pri)); - if (sflg == UIO_USERSPACE) { - if ((error = copyin(buf, pri, len))) - return (error); - } else - memcpy(pri, buf, len); - if (0 < len && pri[0] == '<') { + len = MIN(nbyte, 6); + if (0 < len && buffer[0] == '<') { for (i = 1; i < len; i++) { - if (pri[i] < '0' || pri[i] > '9') + if (buffer[i] < '0' || buffer[i] > '9') break; } - if (i < len && pri[i] == '>') { + if (i < len && buffer[i] == '>') { i++; /* There must be at least one digit <0>. */ if (i >= 3) { - buf += i; + buffer += i; nbyte -= i; } } } + len = 0; } - aiov.iov_base = (char *)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = sflg; + aiov[0].iov_base = logtime; + aiov[0].iov_len = len; + aiov[1].iov_base = buffer; + aiov[1].iov_len = nbyte; + aiov[2].iov_base = "\r\n"; + aiov[2].iov_len = 2; + auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_procp = p; auio.uio_offset = 0; - auio.uio_resid = aiov.iov_len; -#ifdef KTRACE - if (sflg == UIO_USERSPACE && KTRPOINT(p, KTR_GENIO)) - ktriov = aiov; - else - ktriov.iov_len = 0; -#endif - len = auio.uio_resid; if (fp) { int flags = (fp->f_flag & FNONBLOCK) ? MSG_DONTWAIT : 0; + + auio.uio_iov = &aiov[0]; + auio.uio_iovcnt = 2; + auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len; error = sosend(fp->f_data, NULL, &auio, NULL, NULL, flags); - if (error == 0) - len -= auio.uio_resid; } else { KERNEL_LOCK(); if (constty || cn_devvp) { + auio.uio_iov = &aiov[1]; + auio.uio_iovcnt = 2; + auio.uio_resid = aiov[1].iov_len + aiov[2].iov_len; error = cnwrite(0, &auio, 0); - if (error == 0) - len -= auio.uio_resid; - aiov.iov_base = "\r\n"; - aiov.iov_len = 2; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_procp = p; - auio.uio_offset = 0; - auio.uio_resid = aiov.iov_len; - cnwrite(0, &auio, 0); } else { + char *p; + /* XXX console redirection breaks down... */ - if (sflg == UIO_USERSPACE) { - kbuf = malloc(len, M_TEMP, M_WAITOK); - error = copyin(aiov.iov_base, kbuf, len); - } else { - kbuf = aiov.iov_base; - error = 0; + auio.uio_iov = &aiov[1]; + auio.uio_iovcnt = 1; + auio.uio_resid = aiov[1].iov_len; + p = auio.uio_iov[0].iov_base; + while (auio.uio_resid > 0) { + if (*p == '\0') + break; + cnputc(*p); + p++; + auio.uio_resid--; } - if (error == 0) - for (i = 0; i < len; i++) { - if (kbuf[i] == '\0') - break; - cnputc(kbuf[i]); - auio.uio_resid--; - } - if (sflg == UIO_USERSPACE) - free(kbuf, M_TEMP, len); - if (error == 0) - len -= auio.uio_resid; cnputc('\n'); + error = 0; } KERNEL_UNLOCK(); } -#ifdef KTRACE - if (error == 0 && ktriov.iov_len != 0) - ktrgenio(p, -1, UIO_WRITE, &ktriov, len); -#endif if (fp) FRELE(fp, p); - else if (error != EFAULT) + else error = ENOTCONN; return (error); } Index: usr.sbin/syslogd/syslogd.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v retrieving revision 1.265 diff -u -p -r1.265 syslogd.c --- usr.sbin/syslogd/syslogd.c 9 Mar 2021 15:08:23 -0000 1.265 +++ usr.sbin/syslogd/syslogd.c 18 Mar 2021 16:04:22 -0000 @@ -134,6 +134,7 @@ const char ctty[] = _PATH_CONSOLE; #define SYNC_FILE 0x002 /* do fsync on file after printing */ #define ADDDATE 0x004 /* add a date to the message */ #define MARK 0x008 /* this message is a mark */ +#define KERNDATE 0x010 /* the kernel has added log time */ /* * This structure represents the files that will have log @@ -289,6 +290,7 @@ size_t ctl_reply_offset = 0; /* Number o char *linebuf; int linesize; +struct timeval now; int fd_ctlconn, fd_udp, fd_udp6, send_udp, send_udp6; struct event *ev_ctlaccept, *ev_ctlread, *ev_ctlwrite; @@ -305,6 +307,7 @@ char hostname_unknown[] = "???"; void klog_readcb(int, short, void *); void udp_readcb(int, short, void *); +void sendsys_readcb(int, short, void *); void unix_readcb(int, short, void *); int reserve_accept4(int, int, struct event *, void (*)(int, short, void *), struct sockaddr *, socklen_t *, int); @@ -339,10 +342,11 @@ void fprintlog(struct filed *, int, char void dropped_warn(int *, const char *); void init(void); void logevent(int, const char *); -void logline(int, int, char *, char *); +void logline(int, int, struct timeval *, char *, char *); struct filed *find_dup(struct filed *); +size_t parsekerntime(const char *, struct timeval *); size_t parsepriority(const char *, int *); -void printline(char *, char *); +void printline(int, char *, char *); void printsys(char *); void usage(void); void wallmsg(struct filed *, struct iovec *); @@ -796,7 +800,7 @@ main(int argc, char *argv[]) event_set(ev_ctlwrite, fd_ctlconn, EV_WRITE|EV_PERSIST, ctlconn_writecb, ev_ctlwrite); event_set(ev_klog, fd_klog, EV_READ|EV_PERSIST, klog_readcb, ev_klog); - event_set(ev_sendsys, fd_sendsys, EV_READ|EV_PERSIST, unix_readcb, + event_set(ev_sendsys, fd_sendsys, EV_READ|EV_PERSIST, sendsys_readcb, ev_sendsys); event_set(ev_udp, fd_udp, EV_READ|EV_PERSIST, udp_readcb, ev_udp); event_set(ev_udp6, fd_udp6, EV_READ|EV_PERSIST, udp_readcb, ev_udp6); @@ -1054,12 +1058,25 @@ udp_readcb(int fd, short event, void *ar linebuf[n] = '\0'; cvthname((struct sockaddr *)&sa, resolve, sizeof(resolve)); log_debug("cvthname res: %s", resolve); - printline(resolve, linebuf); + printline(0, resolve, linebuf); } else if (n == -1 && errno != EINTR && errno != EWOULDBLOCK) log_warn("recvfrom udp"); } void +sendsys_readcb(int fd, short event, void *arg) +{ + ssize_t n; + + n = recvfrom(fd, linebuf, LOG_MAXLINE, 0, NULL, 0); + if (n > 0) { + linebuf[n] = '\0'; + printline(KERNDATE, LocalHostName, linebuf); + } else if (n == -1 && errno != EINTR && errno != EWOULDBLOCK) + log_warn("recvfrom sendsyslog"); +} + +void unix_readcb(int fd, short event, void *arg) { struct sockaddr_un sa; @@ -1071,7 +1088,7 @@ unix_readcb(int fd, short event, void *a &salen); if (n > 0) { linebuf[n] = '\0'; - printline(LocalHostName, linebuf); + printline(0, LocalHostName, linebuf); } else if (n == -1 && errno != EINTR && errno != EWOULDBLOCK) log_warn("recvfrom unix"); } @@ -1306,13 +1323,13 @@ tcp_readcb(struct bufferevent *bufev, vo linebuf[MINIMUM(len, LOG_MAXLINE)] = '\0'; msg = linebuf; } - printline(p->p_hostname, msg); + printline(0, p->p_hostname, msg); evbuffer_drain(bufev->input, len); } /* Maximum frame has 5 digits, 1 space, MAXLINE chars, 1 new line. */ if (EVBUFFER_LENGTH(bufev->input) >= 5 + 1 + LOG_MAXLINE + 1) { log_debug(", use %zu bytes", EVBUFFER_LENGTH(bufev->input)); - printline(p->p_hostname, EVBUFFER_DATA(bufev->input)); + printline(0, p->p_hostname, EVBUFFER_DATA(bufev->input)); evbuffer_drain(bufev->input, -1); } else if (EVBUFFER_LENGTH(bufev->input) > 0) log_debug(", buffer %zu bytes", EVBUFFER_LENGTH(bufev->input)); @@ -1560,6 +1577,24 @@ usage(void) } /* + * Kernel prepends microtime(9) with space in front of log messages. + * It is called at beginnning of sendsyslog(2) and has 6 digits precision. + */ +size_t +parsekerntime(const char *msg, struct timeval *logtime) +{ + struct timeval time; + int timelen, timenum; + + timenum = sscanf(msg, "%lld.%06ld %n", + &time.tv_sec, &time.tv_usec, &timelen); + if (timenum != 2) + return (0); + *logtime = time; + return (timelen); +} + +/* * Parse a priority code of the form "<123>" into pri, and return the * length of the priority code including the surrounding angle brackets. */ @@ -1591,14 +1626,21 @@ parsepriority(const char *msg, int *pri) * on the appropriate log files. */ void -printline(char *hname, char *msg) +printline(int flags, char *hname, char *msg) { + struct timeval logtime; int pri; char *p, *q, line[LOG_MAXLINE + 4 + 1]; /* message, encoding, NUL */ /* test for special codes */ - pri = DEFUPRI; p = msg; + timerclear(&logtime); + if (flags & KERNDATE) { + p += parsekerntime(p, &logtime); + if (!timerisset(&logtime)) + flags &=~ KERNDATE; + } + pri = DEFUPRI; p += parsepriority(p, &pri); if (pri &~ (LOG_FACMASK|LOG_PRIMASK)) pri = DEFUPRI; @@ -1619,7 +1661,7 @@ printline(char *hname, char *msg) } line[LOG_MAXLINE] = *q = '\0'; - logline(pri, 0, hname, line); + logline(pri, flags, &logtime, hname, line); } /* @@ -1655,7 +1697,7 @@ printsys(char *msg) while (*p && (c = *p++) != '\n' && q < &line[sizeof(line) - 4]) q = vis(q, c, 0, 0); - logline(pri, flags, LocalHostName, line); + logline(pri, flags, NULL, LocalHostName, line); } } @@ -1677,21 +1719,20 @@ vlogmsg(int pri, const char *proc, const init_dropped++; return; } - logline(pri, ADDDATE, LocalHostName, msg); + logline(pri, ADDDATE, NULL, LocalHostName, msg); } -struct timeval now; - /* * Log a message to the appropriate log files, users, etc. based on * the priority. */ void -logline(int pri, int flags, char *from, char *msg) +logline(int pri, int flags, struct timeval *kerntime, char *from, char *msg) { struct filed *f; + struct timeval *logtime; int fac, msglen, prilev, i; - char timestamp[33]; + char timestamp[sizeof("2003-08-24T05:14:15.000003-07:00")]; char prog[NAME_MAX+1]; log_debug("logline: pri 0%o, flags 0x%x, from %s, msg %s", @@ -1768,21 +1809,25 @@ logline(int pri, int flags, char *from, } (void)gettimeofday(&now, NULL); + logtime = (flags & KERNDATE) ? kerntime : &now; if (flags & ADDDATE) { if (ZuluTime) { struct tm *tm; size_t l; - tm = gmtime(&now.tv_sec); + tm = gmtime(&logtime->tv_sec); l = strftime(timestamp, sizeof(timestamp), "%FT%T", tm); /* - * Use only millisecond precision as some time has - * passed since syslog(3) was called. + * Use millisecond precision for gettimeofday(2) as + * some time has passed since syslog(3) was called. + * Kernel time is more precise, use microseconds. */ snprintf(timestamp + l, sizeof(timestamp) - l, - ".%03ldZ", now.tv_usec / 1000); + ".%0*ldZ", (flags & KERNDATE) ? 6 : 3, + (flags & KERNDATE) ? logtime->tv_usec : + logtime->tv_usec / 1000); } else - strlcpy(timestamp, ctime(&now.tv_sec) + 4, 16); + strlcpy(timestamp, ctime(&logtime->tv_sec) + 4, 16); } /* extract facility and priority level */ @@ -2961,9 +3006,9 @@ getmsgbufsize(void) mib[0] = CTL_KERN; mib[1] = KERN_MSGBUFSIZE; - size = sizeof msgbufsize; + size = sizeof(msgbufsize); if (sysctl(mib, 2, &msgbufsize, &size, NULL, 0) == -1) { - log_debug("couldn't get kern.msgbufsize"); + log_warn("sysctl kern.msgbufsize"); return (0); } return (msgbufsize); @@ -3000,7 +3045,8 @@ markit(void) (void)gettimeofday(&now, NULL); MarkSeq += TIMERINTVL; if (MarkSeq >= MarkInterval) { - logline(LOG_INFO, ADDDATE|MARK, LocalHostName, "-- MARK --"); + logline(LOG_INFO, ADDDATE|MARK, NULL, LocalHostName, + "-- MARK --"); MarkSeq = 0; }