Hi,

When the connection to a TCP syslog server fails or the TCP connection
terminates, try to reconnect after an increasing timeout.

ok?

bluhm

Index: usr.sbin/syslogd/syslogd.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.140
diff -u -p -r1.140 syslogd.c
--- usr.sbin/syslogd/syslogd.c  8 Jan 2015 20:22:47 -0000       1.140
+++ usr.sbin/syslogd/syslogd.c  13 Jan 2015 22:32:36 -0000
@@ -135,6 +135,7 @@ struct filed {
                                /* @proto46://[hostname]:servname\0 */
                        struct sockaddr_storage f_addr;
                        struct bufferevent      *f_bufev;
+                       int     f_reconnectwait;
                } f_forw;               /* forwarding address */
                char    f_fname[MAXPATHLEN];
                struct {
@@ -265,7 +266,9 @@ void         udp_readcb(int, short, void *);
 void    unix_readcb(int, short, void *);
 int     tcp_socket(struct filed *);
 void    tcp_readcb(struct bufferevent *, void *);
+void    tcp_writecb(struct bufferevent *, void *);
 void    tcp_errorcb(struct bufferevent *, short, void *);
+void    tcp_connectcb(int, short, void *);
 void    die_signalcb(int, short, void *);
 void    mark_timercb(int, short, void *);
 void    init_signalcb(int, short, void *);
@@ -716,6 +719,18 @@ tcp_readcb(struct bufferevent *bufev, vo
 }
 
 void
+tcp_writecb(struct bufferevent *bufev, void *arg)
+{
+       struct filed    *f = arg;
+
+       /*
+        * Successful write, connection to server is good, reset wait time.
+        */
+       dprintf("loghost \"%s\" successful write\n", f->f_un.f_forw.f_loghost);
+       f->f_un.f_forw.f_reconnectwait = 0;
+}
+
+void
 tcp_errorcb(struct bufferevent *bufev, short event, void *arg)
 {
        struct filed    *f = arg;
@@ -731,20 +746,67 @@ tcp_errorcb(struct bufferevent *bufev, s
                    f->f_un.f_forw.f_loghost, strerror(errno));
        dprintf("%s\n", ebuf);
 
+       /* The SIGHUP handler may also close the socket, so invalidate it. */
        close(f->f_file);
-       if ((f->f_file = tcp_socket(f)) == -1) {
-               /* XXX reconnect later */
-               bufferevent_free(bufev);
-               f->f_type = F_UNUSED;
-       } else {
-               /* XXX The messages in the output buffer may be out of sync. */
-               bufferevent_setfd(bufev, f->f_file);
-               bufferevent_enable(f->f_un.f_forw.f_bufev, EV_READ);
-       }
+       f->f_file = -1;
+
+       /*
+        * XXX The messages in the output buffer may be out of sync.
+        * Here we should clear the buffer or at least remove partial
+        * messages from the beginning.
+        */
+       tcp_connectcb(-1, 0, f);
+
+       /* Log the connection error to the fresh buffer after reconnecting. */
        logmsg(LOG_SYSLOG|LOG_WARNING, ebuf, LocalHostName, ADDDATE);
 }
 
 void
+tcp_connectcb(int fd, short event, void *arg)
+{
+       struct filed            *f = arg;
+       struct bufferevent      *bufev = f->f_un.f_forw.f_bufev;
+       struct timeval           to;
+       int                      s;
+
+       if ((event & EV_TIMEOUT) == 0 && f->f_un.f_forw.f_reconnectwait > 0)
+               goto retry;
+
+       /* Avoid busy reconnect loop, delay until successful write. */
+       if (f->f_un.f_forw.f_reconnectwait == 0)
+               f->f_un.f_forw.f_reconnectwait = 1;
+
+       if ((s = tcp_socket(f)) == -1)
+               goto retry;
+
+       dprintf("tcp connect callback: success, event %#x\n", event);
+       bufferevent_setfd(bufev, s);
+       bufferevent_setcb(bufev, tcp_readcb, tcp_writecb, tcp_errorcb, f);
+       /*
+        * Although syslog is a write only protocol, enable reading from
+        * the socket to detect connection close and errors.
+        */
+       bufferevent_enable(bufev, EV_READ|EV_WRITE);
+       f->f_file = s;
+
+       return;
+
+ retry:
+       f->f_un.f_forw.f_reconnectwait <<= 1;
+       if (f->f_un.f_forw.f_reconnectwait > 600)
+               f->f_un.f_forw.f_reconnectwait = 600;
+       to.tv_sec = f->f_un.f_forw.f_reconnectwait;
+       to.tv_usec = 0;
+
+       dprintf("tcp connect callback: retry, event %#x, wait %d\n",
+           event, f->f_un.f_forw.f_reconnectwait);
+       bufferevent_setfd(bufev, -1);
+       /* We can reuse the write event as bufferevent is disabled. */
+       evtimer_set(&bufev->ev_write, tcp_connectcb, f);
+       evtimer_add(&bufev->ev_write, &to);
+}
+
+void
 usage(void)
 {
 
@@ -1712,22 +1774,16 @@ cfline(char *line, char *prog)
                        }
                        f->f_type = F_FORWUDP;
                } else if (strncmp(proto, "tcp", 3) == 0) {
-                       int s;
-
-                       if ((s = tcp_socket(f)) == -1)
-                               break;
-                       if ((f->f_un.f_forw.f_bufev = bufferevent_new(s,
-                           tcp_readcb, NULL, tcp_errorcb, f)) == NULL) {
+                       if ((f->f_un.f_forw.f_bufev = bufferevent_new(-1,
+                           tcp_readcb, tcp_writecb, tcp_errorcb, f)) == NULL) {
                                snprintf(ebuf, sizeof(ebuf),
                                    "bufferevent \"%s\"",
                                    f->f_un.f_forw.f_loghost);
                                logerror(ebuf);
-                               close(s);
                                break;
                        }
-                       bufferevent_enable(f->f_un.f_forw.f_bufev, EV_READ);
-                       f->f_file = s;
                        f->f_type = F_FORWTCP;
+                       tcp_connectcb(-1, 0, f);
                }
                break;
 

Reply via email to