Re: ntpd still fails to notice unreachable server

Otto Moerbeek Tue, 08 Sep 2020 23:01:32 -0700

On Sat, Sep 05, 2020 at 08:11:21AM +0200, Otto Moerbeek wrote:

> On Fri, Sep 04, 2020 at 11:17:40PM +0200, Christian Weisgerber wrote:
> 
> > Otto Moerbeek:
> > 
> > > This takes the observed issue into account,
> > -snip-
> > 
> > This works for my test case of a single peer and the two scenarios of
> > "can't send query" and "port unreachable".
> > 
> > I notice that both cases are handled very differently:
> > * Can't send query: ntpd keeps retrying with a fast poll interval
> >   of 60 seconds.
> > * Port unreachable: ntpd retries with a maximally slow poll interval
> >   of 3000+ seconds.
> > 
> > This may make sense, I don't know, but the rationale isn't obvious.
> 
> Also note that I'm onlyreducing the trust level on one of the erorrs
> conditions. An invalid packet is just ignored. This is to avoid to
> possibility that somebody can easily spoof pakcets to ntpd and make it go
> into unsynced more with all peers untrusted.
> 
> About the behaviour: is one of the confusing parts of ntpd.  Not all
> transitions of the state engine are clear and the timeouts used are
> hard to follow sometimes. 
> 
>       -Otto


This is a followup diff, that also includes a bug fix for something
semarie@ noticed: ntpd would go unsynced once in a while. After
debugging it turned out that the interval used to send a new poll and
the interval to check were out of sync: the scale factor would be
updated after the new interval was computed. If the scale factor
decreased, that could lead to going unsynced.

So this version of the diff includes a fix for that.

        -Otto

Index: client.c
===================================================================
RCS file: /cvs/src/usr.sbin/ntpd/client.c,v
retrieving revision 1.113
diff -u -p -r1.113 client.c
--- client.c    30 Jan 2020 15:55:41 -0000      1.113
+++ client.c    7 Sep 2020 12:23:29 -0000
@@ -264,6 +264,12 @@ handle_auto(uint8_t trusted, double offs
        priv_settime(offset, "");
 }
 
+
+/*
+ * -1: Not processed, not an NTP message (e.g. icmp induced  ECONNREFUSED)
+ *  0: Not prrocessed due to validation issues
+ *  1: NTP message validated and processed
+ */
 int
 client_dispatch(struct ntp_peer *p, u_int8_t settime, u_int8_t automatic)
 {
@@ -278,7 +284,7 @@ client_dispatch(struct ntp_peer *p, u_in
        } cmsgbuf;
        struct cmsghdr          *cmsg;
        ssize_t                  size;
-       double                   T1, T2, T3, T4;
+       double                   T1, T2, T3, T4, offset, delay;
        time_t                   interval;
 
        memset(&somsg, 0, sizeof(somsg));
@@ -297,7 +303,7 @@ client_dispatch(struct ntp_peer *p, u_in
                    errno == ENOPROTOOPT || errno == ENOENT) {
                        client_log_error(p, "recvmsg", errno);
                        set_next(p, error_interval());
-                       return (0);
+                       return (-1);
                } else
                        fatal("recvfrom");
        }
@@ -418,16 +424,6 @@ client_dispatch(struct ntp_peer *p, u_in
        } else
                p->reply[p->shift].status.send_refid = msg.xmttime.fractionl;
 
-       if (p->trustlevel < TRUSTLEVEL_PATHETIC)
-               interval = scale_interval(INTERVAL_QUERY_PATHETIC);
-       else if (p->trustlevel < TRUSTLEVEL_AGGRESSIVE)
-               interval = (conf->settime && conf->automatic) ?
-                   INTERVAL_QUERY_ULTRA_VIOLENCE :
-                   scale_interval(INTERVAL_QUERY_AGGRESSIVE);
-       else
-               interval = scale_interval(INTERVAL_QUERY_NORMAL);
-
-       set_next(p, interval);
        p->state = STATE_REPLY_RECEIVED;
 
        /* every received reply which we do not discard increases trust */
@@ -439,11 +435,8 @@ client_dispatch(struct ntp_peer *p, u_in
                p->trustlevel++;
        }
 
-       log_debug("reply from %s: offset %f delay %f, "
-           "next query %llds",
-           log_sockaddr((struct sockaddr *)&p->addr->ss),
-           p->reply[p->shift].offset, p->reply[p->shift].delay,
-           (long long)interval);
+       offset = p->reply[p->shift].offset;
+       delay = p->reply[p->shift].delay;
 
        client_update(p);
        if (settime) {
@@ -453,10 +446,27 @@ client_dispatch(struct ntp_peer *p, u_in
                        priv_settime(p->reply[p->shift].offset, "");
        }
 
+       if (p->trustlevel < TRUSTLEVEL_PATHETIC)
+               interval = scale_interval(INTERVAL_QUERY_PATHETIC);
+       else if (p->trustlevel < TRUSTLEVEL_AGGRESSIVE)
+               interval = (conf->settime && conf->automatic) ?
+                   INTERVAL_QUERY_ULTRA_VIOLENCE :
+                   scale_interval(INTERVAL_QUERY_AGGRESSIVE);
+       else
+               interval = scale_interval(INTERVAL_QUERY_NORMAL);
+
+       log_debug("reply from %s: offset %f delay %f, "
+           "next query %llds",
+           log_sockaddr((struct sockaddr *)&p->addr->ss),
+           offset, delay,
+           (long long)interval);
+
+       set_next(p, interval);
+
        if (++p->shift >= OFFSET_ARRAY_SIZE)
                p->shift = 0;
 
-       return (0);
+       return (1);
 }
 
 int
Index: ntp.c
===================================================================
RCS file: /cvs/src/usr.sbin/ntpd/ntp.c,v
retrieving revision 1.166
diff -u -p -r1.166 ntp.c
--- ntp.c       30 Aug 2020 16:21:29 -0000      1.166
+++ ntp.c       7 Sep 2020 12:23:29 -0000
@@ -402,12 +402,29 @@ ntp_main(struct ntpd_conf *nconf, struct
 
                for (; nfds > 0 && j < idx_clients; j++) {
                        if (pfd[j].revents & (POLLIN|POLLERR)) {
+                               struct ntp_peer *pp = idx2peer[j - idx_peers];
+
                                nfds--;
-                               last_action = now;
-                               if (client_dispatch(idx2peer[j - idx_peers],
-                                   conf->settime, conf->automatic) == -1) {
-                                       log_warn("pipe write error (settime)");
-                                       ntp_quit = 1;
+                               switch (client_dispatch(pp, conf->settime,
+                                   conf->automatic)) {
+                               case -1:
+                                       log_debug("no reply from %s "
+                                           "received", log_sockaddr(
+                                           (struct sockaddr *) &pp->addr->ss));
+                                       if (pp->trustlevel >=
+                                           TRUSTLEVEL_BADPEER &&
+                                           (pp->trustlevel /= 2) <
+                                           TRUSTLEVEL_BADPEER)
+                                               log_info("peer %s now invalid",
+                                                   log_sockaddr(
+                                                   (struct sockaddr *)
+                                                   &pp->addr->ss));
+                                       break;
+                               case 0: /* invalid replies are ignored */
+                                       break;
+                               case 1:
+                                       last_action = now;
+                                       break;
                                }
                        }
                }
@@ -433,7 +450,7 @@ ntp_main(struct ntpd_conf *nconf, struct
                interval = INTERVAL_QUERY_NORMAL * conf->scale;
                interval += SCALE_INTERVAL(interval) - 1;
                if (conf->status.synced && last_action + 3 * interval < now) {
-                       log_info("clock is now unsynced");
+                       log_info("clock is now considered unsynced %lld, %lld, 
%lld", last_action, interval, now);
                        conf->status.synced = 0;
                        conf->scale = 1;
                        priv_dns(IMSG_UNSYNCED, NULL, 0);

Re: ntpd still fails to notice unreachable server

Reply via email to