On Sat, Sep 05, 2020 at 08:11:21AM +0200, Otto Moerbeek wrote:
> On Fri, Sep 04, 2020 at 11:17:40PM +0200, Christian Weisgerber wrote:
>
> > Otto Moerbeek:
> >
> > > This takes the observed issue into account,
> > -snip-
> >
> > This works for my test case of a single peer and the two scenarios of
> > "can't send query" and "port unreachable".
> >
> > I notice that both cases are handled very differently:
> > * Can't send query: ntpd keeps retrying with a fast poll interval
> > of 60 seconds.
> > * Port unreachable: ntpd retries with a maximally slow poll interval
> > of 3000+ seconds.
> >
> > This may make sense, I don't know, but the rationale isn't obvious.
>
> Also note that I'm onlyreducing the trust level on one of the erorrs
> conditions. An invalid packet is just ignored. This is to avoid to
> possibility that somebody can easily spoof pakcets to ntpd and make it go
> into unsynced more with all peers untrusted.
>
> About the behaviour: is one of the confusing parts of ntpd. Not all
> transitions of the state engine are clear and the timeouts used are
> hard to follow sometimes.
>
> -Otto
This is a followup diff, that also includes a bug fix for something
semarie@ noticed: ntpd would go unsynced once in a while. After
debugging it turned out that the interval used to send a new poll and
the interval to check were out of sync: the scale factor would be
updated after the new interval was computed. If the scale factor
decreased, that could lead to going unsynced.
So this version of the diff includes a fix for that.
-Otto
Index: client.c
===================================================================
RCS file: /cvs/src/usr.sbin/ntpd/client.c,v
retrieving revision 1.113
diff -u -p -r1.113 client.c
--- client.c 30 Jan 2020 15:55:41 -0000 1.113
+++ client.c 7 Sep 2020 12:23:29 -0000
@@ -264,6 +264,12 @@ handle_auto(uint8_t trusted, double offs
priv_settime(offset, "");
}
+
+/*
+ * -1: Not processed, not an NTP message (e.g. icmp induced ECONNREFUSED)
+ * 0: Not prrocessed due to validation issues
+ * 1: NTP message validated and processed
+ */
int
client_dispatch(struct ntp_peer *p, u_int8_t settime, u_int8_t automatic)
{
@@ -278,7 +284,7 @@ client_dispatch(struct ntp_peer *p, u_in
} cmsgbuf;
struct cmsghdr *cmsg;
ssize_t size;
- double T1, T2, T3, T4;
+ double T1, T2, T3, T4, offset, delay;
time_t interval;
memset(&somsg, 0, sizeof(somsg));
@@ -297,7 +303,7 @@ client_dispatch(struct ntp_peer *p, u_in
errno == ENOPROTOOPT || errno == ENOENT) {
client_log_error(p, "recvmsg", errno);
set_next(p, error_interval());
- return (0);
+ return (-1);
} else
fatal("recvfrom");
}
@@ -418,16 +424,6 @@ client_dispatch(struct ntp_peer *p, u_in
} else
p->reply[p->shift].status.send_refid = msg.xmttime.fractionl;
- if (p->trustlevel < TRUSTLEVEL_PATHETIC)
- interval = scale_interval(INTERVAL_QUERY_PATHETIC);
- else if (p->trustlevel < TRUSTLEVEL_AGGRESSIVE)
- interval = (conf->settime && conf->automatic) ?
- INTERVAL_QUERY_ULTRA_VIOLENCE :
- scale_interval(INTERVAL_QUERY_AGGRESSIVE);
- else
- interval = scale_interval(INTERVAL_QUERY_NORMAL);
-
- set_next(p, interval);
p->state = STATE_REPLY_RECEIVED;
/* every received reply which we do not discard increases trust */
@@ -439,11 +435,8 @@ client_dispatch(struct ntp_peer *p, u_in
p->trustlevel++;
}
- log_debug("reply from %s: offset %f delay %f, "
- "next query %llds",
- log_sockaddr((struct sockaddr *)&p->addr->ss),
- p->reply[p->shift].offset, p->reply[p->shift].delay,
- (long long)interval);
+ offset = p->reply[p->shift].offset;
+ delay = p->reply[p->shift].delay;
client_update(p);
if (settime) {
@@ -453,10 +446,27 @@ client_dispatch(struct ntp_peer *p, u_in
priv_settime(p->reply[p->shift].offset, "");
}
+ if (p->trustlevel < TRUSTLEVEL_PATHETIC)
+ interval = scale_interval(INTERVAL_QUERY_PATHETIC);
+ else if (p->trustlevel < TRUSTLEVEL_AGGRESSIVE)
+ interval = (conf->settime && conf->automatic) ?
+ INTERVAL_QUERY_ULTRA_VIOLENCE :
+ scale_interval(INTERVAL_QUERY_AGGRESSIVE);
+ else
+ interval = scale_interval(INTERVAL_QUERY_NORMAL);
+
+ log_debug("reply from %s: offset %f delay %f, "
+ "next query %llds",
+ log_sockaddr((struct sockaddr *)&p->addr->ss),
+ offset, delay,
+ (long long)interval);
+
+ set_next(p, interval);
+
if (++p->shift >= OFFSET_ARRAY_SIZE)
p->shift = 0;
- return (0);
+ return (1);
}
int
Index: ntp.c
===================================================================
RCS file: /cvs/src/usr.sbin/ntpd/ntp.c,v
retrieving revision 1.166
diff -u -p -r1.166 ntp.c
--- ntp.c 30 Aug 2020 16:21:29 -0000 1.166
+++ ntp.c 7 Sep 2020 12:23:29 -0000
@@ -402,12 +402,29 @@ ntp_main(struct ntpd_conf *nconf, struct
for (; nfds > 0 && j < idx_clients; j++) {
if (pfd[j].revents & (POLLIN|POLLERR)) {
+ struct ntp_peer *pp = idx2peer[j - idx_peers];
+
nfds--;
- last_action = now;
- if (client_dispatch(idx2peer[j - idx_peers],
- conf->settime, conf->automatic) == -1) {
- log_warn("pipe write error (settime)");
- ntp_quit = 1;
+ switch (client_dispatch(pp, conf->settime,
+ conf->automatic)) {
+ case -1:
+ log_debug("no reply from %s "
+ "received", log_sockaddr(
+ (struct sockaddr *) &pp->addr->ss));
+ if (pp->trustlevel >=
+ TRUSTLEVEL_BADPEER &&
+ (pp->trustlevel /= 2) <
+ TRUSTLEVEL_BADPEER)
+ log_info("peer %s now invalid",
+ log_sockaddr(
+ (struct sockaddr *)
+ &pp->addr->ss));
+ break;
+ case 0: /* invalid replies are ignored */
+ break;
+ case 1:
+ last_action = now;
+ break;
}
}
}
@@ -433,7 +450,7 @@ ntp_main(struct ntpd_conf *nconf, struct
interval = INTERVAL_QUERY_NORMAL * conf->scale;
interval += SCALE_INTERVAL(interval) - 1;
if (conf->status.synced && last_action + 3 * interval < now) {
- log_info("clock is now unsynced");
+ log_info("clock is now considered unsynced %lld, %lld,
%lld", last_action, interval, now);
conf->status.synced = 0;
conf->scale = 1;
priv_dns(IMSG_UNSYNCED, NULL, 0);