When the RTR's Session ID changes (for example when the RTR server is
restarted), bgpd would incorreectly branch into the "received %s: bad
msg len:" path.
The length fields in the RTR PDU error messages are 32-bits, so we
should use ntohl() instead of ntohs(). While there, add an additional
length check against the length listed in the RTR payload.
The resulting logs are now more useful:
Jan 31 11:54:39 feather bgpd[27547]: rtr 127.0.0.1: state change idle -> idle,
reason: connection open
Jan 31 11:54:39 feather bgpd[27547]: rtr 127.0.0.1: received error: Corrupt
Data: Session ID doesn't match.
Jan 31 11:54:39 feather bgpd[27547]: rtr 127.0.0.1: state change idle ->
closed, reason: connection closed
Jan 31 11:54:46 feather bgpd[27547]: rtr 127.0.0.1: state change idle -> idle,
reason: connection open
Jan 31 11:54:46 feather bgpd[27547]: rtr 127.0.0.1: received error: Corrupt
Data: Session ID doesn't match.
Jan 31 11:54:46 feather bgpd[27547]: rtr 127.0.0.1: state change idle ->
closed, reason: connection closed
(... this goes on and on)
OK to commit the below?
There still is an open question: if we receive "Corrupt Data" (because
RTR Session ID changed), should bgpd really wait until
RTR_EVNT_TIMER_EXPIRE? Would it be desirable to somehow be able to
establish a proper connection sooner?
Kind regards,
Job
Index: usr.sbin/bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.35
diff -u -p -r1.35 output.c
--- usr.sbin/bgpctl/output.c 24 Jan 2023 15:50:10 -0000 1.35
+++ usr.sbin/bgpctl/output.c 31 Jan 2023 12:00:54 -0000
@@ -1083,11 +1083,13 @@ show_rtr(struct ctl_show_rtr *rtr)
log_reason(rtr->last_sent_msg));
}
if (rtr->last_recv_error != NO_ERROR) {
- printf("Last received error: %s\n",
+ printf(" Last received error: %s",
log_rtr_error(rtr->last_recv_error));
if (rtr->last_recv_msg[0])
printf(" with reason \"%s\"",
log_reason(rtr->last_recv_msg));
+ else
+ printf("\n");
}
printf("\n");
Index: usr.sbin/bgpd/rtr_proto.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
retrieving revision 1.8
diff -u -p -r1.8 rtr_proto.c
--- usr.sbin/bgpd/rtr_proto.c 28 Dec 2022 21:30:16 -0000 1.8
+++ usr.sbin/bgpd/rtr_proto.c 31 Jan 2023 12:00:55 -0000
@@ -635,15 +635,24 @@ rtr_parse_error(struct rtr_session *rs,
uint16_t errcode;
memcpy(&rh, buf, sizeof(rh));
+ errcode = ntohs(rh.session_id);
+ rh.length = ntohl(rh.length);
+
+ if (len != rh.length) {
+ log_warnx("rtr %s: received %s: bad len: %u byte",
+ log_rtr(rs), log_rtr_type(ERROR_REPORT), rh.length);
+ rtr_fsm(rs, RTR_EVNT_CON_CLOSED);
+ return -1;
+ }
+
buf += sizeof(struct rtr_header);
len -= sizeof(struct rtr_header);
- errcode = ntohs(rh.session_id);
memcpy(&pdu_len, buf, sizeof(pdu_len));
- pdu_len = ntohs(pdu_len);
+ pdu_len = ntohl(pdu_len);
if (len < pdu_len + sizeof(pdu_len)) {
- log_warnx("rtr %s: received %s: bad pdu len: %u byte",
+ log_warnx("rtr %s: received %s: bad encapped pdu len: %u byte",
log_rtr(rs), log_rtr_type(ERROR_REPORT), pdu_len);
rtr_fsm(rs, RTR_EVNT_CON_CLOSED);
return -1;
@@ -654,7 +663,7 @@ rtr_parse_error(struct rtr_session *rs,
len -= pdu_len + sizeof(pdu_len);
memcpy(&msg_len, buf, sizeof(msg_len));
- msg_len = ntohs(msg_len);
+ msg_len = ntohl(msg_len);
if (len < msg_len + sizeof(msg_len)) {
log_warnx("rtr %s: received %s: bad msg len: %u byte",