Claudio, How best to test this change proposal? Should this maybe be tested on one of the yycix route servers?
I'll let it run on my home router, if that doesn't cause issues in a week or so; we can consider rs2.yycix.ca Kind regards, Job On Fri, Mar 02, 2018 at 04:55:23PM +0100, Claudio Jeker wrote: > On Wed, Feb 07, 2018 at 05:52:09AM +0100, Claudio Jeker wrote: > > This diff changes the way bgpd does updates. Instead of having its own > > special update queue/tree it uses a regular RIB (Adj-RIB-Out) to store all > > updates to be sent. Stuff that has been sent is linked to the prefixes > > queue. On the peer there are also queues for updates and withdraws. > > The whole update code becomes a lot simpler but also results in the bulk > > of the diff. Other changes include the bgpctl show rib handling (we can > > just walk the Adj-RIB-Out now). Last but not least the EOR records are > > also now a magic rde_aspath (flag F_ATTR_EOR) which is added to the update > > queue. > > > > This diff is still very large and the changes are intrusive so reviews and > > testing is very welcome. > > No news on this? Anyone? > > -- > :wq Claudio > > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > retrieving revision 1.377 > diff -u -p -r1.377 rde.c > --- rde.c 7 Feb 2018 00:02:02 -0000 1.377 > +++ rde.c 7 Feb 2018 00:02:18 -0000 > @@ -80,8 +80,6 @@ void rde_dump_rib_as(struct prefix *, > int); > void rde_dump_filter(struct prefix *, > struct ctl_show_rib_request *); > -void rde_dump_filterout(struct rde_peer *, struct prefix *, > - struct ctl_show_rib_request *); > void rde_dump_upcall(struct rib_entry *, void *); > void rde_dump_prefix_upcall(struct rib_entry *, void *); > void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, > @@ -2262,71 +2260,33 @@ rde_dump_rib_as(struct prefix *p, struct > } > > void > -rde_dump_filterout(struct rde_peer *peer, struct prefix *p, > - struct ctl_show_rib_request *req) > +rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) > { > - struct bgpd_addr addr; > - struct rde_aspath *asp, *fasp; > - enum filter_actions a; > + struct rde_aspath *asp; > > - if (up_test_update(peer, p) != 1) > + if (req->peerid && req->peerid != prefix_peer(p)->conf.id) > return; > + if (p->flags & F_PREFIX_USE_PEER) > + return; /* pending withdraw, skip */ > > - pt_getaddr(p->re->prefix, &addr); > asp = prefix_aspath(p); > - a = rde_filter(out_rules, &fasp, peer, asp, &addr, > - p->re->prefix->prefixlen, asp->peer); > - if (fasp) > - fasp->peer = asp->peer; > - else > - fasp = asp; > - > - if (a == ACTION_ALLOW) > - rde_dump_rib_as(p, fasp, req->pid, req->flags); > - > - if (fasp != asp) > - path_put(fasp); > -} > - > -void > -rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) > -{ > - struct rde_peer *peer; > - struct rde_aspath *asp; > - > - if (req->flags & F_CTL_ADJ_IN || > - !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) { > - asp = prefix_aspath(p); > - if (req->peerid && req->peerid != asp->peer->conf.id) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_AS && > - !aspath_match(asp->aspath->data, asp->aspath->len, > - &req->as, req->as.as)) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && > - !community_match(asp, req->community.as, > - req->community.type)) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY && > - !community_ext_match(asp, &req->extcommunity, 0)) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && > - !community_large_match(asp, req->large_community.as, > - req->large_community.ld1, req->large_community.ld2)) > - return; > - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) > - return; > - rde_dump_rib_as(p, asp, req->pid, req->flags); > - } else if (req->flags & F_CTL_ADJ_OUT) { > - if (p->re->active != p) > - /* only consider active prefix */ > - return; > - if (req->peerid) { > - if ((peer = peer_get(req->peerid)) != NULL) > - rde_dump_filterout(peer, p, req); > - return; > - } > - } > + if (req->type == IMSG_CTL_SHOW_RIB_AS && > + !aspath_match(asp->aspath->data, asp->aspath->len, > + &req->as, req->as.as)) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && > + !community_match(asp, req->community.as, req->community.type)) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY && > + !community_ext_match(asp, &req->extcommunity, 0)) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && > + !community_large_match(asp, req->large_community.as, > + req->large_community.ld1, req->large_community.ld2)) > + return; > + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) > + return; > + rde_dump_rib_as(p, asp, req->pid, req->flags); > } > > void > @@ -2375,7 +2335,11 @@ rde_dump_ctx_new(struct ctl_show_rib_req > sizeof(error)); > return; > } > - if ((rib = rib_find(req->rib)) == NULL) { > + if (req->flags & F_CTL_ADJ_IN) > + rib = &ribs[RIB_ADJ_IN].rib; > + else if (req->flags & F_CTL_ADJ_OUT) > + rib = &ribs[RIB_ADJ_OUT].rib; > + else if ((rib = rib_find(req->rib)) == NULL) { > log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); > error = CTL_RES_NOSUCHPEER; > imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, > @@ -2749,7 +2713,6 @@ void > rde_update6_queue_runner(u_int8_t aid) > { > struct rde_peer *peer; > - u_char *b; > int r, sent, max = RDE_RUNNER_ROUNDS / 2; > u_int16_t len; > > @@ -2762,13 +2725,12 @@ rde_update6_queue_runner(u_int8_t aid) > if (peer->state != PEER_UP) > continue; > len = sizeof(queue_buf) - MSGSIZE_HEADER; > - b = up_dump_mp_unreach(queue_buf, &len, peer, aid); > - > - if (b == NULL) > + r = up_dump_mp_unreach(queue_buf, len, peer, aid); > + if (r == -1) > continue; > /* finally send message to SE */ > if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, > - 0, -1, b, len) == -1) > + 0, -1, queue_buf, r) == -1) > fatal("%s %d imsg_compose error", __func__, > __LINE__); > sent++; > @@ -2786,7 +2748,7 @@ rde_update6_queue_runner(u_int8_t aid) > if (peer->state != PEER_UP) > continue; > len = sizeof(queue_buf) - MSGSIZE_HEADER; > - r = up_dump_mp_reach(queue_buf, &len, peer, aid); > + r = up_dump_mp_reach(queue_buf, len, peer, aid); > switch (r) { > case -2: > continue; > @@ -2794,13 +2756,11 @@ rde_update6_queue_runner(u_int8_t aid) > peer_send_eor(peer, aid); > continue; > default: > - b = queue_buf + r; > break; > } > - > /* finally send message to SE */ > if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, > - 0, -1, b, len) == -1) > + 0, -1, queue_buf, r) == -1) > fatal("%s %d imsg_compose error", __func__, > __LINE__); > sent++; > @@ -2929,8 +2889,8 @@ rde_reload_done(void) > peer->reconf_out = 0; > peer->reconf_rib = 0; > if (peer->rib != rib_find(peer->conf.rib)) { > - rib_dump(peer->rib, rde_softreconfig_unload_peer, peer, > - AID_UNSPEC); > + rib_dump(&ribs[RIB_ADJ_OUT].rib, > + rde_softreconfig_unload_peer, peer, AID_UNSPEC); > peer->rib = rib_find(peer->conf.rib); > if (peer->rib == NULL) > fatalx("King Bula's peer met an unknown RIB"); > @@ -3060,83 +3020,34 @@ rde_softreconfig_in(struct rib_entry *re > void > rde_softreconfig_out(struct rib_entry *re, void *ptr) > { > - struct prefix *p = re->active; > - struct pt_entry *pt; > + struct prefix *new = re->active; > struct rde_peer *peer = ptr; > - struct rde_aspath *oasp, *nasp; > - enum filter_actions oa, na; > - struct bgpd_addr addr; > - > - if (peer->conf.id == 0) > - fatalx("King Bula troubled by bad peer"); > > - if (p == NULL) > + if (new == NULL) > return; > > - pt = re->prefix; > - pt_getaddr(pt, &addr); > - > - if (up_test_update(peer, p) != 1) > - return; > - > - oa = rde_filter(out_rules_tmp, &oasp, peer, prefix_aspath(p), > - &addr, pt->prefixlen, prefix_peer(p)); > - na = rde_filter(out_rules, &nasp, peer, prefix_aspath(p), > - &addr, pt->prefixlen, prefix_peer(p)); > - oasp = oasp != NULL ? oasp : prefix_aspath(p); > - nasp = nasp != NULL ? nasp : prefix_aspath(p); > - > - /* go through all 4 possible combinations */ > - /* if (oa == ACTION_DENY && na == ACTION_DENY) */ > - /* nothing todo */ > - if (oa == ACTION_DENY && na == ACTION_ALLOW) { > - /* send update */ > - up_generate(peer, nasp, &addr, pt->prefixlen); > - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { > - /* send withdraw */ > - up_generate(peer, NULL, &addr, pt->prefixlen); > - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { > - /* send update if path attributes changed */ > - if (path_compare(nasp, oasp) != 0) > - up_generate(peer, nasp, &addr, pt->prefixlen); > - } > - > - if (oasp != prefix_aspath(p)) > - path_put(oasp); > - if (nasp != prefix_aspath(p)) > - path_put(nasp); > + /* > + * path_update is smart enough to only send out updates to > + * prefixes that actually changed. So just regenerate all > + * updates. > + */ > + up_generate_updates(out_rules, peer, new, new); > } > > void > rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr) > { > struct rde_peer *peer = ptr; > - struct prefix *p = re->active; > - struct pt_entry *pt; > - struct rde_aspath *oasp; > - enum filter_actions oa; > + struct prefix *p; > struct bgpd_addr addr; > > - pt = re->prefix; > - pt_getaddr(pt, &addr); > - > - /* check if prefix was announced */ > - if (up_test_update(peer, p) != 1) > + p = prefix_bypeer(re, peer, 0); > + if (p == NULL) > return; > > - oa = rde_filter(out_rules_tmp, &oasp, peer, prefix_aspath(p), > - &addr, pt->prefixlen, prefix_peer(p)); > - oasp = oasp != NULL ? oasp : prefix_aspath(p); > - > - if (oa == ACTION_DENY) > - /* nothing todo */ > - goto done; > - > - /* send withdraw */ > - up_generate(peer, NULL, &addr, pt->prefixlen); > -done: > - if (oasp != prefix_aspath(p)) > - path_put(oasp); > + pt_getaddr(p->re->prefix, &addr); > + prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr, > + p->re->prefix->prefixlen); > } > > /* > Index: rde.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > retrieving revision 1.166 > diff -u -p -r1.166 rde.h > --- rde.h 7 Feb 2018 00:02:02 -0000 1.166 > +++ rde.h 7 Feb 2018 00:34:39 -0000 > @@ -47,14 +47,10 @@ LIST_HEAD(prefix_list, prefix); > TAILQ_HEAD(prefix_queue, prefix); > LIST_HEAD(aspath_head, rde_aspath); > TAILQ_HEAD(aspath_queue, rde_aspath); > -RB_HEAD(uptree_prefix, update_prefix); > -RB_HEAD(uptree_attr, update_attr); > > struct rib_desc; > struct rib; > RB_HEAD(rib_tree, rib_entry); > -TAILQ_HEAD(uplist_prefix, update_prefix); > -TAILQ_HEAD(uplist_attr, update_attr); > > struct rde_peer { > LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */ > @@ -64,10 +60,8 @@ struct rde_peer { > struct bgpd_addr remote_addr; > struct bgpd_addr local_v4_addr; > struct bgpd_addr local_v6_addr; > - struct uptree_prefix up_prefix; > - struct uptree_attr up_attrs; > - struct uplist_attr updates[AID_MAX]; > - struct uplist_prefix withdraws[AID_MAX]; > + struct aspath_queue updates[AID_MAX]; > + struct prefix_queue withdraws[AID_MAX]; > struct capabilities capa; > time_t staletime[AID_MAX]; > u_int64_t prefix_rcvd_update; > @@ -178,7 +172,8 @@ struct path_table { > #define F_NEXTHOP_MASK 0x0f000 > #define F_ATTR_PARSE_ERR 0x10000 /* parse error, not eligable */ > #define F_ATTR_LINKED 0x20000 /* if set path is on various > lists */ > -#define F_ATTR_UPDATE 0x20000 /* if set linked on update_l */ > +#define F_ATTR_UPDATE 0x40000 /* if set linked on update_l */ > +#define F_ATTR_EOR 0x80000 /* magic marker for EOR objects > */ > > > #define ORIGIN_IGP 0 > @@ -204,6 +199,7 @@ struct rde_aspath { > u_int16_t rtlabelid; /* route label id */ > u_int16_t pftableid; /* pf table id */ > u_int8_t origin; > + u_int8_t aid; > u_int8_t others_len; > }; > > @@ -314,12 +310,14 @@ struct prefix { > struct rib_entry *re; > union { > struct rde_aspath *_aspath; > + struct rde_peer *_peer; > } _p; > time_t lastchange; > int flags; > }; > > #define F_PREFIX_USE_UPDATES 0x01 /* linked onto the updates list */ > +#define F_PREFIX_USE_PEER 0x02 /* use _peer instead of _aspath */ > > extern struct rde_memstats rdemem; > > @@ -479,11 +477,14 @@ void path_destroy(struct rde_aspath *) > int path_empty(struct rde_aspath *); > struct rde_aspath *path_copy(struct rde_aspath *); > struct rde_aspath *path_get(void); > +struct rde_aspath *path_get_eor(struct rde_peer *, u_int8_t); > void path_put(struct rde_aspath *); > > #define PREFIX_SIZE(x) (((x) + 7) / 8 + 1) > int prefix_remove(struct rib *, struct rde_peer *, > struct bgpd_addr *, int, u_int32_t); > +void prefix_withdraw(struct rib *, struct rde_peer *, > + struct bgpd_addr *, int); > int prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t); > int prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t); > struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *, > @@ -497,13 +498,18 @@ void prefix_relink(struct prefix *, st > static inline struct rde_aspath * > prefix_aspath(struct prefix *p) > { > + if (p->flags & F_PREFIX_USE_PEER) > + fatalx("prefix_aspath: prefix has no aspath"); > return (p->_p._aspath); > } > > static inline struct rde_peer * > prefix_peer(struct prefix *p) > { > - return (p->_p._aspath->peer); > + if (p->flags & F_PREFIX_USE_PEER) > + return (p->_p._peer); > + else > + return (p->_p._aspath->peer); > } > > void nexthop_init(u_int32_t); > @@ -521,19 +527,15 @@ int nexthop_compare(struct nexthop *, > void up_init(struct rde_peer *); > void up_down(struct rde_peer *); > int up_test_update(struct rde_peer *, struct prefix *); > -int up_generate(struct rde_peer *, struct rde_aspath *, > - struct bgpd_addr *, u_int8_t); > void up_generate_updates(struct filter_head *, struct rde_peer *, > struct prefix *, struct prefix *); > void up_generate_default(struct filter_head *, struct rde_peer *, > u_int8_t); > int up_generate_marker(struct rde_peer *, u_int8_t); > -int up_dump_prefix(u_char *, int, struct uplist_prefix *, > +int up_dump_prefix(u_char *, int, struct prefix_queue *, > struct rde_peer *); > int up_dump_attrnlri(u_char *, int, struct rde_peer *); > -u_char *up_dump_mp_unreach(u_char *, u_int16_t *, struct > rde_peer *, > - u_int8_t); > -int up_dump_mp_reach(u_char *, u_int16_t *, struct rde_peer *, > - u_int8_t); > +int up_dump_mp_unreach(u_char *, int, struct rde_peer *, u_int8_t); > +int up_dump_mp_reach(u_char *, int, struct rde_peer *, u_int8_t); > > #endif /* __RDE_H__ */ > Index: rde_rib.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v > retrieving revision 1.158 > diff -u -p -r1.158 rde_rib.c > --- rde_rib.c 7 Feb 2018 00:02:02 -0000 1.158 > +++ rde_rib.c 7 Feb 2018 00:04:42 -0000 > @@ -405,6 +405,8 @@ path_update(struct rib *rib, struct rde_ > struct prefix *p; > int pflag = 0; > > + nasp->aid = prefix->aid; > + > if (nasp->pftableid) { > rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); > rde_send_pftable_commit(); > @@ -432,6 +434,20 @@ path_update(struct rib *rib, struct rde_ > path_link(asp, peer); > } > > + if (flag & F_ATTR_UPDATE) { > + struct aspath_queue *upl = &peer->updates[asp->aid]; > + > + if (asp->flags & F_ATTR_UPDATE) { > + TAILQ_REMOVE(upl, asp, update_l); > + peer->up_acnt--; > + } > + TAILQ_INSERT_TAIL(upl, asp, update_l); > + asp->flags |= F_ATTR_UPDATE; > + peer->up_acnt++; > + > + pflag = F_PREFIX_USE_UPDATES; > + } > + > /* If the prefix was found move it else add it to the aspath. */ > if (p != NULL) > prefix_move(asp, p, pflag); > @@ -451,6 +467,10 @@ path_compare(struct rde_aspath *a, struc > return (1); > else if (a == NULL) > return (-1); > + if (a->aid > b->aid) > + return (1); > + if (a->aid < b->aid) > + return (-1); > if ((a->flags & ~(F_ATTR_LINKED | F_ATTR_UPDATE)) > > (b->flags & ~(F_ATTR_LINKED | F_ATTR_UPDATE))) > return (1); > @@ -611,10 +631,12 @@ path_destroy(struct rde_aspath *asp) > > nexthop_unlink(asp); > LIST_REMOVE(asp, path_l); > + if (asp->flags & F_ATTR_UPDATE) > + TAILQ_REMOVE(&asp->peer->updates[asp->aid], asp, update_l); > TAILQ_REMOVE(&asp->peer->path_h, asp, peer_l); > asp->peer = NULL; > asp->nexthop = NULL; > - asp->flags &= ~F_ATTR_LINKED; > + asp->flags &= ~(F_ATTR_LINKED | F_ATTR_UPDATE); > > path_put(asp); > } > @@ -665,6 +687,7 @@ path_copy(struct rde_aspath *asp) > nasp->lpref = asp->lpref; > nasp->weight = asp->weight; > nasp->origin = asp->origin; > + nasp->aid = asp->aid; > nasp->rtlabelid = asp->rtlabelid; > rtlabel_ref(nasp->rtlabelid); > nasp->pftableid = asp->pftableid; > @@ -691,6 +714,7 @@ path_get(void) > TAILQ_INIT(&asp->updates); > asp->origin = ORIGIN_INCOMPLETE; > asp->lpref = DEFAULT_LPREF; > + /* aid = 0 */ > /* med = 0 */ > /* weight = 0 */ > /* rtlabel = 0 */ > @@ -698,6 +722,20 @@ path_get(void) > return (asp); > } > > +/* create a special rde_aspath representing a eor record */ > +struct rde_aspath * > +path_get_eor(struct rde_peer *peer, u_int8_t aid) > +{ > + struct rde_aspath *asp; > + > + asp = path_get(); > + asp->flags = F_ATTR_EOR; > + asp->aid = aid; > + path_link(asp, peer); > + > + return (asp); > +} > + > /* free an unlinked element */ > void > path_put(struct rde_aspath *asp) > @@ -861,6 +899,36 @@ prefix_remove(struct rib *rib, struct rd > return (1); > } > > +/* > + * Withdraw a prefix from the Adj-RIB-Out, this unlinks the aspath but leaves > + * the prefix in the RIB linked to the peer withdraw list. > + */ > +void > +prefix_withdraw(struct rib *rib, struct rde_peer *peer, > + struct bgpd_addr *prefix, int prefixlen) > +{ > + struct prefix *p; > + struct rib_entry *re; > + struct rde_aspath *asp; > + > + re = rib_get(rib, prefix, prefixlen); > + if (re == NULL) /* Got a dummy withdrawn request */ > + return; > + > + p = prefix_bypeer(re, peer, 0); > + if (p == NULL) /* Got a dummy withdrawn request. */ > + return; > + > + /* unlink aspath ...*/ > + asp = prefix_aspath(p); > + PREFIX_COUNT(asp, -1); > + prefix_relink(p, NULL, F_PREFIX_USE_PEER); > + > + if (path_empty(asp)) > + path_destroy(asp); > +} > + > + > /* dump a prefix into specified buffer */ > int > prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) > @@ -934,6 +1002,9 @@ prefix_bypeer(struct rib_entry *re, stru > LIST_FOREACH(p, &re->prefix_h, rib_l) { > if (prefix_peer(p) != peer) > continue; > + if (p->flags & F_PREFIX_USE_PEER) > + /* Adj-RIB-Out withdrawn route */ > + continue; > if (prefix_aspath(p)->flags & flags && > (flags & F_ANN_DYNAMIC) != > (prefix_aspath(p)->flags & F_ANN_DYNAMIC)) > @@ -991,15 +1062,16 @@ prefix_updateall(struct rde_aspath *asp, > void > prefix_destroy(struct prefix *p) > { > - struct rde_aspath *asp; > - > - asp = prefix_aspath(p); > - PREFIX_COUNT(asp, -1); > + struct rde_aspath *asp = NULL; > > + if ((p->flags & F_PREFIX_USE_PEER) == 0) { > + asp = prefix_aspath(p); > + PREFIX_COUNT(asp, -1); > + } > prefix_unlink(p); > prefix_free(p); > > - if (path_empty(asp)) > + if (asp && path_empty(asp)) > path_destroy(asp); > } > > @@ -1030,6 +1102,46 @@ prefix_network_clean(struct rde_peer *pe > } > > /* > + * Relink a prefix onto the right queue. > + */ > +void > +prefix_relink(struct prefix *p, struct rde_aspath *asp, int flag) > +{ > + struct prefix_queue *pq; > + struct rde_peer *peer = prefix_peer(p); > + > + /* unhook prefix */ > + if (p->flags & F_PREFIX_USE_PEER) > + pq = &peer->withdraws[p->re->prefix->aid]; > + else if (p->flags & F_PREFIX_USE_UPDATES) { > + if (asp && asp != prefix_aspath(p)) > + fatalx("prefix_relink: move between aspaths"); > + pq = &prefix_aspath(p)->updates; > + } else { > + if (asp && asp != prefix_aspath(p)) > + fatalx("prefix_relink: move between aspaths"); > + pq = &prefix_aspath(p)->prefixes; > + } > + > + TAILQ_REMOVE(pq, p, path_l); > + p->flags &= ~(F_PREFIX_USE_PEER | F_PREFIX_USE_UPDATES); > + > + if (flag & F_PREFIX_USE_PEER) { > + pq = &peer->withdraws[p->re->prefix->aid]; > + p->_p._peer = peer; > + } else if (flag & F_PREFIX_USE_UPDATES) { > + pq = &asp->updates; > + p->_p._aspath = asp; > + } else { > + pq = &asp->prefixes; > + p->_p._aspath = asp; > + } > + > + TAILQ_INSERT_HEAD(pq, p, path_l); > + p->flags |= flag; > +} > + > +/* > * Link a prefix into the different parent objects. > */ > static void > @@ -1064,7 +1176,9 @@ prefix_unlink(struct prefix *pref) > LIST_REMOVE(pref, rib_l); > prefix_evaluate(NULL, re); > > - if (pref->flags & F_PREFIX_USE_UPDATES) > + if (pref->flags & F_PREFIX_USE_PEER) > + pq = &prefix_peer(pref)->withdraws[re->prefix->aid]; > + else if (pref->flags & F_PREFIX_USE_UPDATES) > pq = &prefix_aspath(pref)->updates; > else > pq = &prefix_aspath(pref)->prefixes; > Index: rde_update.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v > retrieving revision 1.88 > diff -u -p -r1.88 rde_update.c > --- rde_update.c 5 Feb 2018 03:55:54 -0000 1.88 > +++ rde_update.c 5 Feb 2018 23:44:56 -0000 > @@ -27,45 +27,6 @@ > #include "rde.h" > #include "log.h" > > -in_addr_t up_get_nexthop(struct rde_peer *, struct rde_aspath *); > -int up_generate_mp_reach(struct rde_peer *, struct update_attr *, > - struct rde_aspath *, u_int8_t); > -int up_generate_attr(struct rde_peer *, struct update_attr *, > - struct rde_aspath *, u_int8_t); > - > -/* update stuff. */ > -struct update_prefix { > - TAILQ_ENTRY(update_prefix) prefix_l; > - RB_ENTRY(update_prefix) entry; > - struct uplist_prefix *prefix_h; > - struct bgpd_addr prefix; > - int prefixlen; > -}; > - > -struct update_attr { > - TAILQ_ENTRY(update_attr) attr_l; > - RB_ENTRY(update_attr) entry; > - struct uplist_prefix prefix_h; > - u_char *attr; > - u_char *mpattr; > - u_int32_t attr_hash; > - u_int16_t attr_len; > - u_int16_t mpattr_len; > -}; > - > -void up_clear(struct uplist_attr *, struct uplist_prefix *); > -int up_prefix_cmp(struct update_prefix *, struct update_prefix *); > -int up_attr_cmp(struct update_attr *, struct update_attr *); > -int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *); > - > -RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp) > -RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp) > - > -RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp) > -RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp) > - > -SIPHASH_KEY uptree_key; > - > void > up_init(struct rde_peer *peer) > { > @@ -75,48 +36,21 @@ up_init(struct rde_peer *peer) > TAILQ_INIT(&peer->updates[i]); > TAILQ_INIT(&peer->withdraws[i]); > } > - RB_INIT(&peer->up_prefix); > - RB_INIT(&peer->up_attrs); > peer->up_pcnt = 0; > peer->up_acnt = 0; > peer->up_nlricnt = 0; > peer->up_wcnt = 0; > - arc4random_buf(&uptree_key, sizeof(uptree_key)); > -} > - > -void > -up_clear(struct uplist_attr *updates, struct uplist_prefix *withdraws) > -{ > - struct update_attr *ua; > - struct update_prefix *up; > - > - while ((ua = TAILQ_FIRST(updates)) != NULL) { > - TAILQ_REMOVE(updates, ua, attr_l); > - while ((up = TAILQ_FIRST(&ua->prefix_h)) != NULL) { > - TAILQ_REMOVE(&ua->prefix_h, up, prefix_l); > - free(up); > - } > - free(ua->attr); > - free(ua->mpattr); > - free(ua); > - } > - > - while ((up = TAILQ_FIRST(withdraws)) != NULL) { > - TAILQ_REMOVE(withdraws, up, prefix_l); > - free(up); > - } > } > > void > up_down(struct rde_peer *peer) > { > + struct prefix *p; > u_int8_t i; > > for (i = 0; i < AID_MAX; i++) > - up_clear(&peer->updates[i], &peer->withdraws[i]); > - > - RB_INIT(&peer->up_prefix); > - RB_INIT(&peer->up_attrs); > + while ((p = TAILQ_FIRST(&peer->withdraws[i])) != NULL) > + prefix_destroy(p); > > peer->up_pcnt = 0; > peer->up_acnt = 0; > @@ -125,153 +59,6 @@ up_down(struct rde_peer *peer) > } > > int > -up_prefix_cmp(struct update_prefix *a, struct update_prefix *b) > -{ > - int i; > - > - if (a->prefix.aid < b->prefix.aid) > - return (-1); > - if (a->prefix.aid > b->prefix.aid) > - return (1); > - > - switch (a->prefix.aid) { > - case AID_INET: > - if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr)) > - return (-1); > - if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr)) > - return (1); > - break; > - case AID_INET6: > - i = memcmp(&a->prefix.v6, &b->prefix.v6, > - sizeof(struct in6_addr)); > - if (i > 0) > - return (1); > - if (i < 0) > - return (-1); > - break; > - case AID_VPN_IPv4: > - if (betoh64(a->prefix.vpn4.rd) < betoh64(b->prefix.vpn4.rd)) > - return (-1); > - if (betoh64(a->prefix.vpn4.rd) > betoh64(b->prefix.vpn4.rd)) > - return (1); > - if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr)) > - return (-1); > - if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr)) > - return (1); > - if (a->prefixlen < b->prefixlen) > - return (-1); > - if (a->prefixlen > b->prefixlen) > - return (1); > - if (a->prefix.vpn4.labellen < b->prefix.vpn4.labellen) > - return (-1); > - if (a->prefix.vpn4.labellen > b->prefix.vpn4.labellen) > - return (1); > - return (memcmp(a->prefix.vpn4.labelstack, > - b->prefix.vpn4.labelstack, a->prefix.vpn4.labellen)); > - default: > - fatalx("pt_prefix_cmp: unknown af"); > - } > - if (a->prefixlen < b->prefixlen) > - return (-1); > - if (a->prefixlen > b->prefixlen) > - return (1); > - return (0); > -} > - > -int > -up_attr_cmp(struct update_attr *a, struct update_attr *b) > -{ > - int r; > - > - if ((r = a->attr_hash - b->attr_hash) != 0) > - return (r); > - if ((r = a->attr_len - b->attr_len) != 0) > - return (r); > - if ((r = a->mpattr_len - b->mpattr_len) != 0) > - return (r); > - if ((r = memcmp(a->mpattr, b->mpattr, a->mpattr_len)) != 0) > - return (r); > - return (memcmp(a->attr, b->attr, a->attr_len)); > -} > - > -int > -up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a) > -{ > - struct update_attr *na = NULL; > - struct update_prefix *np; > - struct uplist_attr *upl = NULL; > - struct uplist_prefix *wdl = NULL; > - > - upl = &peer->updates[p->prefix.aid]; > - wdl = &peer->withdraws[p->prefix.aid]; > - > - /* 1. search for attr */ > - if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) == > - NULL) { > - /* 1.1 if not found -> add */ > - TAILQ_INIT(&a->prefix_h); > - if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) { > - log_warnx("uptree_attr insert failed"); > - /* cleanup */ > - free(a->attr); > - free(a->mpattr); > - free(a); > - free(p); > - return (-1); > - } > - TAILQ_INSERT_TAIL(upl, a, attr_l); > - peer->up_acnt++; > - } else { > - /* 1.2 if found -> use that, free a */ > - if (a != NULL) { > - free(a->attr); > - free(a->mpattr); > - free(a); > - a = na; > - /* move to end of update queue */ > - TAILQ_REMOVE(upl, a, attr_l); > - TAILQ_INSERT_TAIL(upl, a, attr_l); > - } > - } > - > - /* 2. search for prefix */ > - if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) { > - /* 2.1 if not found -> add */ > - if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) { > - log_warnx("uptree_prefix insert failed"); > - /* > - * cleanup. But do not free a because it is already > - * linked or NULL. up_dump_attrnlri() will remove and > - * free the empty attribute later. > - */ > - free(p); > - return (-1); > - } > - peer->up_pcnt++; > - } else { > - /* 2.2 if found -> use that and free p */ > - TAILQ_REMOVE(np->prefix_h, np, prefix_l); > - free(p); > - p = np; > - if (p->prefix_h == wdl) > - peer->up_wcnt--; > - else > - peer->up_nlricnt--; > - } > - /* 3. link prefix to attr */ > - if (a == NULL) { > - TAILQ_INSERT_TAIL(wdl, p, prefix_l); > - p->prefix_h = wdl; > - peer->up_wcnt++; > - } else { > - TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l); > - p->prefix_h = &a->prefix_h; > - peer->up_nlricnt++; > - } > - return (0); > -} > - > -int > up_test_update(struct rde_peer *peer, struct prefix *p) > { > struct bgpd_addr addr; > @@ -365,52 +152,11 @@ up_test_update(struct rde_peer *peer, st > return (1); > } > > -int > -up_generate(struct rde_peer *peer, struct rde_aspath *asp, > - struct bgpd_addr *addr, u_int8_t prefixlen) > -{ > - struct update_attr *ua = NULL; > - struct update_prefix *up; > - SIPHASH_CTX ctx; > - > - if (asp) { > - ua = calloc(1, sizeof(struct update_attr)); > - if (ua == NULL) > - fatal("up_generate"); > - > - if (up_generate_attr(peer, ua, asp, addr->aid) == -1) { > - log_warnx("generation of bgp path attributes failed"); > - free(ua); > - return (-1); > - } > - /* > - * use aspath_hash as attr_hash, this may be unoptimal > - * but currently I don't care. > - */ > - SipHash24_Init(&ctx, &uptree_key); > - SipHash24_Update(&ctx, ua->attr, ua->attr_len); > - if (ua->mpattr) > - SipHash24_Update(&ctx, ua->mpattr, ua->mpattr_len); > - ua->attr_hash = SipHash24_End(&ctx); > - } > - > - up = calloc(1, sizeof(struct update_prefix)); > - if (up == NULL) > - fatal("up_generate"); > - up->prefix = *addr; > - up->prefixlen = prefixlen; > - > - if (up_add(peer, up, ua) == -1) > - return (-1); > - > - return (0); > -} > - > void > up_generate_updates(struct filter_head *rules, struct rde_peer *peer, > struct prefix *new, struct prefix *old) > { > - struct rde_aspath *asp, *fasp; > + struct rde_aspath *fasp; > struct bgpd_addr addr; > > if (peer->state != PEER_UP) > @@ -421,14 +167,14 @@ withdraw: > if (up_test_update(peer, old) != 1) > return; > > - asp = prefix_aspath(old); > pt_getaddr(old->re->prefix, &addr); > - if (rde_filter(rules, NULL, peer, asp, &addr, > - old->re->prefix->prefixlen, asp->peer) == ACTION_DENY) > + if (rde_filter(rules, NULL, peer, prefix_aspath(old), &addr, > + old->re->prefix->prefixlen, prefix_peer(old)) == > + ACTION_DENY) > return; > > - /* withdraw prefix */ > - up_generate(peer, NULL, &addr, old->re->prefix->prefixlen); > + prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr, > + old->re->prefix->prefixlen); > } else { > switch (up_test_update(peer, new)) { > case 1: > @@ -439,20 +185,21 @@ withdraw: > return; > } > > - asp = prefix_aspath(new); > pt_getaddr(new->re->prefix, &addr); > - if (rde_filter(rules, &fasp, peer, asp, &addr, > - new->re->prefix->prefixlen, asp->peer) == ACTION_DENY) { > + if (rde_filter(rules, &fasp, peer, prefix_aspath(new), &addr, > + new->re->prefix->prefixlen, prefix_peer(new)) == > + ACTION_DENY) { > path_put(fasp); > goto withdraw; > } > if (fasp == NULL) > - fasp = asp; > + fasp = prefix_aspath(new); > > - up_generate(peer, fasp, &addr, new->re->prefix->prefixlen); > + path_update(&ribs[RIB_ADJ_OUT].rib, peer, fasp, &addr, > + new->re->prefix->prefixlen, F_ATTR_UPDATE); > > /* free modified aspath */ > - if (fasp != asp) > + if (fasp != prefix_aspath(new)) > path_put(fasp); > } > } > @@ -471,6 +218,7 @@ up_generate_default(struct filter_head * > asp = path_get(); > asp->aspath = aspath_get(NULL, 0); > asp->origin = ORIGIN_IGP; > + asp->aid = aid; > /* the other default values are OK, nexthop is once again NULL */ > > /* > @@ -493,7 +241,8 @@ up_generate_default(struct filter_head * > if (fasp == NULL) > fasp = asp; > > - up_generate(peer, fasp, &addr, 0); > + path_update(&ribs[RIB_ADJ_OUT].rib, peer, fasp, &addr, 0, > + F_ATTR_UPDATE); > > /* no longer needed */ > if (fasp != asp) > @@ -501,47 +250,24 @@ up_generate_default(struct filter_head * > path_put(asp); > } > > -/* generate a EoR marker in the update list. This is a horrible hack. */ > int > up_generate_marker(struct rde_peer *peer, u_int8_t aid) > { > - struct update_attr *ua; > - struct update_attr *na = NULL; > - struct uplist_attr *upl = NULL; > - > - ua = calloc(1, sizeof(struct update_attr)); > - if (ua == NULL) > - fatal("up_generate_marker"); > + struct rde_aspath *asp; > + struct aspath_queue *upl; > + > + asp = path_get_eor(peer, aid); > > upl = &peer->updates[aid]; > + TAILQ_INSERT_TAIL(upl, asp, update_l); > + asp->flags |= F_ATTR_UPDATE; > + peer->up_acnt++; > > - /* 1. search for attr */ > - if ((na = RB_FIND(uptree_attr, &peer->up_attrs, ua)) == NULL) { > - /* 1.1 if not found -> add */ > - TAILQ_INIT(&ua->prefix_h); > - if (RB_INSERT(uptree_attr, &peer->up_attrs, ua) != NULL) { > - log_warnx("uptree_attr insert failed"); > - /* cleanup */ > - free(ua); > - return (-1); > - } > - TAILQ_INSERT_TAIL(upl, ua, attr_l); > - peer->up_acnt++; > - } else { > - /* 1.2 if found -> use that, free ua */ > - free(ua); > - ua = na; > - /* move to end of update queue */ > - TAILQ_REMOVE(upl, ua, attr_l); > - TAILQ_INSERT_TAIL(upl, ua, attr_l); > - } > return (0); > } > > -u_char up_attr_buf[4096]; > - > /* only for IPv4 */ > -in_addr_t > +static in_addr_t > up_get_nexthop(struct rde_peer *peer, struct rde_aspath *a) > { > in_addr_t mask; > @@ -596,37 +322,47 @@ up_get_nexthop(struct rde_peer *peer, st > return (peer->local_v4_addr.v4.s_addr); > } > > -int > -up_generate_mp_reach(struct rde_peer *peer, struct update_attr *upa, > +static int > +up_generate_mp_reach(u_char *buf, int len, struct rde_peer *peer, > struct rde_aspath *a, u_int8_t aid) > { > - u_int16_t tmp; > + u_char *attrbuf; > + int r, wpos, attrlen; > + u_int16_t tmp; > + > + if (len < 4) > + return (-1); > + /* attribute header, defaulting to extended length one */ > + buf[0] = ATTR_OPTIONAL | ATTR_EXTLEN; > + buf[1] = ATTR_MP_REACH_NLRI; > + wpos = 4; > + attrbuf = buf + wpos; > > switch (aid) { > case AID_INET6: > - upa->mpattr_len = 21; /* AFI + SAFI + NH LEN + NH + Reserved */ > - upa->mpattr = malloc(upa->mpattr_len); > - if (upa->mpattr == NULL) > - fatal("up_generate_mp_reach"); > - if (aid2afi(aid, &tmp, &upa->mpattr[2])) > - fatalx("up_generate_mp_reachi: bad AID"); > + attrlen = 21; /* AFI + SAFI + NH LEN + NH + Reserved */ > + if (len < wpos + attrlen) > + return (-1); > + wpos += attrlen; > + if (aid2afi(aid, &tmp, &attrbuf[2])) > + fatalx("up_generate_mp_reach: bad AID"); > tmp = htons(tmp); > - memcpy(upa->mpattr, &tmp, sizeof(tmp)); > - upa->mpattr[3] = sizeof(struct in6_addr); > - upa->mpattr[20] = 0; /* Reserved must be 0 */ > + memcpy(attrbuf, &tmp, sizeof(tmp)); > + attrbuf[3] = sizeof(struct in6_addr); > + attrbuf[20] = 0; /* Reserved must be 0 */ > > /* nexthop dance see also up_get_nexthop() */ > + attrbuf += 4; > if (a->flags & F_NEXTHOP_NOMODIFY) { > /* no modify flag set */ > if (a->nexthop == NULL) > - memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, > + memcpy(attrbuf, &peer->local_v6_addr.v6, > sizeof(struct in6_addr)); > else > - memcpy(&upa->mpattr[4], > - &a->nexthop->exit_nexthop.v6, > + memcpy(attrbuf, &a->nexthop->exit_nexthop.v6, > sizeof(struct in6_addr)); > } else if (a->flags & F_NEXTHOP_SELF) > - memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, > + memcpy(attrbuf, &peer->local_v6_addr.v6, > sizeof(struct in6_addr)); > else if (!peer->conf.ebgp) { > /* ibgp */ > @@ -634,11 +370,10 @@ up_generate_mp_reach(struct rde_peer *pe > (a->nexthop->exit_nexthop.aid == AID_INET6 && > !memcmp(&a->nexthop->exit_nexthop.v6, > &peer->remote_addr.v6, sizeof(struct in6_addr)))) > - memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, > + memcpy(attrbuf, &peer->local_v6_addr.v6, > sizeof(struct in6_addr)); > else > - memcpy(&upa->mpattr[4], > - &a->nexthop->exit_nexthop.v6, > + memcpy(attrbuf, &a->nexthop->exit_nexthop.v6, > sizeof(struct in6_addr)); > } else if (peer->conf.distance == 1) { > /* ebgp directly connected */ > @@ -651,43 +386,43 @@ up_generate_mp_reach(struct rde_peer *pe > * nexthop and peer are in the same > * subnet > */ > - memcpy(&upa->mpattr[4], > + memcpy(attrbuf, > &a->nexthop->exit_nexthop.v6, > sizeof(struct in6_addr)); > - return (0); > + break; > } > - memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, > + memcpy(attrbuf, &peer->local_v6_addr.v6, > sizeof(struct in6_addr)); > } else > /* ebgp multihop */ > - memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, > + memcpy(attrbuf, &peer->local_v6_addr.v6, > sizeof(struct in6_addr)); > - return (0); > + break; > case AID_VPN_IPv4: > - upa->mpattr_len = 17; /* AFI + SAFI + NH LEN + NH + Reserved */ > - upa->mpattr = calloc(upa->mpattr_len, 1); > - if (upa->mpattr == NULL) > - fatal("up_generate_mp_reach"); > - if (aid2afi(aid, &tmp, &upa->mpattr[2])) > + attrlen = 17; /* AFI + SAFI + NH LEN + NH + Reserved */ > + if (len < wpos + attrlen) > + return (-1); > + wpos += attrlen; > + if (aid2afi(aid, &tmp, &attrbuf[2])) > fatalx("up_generate_mp_reachi: bad AID"); > tmp = htons(tmp); > - memcpy(upa->mpattr, &tmp, sizeof(tmp)); > - upa->mpattr[3] = sizeof(u_int64_t) + sizeof(struct in_addr); > + memcpy(attrbuf, &tmp, sizeof(tmp)); > + attrbuf[3] = sizeof(u_int64_t) + sizeof(struct in_addr); > + bzero(attrbuf + 4, sizeof(u_int64_t)); > > /* nexthop dance see also up_get_nexthop() */ > + attrbuf += 12; > if (a->flags & F_NEXTHOP_NOMODIFY) { > /* no modify flag set */ > if (a->nexthop == NULL) > - memcpy(&upa->mpattr[12], > - &peer->local_v4_addr.v4, > + memcpy(attrbuf, &peer->local_v4_addr.v4, > sizeof(struct in_addr)); > else > /* nexthops are stored as IPv4 addrs */ > - memcpy(&upa->mpattr[12], > - &a->nexthop->exit_nexthop.v4, > + memcpy(attrbuf, &a->nexthop->exit_nexthop.v4, > sizeof(struct in_addr)); > } else if (a->flags & F_NEXTHOP_SELF) > - memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4, > + memcpy(attrbuf, &peer->local_v4_addr.v4, > sizeof(struct in_addr)); > else if (!peer->conf.ebgp) { > /* ibgp */ > @@ -695,12 +430,10 @@ up_generate_mp_reach(struct rde_peer *pe > (a->nexthop->exit_nexthop.aid == AID_INET && > !memcmp(&a->nexthop->exit_nexthop.v4, > &peer->remote_addr.v4, sizeof(struct in_addr)))) > - memcpy(&upa->mpattr[12], > - &peer->local_v4_addr.v4, > + memcpy(attrbuf, &peer->local_v4_addr.v4, > sizeof(struct in_addr)); > else > - memcpy(&upa->mpattr[12], > - &a->nexthop->exit_nexthop.v4, > + memcpy(attrbuf, &a->nexthop->exit_nexthop.v4, > sizeof(struct in_addr)); > } else if (peer->conf.distance == 1) { > /* ebgp directly connected */ > @@ -713,40 +446,52 @@ up_generate_mp_reach(struct rde_peer *pe > * nexthop and peer are in the same > * subnet > */ > - memcpy(&upa->mpattr[12], > + memcpy(attrbuf, > &a->nexthop->exit_nexthop.v4, > sizeof(struct in_addr)); > - return (0); > + break; > } > - memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4, > + memcpy(attrbuf, &peer->local_v4_addr.v4, > sizeof(struct in_addr)); > } else > /* ebgp multihop */ > - memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4, > + memcpy(attrbuf, &peer->local_v4_addr.v4, > sizeof(struct in_addr)); > - return (0); > - default: > break; > + default: > + fatalx("up_generate_mp_reach: unknown AID"); > + } > + > + r = up_dump_prefix(buf + wpos, len - wpos, &a->updates, peer); > + if (r == 0) { > + /* no prefixes written ... */ > + return (-1); > } > - return (-1); > + attrlen += r; > + wpos += r; > + /* update attribute length field */ > + tmp = htons(attrlen); > + memcpy(buf + 2, &tmp, sizeof(tmp)); > + > + return (wpos); > } > > -int > -up_generate_attr(struct rde_peer *peer, struct update_attr *upa, > +static int > +up_generate_attr(u_char *buf, int len, struct rde_peer *peer, > struct rde_aspath *a, u_int8_t aid) > { > struct attr *oa, *newaggr = NULL; > u_char *pdata; > u_int32_t tmp32; > in_addr_t nexthop; > - int flags, r, ismp = 0, neednewpath = 0; > - u_int16_t len = sizeof(up_attr_buf), wlen = 0, plen; > + int flags, r, neednewpath = 0; > + u_int16_t wlen = 0, plen; > u_int8_t l; > u_int16_t nlen = 0; > u_char *ndata = NULL; > > /* origin */ > - if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, > + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, > ATTR_ORIGIN, &a->origin, 1)) == -1) > return (-1); > wlen += r; len -= r; > @@ -763,7 +508,7 @@ up_generate_attr(struct rde_peer *peer, > if (!rde_as4byte(peer)) > pdata = aspath_deflate(pdata, &plen, &neednewpath); > > - if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, > + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, > ATTR_ASPATH, pdata, plen)) == -1) > return (-1); > wlen += r; len -= r; > @@ -772,13 +517,12 @@ up_generate_attr(struct rde_peer *peer, > switch (aid) { > case AID_INET: > nexthop = up_get_nexthop(peer, a); > - if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, > + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, > ATTR_NEXTHOP, &nexthop, 4)) == -1) > return (-1); > wlen += r; len -= r; > break; > default: > - ismp = 1; > break; > } > > @@ -791,7 +535,7 @@ up_generate_attr(struct rde_peer *peer, > a->flags & F_ATTR_MED_ANNOUNCE || > peer->conf.flags & PEERFLAG_TRANS_AS)) { > tmp32 = htonl(a->med); > - if ((r = attr_write(up_attr_buf + wlen, len, ATTR_OPTIONAL, > + if ((r = attr_write(buf + wlen, len, ATTR_OPTIONAL, > ATTR_MED, &tmp32, 4)) == -1) > return (-1); > wlen += r; len -= r; > @@ -800,7 +544,7 @@ up_generate_attr(struct rde_peer *peer, > if (!peer->conf.ebgp) { > /* local preference, only valid for ibgp */ > tmp32 = htonl(a->lpref); > - if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, > + if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, > ATTR_LOCALPREF, &tmp32, 4)) == -1) > return (-1); > wlen += r; len -= r; > @@ -820,7 +564,7 @@ up_generate_attr(struct rde_peer *peer, > break; > switch (oa->type) { > case ATTR_ATOMIC_AGGREGATE: > - if ((r = attr_write(up_attr_buf + wlen, len, > + if ((r = attr_write(buf + wlen, len, > ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE, > NULL, 0)) == -1) > return (-1); > @@ -848,7 +592,7 @@ up_generate_attr(struct rde_peer *peer, > memcpy(t + sizeof(tas), > oa->data + sizeof(tmp32), > oa->len - sizeof(tmp32)); > - if ((r = attr_write(up_attr_buf + wlen, len, > + if ((r = attr_write(buf + wlen, len, > oa->flags, oa->type, &t, sizeof(t))) == -1) > return (-1); > break; > @@ -863,7 +607,7 @@ up_generate_attr(struct rde_peer *peer, > r = 0; > break; > } > - if ((r = attr_write(up_attr_buf + wlen, len, > + if ((r = attr_write(buf + wlen, len, > oa->flags, oa->type, oa->data, oa->len)) == -1) > return (-1); > break; > @@ -874,7 +618,7 @@ up_generate_attr(struct rde_peer *peer, > oa->len, &nlen); > > if (nlen > 0) { > - if ((r = attr_write(up_attr_buf + wlen, > + if ((r = attr_write(buf + wlen, > len, oa->flags, oa->type, ndata, > nlen)) == -1) { > free(ndata); > @@ -884,7 +628,7 @@ up_generate_attr(struct rde_peer *peer, > r = 0; > break; > } > - if ((r = attr_write(up_attr_buf + wlen, len, > + if ((r = attr_write(buf + wlen, len, > oa->flags, oa->type, oa->data, oa->len)) == -1) > return (-1); > break; > @@ -900,7 +644,7 @@ up_generate_attr(struct rde_peer *peer, > r = 0; > break; > } > - if ((r = attr_write(up_attr_buf + wlen, len, > + if ((r = attr_write(buf + wlen, len, > oa->flags | ATTR_PARTIAL, oa->type, > oa->data, oa->len)) == -1) > return (-1); > @@ -923,7 +667,7 @@ up_generate_attr(struct rde_peer *peer, > flags |= ATTR_PARTIAL; > if (plen == 0) > r = 0; > - else if ((r = attr_write(up_attr_buf + wlen, len, flags, > + else if ((r = attr_write(buf + wlen, len, flags, > ATTR_AS4_PATH, pdata, plen)) == -1) > return (-1); > wlen += r; len -= r; > @@ -933,54 +677,44 @@ up_generate_attr(struct rde_peer *peer, > flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; > if (!(a->flags & F_PREFIX_ANNOUNCED)) > flags |= ATTR_PARTIAL; > - if ((r = attr_write(up_attr_buf + wlen, len, flags, > + if ((r = attr_write(buf + wlen, len, flags, > ATTR_AS4_AGGREGATOR, newaggr->data, newaggr->len)) == -1) > return (-1); > wlen += r; len -= r; > } > > - /* write mp attribute to different buffer */ > - if (ismp) > - if (up_generate_mp_reach(peer, upa, a, aid) == -1) > - return (-1); > - > - /* the bgp path attributes are now stored in the global buf */ > - upa->attr = malloc(wlen); > - if (upa->attr == NULL) > - fatal("up_generate_attr"); > - memcpy(upa->attr, up_attr_buf, wlen); > - upa->attr_len = wlen; > return (wlen); > } > > -#define MIN_PREFIX_LEN 5 /* 1 byte prefix length + 4 bytes addr > */ > +/* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */ > +#define MIN_UPDATE_LEN 16 > + > int > -up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head, > +up_dump_prefix(u_char *buf, int len, struct prefix_queue *prefix_head, > struct rde_peer *peer) > { > - struct update_prefix *upp; > - int r, wpos = 0; > - u_int8_t i; > + struct prefix *p; > + struct bgpd_addr addr; > + int r, wpos = 0; > > - while ((upp = TAILQ_FIRST(prefix_head)) != NULL) { > + while ((p = TAILQ_FIRST(prefix_head)) != NULL) { > + pt_getaddr(p->re->prefix, &addr); > if ((r = prefix_write(buf + wpos, len - wpos, > - &upp->prefix, upp->prefixlen)) == -1) > + &addr, p->re->prefix->prefixlen)) == -1) > break; > wpos += r; > - if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL) > - log_warnx("dequeuing update failed."); > - TAILQ_REMOVE(upp->prefix_h, upp, prefix_l); > + > peer->up_pcnt--; > - for (i = 0; i < AID_MAX; i++) { > - if (upp->prefix_h == &peer->withdraws[i]) { > - peer->up_wcnt--; > - peer->prefix_sent_withdraw++; > - } else { > - peer->up_nlricnt--; > - peer->prefix_sent_update++; > - } > + if (p->flags & F_PREFIX_USE_PEER) { > + prefix_destroy(p); > + peer->up_wcnt--; > + peer->prefix_sent_withdraw++; > + } else { > + /* move prefix from updates to prefixes */ > + prefix_relink(p, prefix_aspath(p), 0); > + peer->up_nlricnt--; > + peer->prefix_sent_update++; > } > - free(upp); > } > return (wpos); > } > @@ -988,7 +722,7 @@ up_dump_prefix(u_char *buf, int len, str > int > up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer) > { > - struct update_attr *upa; > + struct rde_aspath *asp; > int r, wpos; > u_int16_t attr_len; > > @@ -996,221 +730,155 @@ up_dump_attrnlri(u_char *buf, int len, s > * It is possible that a queued path attribute has no nlri prefix. > * Ignore and remove those path attributes. > */ > - while ((upa = TAILQ_FIRST(&peer->updates[AID_INET])) != NULL) > - if (TAILQ_EMPTY(&upa->prefix_h)) { > - attr_len = upa->attr_len; > - if (RB_REMOVE(uptree_attr, &peer->up_attrs, > - upa) == NULL) > - log_warnx("dequeuing update failed."); > - TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l); > - free(upa->attr); > - free(upa->mpattr); > - free(upa); > + while ((asp = TAILQ_FIRST(&peer->updates[AID_INET])) != NULL) { > + if (TAILQ_EMPTY(&asp->updates)) { > + TAILQ_REMOVE(&peer->updates[AID_INET], asp, update_l); > + asp->flags &= ~F_ATTR_UPDATE; > peer->up_acnt--; > - /* XXX horrible hack, > - * if attr_len is 0, it is a EoR marker */ > - if (attr_len == 0) > + /* special return for EoR markers */ > + if (asp->flags & F_ATTR_EOR) { > + path_destroy(asp); > return (-1); > + } > } else > break; > + } > + > + if (len < 2) > + fatalx("up_dump_attrnlri: buffer way too small"); > > - if (upa == NULL || upa->attr_len + MIN_PREFIX_LEN > len) { > + if (asp == NULL || len < MIN_UPDATE_LEN) > + goto done; > + r = up_generate_attr(buf + 2, len - 2, peer, asp, AID_INET); > + if (r == -1) { > /* > * either no packet or not enough space. > * The length field needs to be set to zero else it would be > * an invalid bgp update. > */ > +done: > bzero(buf, 2); > return (2); > } > > /* first dump the 2-byte path attribute length */ > - attr_len = htons(upa->attr_len); > + attr_len = htons(r); > memcpy(buf, &attr_len, 2); > wpos = 2; > - > - /* then the path attributes themselves */ > - memcpy(buf + wpos, upa->attr, upa->attr_len); > - wpos += upa->attr_len; > + /* then skip over the already dumped path attributes themselves */ > + wpos += r; > > /* last but not least dump the nlri */ > - r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer); > + r = up_dump_prefix(buf + wpos, len - wpos, &asp->updates, peer); > wpos += r; > > /* now check if all prefixes were written */ > - if (TAILQ_EMPTY(&upa->prefix_h)) { > - if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL) > - log_warnx("dequeuing update failed."); > - TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l); > - free(upa->attr); > - free(upa->mpattr); > - free(upa); > + if (TAILQ_EMPTY(&asp->updates)) { > + TAILQ_REMOVE(&peer->updates[AID_INET], asp, update_l); > + asp->flags &= ~F_ATTR_UPDATE; > peer->up_acnt--; > } > > return (wpos); > } > > -u_char * > -up_dump_mp_unreach(u_char *buf, u_int16_t *len, struct rde_peer *peer, > - u_int8_t aid) > +int > +up_dump_mp_unreach(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid) > { > - int wpos; > - u_int16_t datalen, tmp; > - u_int16_t attrlen = 2; /* attribute header (without len) */ > - u_int8_t flags = ATTR_OPTIONAL, safi; > - > - /* > - * reserve space for withdraw len, attr len, the attribute header > - * and the mp attribute header > - */ > - wpos = 2 + 2 + 4 + 3; > + u_char *attrbuf; > + int wpos, r; > + u_int16_t attr_len, tmp; > > - if (*len < wpos) > - return (NULL); > - > - datalen = up_dump_prefix(buf + wpos, *len - wpos, > - &peer->withdraws[aid], peer); > - if (datalen == 0) > - return (NULL); > + if (len < MIN_UPDATE_LEN || TAILQ_EMPTY(&peer->withdraws[aid])) > + return (-1); > > - datalen += 3; /* afi + safi */ > + /* reserve space for withdraw len, attr len */ > + wpos = 2 + 2; > + attrbuf = buf + wpos; > + > + /* attribute header, defaulting to extended length one */ > + attrbuf[0] = ATTR_OPTIONAL | ATTR_EXTLEN; > + attrbuf[1] = ATTR_MP_UNREACH_NLRI; > + wpos += 4; > > - /* prepend header, need to do it reverse */ > - /* safi & afi */ > - if (aid2afi(aid, &tmp, &safi)) > + /* afi & safi */ > + if (aid2afi(aid, &tmp, buf + wpos + 2)) > fatalx("up_dump_mp_unreach: bad AID"); > - buf[--wpos] = safi; > - wpos -= sizeof(u_int16_t); > tmp = htons(tmp); > memcpy(buf + wpos, &tmp, sizeof(u_int16_t)); > + wpos += 3; > > - /* attribute length */ > - if (datalen > 255) { > - attrlen += 2 + datalen; > - flags |= ATTR_EXTLEN; > - wpos -= sizeof(u_int16_t); > - tmp = htons(datalen); > - memcpy(buf + wpos, &tmp, sizeof(u_int16_t)); > - } else { > - attrlen += 1 + datalen; > - buf[--wpos] = (u_char)datalen; > - } > - > - /* mp attribute */ > - buf[--wpos] = (u_char)ATTR_MP_UNREACH_NLRI; > - buf[--wpos] = flags; > + r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid], peer); > + if (r == 0) > + return (-1); > + wpos += r; > + attr_len = r + 3; /* prefixes + afi & safi */ > > /* attribute length */ > - wpos -= sizeof(u_int16_t); > - tmp = htons(attrlen); > - memcpy(buf + wpos, &tmp, sizeof(u_int16_t)); > - > - /* no IPv4 withdraws */ > - wpos -= sizeof(u_int16_t); > - bzero(buf + wpos, sizeof(u_int16_t)); > - > - if (wpos < 0) > - fatalx("up_dump_mp_unreach: buffer underflow"); > + attr_len = htons(attr_len); > + memcpy(attrbuf + 2, &attr_len, sizeof(attr_len)); > > - /* total length includes the two 2-bytes length fields. */ > - *len = attrlen + 2 * sizeof(u_int16_t); > + /* write length fields */ > + bzero(buf, sizeof(u_int16_t)); /* withdrawn routes len */ > + attr_len = htons(wpos - 4); > + memcpy(buf + 2, &attr_len, sizeof(attr_len)); > > - return (buf + wpos); > + return (wpos); > } > > int > -up_dump_mp_reach(u_char *buf, u_int16_t *len, struct rde_peer *peer, > - u_int8_t aid) > +up_dump_mp_reach(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid) > { > - struct update_attr *upa; > - int wpos; > - u_int16_t attr_len, datalen, tmp; > - u_int8_t flags = ATTR_OPTIONAL; > + struct rde_aspath *asp; > + int r, wpos; > + u_int16_t attr_len; > > /* > * It is possible that a queued path attribute has no nlri prefix. > * Ignore and remove those path attributes. > */ > - while ((upa = TAILQ_FIRST(&peer->updates[aid])) != NULL) > - if (TAILQ_EMPTY(&upa->prefix_h)) { > - attr_len = upa->attr_len; > - if (RB_REMOVE(uptree_attr, &peer->up_attrs, > - upa) == NULL) > - log_warnx("dequeuing update failed."); > - TAILQ_REMOVE(&peer->updates[aid], upa, attr_l); > - free(upa->attr); > - free(upa->mpattr); > - free(upa); > + while ((asp = TAILQ_FIRST(&peer->updates[aid])) != NULL) { > + if (TAILQ_EMPTY(&asp->updates)) { > + TAILQ_REMOVE(&peer->updates[aid], asp, update_l); > + asp->flags &= ~F_ATTR_UPDATE; > peer->up_acnt--; > - /* XXX horrible hack, > - * if attr_len is 0, it is a EoR marker */ > - if (attr_len == 0) > + /* special return for EoR markers */ > + if (asp->flags & F_ATTR_EOR) { > + path_destroy(asp); > return (-1); > + } > } else > break; > + } > > - if (upa == NULL) > + if (asp == NULL || len < MIN_UPDATE_LEN) > return (-2); > > - /* > - * reserve space for attr len, the attributes, the > - * mp attribute and the attribute header > - */ > - wpos = 2 + 2 + upa->attr_len + 4 + upa->mpattr_len; > - if (*len < wpos) > - return (-2); > + wpos = 4; /* reserve space for length fields */ > > - datalen = up_dump_prefix(buf + wpos, *len - wpos, > - &upa->prefix_h, peer); > - if (datalen == 0) > + /* write regular path attributes */ > + r = up_generate_attr(buf + wpos, len + wpos, peer, asp, aid); > + if (r == -1) > return (-2); > + wpos += r; > > - if (upa->mpattr_len == 0 || upa->mpattr == NULL) > - fatalx("mulitprotocol update without MP attrs"); > - > - datalen += upa->mpattr_len; > - wpos -= upa->mpattr_len; > - memcpy(buf + wpos, upa->mpattr, upa->mpattr_len); > - > - if (datalen > 255) { > - wpos -= 2; > - tmp = htons(datalen); > - memcpy(buf + wpos, &tmp, sizeof(tmp)); > - datalen += 4; > - flags |= ATTR_EXTLEN; > - } else { > - buf[--wpos] = (u_char)datalen; > - datalen += 3; > - } > - buf[--wpos] = (u_char)ATTR_MP_REACH_NLRI; > - buf[--wpos] = flags; > - > - datalen += upa->attr_len; > - wpos -= upa->attr_len; > - memcpy(buf + wpos, upa->attr, upa->attr_len); > - > - if (wpos < 4) > - fatalx("Grrr, mp_reach buffer fucked up"); > - > - wpos -= 2; > - tmp = htons(datalen); > - memcpy(buf + wpos, &tmp, sizeof(tmp)); > + /* write mp attribute */ > + r = up_generate_mp_reach(buf + wpos, len - wpos, peer, asp, aid); > + if (r == -1) > + return (-2); > + wpos += r; > > - wpos -= 2; > - bzero(buf + wpos, 2); > + /* write length fields */ > + bzero(buf, sizeof(u_int16_t)); /* withdrawn routes len */ > + attr_len = htons(wpos - 4); > + memcpy(buf + 2, &attr_len, sizeof(attr_len)); > > /* now check if all prefixes were written */ > - if (TAILQ_EMPTY(&upa->prefix_h)) { > - if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL) > - log_warnx("dequeuing update failed."); > - TAILQ_REMOVE(&peer->updates[aid], upa, attr_l); > - free(upa->attr); > - free(upa->mpattr); > - free(upa); > + if (TAILQ_EMPTY(&asp->updates)) { > + TAILQ_REMOVE(&peer->updates[aid], asp, update_l); > + asp->flags &= ~F_ATTR_UPDATE; > peer->up_acnt--; > } > > - *len = datalen + 4; > return (wpos); > } >