Re: bgpd pftable change
I've been using this for a week or so now and it's been very boring, which is an improvement in my experience. I has my ok if that has any value. dlg > On 9 Nov 2020, at 8:16 pm, Claudio Jeker wrote: > > Hi bgpd and esp. bgpd-spamd users, > > Currently the pftable code does not keep track how often a prefix was > added to a pftable. Because of this using the same pftable for multiple > neighbor tables does not work well. If one neighbor withdraws a route the > pftable entry is removed from the table no matter if the prefix is still > available from the other neighbor. > > This diff changes this behaviour and introduces proper reference counting. > A pftable entry will now remain in the table until the last neighbor > withdraws the prefix. This makes much more sense and should not break > working setups. It will fix configs where more than one neighbor feeds > into the bgpd pftable. > > As a side-effect bgpd will no commit pftable updates on a more regular > basis and not wait until some operations (full table walk) finished. This > should result in better responsiveness of updates. > > Please test :) > -- > :wq Claudio > > Index: rde.c > === > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > retrieving revision 1.506 > diff -u -p -r1.506 rde.c > --- rde.c 5 Nov 2020 14:44:59 - 1.506 > +++ rde.c 9 Nov 2020 10:06:51 - > @@ -69,6 +69,7 @@ void rde_dump_ctx_terminate(pid_t); > void rde_dump_mrt_new(struct mrt *, pid_t, int); > > intrde_l3vpn_import(struct rde_community *, struct l3vpn *); > +static void rde_commit_pftable(void); > void rde_reload_done(void); > static voidrde_softreconfig_in_done(void *, u_int8_t); > static voidrde_softreconfig_out_done(void *, u_int8_t); > @@ -296,6 +297,8 @@ rde_main(int debug, int verbose) > for (aid = AID_INET6; aid < AID_MAX; aid++) > rde_update6_queue_runner(aid); > } > + /* commit pftable once per poll loop */ > + rde_commit_pftable(); > } > > /* do not clean up on shutdown on production, it takes ages. */ > @@ -497,8 +500,6 @@ badnetdel: > RDE_RUNNER_ROUNDS, peerself, network_flush_upcall, > NULL, NULL) == -1) > log_warn("rde_dispatch: IMSG_NETWORK_FLUSH"); > - /* Deletions were performed in network_flush_upcall */ > - rde_send_pftable_commit(); > break; > case IMSG_FILTER_SET: > if (imsg.hdr.len - IMSG_HEADER_SIZE != > @@ -1389,7 +1390,6 @@ rde_update_dispatch(struct rde_peer *pee > > done: > rde_filterstate_clean(&state); > - rde_send_pftable_commit(); > } > > int > @@ -2950,10 +2950,31 @@ rde_update6_queue_runner(u_int8_t aid) > /* > * pf table specific functions > */ > +struct rde_pftable_node { > + RB_ENTRY(rde_pftable_node) entry; > + struct pt_entry *prefix; > + int refcnt; > + u_int16_tid; > +}; > +RB_HEAD(rde_pftable_tree, rde_pftable_node); > + > +static inline int > +rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b) > +{ > + if (a->prefix > b->prefix) > + return 1; > + if (a->prefix < b->prefix) > + return -1; > + return (a->id - b->id); > +} > + > +RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, > rde_pftable_cmp); > + > +struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree); > int need_commit; > -void > -rde_send_pftable(u_int16_t id, struct bgpd_addr *addr, > -u_int8_t len, int del) > + > +static void > +rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del) > { > struct pftable_msg pfm; > > @@ -2966,8 +2987,8 @@ rde_send_pftable(u_int16_t id, struct bg > > bzero(&pfm, sizeof(pfm)); > strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable)); > - memcpy(&pfm.addr, addr, sizeof(pfm.addr)); > - pfm.len = len; > + pt_getaddr(pt, &pfm.addr); > + pfm.len = pt->prefixlen; > > if (imsg_compose(ibuf_main, > del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD, > @@ -2978,7 +2999,55 @@ rde_send_pftable(u_int16_t id, struct bg > } > > void > -rde_send_pftable_commit(void) > +rde_pftable_add(u_int16_t id, struct prefix *p) > +{ > + struct rde_pftable_node *pfn, node; > + > + memset(&node, 0, sizeof(node)); > + node.prefix = p->pt; > + node.id = id; > + > + pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node); > + if (pfn == NULL) { > + if ((pfn = calloc(1, sizeof(*pfn))) == NULL) > + fatal("%s", __func__); > + pfn->prefix = pt_ref(p->pt); > + pfn->id = id; > + > + if (RB_INSERT(rde_pftable_tree, &pfta
bgpd pftable change
Hi bgpd and esp. bgpd-spamd users, Currently the pftable code does not keep track how often a prefix was added to a pftable. Because of this using the same pftable for multiple neighbor tables does not work well. If one neighbor withdraws a route the pftable entry is removed from the table no matter if the prefix is still available from the other neighbor. This diff changes this behaviour and introduces proper reference counting. A pftable entry will now remain in the table until the last neighbor withdraws the prefix. This makes much more sense and should not break working setups. It will fix configs where more than one neighbor feeds into the bgpd pftable. As a side-effect bgpd will no commit pftable updates on a more regular basis and not wait until some operations (full table walk) finished. This should result in better responsiveness of updates. Please test :) -- :wq Claudio Index: rde.c === RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.506 diff -u -p -r1.506 rde.c --- rde.c 5 Nov 2020 14:44:59 - 1.506 +++ rde.c 9 Nov 2020 10:06:51 - @@ -69,6 +69,7 @@ void rde_dump_ctx_terminate(pid_t); voidrde_dump_mrt_new(struct mrt *, pid_t, int); int rde_l3vpn_import(struct rde_community *, struct l3vpn *); +static void rde_commit_pftable(void); voidrde_reload_done(void); static void rde_softreconfig_in_done(void *, u_int8_t); static void rde_softreconfig_out_done(void *, u_int8_t); @@ -296,6 +297,8 @@ rde_main(int debug, int verbose) for (aid = AID_INET6; aid < AID_MAX; aid++) rde_update6_queue_runner(aid); } + /* commit pftable once per poll loop */ + rde_commit_pftable(); } /* do not clean up on shutdown on production, it takes ages. */ @@ -497,8 +500,6 @@ badnetdel: RDE_RUNNER_ROUNDS, peerself, network_flush_upcall, NULL, NULL) == -1) log_warn("rde_dispatch: IMSG_NETWORK_FLUSH"); - /* Deletions were performed in network_flush_upcall */ - rde_send_pftable_commit(); break; case IMSG_FILTER_SET: if (imsg.hdr.len - IMSG_HEADER_SIZE != @@ -1389,7 +1390,6 @@ rde_update_dispatch(struct rde_peer *pee done: rde_filterstate_clean(&state); - rde_send_pftable_commit(); } int @@ -2950,10 +2950,31 @@ rde_update6_queue_runner(u_int8_t aid) /* * pf table specific functions */ +struct rde_pftable_node { + RB_ENTRY(rde_pftable_node) entry; + struct pt_entry *prefix; + int refcnt; + u_int16_tid; +}; +RB_HEAD(rde_pftable_tree, rde_pftable_node); + +static inline int +rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b) +{ + if (a->prefix > b->prefix) + return 1; + if (a->prefix < b->prefix) + return -1; + return (a->id - b->id); +} + +RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, rde_pftable_cmp); + +struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree); int need_commit; -void -rde_send_pftable(u_int16_t id, struct bgpd_addr *addr, -u_int8_t len, int del) + +static void +rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del) { struct pftable_msg pfm; @@ -2966,8 +2987,8 @@ rde_send_pftable(u_int16_t id, struct bg bzero(&pfm, sizeof(pfm)); strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable)); - memcpy(&pfm.addr, addr, sizeof(pfm.addr)); - pfm.len = len; + pt_getaddr(pt, &pfm.addr); + pfm.len = pt->prefixlen; if (imsg_compose(ibuf_main, del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD, @@ -2978,7 +2999,55 @@ rde_send_pftable(u_int16_t id, struct bg } void -rde_send_pftable_commit(void) +rde_pftable_add(u_int16_t id, struct prefix *p) +{ + struct rde_pftable_node *pfn, node; + + memset(&node, 0, sizeof(node)); + node.prefix = p->pt; + node.id = id; + + pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node); + if (pfn == NULL) { + if ((pfn = calloc(1, sizeof(*pfn))) == NULL) + fatal("%s", __func__); + pfn->prefix = pt_ref(p->pt); + pfn->id = id; + + if (RB_INSERT(rde_pftable_tree, &pftable_tree, pfn) != NULL) + fatalx("%s: tree corrupt", __func__); + + rde_pftable_send(id, p->pt, 0); + } + pfn->refcnt++; +} + +void +rde_pftable_del(u_int16_t id, struct prefix *p) +{ + struct rde_pftable_node *pfn, node; + + memset(&node, 0, sizeof(node)); + node.prefix = p->pt; +