Re: bgpd pftable change

2020-11-14 Thread David Gwynne
I've been using this for a week or so now and it's been very boring, which is 
an improvement in my experience.

I has my ok if that has any value.

dlg

> On 9 Nov 2020, at 8:16 pm, Claudio Jeker  wrote:
> 
> Hi bgpd and esp. bgpd-spamd users,
> 
> Currently the pftable code does not keep track how often a prefix was
> added to a pftable. Because of this using the same pftable for multiple
> neighbor tables does not work well. If one neighbor withdraws a route the
> pftable entry is removed from the table no matter if the prefix is still
> available from the other neighbor.
> 
> This diff changes this behaviour and introduces proper reference counting.
> A pftable entry will now remain in the table until the last neighbor
> withdraws the prefix. This makes much more sense and should not break
> working setups. It will fix configs where more than one neighbor feeds
> into the bgpd pftable.
> 
> As a side-effect bgpd will no commit pftable updates on a more regular
> basis and not wait until some operations (full table walk) finished. This
> should result in better responsiveness of updates.
> 
> Please test :)
> -- 
> :wq Claudio
> 
> Index: rde.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.506
> diff -u -p -r1.506 rde.c
> --- rde.c 5 Nov 2020 14:44:59 -   1.506
> +++ rde.c 9 Nov 2020 10:06:51 -
> @@ -69,6 +69,7 @@ void rde_dump_ctx_terminate(pid_t);
> void   rde_dump_mrt_new(struct mrt *, pid_t, int);
> 
> intrde_l3vpn_import(struct rde_community *, struct l3vpn *);
> +static void   rde_commit_pftable(void);
> void   rde_reload_done(void);
> static voidrde_softreconfig_in_done(void *, u_int8_t);
> static voidrde_softreconfig_out_done(void *, u_int8_t);
> @@ -296,6 +297,8 @@ rde_main(int debug, int verbose)
>   for (aid = AID_INET6; aid < AID_MAX; aid++)
>   rde_update6_queue_runner(aid);
>   }
> + /* commit pftable once per poll loop */
> + rde_commit_pftable();
>   }
> 
>   /* do not clean up on shutdown on production, it takes ages. */
> @@ -497,8 +500,6 @@ badnetdel:
>   RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
>   NULL, NULL) == -1)
>   log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
> - /* Deletions were performed in network_flush_upcall */
> - rde_send_pftable_commit();
>   break;
>   case IMSG_FILTER_SET:
>   if (imsg.hdr.len - IMSG_HEADER_SIZE !=
> @@ -1389,7 +1390,6 @@ rde_update_dispatch(struct rde_peer *pee
> 
> done:
>   rde_filterstate_clean(&state);
> - rde_send_pftable_commit();
> }
> 
> int
> @@ -2950,10 +2950,31 @@ rde_update6_queue_runner(u_int8_t aid)
> /*
>  * pf table specific functions
>  */
> +struct rde_pftable_node {
> + RB_ENTRY(rde_pftable_node)   entry;
> + struct pt_entry *prefix;
> + int  refcnt;
> + u_int16_tid;
> +};
> +RB_HEAD(rde_pftable_tree, rde_pftable_node);
> +
> +static inline int
> +rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b)
> +{
> + if (a->prefix > b->prefix)
> + return 1;
> + if (a->prefix < b->prefix)
> + return -1;
> + return (a->id - b->id);
> +}
> +
> +RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, 
> rde_pftable_cmp);
> +
> +struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree);
> int need_commit;
> -void
> -rde_send_pftable(u_int16_t id, struct bgpd_addr *addr,
> -u_int8_t len, int del)
> +
> +static void
> +rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del)
> {
>   struct pftable_msg pfm;
> 
> @@ -2966,8 +2987,8 @@ rde_send_pftable(u_int16_t id, struct bg
> 
>   bzero(&pfm, sizeof(pfm));
>   strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
> - memcpy(&pfm.addr, addr, sizeof(pfm.addr));
> - pfm.len = len;
> + pt_getaddr(pt, &pfm.addr);
> + pfm.len = pt->prefixlen;
> 
>   if (imsg_compose(ibuf_main,
>   del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
> @@ -2978,7 +2999,55 @@ rde_send_pftable(u_int16_t id, struct bg
> }
> 
> void
> -rde_send_pftable_commit(void)
> +rde_pftable_add(u_int16_t id, struct prefix *p)
> +{
> + struct rde_pftable_node *pfn, node;
> +
> + memset(&node, 0, sizeof(node));
> + node.prefix = p->pt;
> + node.id = id;
> +
> + pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node);
> + if (pfn == NULL) {
> + if ((pfn = calloc(1, sizeof(*pfn))) == NULL)
> + fatal("%s", __func__);
> + pfn->prefix = pt_ref(p->pt);
> + pfn->id = id;
> +
> + if (RB_INSERT(rde_pftable_tree, &pfta

bgpd pftable change

2020-11-09 Thread Claudio Jeker
Hi bgpd and esp. bgpd-spamd users,

Currently the pftable code does not keep track how often a prefix was
added to a pftable. Because of this using the same pftable for multiple
neighbor tables does not work well. If one neighbor withdraws a route the
pftable entry is removed from the table no matter if the prefix is still
available from the other neighbor.

This diff changes this behaviour and introduces proper reference counting.
A pftable entry will now remain in the table until the last neighbor
withdraws the prefix. This makes much more sense and should not break
working setups. It will fix configs where more than one neighbor feeds
into the bgpd pftable.

As a side-effect bgpd will no commit pftable updates on a more regular
basis and not wait until some operations (full table walk) finished. This
should result in better responsiveness of updates.

Please test :)
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.506
diff -u -p -r1.506 rde.c
--- rde.c   5 Nov 2020 14:44:59 -   1.506
+++ rde.c   9 Nov 2020 10:06:51 -
@@ -69,6 +69,7 @@ void   rde_dump_ctx_terminate(pid_t);
 voidrde_dump_mrt_new(struct mrt *, pid_t, int);
 
 int rde_l3vpn_import(struct rde_community *, struct l3vpn *);
+static void rde_commit_pftable(void);
 voidrde_reload_done(void);
 static void rde_softreconfig_in_done(void *, u_int8_t);
 static void rde_softreconfig_out_done(void *, u_int8_t);
@@ -296,6 +297,8 @@ rde_main(int debug, int verbose)
for (aid = AID_INET6; aid < AID_MAX; aid++)
rde_update6_queue_runner(aid);
}
+   /* commit pftable once per poll loop */
+   rde_commit_pftable();
}
 
/* do not clean up on shutdown on production, it takes ages. */
@@ -497,8 +500,6 @@ badnetdel:
RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
NULL, NULL) == -1)
log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
-   /* Deletions were performed in network_flush_upcall */
-   rde_send_pftable_commit();
break;
case IMSG_FILTER_SET:
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -1389,7 +1390,6 @@ rde_update_dispatch(struct rde_peer *pee
 
 done:
rde_filterstate_clean(&state);
-   rde_send_pftable_commit();
 }
 
 int
@@ -2950,10 +2950,31 @@ rde_update6_queue_runner(u_int8_t aid)
 /*
  * pf table specific functions
  */
+struct rde_pftable_node {
+   RB_ENTRY(rde_pftable_node)   entry;
+   struct pt_entry *prefix;
+   int  refcnt;
+   u_int16_tid;
+};
+RB_HEAD(rde_pftable_tree, rde_pftable_node);
+
+static inline int
+rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b)
+{
+   if (a->prefix > b->prefix)
+   return 1;
+   if (a->prefix < b->prefix)
+   return -1;
+   return (a->id - b->id);
+}
+
+RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, rde_pftable_cmp);
+
+struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree);
 int need_commit;
-void
-rde_send_pftable(u_int16_t id, struct bgpd_addr *addr,
-u_int8_t len, int del)
+
+static void
+rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del)
 {
struct pftable_msg pfm;
 
@@ -2966,8 +2987,8 @@ rde_send_pftable(u_int16_t id, struct bg
 
bzero(&pfm, sizeof(pfm));
strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
-   memcpy(&pfm.addr, addr, sizeof(pfm.addr));
-   pfm.len = len;
+   pt_getaddr(pt, &pfm.addr);
+   pfm.len = pt->prefixlen;
 
if (imsg_compose(ibuf_main,
del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
@@ -2978,7 +2999,55 @@ rde_send_pftable(u_int16_t id, struct bg
 }
 
 void
-rde_send_pftable_commit(void)
+rde_pftable_add(u_int16_t id, struct prefix *p)
+{
+   struct rde_pftable_node *pfn, node;
+
+   memset(&node, 0, sizeof(node));
+   node.prefix = p->pt;
+   node.id = id;
+
+   pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node);
+   if (pfn == NULL) {
+   if ((pfn = calloc(1, sizeof(*pfn))) == NULL)
+   fatal("%s", __func__);
+   pfn->prefix = pt_ref(p->pt);
+   pfn->id = id;
+
+   if (RB_INSERT(rde_pftable_tree, &pftable_tree, pfn) != NULL)
+   fatalx("%s: tree corrupt", __func__);
+
+   rde_pftable_send(id, p->pt, 0);
+   }
+   pfn->refcnt++;
+}
+
+void
+rde_pftable_del(u_int16_t id, struct prefix *p)
+{
+   struct rde_pftable_node *pfn, node;
+
+   memset(&node, 0, sizeof(node));
+   node.prefix = p->pt;
+