Re: bgpd L3VPN rework

2019-02-11 Thread Denis Fondras
On Thu, Feb 07, 2019 at 11:41:39AM +0100, Claudio Jeker wrote:
> At a2k19 David (dlg@) and I sat down and looked at BGP L3 MPLS VPN
> support. Now David wants to use this in production and realized that
> in his case it would be great to have more than one mpe(4) interface per
> rdomain. This way it is possible to write good firewall rules using
> interface names or groups.
> 
> The definition of VPNs in bgpd was never super elegant. The 'depend on
> mpeX' config was a bit redundant and so after some discussions we decided
> to rework this part.
> 
> L3 MPLS VPN are now configured like this:
> 
> vpn "staff" on mpe1 {
> rd $ASN:1
> import-target rt $ASN:100
> export-target rt $ASN:101
> network 0/0
> }
> 
> vpn "users" on mpe2 {
> rd $ASN:2
> import-target rt $ASN:200
> export-target rt $ASN:201
> network 0/0
> }
> 
> Now in this example mpe1 and mpe2 can be in the same rdomain. The rdomain
> is now selected based on the rdomain of the mpe(4) interface. There are
> probably still some gotchas around this which can be tackled in a 2nd
> round.
> 
> This diff does a lot of shuffling of code and especially affect network
> statements (since those are now per VPN and no longer per rdomain).
> The rewrite of the network code fixed also some other behaviour bugs which
> do not only affect VPN setups. In short conficts between 'network A.B.C.D/N'
> and 'network static' are now properly handled (with 'network A.B.C.D/N'
> having preference).
> 
> Since this is a major change please test (or suffer later).

Reads good, works fine.

OK denis@

While testing, I noticed that import-target/export-target are not updated on
reload (problem not introduced by this diff though)

> -- 
> :wq Claudio
> 
> Index: usr.sbin/bgpctl/bgpctl.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
> retrieving revision 1.228
> diff -u -p -r1.228 bgpctl.c
> --- usr.sbin/bgpctl/bgpctl.c  20 Jan 2019 23:30:15 -  1.228
> +++ usr.sbin/bgpctl/bgpctl.c  7 Feb 2019 10:11:19 -
> @@ -346,7 +346,7 @@ main(int argc, char *argv[])
>   bzero(, sizeof(net));
>   net.prefix = res->addr;
>   net.prefixlen = res->prefixlen;
> - net.rtableid = tableid;
> + net.rd = res->rd;
>   /* attribute sets are not supported */
>   if (res->action == NETWORK_ADD) {
>   imsg_compose(ibuf, IMSG_NETWORK_ADD, 0, 0, -1,
> @@ -927,7 +927,7 @@ show_fib_head(void)
>   printf("flags: "
>   "* = valid, B = BGP, C = Connected, S = Static, D = Dynamic\n");
>   printf("   "
> - "N = BGP Nexthop reachable via this route R = redistributed\n");
> + "N = BGP Nexthop reachable via this route\n");
>   printf("   r = reject route, b = blackhole route\n\n");
>   printf("flags prio destination  gateway\n");
>  }
> @@ -969,11 +969,6 @@ show_fib_flags(u_int16_t flags)
>   else
>   printf(" ");
>  
> - if (flags & F_REDISTRIBUTED)
> - printf("R");
> - else
> - printf(" ");
> -
>   if (flags & F_REJECT && flags & F_BLACKHOLE)
>   printf("f");
>   else if (flags & F_REJECT)
> @@ -1983,7 +1978,7 @@ network_bulk(struct parse_result *res)
>   errx(1, "bad prefix: %s", b);
>   net.prefix = h;
>   net.prefixlen = len;
> - net.rtableid = tableid;
> + net.rd = res->rd;
>  
>   if (res->action == NETWORK_BULK_ADD) {
>   imsg_compose(ibuf, IMSG_NETWORK_ADD,
> @@ -2128,7 +2123,7 @@ network_mrt_dump(struct mrt_rib *mr, str
>   net.prefix = ctl.prefix;
>   net.prefixlen = ctl.prefixlen;
>   net.type = NETWORK_MRTCLONE;
> - /* XXX rtableid */
> + /* XXX rd can't be set and will be 0 */
>  
>   imsg_compose(ibuf, IMSG_NETWORK_ADD, 0, 0, -1,
>   , sizeof(net));
> Index: usr.sbin/bgpctl/parser.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/parser.c,v
> retrieving revision 1.89
> diff -u -p -r1.89 parser.c
> --- usr.sbin/bgpctl/parser.c  20 Jan 2019 23:30:15 -  1.89
> +++ usr.sbin/bgpctl/parser.c  7 Feb 2019 10:11:19 -
> @@ -58,6 +58,7 @@ enum token_type {
>   PREPNBR,
>   PREPSELF,
>   WEIGHT,
> + RD,
>   FAMILY,
>   GETOPT,
>   RTABLE,
> @@ -374,6 +375,11 @@ static const struct token t_network_show
>   { ENDTOKEN, "", NONE,   NULL}
>  };
>  
> +static const struct token t_rd[] = {
> + { RD,   "", NONE,   t_set},
> + { ENDTOKEN, "", NONE,   NULL}
> +};
> +
>  static const struct token t_set[] = {
>   { 

bgpd L3VPN rework

2019-02-07 Thread Claudio Jeker
At a2k19 David (dlg@) and I sat down and looked at BGP L3 MPLS VPN
support. Now David wants to use this in production and realized that
in his case it would be great to have more than one mpe(4) interface per
rdomain. This way it is possible to write good firewall rules using
interface names or groups.

The definition of VPNs in bgpd was never super elegant. The 'depend on
mpeX' config was a bit redundant and so after some discussions we decided
to rework this part.

L3 MPLS VPN are now configured like this:

vpn "staff" on mpe1 {
rd $ASN:1
import-target rt $ASN:100
export-target rt $ASN:101
network 0/0
}

vpn "users" on mpe2 {
rd $ASN:2
import-target rt $ASN:200
export-target rt $ASN:201
network 0/0
}

Now in this example mpe1 and mpe2 can be in the same rdomain. The rdomain
is now selected based on the rdomain of the mpe(4) interface. There are
probably still some gotchas around this which can be tackled in a 2nd
round.

This diff does a lot of shuffling of code and especially affect network
statements (since those are now per VPN and no longer per rdomain).
The rewrite of the network code fixed also some other behaviour bugs which
do not only affect VPN setups. In short conficts between 'network A.B.C.D/N'
and 'network static' are now properly handled (with 'network A.B.C.D/N'
having preference).

Since this is a major change please test (or suffer later).
-- 
:wq Claudio

Index: usr.sbin/bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.228
diff -u -p -r1.228 bgpctl.c
--- usr.sbin/bgpctl/bgpctl.c20 Jan 2019 23:30:15 -  1.228
+++ usr.sbin/bgpctl/bgpctl.c7 Feb 2019 10:11:19 -
@@ -346,7 +346,7 @@ main(int argc, char *argv[])
bzero(, sizeof(net));
net.prefix = res->addr;
net.prefixlen = res->prefixlen;
-   net.rtableid = tableid;
+   net.rd = res->rd;
/* attribute sets are not supported */
if (res->action == NETWORK_ADD) {
imsg_compose(ibuf, IMSG_NETWORK_ADD, 0, 0, -1,
@@ -927,7 +927,7 @@ show_fib_head(void)
printf("flags: "
"* = valid, B = BGP, C = Connected, S = Static, D = Dynamic\n");
printf("   "
-   "N = BGP Nexthop reachable via this route R = redistributed\n");
+   "N = BGP Nexthop reachable via this route\n");
printf("   r = reject route, b = blackhole route\n\n");
printf("flags prio destination  gateway\n");
 }
@@ -969,11 +969,6 @@ show_fib_flags(u_int16_t flags)
else
printf(" ");
 
-   if (flags & F_REDISTRIBUTED)
-   printf("R");
-   else
-   printf(" ");
-
if (flags & F_REJECT && flags & F_BLACKHOLE)
printf("f");
else if (flags & F_REJECT)
@@ -1983,7 +1978,7 @@ network_bulk(struct parse_result *res)
errx(1, "bad prefix: %s", b);
net.prefix = h;
net.prefixlen = len;
-   net.rtableid = tableid;
+   net.rd = res->rd;
 
if (res->action == NETWORK_BULK_ADD) {
imsg_compose(ibuf, IMSG_NETWORK_ADD,
@@ -2128,7 +2123,7 @@ network_mrt_dump(struct mrt_rib *mr, str
net.prefix = ctl.prefix;
net.prefixlen = ctl.prefixlen;
net.type = NETWORK_MRTCLONE;
-   /* XXX rtableid */
+   /* XXX rd can't be set and will be 0 */
 
imsg_compose(ibuf, IMSG_NETWORK_ADD, 0, 0, -1,
, sizeof(net));
Index: usr.sbin/bgpctl/parser.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/parser.c,v
retrieving revision 1.89
diff -u -p -r1.89 parser.c
--- usr.sbin/bgpctl/parser.c20 Jan 2019 23:30:15 -  1.89
+++ usr.sbin/bgpctl/parser.c7 Feb 2019 10:11:19 -
@@ -58,6 +58,7 @@ enum token_type {
PREPNBR,
PREPSELF,
WEIGHT,
+   RD,
FAMILY,
GETOPT,
RTABLE,
@@ -374,6 +375,11 @@ static const struct token t_network_show
{ ENDTOKEN, "", NONE,   NULL}
 };
 
+static const struct token t_rd[] = {
+   { RD,   "", NONE,   t_set},
+   { ENDTOKEN, "", NONE,   NULL}
+};
+
 static const struct token t_set[] = {
{ NOTOKEN,  "", NONE,   NULL},
{ KEYWORD,  "community",NONE,   t_community},
@@ -386,6 +392,7 @@ static const struct token t_set[] = {
{ KEYWORD,  "pftable",  NONE,   t_pftable},
{ KEYWORD,  "prepend-neighbor", NONE,   t_prepnbr},
{ KEYWORD,  "prepend-self", NONE,