[PATCH v2 1/1] iproute2: Add support for a few routing protocols
Add support for: BGP ISIS OSPF RIP EIGRP Routing protocols to iproute2. Signed-off-by: Donald Sharp --- v2: Update to latest version of code. etc/iproute2/rt_protos | 5 + lib/rt_names.c | 5 + 2 files changed, 10 insertions(+) diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos index 2a9ee01b..b3a0ec8f 100644 --- a/etc/iproute2/rt_protos +++ b/etc/iproute2/rt_protos @@ -16,3 +16,8 @@ 15 ntk 16 dhcp 42 babel +186bgp +187isis +188ospf +189rip +192eigrp diff --git a/lib/rt_names.c b/lib/rt_names.c index a02db35e..66d5f2f0 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -134,6 +134,11 @@ static char *rtnl_rtprot_tab[256] = { [RTPROT_XORP] = "xorp", [RTPROT_NTK] = "ntk", [RTPROT_DHCP] = "dhcp", + [RTPROT_BGP] = "bgp", + [RTPROT_ISIS] = "isis", + [RTPROT_OSPF] = "ospf", + [RTPROT_RIP] = "rip", + [RTPROT_EIGRP]= "eigrp", }; -- 2.14.4
[PATCH v2 0/1] Addition of new routing protocols for iproute2
The linux kernel recently accepted some new RTPROT values for some fairly standard routing protocols. This commit brings in support for iproute2 to handle these new values. v2 - Update to latest version of master which has rtnetlink.h code and drop of work already done. Donald Sharp (1): iproute2: Add support for a few routing protocols etc/iproute2/rt_protos | 5 + lib/rt_names.c | 5 + 2 files changed, 10 insertions(+) -- 2.14.4
[PATCH 1/2] iproute2: Add support for a few routing protocols
Add support for: BGP ISIS OSPF RIP EIGRP Routing protocols to iproute2. Signed-off-by: Donald Sharp --- etc/iproute2/rt_protos| 5 + include/linux/rtnetlink.h | 5 + lib/rt_names.c| 5 + 3 files changed, 15 insertions(+) diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos index 82cf9c46..3ffe8a6c 100644 --- a/etc/iproute2/rt_protos +++ b/etc/iproute2/rt_protos @@ -16,6 +16,11 @@ 15 ntk 16 dhcp 42 babel +186 bgp +187 isis +188 ospf +189 rip +192 eigrp # # Used by me for gated diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 742ba078..2e83a267 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -248,6 +248,11 @@ enum { #define RTPROT_DHCP16 /* DHCP client */ #define RTPROT_MROUTED 17 /* Multicast daemon */ #define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS187 /* ISIS Routes */ +#define RTPROT_OSPF188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ /* rtm_scope diff --git a/lib/rt_names.c b/lib/rt_names.c index 253389a6..d3562d2d 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -137,6 +137,11 @@ static char * rtnl_rtprot_tab[256] = { [RTPROT_XORP] = "xorp", [RTPROT_NTK] = "ntk", [RTPROT_DHCP] = "dhcp", + [RTPROT_BGP] = "bgp", + [RTPROT_ISIS] = "isis", + [RTPROT_OSPF] = "ospf", + [RTPROT_RIP] = "rip", + [RTPROT_EIGRP] = "eigrp", }; -- 2.14.4
[PATCH 0/2] Addition of new routing protocols for iproute2
The linux kernel recently accepted some new RTPROT values for some fairly standard routing protocols. This commit brings in support for iproute2 to handle these new values. Additionally clean up some long standing cruft in etc/iproute2/rt_protos Donald Sharp (2): iproute2: Add support for a few routing protocols iproute2: Remove leftover gated RT_PROT defines etc/iproute2/rt_protos| 18 +- include/linux/rtnetlink.h | 5 + lib/rt_names.c| 5 + 3 files changed, 15 insertions(+), 13 deletions(-) -- 2.14.4
[PATCH 2/2] iproute2: Remove leftover gated RT_PROT defines
These values are not being used nor maintained, so remove. Signed-off-by: Donald Sharp --- etc/iproute2/rt_protos | 13 - 1 file changed, 13 deletions(-) diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos index 3ffe8a6c..a965ad16 100644 --- a/etc/iproute2/rt_protos +++ b/etc/iproute2/rt_protos @@ -21,16 +21,3 @@ 188 ospf 189 rip 192 eigrp - -# -# Used by me for gated -# -254gated/aggr -253gated/bgp -252gated/ospf -251gated/ospfase -250gated/rip -249gated/static -248gated/conn -247gated/inet -246gated/default -- 2.14.4
Re: [PATCH] rtnetlink: Add more well known protocol values
This patch is intended for net-next. thanks! donald On Wed, May 30, 2018 at 8:27 AM, Donald Sharp wrote: > FRRouting installs routes into the kernel associated with > the originating protocol. Add these values to the well > known values in rtnetlink.h. > > Signed-off-by: Donald Sharp > --- > v2: Fixed whitespace issues > include/uapi/linux/rtnetlink.h | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > index cabb210c93af..7d8502313c99 100644 > --- a/include/uapi/linux/rtnetlink.h > +++ b/include/uapi/linux/rtnetlink.h > @@ -254,6 +254,11 @@ enum { > #define RTPROT_DHCP16 /* DHCP client */ > #define RTPROT_MROUTED 17 /* Multicast daemon */ > #define RTPROT_BABEL 42 /* Babel daemon */ > +#define RTPROT_BGP 186 /* BGP Routes */ > +#define RTPROT_ISIS187 /* ISIS Routes */ > +#define RTPROT_OSPF188 /* OSPF Routes */ > +#define RTPROT_RIP 189 /* RIP Routes */ > +#define RTPROT_EIGRP 192 /* EIGRP Routes */ > > /* rtm_scope > > -- > 2.14.3 >
[PATCH] rtnetlink: Add more well known protocol values
FRRouting installs routes into the kernel associated with the originating protocol. Add these values to the well known values in rtnetlink.h. Signed-off-by: Donald Sharp --- v2: Fixed whitespace issues include/uapi/linux/rtnetlink.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index cabb210c93af..7d8502313c99 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -254,6 +254,11 @@ enum { #define RTPROT_DHCP16 /* DHCP client */ #define RTPROT_MROUTED 17 /* Multicast daemon */ #define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS187 /* ISIS Routes */ +#define RTPROT_OSPF188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ /* rtm_scope -- 2.14.3
[PATCH] rtnetlink: Add more well known protocol values
FRRouting installs routes into the kernel associated with the originating protocol. Add these values to the well known values in rtnetlink.h. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/uapi/linux/rtnetlink.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index cabb210c93af..81b33826f818 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -254,6 +254,11 @@ enum { #define RTPROT_DHCP16 /* DHCP client */ #define RTPROT_MROUTED 17 /* Multicast daemon */ #define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP192 /* EIGRP Routes */ /* rtm_scope -- 2.14.3
[PATCH v4 iproute2-next 3/3] ip: Allow rules to accept a specified protocol
Allow the specification of a protocol when the user adds/modifies/deletes a rule. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index 8120520e..6fdc9b5e 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -697,6 +697,13 @@ static int iprule_modify(int cmd, int argc, char **argv) if (get_rt_realms_or_raw(, *argv)) invarg("invalid realms\n", *argv); addattr32(, sizeof(req), FRA_FLOW, realm); + } else if (matches(*argv, "protocol") == 0) { + __u32 proto; + + NEXT_ARG(); + if (rtnl_rtprot_a2n(, *argv)) + invarg("\"protocol\" value is invalid\n", *argv); + addattr8(, sizeof(req), FRA_PROTOCOL, proto); } else if (matches(*argv, "table") == 0 || strcmp(*argv, "lookup") == 0) { NEXT_ARG(); -- 2.14.3
[PATCH v4 iproute2-next 0/3] Allow 'ip rule' command to use protocol
Fix iprule.c to use the actual `struct fib_rule_hdr` and to allow the end user to see and use the protocol keyword for rule manipulation. v2: Rearrange and code changes as per David Ahern v3: Fix some missed RTN_XXX to appropriate FR_XX and doc changes v4: Cleanup some code, fix 'ip rule save' no parameters and doc changes Donald Sharp (3): ip: Use the `struct fib_rule_hdr` for rules ip: Display ip rule protocol used ip: Allow rules to accept a specified protocol include/uapi/linux/fib_rules.h | 1 + ip/iprule.c| 173 + man/man8/ip-rule.8 | 18 - 3 files changed, 126 insertions(+), 66 deletions(-) -- 2.14.3
[PATCH v4 iproute2-next 1/3] ip: Use the `struct fib_rule_hdr` for rules
The iprule.c code was using `struct rtmsg` as the data type to pass into the kernel for the netlink message. While 'struct rtmsg' and `struct fib_rule_hdr` are the same size and mostly the same, we should use the correct data structure. This commit translates the data structures to have iprule.c use the correct one. Additionally copy over the modified fib_rules.h file Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/uapi/linux/fib_rules.h | 1 + ip/iprule.c| 128 + 2 files changed, 68 insertions(+), 61 deletions(-) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9..9477c3af 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -58,6 +58,7 @@ enum { FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ + FRA_PROTOCOL, __FRA_MAX }; diff --git a/ip/iprule.c b/ip/iprule.c index a3abf2f6..94356bf8 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -73,25 +73,33 @@ static struct inet_prefix dst; } filter; +static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) +{ + __u32 table = frh->table; + if (tb[RTA_TABLE]) + table = rta_getattr_u32(tb[RTA_TABLE]); + return table; +} + static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) { - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); __u32 table; - if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family) + if (preferred_family != AF_UNSPEC && frh->family != preferred_family) return false; if (filter.prefmask && filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) : 0)) return false; - if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT)) + if (filter.not && !(frh->flags & FIB_RULE_INVERT)) return false; if (filter.src.family) { inet_prefix *f_src = - if (f_src->family != r->rtm_family || - f_src->bitlen > r->rtm_src_len) + if (f_src->family != frh->family || + f_src->bitlen > frh->src_len) return false; if (inet_addr_match_rta(f_src, tb[FRA_SRC])) @@ -101,15 +109,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) if (filter.dst.family) { inet_prefix *f_dst = - if (f_dst->family != r->rtm_family || - f_dst->bitlen > r->rtm_dst_len) + if (f_dst->family != frh->family || + f_dst->bitlen > frh->dst_len) return false; if (inet_addr_match_rta(f_dst, tb[FRA_DST])) return false; } - if (filter.tosmask && filter.tos ^ r->rtm_tos) + if (filter.tosmask && filter.tos ^ frh->tos) return false; if (filter.fwmark) { @@ -159,7 +167,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return false; } - table = rtm_get_table(r, tb); + table = frh_get_table(frh, tb); if (filter.tb > 0 && filter.tb ^ table) return false; @@ -169,7 +177,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE *)arg; - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); int len = n->nlmsg_len; int host_len = -1; __u32 table; @@ -180,13 +188,13 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE) return 0; - len -= NLMSG_LENGTH(sizeof(*r)); + len -= NLMSG_LENGTH(sizeof(*frh)); if (len < 0) return -1; - parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); + parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); - host_len = af_bit_len(r->rtm_family); + host_len = af_bit_len(frh->family); if (!filter_nlmsg(n, tb, host_len)) return 0; @@ -200,41 +208,41 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) else fprintf(fp, "0:\t"); - if (r->rtm_flags & FIB_RULE_INVERT) + if (frh->flags & FIB_RULE_INVERT) fprintf(fp, "not "); if (tb[FRA_SRC]) {
[PATCH v4 iproute2-next 2/3] ip: Display ip rule protocol used
Modify 'ip rule' command to notice when the kernel passes to us the originating protocol. Add code to allow the `ip rule flush protocol XXX` command to be accepted and properly handled. Modify the documentation to reflect these code changes. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c| 38 ++ man/man8/ip-rule.8 | 18 +- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/ip/iprule.c b/ip/iprule.c index 94356bf8..8120520e 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -47,6 +47,7 @@ static void usage(void) "[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ l3mdev ]\n" "[ uidrange NUMBER-NUMBER ]\n" "ACTION := [ table TABLE_ID ]\n" + " [ protocol PROTO ]\n" " [ nat ADDRESS ]\n" " [ realms [SRCREALM/]DSTREALM ]\n" " [ goto NUMBER ]\n" @@ -71,6 +72,8 @@ static struct struct fib_rule_uid_range range; inet_prefix src; inet_prefix dst; + int protocol; + int protocolmask; } filter; static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) @@ -338,6 +341,16 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) rtnl_rtntype_n2a(frh->action, b1, sizeof(b1))); + if (tb[FRA_PROTOCOL]) { + __u8 protocol = rta_getattr_u8(tb[FRA_PROTOCOL]); + + if ((protocol && protocol != RTPROT_KERNEL) || + show_details > 0) { + fprintf(fp, " proto %s ", + rtnl_rtprot_n2a(protocol, b1, sizeof(b1))); + } + } + fprintf(fp, "\n"); fflush(fp); return 0; @@ -391,6 +404,13 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); + if (tb[FRA_PROTOCOL]) { + __u8 protocol = rta_getattr_u8(tb[FRA_PROTOCOL]); + + if ((filter.protocol ^ protocol) & filter.protocolmask) + return 0; + } + if (tb[FRA_PRIORITY]) { n->nlmsg_type = RTM_DELRULE; n->nlmsg_flags = NLM_F_REQUEST; @@ -415,9 +435,8 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) if (af == AF_UNSPEC) af = AF_INET; - if (action != IPRULE_LIST && argc > 0) { - fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n", - action == IPRULE_SAVE ? "save" : "flush"); + if (action == IPRULE_SAVE && argc > 0) { + fprintf(stderr, "\"ip rule save\" does not take any arguments.\n"); return -1; } @@ -508,7 +527,18 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) NEXT_ARG(); if (get_prefix(, *argv, af)) invarg("from value is invalid\n", *argv); - } else { + } else if (matches(*argv, "protocol") == 0) { + __u32 prot; + NEXT_ARG(); + filter.protocolmask = -1; + if (rtnl_rtprot_a2n(, *argv)) { + if (strcmp(*argv, "all") != 0) + invarg("invalid \"protocol\"\n", *argv); + prot = 0; + filter.protocolmask = 0; + } + filter.protocol = prot; + } else{ if (matches(*argv, "dst") == 0 || matches(*argv, "to") == 0) { NEXT_ARG(); diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index a5c47981..7cf8fd9a 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -50,6 +50,8 @@ ip-rule \- routing policy database management .IR ACTION " := [ " .B table .IR TABLE_ID " ] [ " +.B protocol +.IR PROTO " ] [ " .B nat .IR ADDRESS " ] [ " .B realms @@ -240,6 +242,10 @@ The options preference and order are synonyms with priority. the routing table identifier to lookup if the rule selector matches. It is also possible to use lookup instead of table. +.TP +.BI protocol " PROTO" +the routing protocol who installed the rule in question. As an example when zebra installs a rule it would get RTPROT_ZEBRA as the insta
[PATCH v3 iproute2-next 1/3] ip: Use the `struct fib_rule_hdr` for rules
The iprule.c code was using `struct rtmsg` as the data type to pass into the kernel for the netlink message. While 'struct rtmsg' and `struct fib_rule_hdr` are the same size and mostly the same, we should use the correct data structure. This commit translates the data structures to have iprule.c use the correct one. Additionally copy over the modified fib_rules.h file Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/uapi/linux/fib_rules.h | 1 + ip/iprule.c| 128 + 2 files changed, 68 insertions(+), 61 deletions(-) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9..9477c3af 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -58,6 +58,7 @@ enum { FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ + FRA_PROTOCOL, __FRA_MAX }; diff --git a/ip/iprule.c b/ip/iprule.c index a3abf2f6..94356bf8 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -73,25 +73,33 @@ static struct inet_prefix dst; } filter; +static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) +{ + __u32 table = frh->table; + if (tb[RTA_TABLE]) + table = rta_getattr_u32(tb[RTA_TABLE]); + return table; +} + static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) { - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); __u32 table; - if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family) + if (preferred_family != AF_UNSPEC && frh->family != preferred_family) return false; if (filter.prefmask && filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) : 0)) return false; - if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT)) + if (filter.not && !(frh->flags & FIB_RULE_INVERT)) return false; if (filter.src.family) { inet_prefix *f_src = - if (f_src->family != r->rtm_family || - f_src->bitlen > r->rtm_src_len) + if (f_src->family != frh->family || + f_src->bitlen > frh->src_len) return false; if (inet_addr_match_rta(f_src, tb[FRA_SRC])) @@ -101,15 +109,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) if (filter.dst.family) { inet_prefix *f_dst = - if (f_dst->family != r->rtm_family || - f_dst->bitlen > r->rtm_dst_len) + if (f_dst->family != frh->family || + f_dst->bitlen > frh->dst_len) return false; if (inet_addr_match_rta(f_dst, tb[FRA_DST])) return false; } - if (filter.tosmask && filter.tos ^ r->rtm_tos) + if (filter.tosmask && filter.tos ^ frh->tos) return false; if (filter.fwmark) { @@ -159,7 +167,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return false; } - table = rtm_get_table(r, tb); + table = frh_get_table(frh, tb); if (filter.tb > 0 && filter.tb ^ table) return false; @@ -169,7 +177,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE *)arg; - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); int len = n->nlmsg_len; int host_len = -1; __u32 table; @@ -180,13 +188,13 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE) return 0; - len -= NLMSG_LENGTH(sizeof(*r)); + len -= NLMSG_LENGTH(sizeof(*frh)); if (len < 0) return -1; - parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); + parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); - host_len = af_bit_len(r->rtm_family); + host_len = af_bit_len(frh->family); if (!filter_nlmsg(n, tb, host_len)) return 0; @@ -200,41 +208,41 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) else fprintf(fp, "0:\t"); - if (r->rtm_flags & FIB_RULE_INVERT) + if (frh->flags & FIB_RULE_INVERT) fprintf(fp, "not "); if (tb[FRA_SRC]) {
[PATCH v3 iproute2-next 2/3] ip: Display ip rule protocol used
Modify 'ip rule' command to notice when the kernel passes to us the originating protocol. Add code to allow the `ip rule flush protocol XXX` command to be accepted and properly handled. Modify the documentation to reflect these code changes. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c| 36 +--- man/man8/ip-rule.8 | 18 +- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/ip/iprule.c b/ip/iprule.c index 94356bf8..17df9e9b 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -47,6 +47,7 @@ static void usage(void) "[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ l3mdev ]\n" "[ uidrange NUMBER-NUMBER ]\n" "ACTION := [ table TABLE_ID ]\n" + " [ protocol PROTO ]\n" " [ nat ADDRESS ]\n" " [ realms [SRCREALM/]DSTREALM ]\n" " [ goto NUMBER ]\n" @@ -71,6 +72,8 @@ static struct struct fib_rule_uid_range range; inet_prefix src; inet_prefix dst; + int protocol; + int protocolmask; } filter; static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) @@ -338,6 +341,16 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) rtnl_rtntype_n2a(frh->action, b1, sizeof(b1))); + if (tb[FRA_PROTOCOL]) { + __u8 protocol = rta_getattr_u8(tb[FRA_PROTOCOL]); + + if ((protocol && protocol != RTPROT_KERNEL) || + show_details > 0) { + fprintf(fp, " proto %s ", + rtnl_rtprot_n2a(protocol, b1, sizeof(b1))); + } + } + fprintf(fp, "\n"); fflush(fp); return 0; @@ -391,6 +404,10 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); + if (tb[FRA_PROTOCOL] && + (filter.protocol^rta_getattr_u8(tb[FRA_PROTOCOL]))) + return 0; + if (tb[FRA_PRIORITY]) { n->nlmsg_type = RTM_DELRULE; n->nlmsg_flags = NLM_F_REQUEST; @@ -415,12 +432,6 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) if (af == AF_UNSPEC) af = AF_INET; - if (action != IPRULE_LIST && argc > 0) { - fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n", - action == IPRULE_SAVE ? "save" : "flush"); - return -1; - } - switch (action) { case IPRULE_SAVE: if (save_rule_prep()) @@ -508,7 +519,18 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) NEXT_ARG(); if (get_prefix(, *argv, af)) invarg("from value is invalid\n", *argv); - } else { + } else if (matches(*argv, "protocol") == 0) { + __u32 prot; + NEXT_ARG(); + filter.protocolmask = -1; + if (rtnl_rtprot_a2n(, *argv)) { + if (strcmp(*argv, "all") != 0) + invarg("invalid \"protocol\"\n", *argv); + prot = 0; + filter.protocolmask = 0; + } + filter.protocol = prot; + } else{ if (matches(*argv, "dst") == 0 || matches(*argv, "to") == 0) { NEXT_ARG(); diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index a5c47981..f4070542 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -50,6 +50,8 @@ ip-rule \- routing policy database management .IR ACTION " := [ " .B table .IR TABLE_ID " ] [ " +.B protocol +.IR PROTO " ] [ " .B nat .IR ADDRESS " ] [ " .B realms @@ -240,6 +242,10 @@ The options preference and order are synonyms with priority. the routing table identifier to lookup if the rule selector matches. It is also possible to use lookup instead of table. +.TP +.BI protocol " PROTO" +the protocol who installed the rule in question. + .TP .BI suppress_prefixlength " NUMBER" reject routing decisions that have a prefix length of NUMBER or less. @@ -275,7 +281,11 @@ updates, it flushes the routing cache with .RE .TP .B ip rule flush - also dumps all the deleted rules
[PATCH v3 iproute2-next 3/3] ip: Allow rules to accept a specified protocol
Allow the specification of a protocol when the user adds/modifies/deletes a rule. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index 17df9e9b..796da3b3 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -689,6 +689,12 @@ static int iprule_modify(int cmd, int argc, char **argv) if (get_rt_realms_or_raw(, *argv)) invarg("invalid realms\n", *argv); addattr32(, sizeof(req), FRA_FLOW, realm); + } else if (matches(*argv, "protocol") == 0) { + __u32 proto; + NEXT_ARG(); + if (rtnl_rtprot_a2n(, *argv)) + invarg("\"protocol\" value is invalid\n", *argv); + addattr8(, sizeof(req), FRA_PROTOCOL, proto); } else if (matches(*argv, "table") == 0 || strcmp(*argv, "lookup") == 0) { NEXT_ARG(); -- 2.14.3
[PATCH v3 iproute2-next 0/3] Allow 'ip rule' command to use protocol
Fix iprule.c to use the actual `struct fib_rule_hdr` and to allow the end user to see and use the protocol keyword for rule manipulation. v2: Rearrange and code changes as per David Ahern v3: Fix some missed RTN_XXX to appropriate FR_XX and doc changes Donald Sharp (3): ip: Use the `struct fib_rule_hdr` for rules ip: Display ip rule protocol used ip: Allow rules to accept a specified protocol include/uapi/linux/fib_rules.h | 1 + ip/iprule.c| 170 - man/man8/ip-rule.8 | 18 - 3 files changed, 120 insertions(+), 69 deletions(-) -- 2.14.3
[PATCH iproute2 v2] ip: Properly display AF_BRIDGE address information for neighbor events
The vxlan driver when a neighbor add/delete event occurs sends NDA_DST filled with a union: union vxlan_addr { struct sockaddr_in sin; struct sockaddr_in6 sin6; struct sockaddr sa; }; This eventually calls rt_addr_n2a_r which had no handler for the AF_BRIDGE family and "???" was being printed. Add code to properly display this data when requested. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- lib/utils.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/lib/utils.c b/lib/utils.c index 24aeddd8..fe5841f6 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -1004,6 +1004,25 @@ const char *rt_addr_n2a_r(int af, int len, } case AF_PACKET: return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen); + case AF_BRIDGE: + { + const union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } *sa = addr; + unsigned short family = sa->sa.sa_family; + + switch(family) { + case AF_INET: + return inet_ntop(AF_INET, >sin.sin_addr, buf, buflen); + case AF_INET6: + return inet_ntop(AF_INET6, >sin6.sin6_addr, +buf, buflen); + } + + /* fallthrough */ + } default: return "???"; } -- 2.14.3
[PATCH] net: fib_rules: Add new attribute to set protocol
For ages iproute2 has used `struct rtmsg` as the ancillary header for FIB rules and in the process set the protocol value to RTPROT_BOOT. Until ca56209a66 ("net: Allow a rule to track originating protocol") the kernel rules code ignored the protocol value sent from userspace and always returned 0 in notifications. To avoid incompatibility with existing iproute2, send the protocol as a new attribute. Fixes: cac56209a66 ("net: Allow a rule to track originating protocol") Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- drivers/net/vrf.c | 5 - include/net/fib_rules.h| 3 ++- include/uapi/linux/fib_rules.h | 5 +++-- net/core/fib_rules.c | 15 +++ 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 951a4b42cb29..9ce0182223a0 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1145,6 +1145,7 @@ static inline size_t vrf_fib_rule_nl_size(void) sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)); sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */ sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */ + sz += nla_total_size(sizeof(u8)); /* FRA_PROTOCOL */ return sz; } @@ -1174,7 +1175,9 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) memset(frh, 0, sizeof(*frh)); frh->family = family; frh->action = FR_ACT_TO_TBL; - frh->proto = RTPROT_KERNEL; + + if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL)) + goto nla_put_failure; if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b166ef07e6d4..b3d216249240 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -109,7 +109,8 @@ struct fib_rule_notifier_info { [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_L3MDEV]= { .type = NLA_U8 }, \ - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ + [FRA_PROTOCOL] = { .type = NLA_U8 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 925539172d5b..77d90ae38114 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8tos; __u8table; - __u8proto; - __u8res1; /* reserved */ + __u8res1; /* reserved */ + __u8res2; /* reserved */ __u8action; __u32 flags; @@ -58,6 +58,7 @@ enum { FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ + FRA_PROTOCOL, /* Originator of the rule */ __FRA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 88298f18cbae..a6aea805a0a2 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -466,11 +466,13 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, } refcount_set(>refcnt, 1); rule->fr_net = net; - rule->proto = frh->proto; rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) : fib_default_rule_pref(ops); + rule->proto = tb[FRA_PROTOCOL] ? + nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC; + if (tb[FRA_IIFNAME]) { struct net_device *dev; @@ -666,7 +668,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } list_for_each_entry(rule, >rules_list, list) { - if (frh->proto && (frh->proto != rule->proto)) + if (tb[FRA_PROTOCOL] && + (rule->proto != nla_get_u8(tb[FRA_PROTOCOL]))) continue; if (frh->action && (frh->action != rule->action)) @@ -786,7 +789,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size_64bit(8) /* FRA_TUN_ID */ -+ nla_total_size(sizeof(struct fib_kuid_range)); ++ nla_total_size(sizeof(struct fib_kuid_range)) ++ nla_total_size(1); /* FRA_PROTOCOL */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -813,9 +817,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefi
[PATCH iproute2] ip: Properly display AF_BRIDGE address information for neighbor events
The vxlan driver when a neighbor add/delete event occurs sends NDA_DST filled with a union: union vxlan_addr { struct sockaddr_in sin; struct sockaddr_in6 sin6; struct sockaddr sa; }; This eventually calls rt_addr_n2a_r which had no handler for the AF_BRIDGE family and "???" was being printed. Add code to properly display this data when requested. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- lib/utils.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/lib/utils.c b/lib/utils.c index 24aeddd8..e01e18a7 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -1004,6 +1004,24 @@ const char *rt_addr_n2a_r(int af, int len, } case AF_PACKET: return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen); + case AF_BRIDGE: + { + unsigned short family = ((struct sockaddr *)addr)->sa_family; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + + switch(family) { + case AF_INET: + sin = (struct sockaddr_in *)addr; + return inet_ntop(AF_INET, >sin_addr, buf, buflen); + case AF_INET6: + sin6 = (struct sockaddr_in6 *)addr; + return inet_ntop(AF_INET6, >sin6_addr, +buf, buflen); + } + + /* fallthrough */ + } default: return "???"; } -- 2.14.3
[PATCH v2 iproute2-next 2/3] ip: Display ip rule protocol used
Modify 'ip rule' command to notice when the kernel passes to us the originating protocol. Add code to allow the `ip rule flush protocol XXX` command to be accepted and properly handled. Modify the documentation to reflect these code changes. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c| 29 ++--- man/man8/ip-rule.8 | 18 +- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/ip/iprule.c b/ip/iprule.c index 00a6c26a..39008768 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -47,6 +47,7 @@ static void usage(void) "[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ l3mdev ]\n" "[ uidrange NUMBER-NUMBER ]\n" "ACTION := [ table TABLE_ID ]\n" + " [ protocol RPROTO ]\n" " [ nat ADDRESS ]\n" " [ realms [SRCREALM/]DSTREALM ]\n" " [ goto NUMBER ]\n" @@ -71,6 +72,8 @@ static struct struct fib_rule_uid_range range; inet_prefix src; inet_prefix dst; + int protocol; + int protocolmask; } filter; static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) @@ -338,6 +341,10 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) rtnl_rtntype_n2a(frh->action, b1, sizeof(b1))); + if (frh->proto != RTPROT_UNSPEC) + fprintf(fp, " proto %s ", + rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1))); + fprintf(fp, "\n"); fflush(fp); return 0; @@ -391,6 +398,9 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); + if ((filter.protocol^frh->proto)) + return 0; + if (tb[FRA_PRIORITY]) { n->nlmsg_type = RTM_DELRULE; n->nlmsg_flags = NLM_F_REQUEST; @@ -415,12 +425,6 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) if (af == AF_UNSPEC) af = AF_INET; - if (action != IPRULE_LIST && argc > 0) { - fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n", - action == IPRULE_SAVE ? "save" : "flush"); - return -1; - } - switch (action) { case IPRULE_SAVE: if (save_rule_prep()) @@ -508,7 +512,18 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) NEXT_ARG(); if (get_prefix(, *argv, af)) invarg("from value is invalid\n", *argv); - } else { + } else if (matches(*argv, "protocol") == 0) { + __u32 prot; + NEXT_ARG(); + filter.protocolmask = -1; + if (rtnl_rtprot_a2n(, *argv)) { + if (strcmp(*argv, "all") != 0) + invarg("invalid \"protocol\"\n", *argv); + prot = 0; + filter.protocolmask = 0; + } + filter.protocol = prot; + } else{ if (matches(*argv, "dst") == 0 || matches(*argv, "to") == 0) { NEXT_ARG(); diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index a5c47981..98b2573d 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -50,6 +50,8 @@ ip-rule \- routing policy database management .IR ACTION " := [ " .B table .IR TABLE_ID " ] [ " +.B protocol +.IR RPROTO " ] [ " .B nat .IR ADDRESS " ] [ " .B realms @@ -240,6 +242,10 @@ The options preference and order are synonyms with priority. the routing table identifier to lookup if the rule selector matches. It is also possible to use lookup instead of table. +.TP +.BI protocol " RPROTO" +the protocol who installed the rule in question. + .TP .BI suppress_prefixlength " NUMBER" reject routing decisions that have a prefix length of NUMBER or less. @@ -275,7 +281,11 @@ updates, it flushes the routing cache with .RE .TP .B ip rule flush - also dumps all the deleted rules. -This command has no arguments. +.RS +.TP +.BI protocol " RPROTO" +Select the originating protocol. +.RE .TP .B ip rule show - list rules This command has no arguments. @@ -283,6 +293,12 @@ The options list or lst are synonyms with show. .TP .B ip rule save +.RS +.TP +.BI protocl " RPROTO" +Select the originating protocol. +.RE +.TP save rules table information to stdout .RS This command behaves like -- 2.14.3
[PATCH v2 iproute2-next 0/3] Allow 'ip rule' command to use protocol
Fix iprule.c to use the actual `struct fib_rule_hdr` and to allow the end user to see and use the protocol keyword for rule manipulations. v2: Rearrange and code changes as per David Ahern Donald Sharp (3): ip: Use the `struct fib_rule_hdr` for rules ip: Display ip rule protocol used ip: Allow rules to accept a specified protocol include/uapi/linux/fib_rules.h | 2 +- ip/iprule.c| 164 - man/man8/ip-rule.8 | 18 - 3 files changed, 114 insertions(+), 70 deletions(-) -- 2.14.3
[PATCH v2 iproute2-next 1/3] ip: Use the `struct fib_rule_hdr` for rules
The iprule.c code was using `struct rtmsg` as the data type to pass into the kernel for the netlink message. While 'struct rtmsg' and `struct fib_rule_hdr` are the same size and mostly the same, we should use the correct data structure. This commit translates the data structures to have iprule.c use the correct one. Additionally copy over the modified fib_rules.h file Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/uapi/linux/fib_rules.h | 2 +- ip/iprule.c| 129 ++--- 2 files changed, 69 insertions(+), 62 deletions(-) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9..92553917 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8tos; __u8table; + __u8proto; __u8res1; /* reserved */ - __u8res2; /* reserved */ __u8action; __u32 flags; diff --git a/ip/iprule.c b/ip/iprule.c index a3abf2f6..00a6c26a 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -73,25 +73,33 @@ static struct inet_prefix dst; } filter; +static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) +{ + __u32 table = frh->table; + if (tb[RTA_TABLE]) + table = rta_getattr_u32(tb[RTA_TABLE]); + return table; +} + static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) { - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); __u32 table; - if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family) + if (preferred_family != AF_UNSPEC && frh->family != preferred_family) return false; if (filter.prefmask && filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) : 0)) return false; - if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT)) + if (filter.not && !(frh->flags & FIB_RULE_INVERT)) return false; if (filter.src.family) { inet_prefix *f_src = - if (f_src->family != r->rtm_family || - f_src->bitlen > r->rtm_src_len) + if (f_src->family != frh->family || + f_src->bitlen > frh->src_len) return false; if (inet_addr_match_rta(f_src, tb[FRA_SRC])) @@ -101,15 +109,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) if (filter.dst.family) { inet_prefix *f_dst = - if (f_dst->family != r->rtm_family || - f_dst->bitlen > r->rtm_dst_len) + if (f_dst->family != frh->family || + f_dst->bitlen > frh->dst_len) return false; if (inet_addr_match_rta(f_dst, tb[FRA_DST])) return false; } - if (filter.tosmask && filter.tos ^ r->rtm_tos) + if (filter.tosmask && filter.tos ^ frh->tos) return false; if (filter.fwmark) { @@ -159,7 +167,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return false; } - table = rtm_get_table(r, tb); + table = frh_get_table(frh, tb); if (filter.tb > 0 && filter.tb ^ table) return false; @@ -169,7 +177,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE *)arg; - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); int len = n->nlmsg_len; int host_len = -1; __u32 table; @@ -180,13 +188,13 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE) return 0; - len -= NLMSG_LENGTH(sizeof(*r)); + len -= NLMSG_LENGTH(sizeof(*frh)); if (len < 0) return -1; - parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); + parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); - host_len = af_bit_len(r->rtm_family); + host_len = af_bit_len(frh->family); if (!filter_nlmsg(n, tb, host_len)) return 0; @@ -200,41 +208,41 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) else fprintf(fp, "0:\t"); - if (r->rtm_flags & FIB_RULE_INVERT) + if (frh->flags & FIB
[PATCH v2 iproute2-next 3/3] ip: Allow rules to accept a specified protocol
Allow the specification of a protocol when the user adds/modifies/deletes a rule. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index 39008768..192fe215 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -683,6 +683,12 @@ static int iprule_modify(int cmd, int argc, char **argv) if (get_rt_realms_or_raw(, *argv)) invarg("invalid realms\n", *argv); addattr32(, sizeof(req), FRA_FLOW, realm); + } else if (matches(*argv, "protocol") == 0) { + __u32 proto; + NEXT_ARG(); + if (rtnl_rtprot_a2n(, *argv)) + invarg("\"protocol\" value is invalid\n", *argv); + req.frh.proto = proto; } else if (matches(*argv, "table") == 0 || strcmp(*argv, "lookup") == 0) { NEXT_ARG(); -- 2.14.3
[PATCH iproute2-next 2/4] ip: Display ip rule protocol used
Newer kernels are now accepting a protocol from the installing program for who installed the rule. This change allows us to see this change if it is being specified by the installing program. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 4 1 file changed, 4 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index c40d76f1..b3e7d92c 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -341,6 +341,10 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) rtnl_rtntype_n2a(frh->action, b1, sizeof(b1))); + if (frh->proto != RTPROT_UNSPEC) + fprintf(fp, " proto %s ", + rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1))); + fprintf(fp, "\n"); fflush(fp); return 0; -- 2.14.3
[PATCH iproute2-next 3/4] ip: Allow rules to accept a specified protocol
Allow the specification of a protocol when the user adds/modifies/deletes a rule. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index b3e7d92c..fd242fee 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -675,6 +675,12 @@ static int iprule_modify(int cmd, int argc, char **argv) if (get_rt_realms_or_raw(, *argv)) invarg("invalid realms\n", *argv); addattr32(, sizeof(req), FRA_FLOW, realm); + } else if (matches(*argv, "protocol") == 0) { + __u32 proto; + NEXT_ARG(); + if (rtnl_rtprot_a2n(, *argv)) + invarg("\"protocol\" value is invalid\n", *argv); + req.frh.proto = proto; } else if (matches(*argv, "table") == 0 || strcmp(*argv, "lookup") == 0) { NEXT_ARG(); -- 2.14.3
[PATCH iproute2-next 0/4] Allow 'ip rule' command to use protocol
Fix iprule.c to use the actual `struct fib_rule_hdr` and to allow the end user to see and use the protocol keyword for rule manipulations. Donald Sharp (4): ip: Use the `struct fib_rule_hdr` for rules ip: Display ip rule protocol used ip: Allow rules to accept a specified protocol ip: Add ability to flush a rule based upon protocol include/uapi/linux/fib_rules.h | 2 +- ip/iprule.c| 167 - man/man8/ip-rule.8 | 18 - 3 files changed, 117 insertions(+), 70 deletions(-) -- 2.14.3
[PATCH iproute2-next 4/4] ip: Add ability to flush a rule based upon protocol
Add code to allow the `ip rule flush protocol XXX` command to be accepted and properly handled. Additionally modify the documentation to be correct with these changes. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c| 25 ++--- man/man8/ip-rule.8 | 18 +- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/ip/iprule.c b/ip/iprule.c index fd242fee..b69413dd 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -47,6 +47,7 @@ static void usage(void) "[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ l3mdev ]\n" "[ uidrange NUMBER-NUMBER ]\n" "ACTION := [ table TABLE_ID ]\n" + " [ protocol RPROTO ]\n" " [ nat ADDRESS ]\n" " [ realms [SRCREALM/]DSTREALM ]\n" " [ goto NUMBER ]\n" @@ -71,6 +72,8 @@ static struct struct fib_rule_uid_range range; inet_prefix src; inet_prefix dst; + int protocol; + int protocolmask; } filter; static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) @@ -398,6 +401,9 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); + if ((filter.protocol^frh->proto)) + return 0; + if (tb[FRA_PRIORITY]) { n->nlmsg_type = RTM_DELRULE; n->nlmsg_flags = NLM_F_REQUEST; @@ -422,12 +428,6 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) if (af == AF_UNSPEC) af = AF_INET; - if (action != IPRULE_LIST && argc > 0) { - fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n", - action == IPRULE_SAVE ? "save" : "flush"); - return -1; - } - switch (action) { case IPRULE_SAVE: if (save_rule_prep()) @@ -515,7 +515,18 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) NEXT_ARG(); if (get_prefix(, *argv, af)) invarg("from value is invalid\n", *argv); - } else { + } else if (matches(*argv, "protocol") == 0) { + __u32 prot; + NEXT_ARG(); + filter.protocolmask = -1; + if (rtnl_rtprot_a2n(, *argv)) { + if (strcmp(*argv, "all") != 0) + invarg("invalid \"protocol\"\n", *argv); + prot = 0; + filter.protocolmask = 0; + } + filter.protocol = prot; + } else{ if (matches(*argv, "dst") == 0 || matches(*argv, "to") == 0) { NEXT_ARG(); diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index a5c47981..98b2573d 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -50,6 +50,8 @@ ip-rule \- routing policy database management .IR ACTION " := [ " .B table .IR TABLE_ID " ] [ " +.B protocol +.IR RPROTO " ] [ " .B nat .IR ADDRESS " ] [ " .B realms @@ -240,6 +242,10 @@ The options preference and order are synonyms with priority. the routing table identifier to lookup if the rule selector matches. It is also possible to use lookup instead of table. +.TP +.BI protocol " RPROTO" +the protocol who installed the rule in question. + .TP .BI suppress_prefixlength " NUMBER" reject routing decisions that have a prefix length of NUMBER or less. @@ -275,7 +281,11 @@ updates, it flushes the routing cache with .RE .TP .B ip rule flush - also dumps all the deleted rules. -This command has no arguments. +.RS +.TP +.BI protocol " RPROTO" +Select the originating protocol. +.RE .TP .B ip rule show - list rules This command has no arguments. @@ -283,6 +293,12 @@ The options list or lst are synonyms with show. .TP .B ip rule save +.RS +.TP +.BI protocl " RPROTO" +Select the originating protocol. +.RE +.TP save rules table information to stdout .RS This command behaves like -- 2.14.3
[PATCH iproute2-next 1/4] ip: Use the `struct fib_rule_hdr` for rules
The iprule.c code was using `struct rtmsg` as the data type to pass into the kernel for the netlink message. While 'struct rtmsg' and `struct fib_rule_hdr` are the same size and mostly the same, we should use the correct data structure. This commit translates the data structures to have iprule.c use the correct one. Additionally copy over the modified fib_rules.h file Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/uapi/linux/fib_rules.h | 2 +- ip/iprule.c| 132 ++--- 2 files changed, 72 insertions(+), 62 deletions(-) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9..92553917 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8tos; __u8table; + __u8proto; __u8res1; /* reserved */ - __u8res2; /* reserved */ __u8action; __u32 flags; diff --git a/ip/iprule.c b/ip/iprule.c index a3abf2f6..c40d76f1 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -75,23 +75,23 @@ static struct static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) { - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); __u32 table; - if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family) + if (preferred_family != AF_UNSPEC && frh->family != preferred_family) return false; if (filter.prefmask && filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) : 0)) return false; - if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT)) + if (filter.not && !(frh->flags & FIB_RULE_INVERT)) return false; if (filter.src.family) { inet_prefix *f_src = - if (f_src->family != r->rtm_family || - f_src->bitlen > r->rtm_src_len) + if (f_src->family != frh->family || + f_src->bitlen > frh->src_len) return false; if (inet_addr_match_rta(f_src, tb[FRA_SRC])) @@ -101,15 +101,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) if (filter.dst.family) { inet_prefix *f_dst = - if (f_dst->family != r->rtm_family || - f_dst->bitlen > r->rtm_dst_len) + if (f_dst->family != frh->family || + f_dst->bitlen > frh->dst_len) return false; if (inet_addr_match_rta(f_dst, tb[FRA_DST])) return false; } - if (filter.tosmask && filter.tos ^ r->rtm_tos) + if (filter.tosmask && filter.tos ^ frh->tos) return false; if (filter.fwmark) { @@ -159,7 +159,13 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return false; } - table = rtm_get_table(r, tb); + + /* struct fib_rule_hdr and struct rtmsg +* were intentionally the same. Since +* the table is the rtm_table, just call +* it. +*/ + table = rtm_get_table((struct rtmsg *)frh, tb); if (filter.tb > 0 && filter.tb ^ table) return false; @@ -169,7 +175,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE *)arg; - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); int len = n->nlmsg_len; int host_len = -1; __u32 table; @@ -180,13 +186,13 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE) return 0; - len -= NLMSG_LENGTH(sizeof(*r)); + len -= NLMSG_LENGTH(sizeof(*frh)); if (len < 0) return -1; - parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); + parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); - host_len = af_bit_len(r->rtm_family); + host_len = af_bit_len(frh->family); if (!filter_nlmsg(n, tb, host_len)) return 0; @@ -200,41 +206,41 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) else fprintf(fp, "0:\t"); - if (r->rtm_flags & FIB_RULE_INVERT) + if (frh->flags & FIB_RULE_INVERT) fprintf(fp, "not ");
[PATCH net-next v2 0/1] Allow rules to track originating protocol
Add the ability for the kernel to track the originating protocol for when new rules are added to the kernel. --- v1->v2 -> Address comments by David Miller to collapse patches into 1. Donald Sharp (1): net: Allow a rule to track originating protocol drivers/net/vrf.c | 1 + include/net/fib_rules.h| 3 ++- include/uapi/linux/fib_rules.h | 2 +- net/core/fib_rules.c | 7 ++- 4 files changed, 10 insertions(+), 3 deletions(-) -- 2.14.3
[PATCH net-next v2 1/1] net: Allow a rule to track originating protocol
Allow a rule that is being added/deleted/modified or dumped to contain the originating protocol's id. The protocol is handled just like a routes originating protocol is. This is especially useful because there is starting to be a plethora of different user space programs adding rules. Allow the vrf device to specify that the kernel is the originator of the rule created for this device. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- drivers/net/vrf.c | 1 + include/net/fib_rules.h| 3 ++- include/uapi/linux/fib_rules.h | 2 +- net/core/fib_rules.c | 7 ++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 139c61c8244a..ec6d2d623b60 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1175,6 +1175,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) memset(frh, 0, sizeof(*frh)); frh->family = family; frh->action = FR_ACT_TO_TBL; + frh->proto = RTPROT_KERNEL; if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07..b166ef07e6d4 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -26,7 +26,8 @@ struct fib_rule { u32 table; u8 action; u8 l3mdev; - /* 2 bytes hole, try to use */ + u8 proto; + /* 1 byte hole, try to use */ u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9b5a0..925539172d5b 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8tos; __u8table; + __u8proto; __u8res1; /* reserved */ - __u8res2; /* reserved */ __u8action; __u32 flags; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98e1066c3d55..c1d4ab5b2d9f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -51,6 +51,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; @@ -465,6 +466,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, } refcount_set(>refcnt, 1); rule->fr_net = net; + rule->proto = frh->proto; rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) : fib_default_rule_pref(ops); @@ -664,6 +666,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } list_for_each_entry(rule, >rules_list, list) { + if (frh->proto && (frh->proto != rule->proto)) + continue; + if (frh->action && (frh->action != rule->action)) continue; @@ -808,9 +813,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) goto nla_put_failure; frh->res1 = 0; - frh->res2 = 0; frh->action = rule->action; frh->flags = rule->flags; + frh->proto = rule->proto; if (rule->action == FR_ACT_GOTO && rcu_access_pointer(rule->ctarget) == NULL) -- 2.14.3
Re: [PATCH 0/3] Allow 'ip rule' command to use protocol
Got it. I'll send an update. donald On Sat, Feb 17, 2018 at 6:35 PM, David Ahern <d...@cumulusnetworks.com> wrote: > On 2/17/18 5:47 AM, Donald Sharp wrote: >> Fix iprule.c to use the actual `struct fib_rule_hdr` and to >> allow the end user to see and use the protocol keyword >> for rule manipulations. >> >> Donald Sharp (3): >> ip: Use the `struct fib_rule_hdr` for rules >> ip: Display ip rule protocol used >> ip: Allow rules to accept a specified protocol >> >> include/linux/fib_rules.h | 2 +- >> ip/iprule.c | 114 >> ++ >> 2 files changed, 65 insertions(+), 51 deletions(-) >> > > you are missing a patch to add protocol to iprule_list_flush_or_save so > 'ip ru flush proto NAME' flushes all rules with that protocol.
[PATCH 3/3] ip: Allow rules to accept a specified protocol
Allow the specification of a protocol when the user adds/modifies/deletes a rule. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index 5703d6e4..8fc6ac48 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -324,6 +324,12 @@ static int iprule_modify(int cmd, int argc, char **argv) if (get_rt_realms(, *argv)) invarg("invalid realms\n", *argv); addattr32(, sizeof(req), FRA_FLOW, realm); + } else if (matches(*argv, "protocol") == 0) { + __u32 proto; + NEXT_ARG(); + if (rtnl_rtprot_a2n(, *argv)) + invarg("\"protocol\" value is invalid\n", *argv); + req.frh.proto = proto; } else if (matches(*argv, "table") == 0 || strcmp(*argv, "lookup") == 0) { NEXT_ARG(); -- 2.14.3
[PATCH 1/3] ip: Use the `struct fib_rule_hdr` for rules
The iprule.c code was using `struct rtmsg` as the data type to pass into the kernel for the netlink message. While 'struct rtmsg' and `struct fib_rule_hdr` are the same size and mostly the same, we should use the correct data structure. This commit translates the data structures to have iprule.c use the correct one. Additionally copy over the modified fib_rules.h file Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/linux/fib_rules.h | 2 +- ip/iprule.c | 105 -- 2 files changed, 56 insertions(+), 51 deletions(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index bbf02a63..21f1fbf3 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -22,8 +22,8 @@ struct fib_rule_hdr { __u8tos; __u8table; + __u8proto; /* reserved */ __u8res1; /* reserved */ - __u8res2; /* reserved */ __u8action; __u32 flags; diff --git a/ip/iprule.c b/ip/iprule.c index 854a3d8e..82e22fee 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -50,7 +50,7 @@ static void usage(void) int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; - struct rtmsg *r = NLMSG_DATA(n); + struct fib_rule_hdr *frh = NLMSG_DATA(n); int len = n->nlmsg_len; int host_len = -1; __u32 table; @@ -61,13 +61,13 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE) return 0; - len -= NLMSG_LENGTH(sizeof(*r)); + len -= NLMSG_LENGTH(sizeof(*frh)); if (len < 0) return -1; - parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); + parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); - host_len = af_bit_len(r->rtm_family); + host_len = af_bit_len(frh->family); if (n->nlmsg_type == RTM_DELRULE) fprintf(fp, "Deleted "); @@ -77,51 +77,51 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) else fprintf(fp, "0:\t"); - if (r->rtm_flags & FIB_RULE_INVERT) + if (frh->flags & FIB_RULE_INVERT) fprintf(fp, "not "); if (tb[FRA_SRC]) { - if (r->rtm_src_len != host_len) { - fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family, + if (frh->src_len != host_len) { + fprintf(fp, "from %s/%u ", rt_addr_n2a(frh->family, RTA_PAYLOAD(tb[FRA_SRC]), RTA_DATA(tb[FRA_SRC]), abuf, sizeof(abuf)), - r->rtm_src_len + frh->src_len ); } else { - fprintf(fp, "from %s ", format_host(r->rtm_family, + fprintf(fp, "from %s ", format_host(frh->family, RTA_PAYLOAD(tb[FRA_SRC]), RTA_DATA(tb[FRA_SRC]), abuf, sizeof(abuf)) ); } - } else if (r->rtm_src_len) { - fprintf(fp, "from 0/%d ", r->rtm_src_len); + } else if (frh->src_len) { + fprintf(fp, "from 0/%d ", frh->src_len); } else { fprintf(fp, "from all "); } if (tb[FRA_DST]) { - if (r->rtm_dst_len != host_len) { - fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family, + if (frh->dst_len != host_len) { + fprintf(fp, "to %s/%u ", rt_addr_n2a(frh->family, RTA_PAYLOAD(tb[FRA_DST]), RTA_DATA(tb[FRA_DST]), abuf, sizeof(abuf)), - r->rtm_dst_len + frh->dst_len ); } else { - fprintf(fp, "to %s ", format_host(r->rtm_family, + fprintf(fp, "to %s ", format_host(frh->family, RTA_PAYLOAD(tb[FRA_DST]), RTA_DATA(tb[FRA_DST]),
[PATCH 0/3] Allow 'ip rule' command to use protocol
Fix iprule.c to use the actual `struct fib_rule_hdr` and to allow the end user to see and use the protocol keyword for rule manipulations. Donald Sharp (3): ip: Use the `struct fib_rule_hdr` for rules ip: Display ip rule protocol used ip: Allow rules to accept a specified protocol include/linux/fib_rules.h | 2 +- ip/iprule.c | 114 ++ 2 files changed, 65 insertions(+), 51 deletions(-) -- 2.14.3
[PATCH 2/3] ip: Display ip rule protocol used
Newer kernels are now accepting a protocol from the installing program for who installed the rule. This change allows us to see this change if it is being specified by the installing program. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/iprule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ip/iprule.c b/ip/iprule.c index 82e22fee..5703d6e4 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -213,6 +213,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) else if (frh->action != RTN_UNICAST) fprintf(fp, "%s", rtnl_rtntype_n2a(frh->action, b1, sizeof(b1))); + if (frh->proto != RTPROT_UNSPEC) + fprintf(fp, " proto %s ", + rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1))); fprintf(fp, "\n"); fflush(fp); return 0; -- 2.14.3
[PATCH 1/2] net: Allow a rule to track originating protocol
Allow a rule that is being added/deleted/modified or dumped to contain the originating protocol's id. The protocol is handled just like a routes originating protocol is. This is especially useful because there is starting to be a plethora of different user space programs adding rules. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- include/net/fib_rules.h| 3 ++- include/uapi/linux/fib_rules.h | 2 +- net/core/fib_rules.c | 7 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07..b166ef07e6d4 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -26,7 +26,8 @@ struct fib_rule { u32 table; u8 action; u8 l3mdev; - /* 2 bytes hole, try to use */ + u8 proto; + /* 1 byte hole, try to use */ u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9b5a0..925539172d5b 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8tos; __u8table; + __u8proto; __u8res1; /* reserved */ - __u8res2; /* reserved */ __u8action; __u32 flags; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98e1066c3d55..c1d4ab5b2d9f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -51,6 +51,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; @@ -465,6 +466,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, } refcount_set(>refcnt, 1); rule->fr_net = net; + rule->proto = frh->proto; rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) : fib_default_rule_pref(ops); @@ -664,6 +666,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } list_for_each_entry(rule, >rules_list, list) { + if (frh->proto && (frh->proto != rule->proto)) + continue; + if (frh->action && (frh->action != rule->action)) continue; @@ -808,9 +813,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) goto nla_put_failure; frh->res1 = 0; - frh->res2 = 0; frh->action = rule->action; frh->flags = rule->flags; + frh->proto = rule->proto; if (rule->action == FR_ACT_GOTO && rcu_access_pointer(rule->ctarget) == NULL) -- 2.14.3
[PATCH 2/2] drivers: Modify vrf device to specify it's rule as RTPROT_KERNEL
Allow the vrf device to specify that the kernel is the originator of the rule created for this device. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- drivers/net/vrf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 139c61c8244a..ec6d2d623b60 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1175,6 +1175,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) memset(frh, 0, sizeof(*frh)); frh->family = family; frh->action = FR_ACT_TO_TBL; + frh->proto = RTPROT_KERNEL; if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; -- 2.14.3
[PATCH 0/2] Allow rules to track originating protocol
Add the ability for the kernel to track the originating protocol for when new rules are added to the kernel. Donald Sharp (2): net: Allow a rule to track originating protocol drivers: Modify vrf device to specify it's rule as RTPROT_KERNEL drivers/net/vrf.c | 1 + include/net/fib_rules.h| 3 ++- include/uapi/linux/fib_rules.h | 2 +- net/core/fib_rules.c | 7 ++- 4 files changed, 10 insertions(+), 3 deletions(-) -- 2.14.3
[PATCH] doc: Update VRF documentation metric
Two things: 1) Update examples to show usage of metric 2) Discuss reasoning for using such a high metric. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- Documentation/networking/vrf.txt | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index 3918dae..8ff7b4c 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt @@ -71,7 +71,12 @@ Setup ip ru add iif vrf-blue table 10 3. Set the default route for the table (and hence default route for the VRF). - ip route add table 10 unreachable default + ip route add table 10 unreachable default metric 4278198272 + + This high metric value ensures that the default unreachable route can + be overridden by a routing protocol suite. FRRouting interprets + kernel metrics as a combined admin distance (upper byte) and priority + (lower 3 bytes). Thus the above metric translates to [255/8192]. 4. Enslave L3 interfaces to a VRF device. ip link set dev eth1 master vrf-blue @@ -256,7 +261,7 @@ older form without it. For example: $ ip route show vrf red - prohibit default + unreachable default metric 4278198272 broadcast 10.2.1.0 dev eth1 proto kernel scope link src 10.2.1.2 10.2.1.0/24 dev eth1 proto kernel scope link src 10.2.1.2 local 10.2.1.2 dev eth1 proto kernel scope host src 10.2.1.2 @@ -282,7 +287,7 @@ older form without it. ff00::/8 dev red metric 256 pref medium ff00::/8 dev eth1 metric 256 pref medium ff00::/8 dev eth2 metric 256 pref medium - + unreachable default dev lo metric 4278198272 error -101 pref medium 8. Route Lookup for a VRF @@ -331,7 +336,7 @@ function vrf_create ip link add ${VRF} type vrf table ${TBID} if [ "${VRF}" != "mgmt" ]; then -ip route add table ${TBID} unreachable default +ip route add table ${TBID} unreachable default metric 4278198272 fi ip link set dev ${VRF} up } -- 2.9.5
[PATCH] net: ipmr: Add ipmr_rtm_getroute
Add to RTNL_FAMILY_IPMR, RTM_GETROUTE the ability to retrieve one S,G mroute from a specified table. *,G will return mroute information for just that particular mroute if it exists. This is because it is entirely possible to have more S's then can fit in one skb to return to the requesting process. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- net/ipv4/ipmr.c | 63 - 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a1d521b..bb909f1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2406,6 +2406,67 @@ static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); } +static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, +struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[RTA_MAX + 1]; + struct sk_buff *skb = NULL; + struct mfc_cache *cache; + struct mr_table *mrt; + struct rtmsg *rtm; + __be32 src, grp; + u32 tableid; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; + grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; + tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; + + mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); + goto errout_free; + } + + /* entries are added/deleted only under RTNL */ + rcu_read_lock(); + cache = ipmr_cache_find(mrt, src, grp); + rcu_read_unlock(); + if (!cache) { + err = -ENOENT; + goto errout_free; + } + + skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout_free; + } + + err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, cache, + RTM_NEWROUTE, 0); + if (err < 0) + goto errout_free; + + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + +errout: + return err; + +errout_free: + kfree_skb(skb); + goto errout; +} + static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -3053,7 +3114,7 @@ int __init ip_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - NULL, ipmr_rtm_dumproute, NULL); + ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, ipmr_rtm_route, NULL, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, -- 2.9.4
[PATCH iproute2] ip: mroute: Add table output to show command
When the user specifies `table all` or `table 0` to the `ip mroute show` command we dump the entirety of the known mroute tables. Without some sort of divisor to tell us what table we are looking at the command is useless. Add `Table: ` to the output of 'ip mroute show table 0' Follow the convention established by 'ip route show table 0' for when to display Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- ip/ipmroute.c | 5 + 1 file changed, 5 insertions(+) diff --git a/ip/ipmroute.c b/ip/ipmroute.c index 4d2d758..70f4c4f 100644 --- a/ip/ipmroute.c +++ b/ip/ipmroute.c @@ -189,6 +189,11 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, ", Age %4i.%.2i", (int)tv.tv_sec, (int)tv.tv_usec/1); } + + if (table && (table != RT_TABLE_MAIN || show_details > 0) && !filter.tb) + fprintf(fp, " Table: %s", + rtnl_rttable_n2a(table, b1, sizeof(b1))); + fprintf(fp, "\n"); fflush(fp); return 0; -- 2.1.4
Re: ipmr: MFC routes when VIF deleted
I would argue that this is just an unintended side effect of the original implementation. Shuffling interface vif's seems like a good way to churn a significant number of mroutes to me. If you want to use a interface it probably is going to be already be configured with it's own vif, and as such it's just better to figure out the new proper RPF and insert that mroute. If it isn't, then I need to bring up that new vif ( and all it's associated pim information, like establishing a new neighbor relationship ) when I do need to shuffle interface vif's, In that time I'm dropping a signficant number of packets while this is happening and all our end users are going to be screaming at us to fix that hole. donald On Sun, Jun 11, 2017 at 12:34 PM, Nikolay Aleksandrovwrote: > On 11/06/17 11:55, Yotam Gigi wrote: >> I have been looking into some weird behavior, and I am not sure whether it is >> a bug or a feature. >> >> When a VIF with index v gets deleted, the MFC routes does not get updated, >> which >> means that there can be routes pointing to that VIF. On datapath, when packet >> hits that route, the VIF validity will be checked and will not be sent to >> that >> device (but still, the route does not get updated). Now, if the user creates >> another VIF with the same index v but different underlay device, the same >> route >> will forward the traffic to that device. >> >> It is relevant to mention that when user adds a MFC route, only the active >> VIFs >> are used, so the flow of adding a route with dummy VIF indices and then >> connecting those VIF indices to real device is not supported. The only way to >> create a MFC route that has non existing VIFs is to create one with existing >> VIFs and then delete them. >> >> Do we really want to support that? To me, it looks like a buggy flow and I >> suggest that upon VIF deletion, the MFC routes will be updated to not point >> to >> any non existing VIF indices. >> > > Hi Yotam, > I'm not strongly against such change but my feeling is that we shouldn't > change it. > I think we shouldn't change it because we cannot guarantee that we won't > break some > user-space app that relies on this behaviour or uses it as an optimization. > User-space ipmr apps work in sync with the kernel and are usually the only > ones > doing such changes so their internal state will be always valid, and I'd guess > they already deal with this one way or another. > My second argument is a minor one and is about performance. There are some > apps > (e.g. pimd) which use interface add/del on interface state change (up/down) > and > this could make these ops slower on large setups. > > Again I see how this could be helpful and should've probably been like that > from the > start, so if other people feel confident we won't break anything then I > wouldn't > mind the change. > > Thanks, > Nik > > >
[PATCH net v3] net: ipmr: Fix some mroute forwarding issues in vrf's
This patch fixes two issues: 1) When forwarding on *,G mroutes that are in a vrf, the kernel was dropping information about the actual incoming interface when calling ip_mr_forward from ip_mr_input. This caused ip_mr_forward to send the multicast packet back out the incoming interface. Fix this by modifying ip_mr_forward to be handed the correctly resolved dev. 2) When a unresolved cache entry is created we store the incoming skb on the unresolved cache entry and upon mroute resolution from the user space daemon, we attempt to forward the packet. Again we were not resolving to the correct incoming device for a vrf scenario, before calling ip_mr_forward. Fix this by resolving to the correct interface and calling ip_mr_forward with the result. Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast") Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- v2: Fixed title v3: Addressed Review comments by Andrew Lunn and David Ahern net/ipv4/ipmr.c | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 551de4d..09368a1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local); + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, @@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { - ip_mr_forward(net, mrt, skb, c, 0); + ip_mr_forward(net, mrt, skb->dev, skb, c, 0); } } } @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Queue a packet for resolution. It gets locked cache entry! */ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, -struct sk_buff *skb) +struct sk_buff *skb, struct net_device *dev) { const struct iphdr *iph = ip_hdr(skb); struct mfc_cache *c; @@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, kfree_skb(skb); err = -ENOBUFS; } else { + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; + } skb_queue_tail(>mfc_un.unres.unresolved, skb); err = 0; } @@ -1828,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) /* "local" means that we should preserve one skb (for local delivery) */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local) + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local) { - int true_vifi = ipmr_find_vif(mrt, skb->dev); + int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; @@ -1853,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != skb->dev) { - struct net_device *mdev; - - mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); - if (mdev == skb->dev) - goto forward; - + if (mrt->vif_table[vif].dev != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -2053,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb) read_lock(_lock); vif = ipmr_find_vif(mrt, dev); if (vif >= 0) { - int err2 = ipmr_cache_unresolved(mrt, vif, skb); + int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); read_unlock(_lock); return err2; @@ -2064,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(_lock); - ip_mr_forward(net, mrt, skb, cache, local); + ip_mr_forward(net, mrt, dev, skb, cache, local); read
Re: [PATCH net v2] net: ipmr: Fix some mroute forwarding issues in vrf's
I'll change it over to this way. No problem. donald On Fri, Jun 9, 2017 at 10:54 AM, David Ahern <dsah...@gmail.com> wrote: > On 6/9/17 8:22 AM, Donald Sharp wrote: >> @@ -988,7 +988,16 @@ static void ipmr_cache_resolve(struct net *net, struct >> mr_table *mrt, >> >> rtnl_unicast(skb, net, NETLINK_CB(skb).portid); >> } else { >> - ip_mr_forward(net, mrt, skb, c, 0); >> +struct net_device *dev = skb->dev; >> + >> +if (netif_is_l3_master(dev)) { >> +dev = __dev_get_by_index(net, >> IPCB(skb)->iif); >> +if (!dev) { >> +kfree_skb(skb); >> +continue; >> +} >> +} >> +ip_mr_forward(net, mrt, dev, skb, c, 0); >> } >> } >> } > > What about changing ipmr_cache_unresolved to take the dev it looked up > already and then have ipmr_cache_unresolved reset skb->dev to it (and > reset skb->skb_iff to dev->ifindex) when queuing to the unresolved list? > Since this path does not have a local delivery, resetting the skb->dev > will be fine and it avoids this second lookup using IPCB(skb)->iif: > > @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt, > > /* Queue a packet for resolution. It gets locked cache entry! */ > static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, > -struct sk_buff *skb) > +struct sk_buff *skb, struct net_device > *dev) > { > const struct iphdr *iph = ip_hdr(skb); > struct mfc_cache *c; > @@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table > *mrt, vifi_t vifi, > kfree_skb(skb); > err = -ENOBUFS; > } else { > + if (dev) { > + skb->dev = dev; > + skb->skb_iif = dev->ifindex; > + } > skb_queue_tail(>mfc_un.unres.unresolved, skb); > err = 0; > } > > > Combined with Thomas' earlier change this check in ip_mr_forward becomes: > > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > index 9374b99c7c17..1393a4d18a9a 100644 > --- a/net/ipv4/ipmr.c > +++ b/net/ipv4/ipmr.c > @@ -1853,13 +1853,7 @@ static void ip_mr_forward(struct net *net, struct > mr_table *mrt, > } > > /* Wrong interface: drop packet and (maybe) send PIM assert. */ > - if (mrt->vif_table[vif].dev != skb->dev) { > - struct net_device *mdev; > - > - mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); > - if (mdev == skb->dev) > - goto forward; > - > + if (mrt->vif_table[vif].dev != dev) { > if (rt_is_output_route(skb_rtable(skb))) { > /* It is our own packet, looped back. > * Very complicated situation...
[PATCH net v2] net: ipmr: Fix some mroute forwarding issues in vrf's
This patch fixes two issues: 1) When forwarding on *,G mroutes that are in a vrf, the kernel was dropping information about the actual incoming interface when calling ip_mr_forward from ip_mr_input. This caused ip_mr_forward to send the multicast packet back out the incoming interface. Fix this by modifying ip_mr_forward to be handed the correctly resolved dev. 2) When a unresolved cache entry is created we store the incoming skb on the unresolved cache entry and upon mroute resolution from the user space daemon, we attempt to forward the packet. Again we were not resolving to the correct incoming device for a vrf scenario, before calling ip_mr_forward. Fix this by resolving to the correct interface and calling ip_mr_forward with the result. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- v2: Fixed title net/ipv4/ipmr.c | 27 ++- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 551de4d..559009e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local); + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, @@ -988,7 +988,16 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { - ip_mr_forward(net, mrt, skb, c, 0); +struct net_device *dev = skb->dev; + +if (netif_is_l3_master(dev)) { +dev = __dev_get_by_index(net, IPCB(skb)->iif); +if (!dev) { +kfree_skb(skb); +continue; +} +} +ip_mr_forward(net, mrt, dev, skb, c, 0); } } } @@ -1828,10 +1837,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) /* "local" means that we should preserve one skb (for local delivery) */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local) + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local) { - int true_vifi = ipmr_find_vif(mrt, skb->dev); + int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; @@ -1853,11 +1862,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != skb->dev) { + if (mrt->vif_table[vif].dev != dev) { struct net_device *mdev; mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); - if (mdev == skb->dev) + if (mdev == dev) goto forward; if (rt_is_output_route(skb_rtable(skb))) { @@ -2064,7 +2073,7 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(_lock); - ip_mr_forward(net, mrt, skb, cache, local); + ip_mr_forward(net, mrt, dev, skb, cache, local); read_unlock(_lock); if (local) -- 2.9.4
[PATCH] Fix some mroute forwarding issues in vrf's
This patch fixes two issues: 1) When forwarding on *,G mroutes that are in a vrf, the kernel was dropping information about the actual incoming interface when calling ip_mr_forward from ip_mr_input. This caused ip_mr_forward to send the multicast packet back out the incoming interface. Fix this by modifying ip_mr_forward to be handed the correctly resolved dev. 2) When a unresolved cache entry is created we store the incoming skb on the unresolved cache entry and upon mroute resolution from the user space daemon, we attempt to forward the packet. Again we were not resolving to the correct incoming device for a vrf scenario, before calling ip_mr_forward. Fix this by resolving to the correct interface and calling ip_mr_forward with the result. Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com> --- net/ipv4/ipmr.c | 27 ++- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 551de4d..559009e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local); + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, @@ -988,7 +988,16 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { - ip_mr_forward(net, mrt, skb, c, 0); +struct net_device *dev = skb->dev; + +if (netif_is_l3_master(dev)) { +dev = __dev_get_by_index(net, IPCB(skb)->iif); +if (!dev) { +kfree_skb(skb); +continue; +} +} +ip_mr_forward(net, mrt, dev, skb, c, 0); } } } @@ -1828,10 +1837,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) /* "local" means that we should preserve one skb (for local delivery) */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local) + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local) { - int true_vifi = ipmr_find_vif(mrt, skb->dev); + int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; @@ -1853,11 +1862,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != skb->dev) { + if (mrt->vif_table[vif].dev != dev) { struct net_device *mdev; mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); - if (mdev == skb->dev) + if (mdev == dev) goto forward; if (rt_is_output_route(skb_rtable(skb))) { @@ -2064,7 +2073,7 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(_lock); - ip_mr_forward(net, mrt, skb, cache, local); + ip_mr_forward(net, mrt, dev, skb, cache, local); read_unlock(_lock); if (local) -- 2.9.4