[PATCH v2 1/1] iproute2: Add support for a few routing protocols

2018-06-08 Thread Donald Sharp
Add support for:

BGP
ISIS
OSPF
RIP
EIGRP

Routing protocols to iproute2.

Signed-off-by: Donald Sharp 
---
v2: Update to latest version of code.
 etc/iproute2/rt_protos | 5 +
 lib/rt_names.c | 5 +
 2 files changed, 10 insertions(+)

diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos
index 2a9ee01b..b3a0ec8f 100644
--- a/etc/iproute2/rt_protos
+++ b/etc/iproute2/rt_protos
@@ -16,3 +16,8 @@
 15 ntk
 16  dhcp
 42 babel
+186bgp
+187isis
+188ospf
+189rip
+192eigrp
diff --git a/lib/rt_names.c b/lib/rt_names.c
index a02db35e..66d5f2f0 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -134,6 +134,11 @@ static char *rtnl_rtprot_tab[256] = {
[RTPROT_XORP] = "xorp",
[RTPROT_NTK]  = "ntk",
[RTPROT_DHCP] = "dhcp",
+   [RTPROT_BGP]  = "bgp",
+   [RTPROT_ISIS] = "isis",
+   [RTPROT_OSPF] = "ospf",
+   [RTPROT_RIP]  = "rip",
+   [RTPROT_EIGRP]= "eigrp",
 };
 
 
-- 
2.14.4



[PATCH v2 0/1] Addition of new routing protocols for iproute2

2018-06-08 Thread Donald Sharp
The linux kernel recently accepted some new RTPROT values for some
fairly standard routing protocols.  This commit brings in support
for iproute2 to handle these new values.

v2 - Update to latest version of master which has rtnetlink.h code and drop
 of work already done.

Donald Sharp (1):
  iproute2: Add support for a few routing protocols

 etc/iproute2/rt_protos | 5 +
 lib/rt_names.c | 5 +
 2 files changed, 10 insertions(+)

-- 
2.14.4



[PATCH 1/2] iproute2: Add support for a few routing protocols

2018-06-08 Thread Donald Sharp
Add support for:

BGP
ISIS
OSPF
RIP
EIGRP

Routing protocols to iproute2.

Signed-off-by: Donald Sharp 
---
 etc/iproute2/rt_protos| 5 +
 include/linux/rtnetlink.h | 5 +
 lib/rt_names.c| 5 +
 3 files changed, 15 insertions(+)

diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos
index 82cf9c46..3ffe8a6c 100644
--- a/etc/iproute2/rt_protos
+++ b/etc/iproute2/rt_protos
@@ -16,6 +16,11 @@
 15 ntk
 16  dhcp
 42 babel
+186 bgp
+187 isis
+188 ospf
+189 rip
+192 eigrp
 
 #
 #  Used by me for gated
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 742ba078..2e83a267 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -248,6 +248,11 @@ enum {
 #define RTPROT_DHCP16  /* DHCP client */
 #define RTPROT_MROUTED 17  /* Multicast daemon */
 #define RTPROT_BABEL   42  /* Babel daemon */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS187 /* ISIS Routes */
+#define RTPROT_OSPF188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP   192 /* EIGRP Routes */
 
 /* rtm_scope
 
diff --git a/lib/rt_names.c b/lib/rt_names.c
index 253389a6..d3562d2d 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -137,6 +137,11 @@ static char * rtnl_rtprot_tab[256] = {
[RTPROT_XORP] = "xorp",
[RTPROT_NTK] = "ntk",
[RTPROT_DHCP] = "dhcp",
+   [RTPROT_BGP] = "bgp",
+   [RTPROT_ISIS] = "isis",
+   [RTPROT_OSPF] = "ospf",
+   [RTPROT_RIP] = "rip",
+   [RTPROT_EIGRP] = "eigrp",
 };
 
 
-- 
2.14.4



[PATCH 0/2] Addition of new routing protocols for iproute2

2018-06-08 Thread Donald Sharp
The linux kernel recently accepted some new RTPROT values for some
fairly standard routing protocols.  This commit brings in support
for iproute2 to handle these new values.

Additionally clean up some long standing cruft in etc/iproute2/rt_protos

Donald Sharp (2):
  iproute2: Add support for a few routing protocols
  iproute2: Remove leftover gated RT_PROT defines

 etc/iproute2/rt_protos| 18 +-
 include/linux/rtnetlink.h |  5 +
 lib/rt_names.c|  5 +
 3 files changed, 15 insertions(+), 13 deletions(-)

-- 
2.14.4



[PATCH 2/2] iproute2: Remove leftover gated RT_PROT defines

2018-06-08 Thread Donald Sharp
These values are not being used nor maintained, so remove.

Signed-off-by: Donald Sharp 
---
 etc/iproute2/rt_protos | 13 -
 1 file changed, 13 deletions(-)

diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos
index 3ffe8a6c..a965ad16 100644
--- a/etc/iproute2/rt_protos
+++ b/etc/iproute2/rt_protos
@@ -21,16 +21,3 @@
 188 ospf
 189 rip
 192 eigrp
-
-#
-#  Used by me for gated
-#
-254gated/aggr
-253gated/bgp
-252gated/ospf
-251gated/ospfase
-250gated/rip
-249gated/static
-248gated/conn
-247gated/inet
-246gated/default
-- 
2.14.4



Re: [PATCH] rtnetlink: Add more well known protocol values

2018-05-30 Thread Donald Sharp
This patch is intended for net-next.

thanks!

donald

On Wed, May 30, 2018 at 8:27 AM, Donald Sharp
 wrote:
> FRRouting installs routes into the kernel associated with
> the originating protocol.  Add these values to the well
> known values in rtnetlink.h.
>
> Signed-off-by: Donald Sharp 
> ---
> v2: Fixed whitespace issues
>  include/uapi/linux/rtnetlink.h | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index cabb210c93af..7d8502313c99 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -254,6 +254,11 @@ enum {
>  #define RTPROT_DHCP16  /* DHCP client */
>  #define RTPROT_MROUTED 17  /* Multicast daemon */
>  #define RTPROT_BABEL   42  /* Babel daemon */
> +#define RTPROT_BGP 186 /* BGP Routes */
> +#define RTPROT_ISIS187 /* ISIS Routes */
> +#define RTPROT_OSPF188 /* OSPF Routes */
> +#define RTPROT_RIP 189 /* RIP Routes */
> +#define RTPROT_EIGRP   192 /* EIGRP Routes */
>
>  /* rtm_scope
>
> --
> 2.14.3
>


[PATCH] rtnetlink: Add more well known protocol values

2018-05-30 Thread Donald Sharp
FRRouting installs routes into the kernel associated with
the originating protocol.  Add these values to the well
known values in rtnetlink.h.

Signed-off-by: Donald Sharp 
---
v2: Fixed whitespace issues
 include/uapi/linux/rtnetlink.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cabb210c93af..7d8502313c99 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -254,6 +254,11 @@ enum {
 #define RTPROT_DHCP16  /* DHCP client */
 #define RTPROT_MROUTED 17  /* Multicast daemon */
 #define RTPROT_BABEL   42  /* Babel daemon */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS187 /* ISIS Routes */
+#define RTPROT_OSPF188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP   192 /* EIGRP Routes */
 
 /* rtm_scope
 
-- 
2.14.3



[PATCH] rtnetlink: Add more well known protocol values

2018-05-25 Thread Donald Sharp
FRRouting installs routes into the kernel associated with
the originating protocol.  Add these values to the well
known values in rtnetlink.h.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/uapi/linux/rtnetlink.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cabb210c93af..81b33826f818 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -254,6 +254,11 @@ enum {
 #define RTPROT_DHCP16  /* DHCP client */
 #define RTPROT_MROUTED 17  /* Multicast daemon */
 #define RTPROT_BABEL   42  /* Babel daemon */
+#define RTPROT_BGP  186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP  189 /* RIP Routes */
+#define RTPROT_EIGRP192 /* EIGRP Routes */
 
 /* rtm_scope
 
-- 
2.14.3



[PATCH v4 iproute2-next 3/3] ip: Allow rules to accept a specified protocol

2018-02-28 Thread Donald Sharp
Allow the specification of a protocol when the user
adds/modifies/deletes a rule.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index 8120520e..6fdc9b5e 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -697,6 +697,13 @@ static int iprule_modify(int cmd, int argc, char **argv)
if (get_rt_realms_or_raw(, *argv))
invarg("invalid realms\n", *argv);
addattr32(, sizeof(req), FRA_FLOW, realm);
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 proto;
+
+   NEXT_ARG();
+   if (rtnl_rtprot_a2n(, *argv))
+   invarg("\"protocol\" value is invalid\n", 
*argv);
+   addattr8(, sizeof(req), FRA_PROTOCOL, proto);
} else if (matches(*argv, "table") == 0 ||
   strcmp(*argv, "lookup") == 0) {
NEXT_ARG();
-- 
2.14.3



[PATCH v4 iproute2-next 0/3] Allow 'ip rule' command to use protocol

2018-02-28 Thread Donald Sharp
Fix iprule.c to use the actual `struct fib_rule_hdr` and to
allow the end user to see and use the protocol keyword
for rule manipulation.

v2: Rearrange and code changes as per David Ahern
v3: Fix some missed RTN_XXX to appropriate FR_XX and doc changes
v4: Cleanup some code, fix 'ip rule save' no parameters and doc changes

Donald Sharp (3):
  ip: Use the `struct fib_rule_hdr` for rules
  ip: Display ip rule protocol used
  ip: Allow rules to accept a specified protocol

 include/uapi/linux/fib_rules.h |   1 +
 ip/iprule.c| 173 +
 man/man8/ip-rule.8 |  18 -
 3 files changed, 126 insertions(+), 66 deletions(-)

-- 
2.14.3



[PATCH v4 iproute2-next 1/3] ip: Use the `struct fib_rule_hdr` for rules

2018-02-28 Thread Donald Sharp
The iprule.c code was using `struct rtmsg` as the data
type to pass into the kernel for the netlink message.
While 'struct rtmsg' and `struct fib_rule_hdr` are
the same size and mostly the same, we should use
the correct data structure.  This commit translates
the data structures to have iprule.c use the correct
one.

Additionally copy over the modified fib_rules.h file

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/uapi/linux/fib_rules.h |   1 +
 ip/iprule.c| 128 +
 2 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9..9477c3af 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -58,6 +58,7 @@ enum {
FRA_PAD,
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
FRA_UID_RANGE,  /* UID range */
+   FRA_PROTOCOL,
__FRA_MAX
 };
 
diff --git a/ip/iprule.c b/ip/iprule.c
index a3abf2f6..94356bf8 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -73,25 +73,33 @@ static struct
inet_prefix dst;
 } filter;
 
+static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
+{
+   __u32 table = frh->table;
+   if (tb[RTA_TABLE])
+   table = rta_getattr_u32(tb[RTA_TABLE]);
+   return table;
+}
+
 static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 {
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
__u32 table;
 
-   if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family)
+   if (preferred_family != AF_UNSPEC && frh->family != preferred_family)
return false;
 
if (filter.prefmask &&
filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) 
: 0))
return false;
-   if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT))
+   if (filter.not && !(frh->flags & FIB_RULE_INVERT))
return false;
 
if (filter.src.family) {
inet_prefix *f_src = 
 
-   if (f_src->family != r->rtm_family ||
-   f_src->bitlen > r->rtm_src_len)
+   if (f_src->family != frh->family ||
+   f_src->bitlen > frh->src_len)
return false;
 
if (inet_addr_match_rta(f_src, tb[FRA_SRC]))
@@ -101,15 +109,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct 
rtattr **tb, int host_len)
if (filter.dst.family) {
inet_prefix *f_dst = 
 
-   if (f_dst->family != r->rtm_family ||
-   f_dst->bitlen > r->rtm_dst_len)
+   if (f_dst->family != frh->family ||
+   f_dst->bitlen > frh->dst_len)
return false;
 
if (inet_addr_match_rta(f_dst, tb[FRA_DST]))
return false;
}
 
-   if (filter.tosmask && filter.tos ^ r->rtm_tos)
+   if (filter.tosmask && filter.tos ^ frh->tos)
return false;
 
if (filter.fwmark) {
@@ -159,7 +167,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
return false;
}
 
-   table = rtm_get_table(r, tb);
+   table = frh_get_table(frh, tb);
if (filter.tb > 0 && filter.tb ^ table)
return false;
 
@@ -169,7 +177,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
 int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
FILE *fp = (FILE *)arg;
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
__u32 table;
@@ -180,13 +188,13 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE)
return 0;
 
-   len -= NLMSG_LENGTH(sizeof(*r));
+   len -= NLMSG_LENGTH(sizeof(*frh));
if (len < 0)
return -1;
 
-   parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
+   parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
-   host_len = af_bit_len(r->rtm_family);
+   host_len = af_bit_len(frh->family);
 
if (!filter_nlmsg(n, tb, host_len))
return 0;
@@ -200,41 +208,41 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
else
fprintf(fp, "0:\t");
 
-   if (r->rtm_flags & FIB_RULE_INVERT)
+   if (frh->flags & FIB_RULE_INVERT)
fprintf(fp, "not ");
 
if (tb[FRA_SRC]) {

[PATCH v4 iproute2-next 2/3] ip: Display ip rule protocol used

2018-02-28 Thread Donald Sharp
Modify 'ip rule' command to notice when the kernel passes
to us the originating protocol.

Add code to allow the `ip rule flush protocol XXX`
command to be accepted and properly handled.

Modify the documentation to reflect these code changes.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c| 38 ++
 man/man8/ip-rule.8 | 18 +-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/ip/iprule.c b/ip/iprule.c
index 94356bf8..8120520e 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -47,6 +47,7 @@ static void usage(void)
"[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ 
l3mdev ]\n"
"[ uidrange NUMBER-NUMBER ]\n"
"ACTION := [ table TABLE_ID ]\n"
+   "  [ protocol PROTO ]\n"
"  [ nat ADDRESS ]\n"
"  [ realms [SRCREALM/]DSTREALM ]\n"
"  [ goto NUMBER ]\n"
@@ -71,6 +72,8 @@ static struct
struct fib_rule_uid_range range;
inet_prefix src;
inet_prefix dst;
+   int protocol;
+   int protocolmask;
 } filter;
 
 static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
@@ -338,6 +341,16 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
rtnl_rtntype_n2a(frh->action,
 b1, sizeof(b1)));
 
+   if (tb[FRA_PROTOCOL]) {
+   __u8 protocol = rta_getattr_u8(tb[FRA_PROTOCOL]);
+
+   if ((protocol && protocol != RTPROT_KERNEL) ||
+   show_details > 0) {
+   fprintf(fp, " proto %s ",
+   rtnl_rtprot_n2a(protocol, b1, sizeof(b1)));
+   }
+   }
+
fprintf(fp, "\n");
fflush(fp);
return 0;
@@ -391,6 +404,13 @@ static int flush_rule(const struct sockaddr_nl *who, 
struct nlmsghdr *n,
 
parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
+   if (tb[FRA_PROTOCOL]) {
+   __u8 protocol = rta_getattr_u8(tb[FRA_PROTOCOL]);
+
+   if ((filter.protocol ^ protocol) & filter.protocolmask)
+   return 0;
+   }
+
if (tb[FRA_PRIORITY]) {
n->nlmsg_type = RTM_DELRULE;
n->nlmsg_flags = NLM_F_REQUEST;
@@ -415,9 +435,8 @@ static int iprule_list_flush_or_save(int argc, char **argv, 
int action)
if (af == AF_UNSPEC)
af = AF_INET;
 
-   if (action != IPRULE_LIST && argc > 0) {
-   fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
-   action == IPRULE_SAVE ? "save" : "flush");
+   if (action == IPRULE_SAVE && argc > 0) {
+   fprintf(stderr, "\"ip rule save\" does not take any 
arguments.\n");
return -1;
}
 
@@ -508,7 +527,18 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
NEXT_ARG();
if (get_prefix(, *argv, af))
invarg("from value is invalid\n", *argv);
-   } else {
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 prot;
+   NEXT_ARG();
+   filter.protocolmask = -1;
+   if (rtnl_rtprot_a2n(, *argv)) {
+   if (strcmp(*argv, "all") != 0)
+   invarg("invalid \"protocol\"\n", *argv);
+   prot = 0;
+   filter.protocolmask = 0;
+   }
+   filter.protocol = prot;
+   } else{
if (matches(*argv, "dst") == 0 ||
matches(*argv, "to") == 0) {
NEXT_ARG();
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index a5c47981..7cf8fd9a 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -50,6 +50,8 @@ ip-rule \- routing policy database management
 .IR ACTION " := [ "
 .B  table
 .IR TABLE_ID " ] [ "
+.B  protocol
+.IR PROTO " ] [ "
 .B  nat
 .IR ADDRESS " ] [ "
 .B realms
@@ -240,6 +242,10 @@ The options preference and order are synonyms with 
priority.
 the routing table identifier to lookup if the rule selector matches.
 It is also possible to use lookup instead of table.
 
+.TP
+.BI protocol " PROTO"
+the routing protocol who installed the rule in question.  As an example when 
zebra installs a rule it would get RTPROT_ZEBRA as the insta

[PATCH v3 iproute2-next 1/3] ip: Use the `struct fib_rule_hdr` for rules

2018-02-23 Thread Donald Sharp
The iprule.c code was using `struct rtmsg` as the data
type to pass into the kernel for the netlink message.
While 'struct rtmsg' and `struct fib_rule_hdr` are
the same size and mostly the same, we should use
the correct data structure.  This commit translates
the data structures to have iprule.c use the correct
one.

Additionally copy over the modified fib_rules.h file

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/uapi/linux/fib_rules.h |   1 +
 ip/iprule.c| 128 +
 2 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9..9477c3af 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -58,6 +58,7 @@ enum {
FRA_PAD,
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
FRA_UID_RANGE,  /* UID range */
+   FRA_PROTOCOL,
__FRA_MAX
 };
 
diff --git a/ip/iprule.c b/ip/iprule.c
index a3abf2f6..94356bf8 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -73,25 +73,33 @@ static struct
inet_prefix dst;
 } filter;
 
+static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
+{
+   __u32 table = frh->table;
+   if (tb[RTA_TABLE])
+   table = rta_getattr_u32(tb[RTA_TABLE]);
+   return table;
+}
+
 static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 {
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
__u32 table;
 
-   if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family)
+   if (preferred_family != AF_UNSPEC && frh->family != preferred_family)
return false;
 
if (filter.prefmask &&
filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) 
: 0))
return false;
-   if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT))
+   if (filter.not && !(frh->flags & FIB_RULE_INVERT))
return false;
 
if (filter.src.family) {
inet_prefix *f_src = 
 
-   if (f_src->family != r->rtm_family ||
-   f_src->bitlen > r->rtm_src_len)
+   if (f_src->family != frh->family ||
+   f_src->bitlen > frh->src_len)
return false;
 
if (inet_addr_match_rta(f_src, tb[FRA_SRC]))
@@ -101,15 +109,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct 
rtattr **tb, int host_len)
if (filter.dst.family) {
inet_prefix *f_dst = 
 
-   if (f_dst->family != r->rtm_family ||
-   f_dst->bitlen > r->rtm_dst_len)
+   if (f_dst->family != frh->family ||
+   f_dst->bitlen > frh->dst_len)
return false;
 
if (inet_addr_match_rta(f_dst, tb[FRA_DST]))
return false;
}
 
-   if (filter.tosmask && filter.tos ^ r->rtm_tos)
+   if (filter.tosmask && filter.tos ^ frh->tos)
return false;
 
if (filter.fwmark) {
@@ -159,7 +167,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
return false;
}
 
-   table = rtm_get_table(r, tb);
+   table = frh_get_table(frh, tb);
if (filter.tb > 0 && filter.tb ^ table)
return false;
 
@@ -169,7 +177,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
 int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
FILE *fp = (FILE *)arg;
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
__u32 table;
@@ -180,13 +188,13 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE)
return 0;
 
-   len -= NLMSG_LENGTH(sizeof(*r));
+   len -= NLMSG_LENGTH(sizeof(*frh));
if (len < 0)
return -1;
 
-   parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
+   parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
-   host_len = af_bit_len(r->rtm_family);
+   host_len = af_bit_len(frh->family);
 
if (!filter_nlmsg(n, tb, host_len))
return 0;
@@ -200,41 +208,41 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
else
fprintf(fp, "0:\t");
 
-   if (r->rtm_flags & FIB_RULE_INVERT)
+   if (frh->flags & FIB_RULE_INVERT)
fprintf(fp, "not ");
 
if (tb[FRA_SRC]) {

[PATCH v3 iproute2-next 2/3] ip: Display ip rule protocol used

2018-02-23 Thread Donald Sharp
Modify 'ip rule' command to notice when the kernel passes
to us the originating protocol.

Add code to allow the `ip rule flush protocol XXX`
command to be accepted and properly handled.

Modify the documentation to reflect these code changes.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c| 36 +---
 man/man8/ip-rule.8 | 18 +-
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/ip/iprule.c b/ip/iprule.c
index 94356bf8..17df9e9b 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -47,6 +47,7 @@ static void usage(void)
"[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ 
l3mdev ]\n"
"[ uidrange NUMBER-NUMBER ]\n"
"ACTION := [ table TABLE_ID ]\n"
+   "  [ protocol PROTO ]\n"
"  [ nat ADDRESS ]\n"
"  [ realms [SRCREALM/]DSTREALM ]\n"
"  [ goto NUMBER ]\n"
@@ -71,6 +72,8 @@ static struct
struct fib_rule_uid_range range;
inet_prefix src;
inet_prefix dst;
+   int protocol;
+   int protocolmask;
 } filter;
 
 static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
@@ -338,6 +341,16 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
rtnl_rtntype_n2a(frh->action,
 b1, sizeof(b1)));
 
+   if (tb[FRA_PROTOCOL]) {
+   __u8 protocol = rta_getattr_u8(tb[FRA_PROTOCOL]);
+
+   if ((protocol && protocol != RTPROT_KERNEL) ||
+   show_details > 0) {
+   fprintf(fp, " proto %s ",
+   rtnl_rtprot_n2a(protocol, b1, sizeof(b1)));
+   }
+   }
+
fprintf(fp, "\n");
fflush(fp);
return 0;
@@ -391,6 +404,10 @@ static int flush_rule(const struct sockaddr_nl *who, 
struct nlmsghdr *n,
 
parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
+   if (tb[FRA_PROTOCOL] &&
+   
(filter.protocol^rta_getattr_u8(tb[FRA_PROTOCOL])))
+   return 0;
+
if (tb[FRA_PRIORITY]) {
n->nlmsg_type = RTM_DELRULE;
n->nlmsg_flags = NLM_F_REQUEST;
@@ -415,12 +432,6 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
if (af == AF_UNSPEC)
af = AF_INET;
 
-   if (action != IPRULE_LIST && argc > 0) {
-   fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
-   action == IPRULE_SAVE ? "save" : "flush");
-   return -1;
-   }
-
switch (action) {
case IPRULE_SAVE:
if (save_rule_prep())
@@ -508,7 +519,18 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
NEXT_ARG();
if (get_prefix(, *argv, af))
invarg("from value is invalid\n", *argv);
-   } else {
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 prot;
+   NEXT_ARG();
+   filter.protocolmask = -1;
+   if (rtnl_rtprot_a2n(, *argv)) {
+   if (strcmp(*argv, "all") != 0)
+   invarg("invalid \"protocol\"\n", *argv);
+   prot = 0;
+   filter.protocolmask = 0;
+   }
+   filter.protocol = prot;
+   } else{
if (matches(*argv, "dst") == 0 ||
matches(*argv, "to") == 0) {
NEXT_ARG();
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index a5c47981..f4070542 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -50,6 +50,8 @@ ip-rule \- routing policy database management
 .IR ACTION " := [ "
 .B  table
 .IR TABLE_ID " ] [ "
+.B  protocol
+.IR PROTO " ] [ "
 .B  nat
 .IR ADDRESS " ] [ "
 .B realms
@@ -240,6 +242,10 @@ The options preference and order are synonyms with 
priority.
 the routing table identifier to lookup if the rule selector matches.
 It is also possible to use lookup instead of table.
 
+.TP
+.BI protocol " PROTO"
+the protocol who installed the rule in question.
+
 .TP
 .BI suppress_prefixlength " NUMBER"
 reject routing decisions that have a prefix length of NUMBER or less.
@@ -275,7 +281,11 @@ updates, it flushes the routing cache with
 .RE
 .TP
 .B ip rule flush - also dumps all the deleted rules

[PATCH v3 iproute2-next 3/3] ip: Allow rules to accept a specified protocol

2018-02-23 Thread Donald Sharp
Allow the specification of a protocol when the user
adds/modifies/deletes a rule.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index 17df9e9b..796da3b3 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -689,6 +689,12 @@ static int iprule_modify(int cmd, int argc, char **argv)
if (get_rt_realms_or_raw(, *argv))
invarg("invalid realms\n", *argv);
addattr32(, sizeof(req), FRA_FLOW, realm);
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 proto;
+   NEXT_ARG();
+   if (rtnl_rtprot_a2n(, *argv))
+   invarg("\"protocol\" value is invalid\n", 
*argv);
+   addattr8(, sizeof(req), FRA_PROTOCOL, proto);
} else if (matches(*argv, "table") == 0 ||
   strcmp(*argv, "lookup") == 0) {
NEXT_ARG();
-- 
2.14.3



[PATCH v3 iproute2-next 0/3] Allow 'ip rule' command to use protocol

2018-02-23 Thread Donald Sharp
Fix iprule.c to use the actual `struct fib_rule_hdr` and to
allow the end user to see and use the protocol keyword
for rule manipulation.

v2: Rearrange and code changes as per David Ahern
v3: Fix some missed RTN_XXX to appropriate FR_XX and doc changes

Donald Sharp (3):
  ip: Use the `struct fib_rule_hdr` for rules
  ip: Display ip rule protocol used
  ip: Allow rules to accept a specified protocol

 include/uapi/linux/fib_rules.h |   1 +
 ip/iprule.c| 170 -
 man/man8/ip-rule.8 |  18 -
 3 files changed, 120 insertions(+), 69 deletions(-)

-- 
2.14.3



[PATCH iproute2 v2] ip: Properly display AF_BRIDGE address information for neighbor events

2018-02-23 Thread Donald Sharp
The vxlan driver when a neighbor add/delete event occurs sends
NDA_DST filled with a union:

union vxlan_addr {
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
struct sockaddr sa;
};

This eventually calls rt_addr_n2a_r which had no handler for the
AF_BRIDGE family and "???" was being printed.

Add code to properly display this data when requested.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 lib/utils.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/lib/utils.c b/lib/utils.c
index 24aeddd8..fe5841f6 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -1004,6 +1004,25 @@ const char *rt_addr_n2a_r(int af, int len,
}
case AF_PACKET:
return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
+   case AF_BRIDGE:
+   {
+   const union {
+   struct sockaddr sa;
+   struct sockaddr_in sin;
+   struct sockaddr_in6 sin6;
+   } *sa = addr;
+   unsigned short family = sa->sa.sa_family;
+
+   switch(family) {
+   case AF_INET:
+   return inet_ntop(AF_INET, >sin.sin_addr, buf, 
buflen);
+   case AF_INET6:
+   return inet_ntop(AF_INET6, >sin6.sin6_addr,
+buf, buflen);
+   }
+
+   /* fallthrough */
+   }
default:
return "???";
}
-- 
2.14.3



[PATCH] net: fib_rules: Add new attribute to set protocol

2018-02-23 Thread Donald Sharp
For ages iproute2 has used `struct rtmsg` as the ancillary header for
FIB rules and in the process set the protocol value to RTPROT_BOOT.
Until ca56209a66 ("net: Allow a rule to track originating protocol")
the kernel rules code ignored the protocol value sent from userspace
and always returned 0 in notifications. To avoid incompatibility with
existing iproute2, send the protocol as a new attribute.

Fixes: cac56209a66 ("net: Allow a rule to track originating protocol")
Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 drivers/net/vrf.c  |  5 -
 include/net/fib_rules.h|  3 ++-
 include/uapi/linux/fib_rules.h |  5 +++--
 net/core/fib_rules.c   | 15 +++
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 951a4b42cb29..9ce0182223a0 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1145,6 +1145,7 @@ static inline size_t vrf_fib_rule_nl_size(void)
sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
sz += nla_total_size(sizeof(u8));   /* FRA_L3MDEV */
sz += nla_total_size(sizeof(u32));  /* FRA_PRIORITY */
+   sz += nla_total_size(sizeof(u8));   /* FRA_PROTOCOL */
 
return sz;
 }
@@ -1174,7 +1175,9 @@ static int vrf_fib_rule(const struct net_device *dev, 
__u8 family, bool add_it)
memset(frh, 0, sizeof(*frh));
frh->family = family;
frh->action = FR_ACT_TO_TBL;
-   frh->proto = RTPROT_KERNEL;
+
+   if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
+   goto nla_put_failure;
 
if (nla_put_u8(skb, FRA_L3MDEV, 1))
goto nla_put_failure;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b166ef07e6d4..b3d216249240 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -109,7 +109,8 @@ struct fib_rule_notifier_info {
[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
[FRA_GOTO]  = { .type = NLA_U32 }, \
[FRA_L3MDEV]= { .type = NLA_U8 }, \
-   [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }
+   [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
+   [FRA_PROTOCOL]  = { .type = NLA_U8 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 925539172d5b..77d90ae38114 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
__u8tos;
 
__u8table;
-   __u8proto;
-   __u8res1;   /* reserved */
+   __u8res1;   /* reserved */
+   __u8res2;   /* reserved */
__u8action;
 
__u32   flags;
@@ -58,6 +58,7 @@ enum {
FRA_PAD,
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
FRA_UID_RANGE,  /* UID range */
+   FRA_PROTOCOL,   /* Originator of the rule */
__FRA_MAX
 };
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 88298f18cbae..a6aea805a0a2 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -466,11 +466,13 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr 
*nlh,
}
refcount_set(>refcnt, 1);
rule->fr_net = net;
-   rule->proto = frh->proto;
 
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
  : fib_default_rule_pref(ops);
 
+   rule->proto = tb[FRA_PROTOCOL] ?
+   nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
 
@@ -666,7 +668,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr 
*nlh,
}
 
list_for_each_entry(rule, >rules_list, list) {
-   if (frh->proto && (frh->proto != rule->proto))
+   if (tb[FRA_PROTOCOL] &&
+   (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
continue;
 
if (frh->action && (frh->action != rule->action))
@@ -786,7 +789,8 @@ static inline size_t fib_rule_nlmsg_size(struct 
fib_rules_ops *ops,
 + nla_total_size(4) /* FRA_FWMARK */
 + nla_total_size(4) /* FRA_FWMASK */
 + nla_total_size_64bit(8) /* FRA_TUN_ID */
-+ nla_total_size(sizeof(struct fib_kuid_range));
++ nla_total_size(sizeof(struct fib_kuid_range))
++ nla_total_size(1); /* FRA_PROTOCOL */
 
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -813,9 +817,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct 
fib_rule *rule,
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefi

[PATCH iproute2] ip: Properly display AF_BRIDGE address information for neighbor events

2018-02-21 Thread Donald Sharp
The vxlan driver when a neighbor add/delete event occurs sends
NDA_DST filled with a union:

union vxlan_addr {
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
struct sockaddr sa;
};

This eventually calls rt_addr_n2a_r which had no handler for the
AF_BRIDGE family and "???" was being printed.

Add code to properly display this data when requested.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 lib/utils.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/lib/utils.c b/lib/utils.c
index 24aeddd8..e01e18a7 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -1004,6 +1004,24 @@ const char *rt_addr_n2a_r(int af, int len,
}
case AF_PACKET:
return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
+   case AF_BRIDGE:
+   {
+   unsigned short family = ((struct sockaddr *)addr)->sa_family;
+   struct sockaddr_in6 *sin6;
+   struct sockaddr_in *sin;
+
+   switch(family) {
+   case AF_INET:
+   sin = (struct sockaddr_in *)addr;
+   return inet_ntop(AF_INET, >sin_addr, buf, buflen);
+   case AF_INET6:
+   sin6 = (struct sockaddr_in6 *)addr;
+   return inet_ntop(AF_INET6, >sin6_addr,
+buf, buflen);
+   }
+
+   /* fallthrough */
+   }
default:
return "???";
}
-- 
2.14.3



[PATCH v2 iproute2-next 2/3] ip: Display ip rule protocol used

2018-02-21 Thread Donald Sharp
Modify 'ip rule' command to notice when the kernel passes
to us the originating protocol.

Add code to allow the `ip rule flush protocol XXX`
command to be accepted and properly handled.

Modify the documentation to reflect these code changes.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c| 29 ++---
 man/man8/ip-rule.8 | 18 +-
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/ip/iprule.c b/ip/iprule.c
index 00a6c26a..39008768 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -47,6 +47,7 @@ static void usage(void)
"[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ 
l3mdev ]\n"
"[ uidrange NUMBER-NUMBER ]\n"
"ACTION := [ table TABLE_ID ]\n"
+   "  [ protocol RPROTO ]\n"
"  [ nat ADDRESS ]\n"
"  [ realms [SRCREALM/]DSTREALM ]\n"
"  [ goto NUMBER ]\n"
@@ -71,6 +72,8 @@ static struct
struct fib_rule_uid_range range;
inet_prefix src;
inet_prefix dst;
+   int protocol;
+   int protocolmask;
 } filter;
 
 static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
@@ -338,6 +341,10 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
rtnl_rtntype_n2a(frh->action,
 b1, sizeof(b1)));
 
+   if (frh->proto != RTPROT_UNSPEC)
+   fprintf(fp, " proto %s ",
+   rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1)));
+
fprintf(fp, "\n");
fflush(fp);
return 0;
@@ -391,6 +398,9 @@ static int flush_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n,
 
parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
+   if ((filter.protocol^frh->proto))
+   return 0;
+
if (tb[FRA_PRIORITY]) {
n->nlmsg_type = RTM_DELRULE;
n->nlmsg_flags = NLM_F_REQUEST;
@@ -415,12 +425,6 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
if (af == AF_UNSPEC)
af = AF_INET;
 
-   if (action != IPRULE_LIST && argc > 0) {
-   fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
-   action == IPRULE_SAVE ? "save" : "flush");
-   return -1;
-   }
-
switch (action) {
case IPRULE_SAVE:
if (save_rule_prep())
@@ -508,7 +512,18 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
NEXT_ARG();
if (get_prefix(, *argv, af))
invarg("from value is invalid\n", *argv);
-   } else {
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 prot;
+   NEXT_ARG();
+   filter.protocolmask = -1;
+   if (rtnl_rtprot_a2n(, *argv)) {
+   if (strcmp(*argv, "all") != 0)
+   invarg("invalid \"protocol\"\n", *argv);
+   prot = 0;
+   filter.protocolmask = 0;
+   }
+   filter.protocol = prot;
+   } else{
if (matches(*argv, "dst") == 0 ||
matches(*argv, "to") == 0) {
NEXT_ARG();
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index a5c47981..98b2573d 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -50,6 +50,8 @@ ip-rule \- routing policy database management
 .IR ACTION " := [ "
 .B  table
 .IR TABLE_ID " ] [ "
+.B  protocol
+.IR RPROTO " ] [ "
 .B  nat
 .IR ADDRESS " ] [ "
 .B realms
@@ -240,6 +242,10 @@ The options preference and order are synonyms with 
priority.
 the routing table identifier to lookup if the rule selector matches.
 It is also possible to use lookup instead of table.
 
+.TP
+.BI protocol " RPROTO"
+the protocol who installed the rule in question.
+
 .TP
 .BI suppress_prefixlength " NUMBER"
 reject routing decisions that have a prefix length of NUMBER or less.
@@ -275,7 +281,11 @@ updates, it flushes the routing cache with
 .RE
 .TP
 .B ip rule flush - also dumps all the deleted rules.
-This command has no arguments.
+.RS
+.TP
+.BI protocol " RPROTO"
+Select the originating protocol.
+.RE
 .TP
 .B ip rule show - list rules
 This command has no arguments.
@@ -283,6 +293,12 @@ The options list or lst are synonyms with show.
 
 .TP
 .B ip rule save
+.RS
+.TP
+.BI protocl " RPROTO"
+Select the originating protocol.
+.RE
+.TP
 save rules table information to stdout
 .RS
 This command behaves like
-- 
2.14.3



[PATCH v2 iproute2-next 0/3] Allow 'ip rule' command to use protocol

2018-02-21 Thread Donald Sharp
Fix iprule.c to use the actual `struct fib_rule_hdr` and to
allow the end user to see and use the protocol keyword
for rule manipulations.

v2: Rearrange and code changes as per David Ahern

Donald Sharp (3):
  ip: Use the `struct fib_rule_hdr` for rules
  ip: Display ip rule protocol used
  ip: Allow rules to accept a specified protocol

 include/uapi/linux/fib_rules.h |   2 +-
 ip/iprule.c| 164 -
 man/man8/ip-rule.8 |  18 -
 3 files changed, 114 insertions(+), 70 deletions(-)

-- 
2.14.3



[PATCH v2 iproute2-next 1/3] ip: Use the `struct fib_rule_hdr` for rules

2018-02-21 Thread Donald Sharp
The iprule.c code was using `struct rtmsg` as the data
type to pass into the kernel for the netlink message.
While 'struct rtmsg' and `struct fib_rule_hdr` are
the same size and mostly the same, we should use
the correct data structure.  This commit translates
the data structures to have iprule.c use the correct
one.

Additionally copy over the modified fib_rules.h file

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/uapi/linux/fib_rules.h |   2 +-
 ip/iprule.c| 129 ++---
 2 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9..92553917 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
__u8tos;
 
__u8table;
+   __u8proto;
__u8res1;   /* reserved */
-   __u8res2;   /* reserved */
__u8action;
 
__u32   flags;
diff --git a/ip/iprule.c b/ip/iprule.c
index a3abf2f6..00a6c26a 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -73,25 +73,33 @@ static struct
inet_prefix dst;
 } filter;
 
+static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
+{
+   __u32 table = frh->table;
+   if (tb[RTA_TABLE])
+   table = rta_getattr_u32(tb[RTA_TABLE]);
+   return table;
+}
+
 static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 {
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
__u32 table;
 
-   if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family)
+   if (preferred_family != AF_UNSPEC && frh->family != preferred_family)
return false;
 
if (filter.prefmask &&
filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) 
: 0))
return false;
-   if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT))
+   if (filter.not && !(frh->flags & FIB_RULE_INVERT))
return false;
 
if (filter.src.family) {
inet_prefix *f_src = 
 
-   if (f_src->family != r->rtm_family ||
-   f_src->bitlen > r->rtm_src_len)
+   if (f_src->family != frh->family ||
+   f_src->bitlen > frh->src_len)
return false;
 
if (inet_addr_match_rta(f_src, tb[FRA_SRC]))
@@ -101,15 +109,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct 
rtattr **tb, int host_len)
if (filter.dst.family) {
inet_prefix *f_dst = 
 
-   if (f_dst->family != r->rtm_family ||
-   f_dst->bitlen > r->rtm_dst_len)
+   if (f_dst->family != frh->family ||
+   f_dst->bitlen > frh->dst_len)
return false;
 
if (inet_addr_match_rta(f_dst, tb[FRA_DST]))
return false;
}
 
-   if (filter.tosmask && filter.tos ^ r->rtm_tos)
+   if (filter.tosmask && filter.tos ^ frh->tos)
return false;
 
if (filter.fwmark) {
@@ -159,7 +167,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
return false;
}
 
-   table = rtm_get_table(r, tb);
+   table = frh_get_table(frh, tb);
if (filter.tb > 0 && filter.tb ^ table)
return false;
 
@@ -169,7 +177,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
 int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
FILE *fp = (FILE *)arg;
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
__u32 table;
@@ -180,13 +188,13 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE)
return 0;
 
-   len -= NLMSG_LENGTH(sizeof(*r));
+   len -= NLMSG_LENGTH(sizeof(*frh));
if (len < 0)
return -1;
 
-   parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
+   parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
-   host_len = af_bit_len(r->rtm_family);
+   host_len = af_bit_len(frh->family);
 
if (!filter_nlmsg(n, tb, host_len))
return 0;
@@ -200,41 +208,41 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
else
fprintf(fp, "0:\t");
 
-   if (r->rtm_flags & FIB_RULE_INVERT)
+   if (frh->flags & FIB

[PATCH v2 iproute2-next 3/3] ip: Allow rules to accept a specified protocol

2018-02-21 Thread Donald Sharp
Allow the specification of a protocol when the user
adds/modifies/deletes a rule.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index 39008768..192fe215 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -683,6 +683,12 @@ static int iprule_modify(int cmd, int argc, char **argv)
if (get_rt_realms_or_raw(, *argv))
invarg("invalid realms\n", *argv);
addattr32(, sizeof(req), FRA_FLOW, realm);
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 proto;
+   NEXT_ARG();
+   if (rtnl_rtprot_a2n(, *argv))
+   invarg("\"protocol\" value is invalid\n", 
*argv);
+   req.frh.proto = proto;
} else if (matches(*argv, "table") == 0 ||
   strcmp(*argv, "lookup") == 0) {
NEXT_ARG();
-- 
2.14.3



[PATCH iproute2-next 2/4] ip: Display ip rule protocol used

2018-02-20 Thread Donald Sharp
Newer kernels are now accepting a protocol from the installing
program for who installed the rule.  This change allows us
to see this change if it is being specified by the installing
program.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index c40d76f1..b3e7d92c 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -341,6 +341,10 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
rtnl_rtntype_n2a(frh->action,
 b1, sizeof(b1)));
 
+   if (frh->proto != RTPROT_UNSPEC)
+   fprintf(fp, " proto %s ",
+   rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1)));
+
fprintf(fp, "\n");
fflush(fp);
return 0;
-- 
2.14.3



[PATCH iproute2-next 3/4] ip: Allow rules to accept a specified protocol

2018-02-20 Thread Donald Sharp
Allow the specification of a protocol when the user
adds/modifies/deletes a rule.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index b3e7d92c..fd242fee 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -675,6 +675,12 @@ static int iprule_modify(int cmd, int argc, char **argv)
if (get_rt_realms_or_raw(, *argv))
invarg("invalid realms\n", *argv);
addattr32(, sizeof(req), FRA_FLOW, realm);
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 proto;
+   NEXT_ARG();
+   if (rtnl_rtprot_a2n(, *argv))
+   invarg("\"protocol\" value is invalid\n", 
*argv);
+   req.frh.proto = proto;
} else if (matches(*argv, "table") == 0 ||
   strcmp(*argv, "lookup") == 0) {
NEXT_ARG();
-- 
2.14.3



[PATCH iproute2-next 0/4] Allow 'ip rule' command to use protocol

2018-02-20 Thread Donald Sharp
Fix iprule.c to use the actual `struct fib_rule_hdr` and to
allow the end user to see and use the protocol keyword
for rule manipulations.

Donald Sharp (4):
  ip: Use the `struct fib_rule_hdr` for rules
  ip: Display ip rule protocol used
  ip: Allow rules to accept a specified protocol
  ip: Add ability to flush a rule based upon protocol

 include/uapi/linux/fib_rules.h |   2 +-
 ip/iprule.c| 167 -
 man/man8/ip-rule.8 |  18 -
 3 files changed, 117 insertions(+), 70 deletions(-)

-- 
2.14.3



[PATCH iproute2-next 4/4] ip: Add ability to flush a rule based upon protocol

2018-02-20 Thread Donald Sharp
Add code to allow the `ip rule flush protocol XXX`
command to be accepted and properly handled.

Additionally modify the documentation to be correct
with these changes.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c| 25 ++---
 man/man8/ip-rule.8 | 18 +-
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/ip/iprule.c b/ip/iprule.c
index fd242fee..b69413dd 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -47,6 +47,7 @@ static void usage(void)
"[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ 
l3mdev ]\n"
"[ uidrange NUMBER-NUMBER ]\n"
"ACTION := [ table TABLE_ID ]\n"
+   "  [ protocol RPROTO ]\n"
"  [ nat ADDRESS ]\n"
"  [ realms [SRCREALM/]DSTREALM ]\n"
"  [ goto NUMBER ]\n"
@@ -71,6 +72,8 @@ static struct
struct fib_rule_uid_range range;
inet_prefix src;
inet_prefix dst;
+   int protocol;
+   int protocolmask;
 } filter;
 
 static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
@@ -398,6 +401,9 @@ static int flush_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n,
 
parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
+   if ((filter.protocol^frh->proto))
+   return 0;
+
if (tb[FRA_PRIORITY]) {
n->nlmsg_type = RTM_DELRULE;
n->nlmsg_flags = NLM_F_REQUEST;
@@ -422,12 +428,6 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
if (af == AF_UNSPEC)
af = AF_INET;
 
-   if (action != IPRULE_LIST && argc > 0) {
-   fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
-   action == IPRULE_SAVE ? "save" : "flush");
-   return -1;
-   }
-
switch (action) {
case IPRULE_SAVE:
if (save_rule_prep())
@@ -515,7 +515,18 @@ static int iprule_list_flush_or_save(int argc, char 
**argv, int action)
NEXT_ARG();
if (get_prefix(, *argv, af))
invarg("from value is invalid\n", *argv);
-   } else {
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 prot;
+   NEXT_ARG();
+   filter.protocolmask = -1;
+   if (rtnl_rtprot_a2n(, *argv)) {
+   if (strcmp(*argv, "all") != 0)
+   invarg("invalid \"protocol\"\n", *argv);
+   prot = 0;
+   filter.protocolmask = 0;
+   }
+   filter.protocol = prot;
+   } else{
if (matches(*argv, "dst") == 0 ||
matches(*argv, "to") == 0) {
NEXT_ARG();
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index a5c47981..98b2573d 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -50,6 +50,8 @@ ip-rule \- routing policy database management
 .IR ACTION " := [ "
 .B  table
 .IR TABLE_ID " ] [ "
+.B  protocol
+.IR RPROTO " ] [ "
 .B  nat
 .IR ADDRESS " ] [ "
 .B realms
@@ -240,6 +242,10 @@ The options preference and order are synonyms with 
priority.
 the routing table identifier to lookup if the rule selector matches.
 It is also possible to use lookup instead of table.
 
+.TP
+.BI protocol " RPROTO"
+the protocol who installed the rule in question.
+
 .TP
 .BI suppress_prefixlength " NUMBER"
 reject routing decisions that have a prefix length of NUMBER or less.
@@ -275,7 +281,11 @@ updates, it flushes the routing cache with
 .RE
 .TP
 .B ip rule flush - also dumps all the deleted rules.
-This command has no arguments.
+.RS
+.TP
+.BI protocol " RPROTO"
+Select the originating protocol.
+.RE
 .TP
 .B ip rule show - list rules
 This command has no arguments.
@@ -283,6 +293,12 @@ The options list or lst are synonyms with show.
 
 .TP
 .B ip rule save
+.RS
+.TP
+.BI protocl " RPROTO"
+Select the originating protocol.
+.RE
+.TP
 save rules table information to stdout
 .RS
 This command behaves like
-- 
2.14.3



[PATCH iproute2-next 1/4] ip: Use the `struct fib_rule_hdr` for rules

2018-02-20 Thread Donald Sharp
The iprule.c code was using `struct rtmsg` as the data
type to pass into the kernel for the netlink message.
While 'struct rtmsg' and `struct fib_rule_hdr` are
the same size and mostly the same, we should use
the correct data structure.  This commit translates
the data structures to have iprule.c use the correct
one.

Additionally copy over the modified fib_rules.h file

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/uapi/linux/fib_rules.h |   2 +-
 ip/iprule.c| 132 ++---
 2 files changed, 72 insertions(+), 62 deletions(-)

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9..92553917 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
__u8tos;
 
__u8table;
+   __u8proto;
__u8res1;   /* reserved */
-   __u8res2;   /* reserved */
__u8action;
 
__u32   flags;
diff --git a/ip/iprule.c b/ip/iprule.c
index a3abf2f6..c40d76f1 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -75,23 +75,23 @@ static struct
 
 static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
 {
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
__u32 table;
 
-   if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family)
+   if (preferred_family != AF_UNSPEC && frh->family != preferred_family)
return false;
 
if (filter.prefmask &&
filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) 
: 0))
return false;
-   if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT))
+   if (filter.not && !(frh->flags & FIB_RULE_INVERT))
return false;
 
if (filter.src.family) {
inet_prefix *f_src = 
 
-   if (f_src->family != r->rtm_family ||
-   f_src->bitlen > r->rtm_src_len)
+   if (f_src->family != frh->family ||
+   f_src->bitlen > frh->src_len)
return false;
 
if (inet_addr_match_rta(f_src, tb[FRA_SRC]))
@@ -101,15 +101,15 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct 
rtattr **tb, int host_len)
if (filter.dst.family) {
inet_prefix *f_dst = 
 
-   if (f_dst->family != r->rtm_family ||
-   f_dst->bitlen > r->rtm_dst_len)
+   if (f_dst->family != frh->family ||
+   f_dst->bitlen > frh->dst_len)
return false;
 
if (inet_addr_match_rta(f_dst, tb[FRA_DST]))
return false;
}
 
-   if (filter.tosmask && filter.tos ^ r->rtm_tos)
+   if (filter.tosmask && filter.tos ^ frh->tos)
return false;
 
if (filter.fwmark) {
@@ -159,7 +159,13 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
return false;
}
 
-   table = rtm_get_table(r, tb);
+
+   /* struct fib_rule_hdr and struct rtmsg
+* were intentionally the same.  Since
+* the table is the rtm_table, just call
+* it.
+*/
+   table = rtm_get_table((struct rtmsg *)frh, tb);
if (filter.tb > 0 && filter.tb ^ table)
return false;
 
@@ -169,7 +175,7 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr 
**tb, int host_len)
 int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
FILE *fp = (FILE *)arg;
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
__u32 table;
@@ -180,13 +186,13 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE)
return 0;
 
-   len -= NLMSG_LENGTH(sizeof(*r));
+   len -= NLMSG_LENGTH(sizeof(*frh));
if (len < 0)
return -1;
 
-   parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
+   parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
-   host_len = af_bit_len(r->rtm_family);
+   host_len = af_bit_len(frh->family);
 
if (!filter_nlmsg(n, tb, host_len))
return 0;
@@ -200,41 +206,41 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
else
fprintf(fp, "0:\t");
 
-   if (r->rtm_flags & FIB_RULE_INVERT)
+   if (frh->flags & FIB_RULE_INVERT)
fprintf(fp, "not ");
 
  

[PATCH net-next v2 0/1] Allow rules to track originating protocol

2018-02-20 Thread Donald Sharp
Add the ability for the kernel to track the originating protocol
for when new rules are added to the kernel.

---
v1->v2
-> Address comments by David Miller to collapse patches into 1.

Donald Sharp (1):
  net: Allow a rule to track originating protocol

 drivers/net/vrf.c  | 1 +
 include/net/fib_rules.h| 3 ++-
 include/uapi/linux/fib_rules.h | 2 +-
 net/core/fib_rules.c   | 7 ++-
 4 files changed, 10 insertions(+), 3 deletions(-)

-- 
2.14.3



[PATCH net-next v2 1/1] net: Allow a rule to track originating protocol

2018-02-20 Thread Donald Sharp
Allow a rule that is being added/deleted/modified or
dumped to contain the originating protocol's id.

The protocol is handled just like a routes originating
protocol is.  This is especially useful because there
is starting to be a plethora of different user space
programs adding rules.

Allow the vrf device to specify that the kernel is the originator
of the rule created for this device.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 drivers/net/vrf.c  | 1 +
 include/net/fib_rules.h| 3 ++-
 include/uapi/linux/fib_rules.h | 2 +-
 net/core/fib_rules.c   | 7 ++-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..ec6d2d623b60 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1175,6 +1175,7 @@ static int vrf_fib_rule(const struct net_device *dev, 
__u8 family, bool add_it)
memset(frh, 0, sizeof(*frh));
frh->family = family;
frh->action = FR_ACT_TO_TBL;
+   frh->proto = RTPROT_KERNEL;
 
if (nla_put_u8(skb, FRA_L3MDEV, 1))
goto nla_put_failure;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 648caf90ec07..b166ef07e6d4 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -26,7 +26,8 @@ struct fib_rule {
u32 table;
u8  action;
u8  l3mdev;
-   /* 2 bytes hole, try to use */
+   u8  proto;
+   /* 1 byte hole, try to use */
u32 target;
__be64  tun_id;
struct fib_rule __rcu   *ctarget;
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..925539172d5b 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
__u8tos;
 
__u8table;
+   __u8proto;
__u8res1;   /* reserved */
-   __u8res2;   /* reserved */
__u8action;
 
__u32   flags;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..c1d4ab5b2d9f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -51,6 +51,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
+   r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
 
@@ -465,6 +466,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr 
*nlh,
}
refcount_set(>refcnt, 1);
rule->fr_net = net;
+   rule->proto = frh->proto;
 
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
  : fib_default_rule_pref(ops);
@@ -664,6 +666,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr 
*nlh,
}
 
list_for_each_entry(rule, >rules_list, list) {
+   if (frh->proto && (frh->proto != rule->proto))
+   continue;
+
if (frh->action && (frh->action != rule->action))
continue;
 
@@ -808,9 +813,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct 
fib_rule *rule,
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
goto nla_put_failure;
frh->res1 = 0;
-   frh->res2 = 0;
frh->action = rule->action;
frh->flags = rule->flags;
+   frh->proto = rule->proto;
 
if (rule->action == FR_ACT_GOTO &&
rcu_access_pointer(rule->ctarget) == NULL)
-- 
2.14.3



Re: [PATCH 0/3] Allow 'ip rule' command to use protocol

2018-02-17 Thread Donald Sharp
Got it.  I'll send an update.

donald

On Sat, Feb 17, 2018 at 6:35 PM, David Ahern <d...@cumulusnetworks.com> wrote:
> On 2/17/18 5:47 AM, Donald Sharp wrote:
>> Fix iprule.c to use the actual `struct fib_rule_hdr` and to
>> allow the end user to see and use the protocol keyword
>> for rule manipulations.
>>
>> Donald Sharp (3):
>>   ip: Use the `struct fib_rule_hdr` for rules
>>   ip: Display ip rule protocol used
>>   ip: Allow rules to accept a specified protocol
>>
>>  include/linux/fib_rules.h |   2 +-
>>  ip/iprule.c   | 114 
>> ++
>>  2 files changed, 65 insertions(+), 51 deletions(-)
>>
>
> you are missing a patch to add protocol to iprule_list_flush_or_save so
> 'ip ru flush proto NAME' flushes all rules with that protocol.


[PATCH 3/3] ip: Allow rules to accept a specified protocol

2018-02-17 Thread Donald Sharp
Allow the specification of a protocol when the user
adds/modifies/deletes a rule.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index 5703d6e4..8fc6ac48 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -324,6 +324,12 @@ static int iprule_modify(int cmd, int argc, char **argv)
if (get_rt_realms(, *argv))
invarg("invalid realms\n", *argv);
addattr32(, sizeof(req), FRA_FLOW, realm);
+   } else if (matches(*argv, "protocol") == 0) {
+   __u32 proto;
+   NEXT_ARG();
+   if (rtnl_rtprot_a2n(, *argv))
+   invarg("\"protocol\" value is invalid\n", 
*argv);
+   req.frh.proto = proto;
} else if (matches(*argv, "table") == 0 ||
   strcmp(*argv, "lookup") == 0) {
NEXT_ARG();
-- 
2.14.3



[PATCH 1/3] ip: Use the `struct fib_rule_hdr` for rules

2018-02-17 Thread Donald Sharp
The iprule.c code was using `struct rtmsg` as the data
type to pass into the kernel for the netlink message.
While 'struct rtmsg' and `struct fib_rule_hdr` are
the same size and mostly the same, we should use
the correct data structure.  This commit translates
the data structures to have iprule.c use the correct
one.

Additionally copy over the modified fib_rules.h file

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/linux/fib_rules.h |   2 +-
 ip/iprule.c   | 105 --
 2 files changed, 56 insertions(+), 51 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index bbf02a63..21f1fbf3 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -22,8 +22,8 @@ struct fib_rule_hdr {
__u8tos;
 
__u8table;
+   __u8proto;  /* reserved */
__u8res1;   /* reserved */
-   __u8res2;   /* reserved */
__u8action;
 
__u32   flags;
diff --git a/ip/iprule.c b/ip/iprule.c
index 854a3d8e..82e22fee 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -50,7 +50,7 @@ static void usage(void)
 int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
FILE *fp = (FILE*)arg;
-   struct rtmsg *r = NLMSG_DATA(n);
+   struct fib_rule_hdr *frh = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
__u32 table;
@@ -61,13 +61,13 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
if (n->nlmsg_type != RTM_NEWRULE && n->nlmsg_type != RTM_DELRULE)
return 0;
 
-   len -= NLMSG_LENGTH(sizeof(*r));
+   len -= NLMSG_LENGTH(sizeof(*frh));
if (len < 0)
return -1;
 
-   parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
+   parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
 
-   host_len = af_bit_len(r->rtm_family);
+   host_len = af_bit_len(frh->family);
 
if (n->nlmsg_type == RTM_DELRULE)
fprintf(fp, "Deleted ");
@@ -77,51 +77,51 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
else
fprintf(fp, "0:\t");
 
-   if (r->rtm_flags & FIB_RULE_INVERT)
+   if (frh->flags & FIB_RULE_INVERT)
fprintf(fp, "not ");
 
if (tb[FRA_SRC]) {
-   if (r->rtm_src_len != host_len) {
-   fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
+   if (frh->src_len != host_len) {
+   fprintf(fp, "from %s/%u ", rt_addr_n2a(frh->family,
   RTA_PAYLOAD(tb[FRA_SRC]),
   RTA_DATA(tb[FRA_SRC]),
   abuf, sizeof(abuf)),
-   r->rtm_src_len
+   frh->src_len
);
} else {
-   fprintf(fp, "from %s ", format_host(r->rtm_family,
+   fprintf(fp, "from %s ", format_host(frh->family,
   RTA_PAYLOAD(tb[FRA_SRC]),
   RTA_DATA(tb[FRA_SRC]),
   abuf, sizeof(abuf))
);
}
-   } else if (r->rtm_src_len) {
-   fprintf(fp, "from 0/%d ", r->rtm_src_len);
+   } else if (frh->src_len) {
+   fprintf(fp, "from 0/%d ", frh->src_len);
} else {
fprintf(fp, "from all ");
}
 
if (tb[FRA_DST]) {
-   if (r->rtm_dst_len != host_len) {
-   fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
+   if (frh->dst_len != host_len) {
+   fprintf(fp, "to %s/%u ", rt_addr_n2a(frh->family,
   RTA_PAYLOAD(tb[FRA_DST]),
   RTA_DATA(tb[FRA_DST]),
   abuf, sizeof(abuf)),
-   r->rtm_dst_len
+   frh->dst_len
);
} else {
-   fprintf(fp, "to %s ", format_host(r->rtm_family,
+   fprintf(fp, "to %s ", format_host(frh->family,
   RTA_PAYLOAD(tb[FRA_DST]),
   RTA_DATA(tb[FRA_DST]),
  

[PATCH 0/3] Allow 'ip rule' command to use protocol

2018-02-17 Thread Donald Sharp
Fix iprule.c to use the actual `struct fib_rule_hdr` and to
allow the end user to see and use the protocol keyword
for rule manipulations.

Donald Sharp (3):
  ip: Use the `struct fib_rule_hdr` for rules
  ip: Display ip rule protocol used
  ip: Allow rules to accept a specified protocol

 include/linux/fib_rules.h |   2 +-
 ip/iprule.c   | 114 ++
 2 files changed, 65 insertions(+), 51 deletions(-)

-- 
2.14.3



[PATCH 2/3] ip: Display ip rule protocol used

2018-02-17 Thread Donald Sharp
Newer kernels are now accepting a protocol from the installing
program for who installed the rule.  This change allows us
to see this change if it is being specified by the installing
program.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/iprule.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ip/iprule.c b/ip/iprule.c
index 82e22fee..5703d6e4 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -213,6 +213,9 @@ int print_rule(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
else if (frh->action != RTN_UNICAST)
fprintf(fp, "%s", rtnl_rtntype_n2a(frh->action, b1, 
sizeof(b1)));
 
+   if (frh->proto != RTPROT_UNSPEC)
+   fprintf(fp, " proto %s ",
+   rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1)));
fprintf(fp, "\n");
fflush(fp);
return 0;
-- 
2.14.3



[PATCH 1/2] net: Allow a rule to track originating protocol

2018-02-17 Thread Donald Sharp
Allow a rule that is being added/deleted/modified or
dumped to contain the originating protocol's id.

The protocol is handled just like a routes originating
protocol is.  This is especially useful because there
is starting to be a plethora of different user space
programs adding rules.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 include/net/fib_rules.h| 3 ++-
 include/uapi/linux/fib_rules.h | 2 +-
 net/core/fib_rules.c   | 7 ++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 648caf90ec07..b166ef07e6d4 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -26,7 +26,8 @@ struct fib_rule {
u32 table;
u8  action;
u8  l3mdev;
-   /* 2 bytes hole, try to use */
+   u8  proto;
+   /* 1 byte hole, try to use */
u32 target;
__be64  tun_id;
struct fib_rule __rcu   *ctarget;
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..925539172d5b 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
__u8tos;
 
__u8table;
+   __u8proto;
__u8res1;   /* reserved */
-   __u8res2;   /* reserved */
__u8action;
 
__u32   flags;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..c1d4ab5b2d9f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -51,6 +51,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
+   r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
 
@@ -465,6 +466,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr 
*nlh,
}
refcount_set(>refcnt, 1);
rule->fr_net = net;
+   rule->proto = frh->proto;
 
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
  : fib_default_rule_pref(ops);
@@ -664,6 +666,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr 
*nlh,
}
 
list_for_each_entry(rule, >rules_list, list) {
+   if (frh->proto && (frh->proto != rule->proto))
+   continue;
+
if (frh->action && (frh->action != rule->action))
continue;
 
@@ -808,9 +813,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct 
fib_rule *rule,
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
goto nla_put_failure;
frh->res1 = 0;
-   frh->res2 = 0;
frh->action = rule->action;
frh->flags = rule->flags;
+   frh->proto = rule->proto;
 
if (rule->action == FR_ACT_GOTO &&
rcu_access_pointer(rule->ctarget) == NULL)
-- 
2.14.3



[PATCH 2/2] drivers: Modify vrf device to specify it's rule as RTPROT_KERNEL

2018-02-17 Thread Donald Sharp
Allow the vrf device to specify that the kernel is the originator
of the rule created for this device.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 drivers/net/vrf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..ec6d2d623b60 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1175,6 +1175,7 @@ static int vrf_fib_rule(const struct net_device *dev, 
__u8 family, bool add_it)
memset(frh, 0, sizeof(*frh));
frh->family = family;
frh->action = FR_ACT_TO_TBL;
+   frh->proto = RTPROT_KERNEL;
 
if (nla_put_u8(skb, FRA_L3MDEV, 1))
goto nla_put_failure;
-- 
2.14.3



[PATCH 0/2] Allow rules to track originating protocol

2018-02-17 Thread Donald Sharp
Add the ability for the kernel to track the originating protocol
for when new rules are added to the kernel.

Donald Sharp (2):
  net: Allow a rule to track originating protocol
  drivers: Modify vrf device to specify it's rule as RTPROT_KERNEL

 drivers/net/vrf.c  | 1 +
 include/net/fib_rules.h| 3 ++-
 include/uapi/linux/fib_rules.h | 2 +-
 net/core/fib_rules.c   | 7 ++-
 4 files changed, 10 insertions(+), 3 deletions(-)

-- 
2.14.3



[PATCH] doc: Update VRF documentation metric

2017-10-18 Thread Donald Sharp
Two things:

1) Update examples to show usage of metric
2) Discuss reasoning for using such a high metric.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 Documentation/networking/vrf.txt | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index 3918dae..8ff7b4c 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -71,7 +71,12 @@ Setup
ip ru add iif vrf-blue table 10
 
 3. Set the default route for the table (and hence default route for the VRF).
-   ip route add table 10 unreachable default
+   ip route add table 10 unreachable default metric 4278198272
+
+   This high metric value ensures that the default unreachable route can
+   be overridden by a routing protocol suite.  FRRouting interprets
+   kernel metrics as a combined admin distance (upper byte) and priority
+   (lower 3 bytes).  Thus the above metric translates to [255/8192].
 
 4. Enslave L3 interfaces to a VRF device.
ip link set dev eth1 master vrf-blue
@@ -256,7 +261,7 @@ older form without it.
 
For example:
$ ip route show vrf red
-   prohibit default
+   unreachable default  metric 4278198272
broadcast 10.2.1.0 dev eth1  proto kernel  scope link  src 10.2.1.2
10.2.1.0/24 dev eth1  proto kernel  scope link  src 10.2.1.2
local 10.2.1.2 dev eth1  proto kernel  scope host  src 10.2.1.2
@@ -282,7 +287,7 @@ older form without it.
ff00::/8 dev red  metric 256  pref medium
ff00::/8 dev eth1  metric 256  pref medium
ff00::/8 dev eth2  metric 256  pref medium
-
+   unreachable default dev lo  metric 4278198272  error -101 pref medium
 
 8. Route Lookup for a VRF
 
@@ -331,7 +336,7 @@ function vrf_create
 ip link add ${VRF} type vrf table ${TBID}
 
 if [ "${VRF}" != "mgmt" ]; then
-ip route add table ${TBID} unreachable default
+ip route add table ${TBID} unreachable default metric 4278198272
 fi
 ip link set dev ${VRF} up
 }
-- 
2.9.5



[PATCH] net: ipmr: Add ipmr_rtm_getroute

2017-06-28 Thread Donald Sharp
Add to RTNL_FAMILY_IPMR, RTM_GETROUTE the ability
to retrieve one S,G mroute from a specified table.

*,G will return mroute information for just that
particular mroute if it exists.  This is because
it is entirely possible to have more S's then
can fit in one skb to return to the requesting
process.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 net/ipv4/ipmr.c | 63 -
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1d521b..bb909f1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2406,6 +2406,67 @@ static void igmpmsg_netlink_event(struct mr_table *mrt, 
struct sk_buff *pkt)
rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
 }
 
+static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+struct netlink_ext_ack *extack)
+{
+   struct net *net = sock_net(in_skb->sk);
+   struct nlattr *tb[RTA_MAX + 1];
+   struct sk_buff *skb = NULL;
+   struct mfc_cache *cache;
+   struct mr_table *mrt;
+   struct rtmsg *rtm;
+   __be32 src, grp;
+   u32 tableid;
+   int err;
+
+   err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv4_policy, extack);
+   if (err < 0)
+   goto errout;
+
+   rtm = nlmsg_data(nlh);
+
+   src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
+   grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
+   tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+
+   mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
+   if (IS_ERR(mrt)) {
+   err = PTR_ERR(mrt);
+   goto errout_free;
+   }
+
+   /* entries are added/deleted only under RTNL */
+   rcu_read_lock();
+   cache = ipmr_cache_find(mrt, src, grp);
+   rcu_read_unlock();
+   if (!cache) {
+   err = -ENOENT;
+   goto errout_free;
+   }
+
+   skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
+   if (!skb) {
+   err = -ENOBUFS;
+   goto errout_free;
+   }
+
+   err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
+  nlh->nlmsg_seq, cache,
+  RTM_NEWROUTE, 0);
+   if (err < 0)
+   goto errout_free;
+
+   err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout:
+   return err;
+
+errout_free:
+   kfree_skb(skb);
+   goto errout;
+}
+
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
struct net *net = sock_net(skb->sk);
@@ -3053,7 +3114,7 @@ int __init ip_mr_init(void)
}
 #endif
rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
- NULL, ipmr_rtm_dumproute, NULL);
+ ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
  ipmr_rtm_route, NULL, NULL);
rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
-- 
2.9.4



[PATCH iproute2] ip: mroute: Add table output to show command

2017-06-14 Thread Donald Sharp
When the user specifies `table all` or `table 0` to
the `ip mroute show` command we dump the entirety of
the known mroute tables.  Without some sort of
divisor to tell us what table we are looking at
the command is useless.

Add `Table: ` to the output of 'ip mroute show table 0'

Follow the convention established by 'ip route show table 0'
for when to display

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 ip/ipmroute.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/ip/ipmroute.c b/ip/ipmroute.c
index 4d2d758..70f4c4f 100644
--- a/ip/ipmroute.c
+++ b/ip/ipmroute.c
@@ -189,6 +189,11 @@ int print_mroute(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
fprintf(fp, ", Age %4i.%.2i", (int)tv.tv_sec,
(int)tv.tv_usec/1);
}
+
+   if (table && (table != RT_TABLE_MAIN || show_details > 0) && !filter.tb)
+   fprintf(fp, " Table: %s",
+   rtnl_rttable_n2a(table, b1, sizeof(b1)));
+
fprintf(fp, "\n");
fflush(fp);
return 0;
-- 
2.1.4



Re: ipmr: MFC routes when VIF deleted

2017-06-11 Thread Donald Sharp
I would argue that this is just an unintended side effect of the
original implementation.  Shuffling interface vif's seems like a good
way to churn a significant number of mroutes to me.  If you want to
use a interface it probably is going to be already be configured with
it's own vif, and as such it's just better to figure out the new
proper RPF and insert that mroute.  If it isn't, then I need to bring
up that new vif ( and all it's associated pim information, like
establishing a new neighbor relationship ) when I do need to shuffle
interface vif's, In that time I'm dropping a signficant number of
packets while this is happening and all our end users are going to be
screaming at us to fix that hole.


donald



On Sun, Jun 11, 2017 at 12:34 PM, Nikolay Aleksandrov
 wrote:
> On 11/06/17 11:55, Yotam Gigi wrote:
>> I have been looking into some weird behavior, and I am not sure whether it is
>> a bug or a feature.
>>
>> When a VIF with index v gets deleted, the MFC routes does not get updated, 
>> which
>> means that there can be routes pointing to that VIF. On datapath, when packet
>> hits that route, the VIF validity will be checked and will not be sent to 
>> that
>> device (but still, the route does not get updated).  Now, if the user creates
>> another VIF with the same index v but different underlay device, the same 
>> route
>> will forward the traffic to that device.
>>
>> It is relevant to mention that when user adds a MFC route, only the active 
>> VIFs
>> are used, so the flow of adding a route with dummy VIF indices and then
>> connecting those VIF indices to real device is not supported. The only way to
>> create a MFC route that has non existing VIFs is to create one with existing
>> VIFs and then delete them.
>>
>> Do we really want to support that?  To me, it looks like a buggy flow and I
>> suggest that upon VIF deletion, the MFC routes will be updated to not point 
>> to
>> any non existing VIF indices.
>>
>
> Hi Yotam,
> I'm not strongly against such change but my feeling is that we shouldn't 
> change it.
> I think we shouldn't change it because we cannot guarantee that we won't 
> break some
> user-space app that relies on this behaviour or uses it as an optimization.
> User-space ipmr apps work in sync with the kernel and are usually the only 
> ones
> doing such changes so their internal state will be always valid, and I'd guess
> they already deal with this one way or another.
> My second argument is a minor one and is about performance. There are some 
> apps
> (e.g. pimd) which use interface add/del on interface state change (up/down) 
> and
> this could make these ops slower on large setups.
>
> Again I see how this could be helpful and should've probably been like that 
> from the
> start, so if other people feel confident we won't break anything then I 
> wouldn't
> mind the change.
>
> Thanks,
>  Nik
>
>
>


[PATCH net v3] net: ipmr: Fix some mroute forwarding issues in vrf's

2017-06-10 Thread Donald Sharp
This patch fixes two issues:

1) When forwarding on *,G mroutes that are in a vrf, the
kernel was dropping information about the actual incoming
interface when calling ip_mr_forward from ip_mr_input.
This caused ip_mr_forward to send the multicast packet
back out the incoming interface.  Fix this by
modifying ip_mr_forward to be handed the correctly
resolved dev.

2) When a unresolved cache entry is created we store
the incoming skb on the unresolved cache entry and
upon mroute resolution from the user space daemon,
we attempt to forward the packet.  Again we were
not resolving to the correct incoming device for
a vrf scenario, before calling ip_mr_forward.
Fix this by resolving to the correct interface
and calling ip_mr_forward with the result.

Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast")
Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
v2: Fixed title
v3: Addressed Review comments by Andrew Lunn and David Ahern

 net/ipv4/ipmr.c | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 551de4d..09368a1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 
id);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local);
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct 
mr_table *mrt,
 
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
-   ip_mr_forward(net, mrt, skb, c, 0);
+   ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
}
}
 }
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 
 /* Queue a packet for resolution. It gets locked cache entry! */
 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
-struct sk_buff *skb)
+struct sk_buff *skb, struct net_device *dev)
 {
const struct iphdr *iph = ip_hdr(skb);
struct mfc_cache *c;
@@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, 
vifi_t vifi,
kfree_skb(skb);
err = -ENOBUFS;
} else {
+   if (dev) {
+   skb->dev = dev;
+   skb->skb_iif = dev->ifindex;
+   }
skb_queue_tail(>mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1828,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct 
net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local)
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *cache, int local)
 {
-   int true_vifi = ipmr_find_vif(mrt, skb->dev);
+   int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
 
@@ -1853,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct 
mr_table *mrt,
}
 
/* Wrong interface: drop packet and (maybe) send PIM assert. */
-   if (mrt->vif_table[vif].dev != skb->dev) {
-   struct net_device *mdev;
-
-   mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
-   if (mdev == skb->dev)
-   goto forward;
-
+   if (mrt->vif_table[vif].dev != dev) {
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
 * Very complicated situation...
@@ -2053,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb)
read_lock(_lock);
vif = ipmr_find_vif(mrt, dev);
if (vif >= 0) {
-   int err2 = ipmr_cache_unresolved(mrt, vif, skb);
+   int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
read_unlock(_lock);
 
return err2;
@@ -2064,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb)
}
 
read_lock(_lock);
-   ip_mr_forward(net, mrt, skb, cache, local);
+   ip_mr_forward(net, mrt, dev, skb, cache, local);
read

Re: [PATCH net v2] net: ipmr: Fix some mroute forwarding issues in vrf's

2017-06-09 Thread Donald Sharp
I'll change it over to this way.  No problem.

donald

On Fri, Jun 9, 2017 at 10:54 AM, David Ahern <dsah...@gmail.com> wrote:
> On 6/9/17 8:22 AM, Donald Sharp wrote:
>> @@ -988,7 +988,16 @@ static void ipmr_cache_resolve(struct net *net, struct 
>> mr_table *mrt,
>>
>>   rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
>>   } else {
>> - ip_mr_forward(net, mrt, skb, c, 0);
>> +struct net_device *dev = skb->dev;
>> +
>> +if (netif_is_l3_master(dev)) {
>> +dev = __dev_get_by_index(net, 
>> IPCB(skb)->iif);
>> +if (!dev) {
>> +kfree_skb(skb);
>> +continue;
>> +}
>> +}
>> +ip_mr_forward(net, mrt, dev, skb, c, 0);
>>   }
>>   }
>>  }
>
> What about changing ipmr_cache_unresolved to take the dev it looked up
> already and then have ipmr_cache_unresolved reset skb->dev to it (and
> reset skb->skb_iff to dev->ifindex) when queuing to the unresolved list?
> Since this path does not have a local delivery, resetting the skb->dev
> will be fine and it avoids this second lookup using IPCB(skb)->iif:
>
> @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
>
>  /* Queue a packet for resolution. It gets locked cache entry! */
>  static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
> -struct sk_buff *skb)
> +struct sk_buff *skb, struct net_device
> *dev)
>  {
> const struct iphdr *iph = ip_hdr(skb);
> struct mfc_cache *c;
> @@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table
> *mrt, vifi_t vifi,
> kfree_skb(skb);
> err = -ENOBUFS;
> } else {
> +   if (dev) {
> +   skb->dev = dev;
> +   skb->skb_iif = dev->ifindex;
> +   }
> skb_queue_tail(>mfc_un.unres.unresolved, skb);
> err = 0;
> }
>
>
> Combined with Thomas' earlier change this check in ip_mr_forward becomes:
>
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9374b99c7c17..1393a4d18a9a 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1853,13 +1853,7 @@ static void ip_mr_forward(struct net *net, struct
> mr_table *mrt,
> }
>
> /* Wrong interface: drop packet and (maybe) send PIM assert. */
> -   if (mrt->vif_table[vif].dev != skb->dev) {
> -   struct net_device *mdev;
> -
> -   mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
> -   if (mdev == skb->dev)
> -   goto forward;
> -
> +   if (mrt->vif_table[vif].dev != dev) {
> if (rt_is_output_route(skb_rtable(skb))) {
> /* It is our own packet, looped back.
>  * Very complicated situation...


[PATCH net v2] net: ipmr: Fix some mroute forwarding issues in vrf's

2017-06-09 Thread Donald Sharp
This patch fixes two issues:

1) When forwarding on *,G mroutes that are in a vrf, the
kernel was dropping information about the actual incoming
interface when calling ip_mr_forward from ip_mr_input.
This caused ip_mr_forward to send the multicast packet
back out the incoming interface.  Fix this by
modifying ip_mr_forward to be handed the correctly
resolved dev.

2) When a unresolved cache entry is created we store
the incoming skb on the unresolved cache entry and
upon mroute resolution from the user space daemon,
we attempt to forward the packet.  Again we were
not resolving to the correct incoming device for
a vrf scenario, before calling ip_mr_forward.
Fix this by resolving to the correct interface
and calling ip_mr_forward with the result.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
v2: Fixed title
 
 net/ipv4/ipmr.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 551de4d..559009e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 
id);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local);
+ struct net_device *dev, struct sk_buff *skb,
+  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -988,7 +988,16 @@ static void ipmr_cache_resolve(struct net *net, struct 
mr_table *mrt,
 
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
-   ip_mr_forward(net, mrt, skb, c, 0);
+struct net_device *dev = skb->dev;
+
+if (netif_is_l3_master(dev)) {
+dev = __dev_get_by_index(net, IPCB(skb)->iif);
+if (!dev) {
+kfree_skb(skb);
+continue;
+}
+}
+ip_mr_forward(net, mrt, dev, skb, c, 0);
}
}
 }
@@ -1828,10 +1837,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct 
net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local)
+ struct net_device *dev, struct sk_buff *skb,
+  struct mfc_cache *cache, int local)
 {
-   int true_vifi = ipmr_find_vif(mrt, skb->dev);
+   int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
 
@@ -1853,11 +1862,11 @@ static void ip_mr_forward(struct net *net, struct 
mr_table *mrt,
}
 
/* Wrong interface: drop packet and (maybe) send PIM assert. */
-   if (mrt->vif_table[vif].dev != skb->dev) {
+   if (mrt->vif_table[vif].dev != dev) {
struct net_device *mdev;
 
mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
-   if (mdev == skb->dev)
+   if (mdev == dev)
goto forward;
 
if (rt_is_output_route(skb_rtable(skb))) {
@@ -2064,7 +2073,7 @@ int ip_mr_input(struct sk_buff *skb)
}
 
read_lock(_lock);
-   ip_mr_forward(net, mrt, skb, cache, local);
+   ip_mr_forward(net, mrt, dev, skb, cache, local);
read_unlock(_lock);
 
if (local)
-- 
2.9.4



[PATCH] Fix some mroute forwarding issues in vrf's

2017-06-09 Thread Donald Sharp
This patch fixes two issues:

1) When forwarding on *,G mroutes that are in a vrf, the
kernel was dropping information about the actual incoming
interface when calling ip_mr_forward from ip_mr_input.
This caused ip_mr_forward to send the multicast packet
back out the incoming interface.  Fix this by
modifying ip_mr_forward to be handed the correctly
resolved dev.

2) When a unresolved cache entry is created we store
the incoming skb on the unresolved cache entry and
upon mroute resolution from the user space daemon,
we attempt to forward the packet.  Again we were
not resolving to the correct incoming device for
a vrf scenario, before calling ip_mr_forward.
Fix this by resolving to the correct interface
and calling ip_mr_forward with the result.

Signed-off-by: Donald Sharp <sha...@cumulusnetworks.com>
---
 net/ipv4/ipmr.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 551de4d..559009e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 
id);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local);
+ struct net_device *dev, struct sk_buff *skb,
+  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -988,7 +988,16 @@ static void ipmr_cache_resolve(struct net *net, struct 
mr_table *mrt,
 
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
-   ip_mr_forward(net, mrt, skb, c, 0);
+struct net_device *dev = skb->dev;
+
+if (netif_is_l3_master(dev)) {
+dev = __dev_get_by_index(net, IPCB(skb)->iif);
+if (!dev) {
+kfree_skb(skb);
+continue;
+}
+}
+ip_mr_forward(net, mrt, dev, skb, c, 0);
}
}
 }
@@ -1828,10 +1837,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct 
net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local)
+ struct net_device *dev, struct sk_buff *skb,
+  struct mfc_cache *cache, int local)
 {
-   int true_vifi = ipmr_find_vif(mrt, skb->dev);
+   int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
 
@@ -1853,11 +1862,11 @@ static void ip_mr_forward(struct net *net, struct 
mr_table *mrt,
}
 
/* Wrong interface: drop packet and (maybe) send PIM assert. */
-   if (mrt->vif_table[vif].dev != skb->dev) {
+   if (mrt->vif_table[vif].dev != dev) {
struct net_device *mdev;
 
mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
-   if (mdev == skb->dev)
+   if (mdev == dev)
goto forward;
 
if (rt_is_output_route(skb_rtable(skb))) {
@@ -2064,7 +2073,7 @@ int ip_mr_input(struct sk_buff *skb)
}
 
read_lock(_lock);
-   ip_mr_forward(net, mrt, skb, cache, local);
+   ip_mr_forward(net, mrt, dev, skb, cache, local);
read_unlock(_lock);
 
if (local)
-- 
2.9.4