Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 10:36:52AM -0600, David Ahern wrote:
> >> 2. How are routes with devices unrelated to ports owned by this driver
> >> handled?
> > 
> > They are handled just like any other route, but they don't have a valid
> > RIF (for directly connected routes) or an adjacency group (for
> > gatewayed routes), so the check in mlxsw_sp_fib_entry_should_offload()
> > will return false and they will be programmed to the device with trap
> > action, but using a trap ID (RTR_INGRESS0) with a lower traffic class
> > than IP2ME, so packets that actually need to be locally received by the
> > CPU have a better QoS.
> 
> so mlxsw keeps a copy of the complete FIB for IPv4 and IPv6, even routes
> unrelated to its ports?

If we don't reflect all the routes in the system to the ASIC, then we'll
have a broken routing table and a different behavior from what you would
get with plain NICs.


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread David Ahern
On 7/19/17 10:30 AM, Ido Schimmel wrote:
>> rif == 0 means the dst device is not related to a port owned by this
>> driver?
> 
> Yes.
> 
>>
>>
>> A lot to process so I am sure I missed the answer to these:
>>
>> 1. How do you handle host routes for local addresses? IPv6 inserts the
>> host and anycast routes with the device set to 'lo' (or VRF device)
>> instead of the device with the address. I have a patch to change this,
>> but needs more testing
> 
> In mlxsw_sp_fib6_entry_type_set() we check for RTF_LOCAL and set the
> FIB entry type to MLXSW_SP_FIB_ENTRY_TYPE_TRAP. Packets hitting these
> routes will be trapped with IP2ME trap ID towards the CPU.

got it. thanks.

> 
>> 2. How are routes with devices unrelated to ports owned by this driver
>> handled?
> 
> They are handled just like any other route, but they don't have a valid
> RIF (for directly connected routes) or an adjacency group (for
> gatewayed routes), so the check in mlxsw_sp_fib_entry_should_offload()
> will return false and they will be programmed to the device with trap
> action, but using a trap ID (RTR_INGRESS0) with a lower traffic class
> than IP2ME, so packets that actually need to be locally received by the
> CPU have a better QoS.

so mlxsw keeps a copy of the complete FIB for IPv4 and IPv6, even routes
unrelated to its ports?


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread Ido Schimmel
On Wed, Jul 19, 2017 at 10:14:54AM -0600, David Ahern wrote:
> On 7/19/17 1:02 AM, Jiri Pirko wrote:
> > @@ -2094,6 +2106,40 @@ mlxsw_sp_fib_entry_should_offload(const struct 
> > mlxsw_sp_fib_entry *fib_entry)
> > }
> >  }
> >  
> > +static void
> > +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
> > +{
> > +   struct mlxsw_sp_fib6_entry *fib6_entry;
> > +   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
> > +
> > +   fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
> > + common);
> > +   list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
> > +   struct rt6_info *rt = mlxsw_sp_rt6->rt;
> > +
> > +   write_lock_bh(>rt6i_table->tb6_lock);
> > +   rt->rt6i_flags |= RTF_OFFLOAD;
> > +   write_unlock_bh(>rt6i_table->tb6_lock);
> 
> Seems wrong. A device driver should not be taking FIB table locks.

Will remove this in v2.

[...]

> > +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
> > + struct mlxsw_sp_nexthop_group *nh_grp,
> > + struct mlxsw_sp_nexthop *nh,
> > + const struct rt6_info *rt)
> > +{
> > +   struct net_device *dev = rt->dst.dev;
> > +   struct mlxsw_sp_rif *rif;
> > +   int err;
> > +
> > +   nh->nh_grp = nh_grp;
> > +   memcpy(>gw_addr, >rt6i_gateway, sizeof(nh->gw_addr));
> > +
> > +   if (!dev)
> > +   return 0;
> > +
> > +   rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
> > +   if (!rif)
> > +   return 0;
> 
> rif == 0 means the dst device is not related to a port owned by this
> driver?

Yes.

> 
> 
> A lot to process so I am sure I missed the answer to these:
> 
> 1. How do you handle host routes for local addresses? IPv6 inserts the
> host and anycast routes with the device set to 'lo' (or VRF device)
> instead of the device with the address. I have a patch to change this,
> but needs more testing

In mlxsw_sp_fib6_entry_type_set() we check for RTF_LOCAL and set the
FIB entry type to MLXSW_SP_FIB_ENTRY_TYPE_TRAP. Packets hitting these
routes will be trapped with IP2ME trap ID towards the CPU.

> 2. How are routes with devices unrelated to ports owned by this driver
> handled?

They are handled just like any other route, but they don't have a valid
RIF (for directly connected routes) or an adjacency group (for
gatewayed routes), so the check in mlxsw_sp_fib_entry_should_offload()
will return false and they will be programmed to the device with trap
action, but using a trap ID (RTR_INGRESS0) with a lower traffic class
than IP2ME, so packets that actually need to be locally received by the
CPU have a better QoS.


Re: [patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread David Ahern
On 7/19/17 1:02 AM, Jiri Pirko wrote:
> @@ -2094,6 +2106,40 @@ mlxsw_sp_fib_entry_should_offload(const struct 
> mlxsw_sp_fib_entry *fib_entry)
>   }
>  }
>  
> +static void
> +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
> +{
> + struct mlxsw_sp_fib6_entry *fib6_entry;
> + struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
> +
> + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
> +   common);
> + list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
> + struct rt6_info *rt = mlxsw_sp_rt6->rt;
> +
> + write_lock_bh(>rt6i_table->tb6_lock);
> + rt->rt6i_flags |= RTF_OFFLOAD;
> + write_unlock_bh(>rt6i_table->tb6_lock);

Seems wrong. A device driver should not be taking FIB table locks.


> + }
> +}
> +
> +static void
> +mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
> +{
> + struct mlxsw_sp_fib6_entry *fib6_entry;
> + struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
> +
> + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
> +   common);
> + list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
> + struct rt6_info *rt = mlxsw_sp_rt6->rt;
> +
> + write_lock_bh(>rt6i_table->tb6_lock);
> + rt->rt6i_flags &= ~RTF_OFFLOAD;
> + write_unlock_bh(>rt6i_table->tb6_lock);

same here.




> +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
> +   struct mlxsw_sp_nexthop_group *nh_grp,
> +   struct mlxsw_sp_nexthop *nh,
> +   const struct rt6_info *rt)
> +{
> + struct net_device *dev = rt->dst.dev;
> + struct mlxsw_sp_rif *rif;
> + int err;
> +
> + nh->nh_grp = nh_grp;
> + memcpy(>gw_addr, >rt6i_gateway, sizeof(nh->gw_addr));
> +
> + if (!dev)
> + return 0;
> +
> + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
> + if (!rif)
> + return 0;

rif == 0 means the dst device is not related to a port owned by this
driver?


A lot to process so I am sure I missed the answer to these:

1. How do you handle host routes for local addresses? IPv6 inserts the
host and anycast routes with the device set to 'lo' (or VRF device)
instead of the device with the address. I have a patch to change this,
but needs more testing

2. How are routes with devices unrelated to ports owned by this driver
handled?


[patch net-next 14/17] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion

2017-07-19 Thread Jiri Pirko
From: Ido Schimmel 

Allow directly connected and remote unicast IPv6 routes to be programmed
to the device's tables.

As with IPv4, identical routes - sharing the same destination prefix -
are ordered in a FIB node according to their table ID and then the
metric. While the kernel doesn't share the same trie for the local and
main table, this does happen in the device, so ordering according to
table ID is needed.

Since individual nexthops can be added and deleted in IPv6, each FIB
entry stores a linked list of the rt6_info structs it represents. Upon
the addition or deletion of a nexthop, a new nexthop group is allocated
according to the new configuration and the old one is destroyed.
Identical groups aren't currently consolidated, but will be in a
follow-up patchset.

Signed-off-by: Ido Schimmel 
Signed-off-by: Jiri Pirko 
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 685 -
 1 file changed, 682 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index cf06b7d..33e5b16 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -44,6 +44,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -407,6 +408,17 @@ struct mlxsw_sp_fib4_entry {
u8 type;
 };
 
+struct mlxsw_sp_fib6_entry {
+   struct mlxsw_sp_fib_entry common;
+   struct list_head rt6_list;
+   unsigned int nrt6;
+};
+
+struct mlxsw_sp_rt6 {
+   struct list_head list;
+   struct rt6_info *rt;
+};
+
 enum mlxsw_sp_l3proto {
MLXSW_SP_L3_PROTO_IPV4,
MLXSW_SP_L3_PROTO_IPV6,
@@ -2094,6 +2106,40 @@ mlxsw_sp_fib_entry_should_offload(const struct 
mlxsw_sp_fib_entry *fib_entry)
}
 }
 
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+   struct mlxsw_sp_fib6_entry *fib6_entry;
+   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+   fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+   list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
+   struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+   write_lock_bh(>rt6i_table->tb6_lock);
+   rt->rt6i_flags |= RTF_OFFLOAD;
+   write_unlock_bh(>rt6i_table->tb6_lock);
+   }
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+   struct mlxsw_sp_fib6_entry *fib6_entry;
+   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+   fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+   list_for_each_entry(mlxsw_sp_rt6, _entry->rt6_list, list) {
+   struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+   write_lock_bh(>rt6i_table->tb6_lock);
+   rt->rt6i_flags &= ~RTF_OFFLOAD;
+   write_unlock_bh(>rt6i_table->tb6_lock);
+   }
+}
+
 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry 
*fib_entry)
 {
fib_entry->offloaded = true;
@@ -2103,7 +2149,8 @@ static void mlxsw_sp_fib_entry_offload_set(struct 
mlxsw_sp_fib_entry *fib_entry)
fib_info_offload_inc(fib_entry->nh_group->key.fi);
break;
case MLXSW_SP_L3_PROTO_IPV6:
-   WARN_ON_ONCE(1);
+   mlxsw_sp_fib6_entry_offload_set(fib_entry);
+   break;
}
 }
 
@@ -2115,7 +2162,8 @@ mlxsw_sp_fib_entry_offload_unset(struct 
mlxsw_sp_fib_entry *fib_entry)
fib_info_offload_dec(fib_entry->nh_group->key.fi);
break;
case MLXSW_SP_L3_PROTO_IPV6:
-   WARN_ON_ONCE(1);
+   mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+   break;
}
 
fib_entry->offloaded = false;
@@ -2829,6 +2877,602 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp 
*mlxsw_sp,
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+{
+   /* Packets with link-local destination IP arriving to the router
+* are trapped to the CPU, so no need to program specific routes
+* for them.
+*/
+   if (ipv6_addr_type(>rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+   return true;
+
+   /* Multicast routes aren't supported, so ignore them. Neighbour
+* Discovery packets are specifically trapped.
+*/
+   if (ipv6_addr_type(>rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+   return true;
+
+   /* Cloned routes are irrelevant in the forwarding path. */
+   if (rt->rt6i_flags & RTF_CACHE)
+   return true;
+
+   return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+{
+   struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+