Re: [ovs-dev] [PATCH v8 1/3] dpif-netlink: Detect Out-Of-Resource condition on a netdev
On 26 Oct 2018, at 11:53, Simon Horman wrote: > On Fri, 26 Oct 2018 at 08:58, Eelco Chaudron wrote: > I have a general comment, don't know where to put it, so I put it here. Some hardware might have multiple tables. If one type of table is full the ENOSPC might be returned, but it does not mean all type of flows can no longer be offloaded. This might be a situation to think about. >>> >>> Ok, thanks for bringing it up. Currently from OvS daemon's perspective >>> a >>> request to add/delete a flow is issued on a netdev and the failure >>> indicates >>> that the particular netdev is out of resources. If we need to handle >>> the >>> condition where HW has different tables, we need to further extend >>> this >>> design and the tc interfaces to propagate this fine grained >>> information. >> >> Would be good if other hardware vendors can comment here? >> > > There was a discussion in another forum involving at least Mellanox, > Broadcom and Netronome. > From a Netronome point of view this scheme is satisfactory and my > recollection is that > was the agreement of those involved in the discussion. Thanks for the clarification… ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v8 1/3] dpif-netlink: Detect Out-Of-Resource condition on a netdev
On Fri, 26 Oct 2018 at 08:58, Eelco Chaudron wrote: > > > On 25 Oct 2018, at 16:00, Sriharsha Basavapatna wrote: > > > Hi Eelco, > > > > Thanks for your comments, please see my response below. > > On Fri, Oct 19, 2018 at 7:52 PM Eelco Chaudron > > wrote: > >> > >> On 18 Oct 2018, at 18:13, Sriharsha Basavapatna via dev wrote: > >> > >>> This is the first patch in the patch-set to support dynamic > >>> rebalancing > >>> of offloaded flows. > >>> > >>> The patch detects OOR condition on a netdev port when ENOSPC error > >>> is > >>> returned by TC-Flower while adding a flow rule. A new structure is > >>> added > >>> to the netdev called "netdev_hw_info", to store OOR related > >>> information > >>> required to perform dynamic offload-rebalancing. > >>> > >>> Signed-off-by: Sriharsha Basavapatna > >>> > >>> Co-authored-by: Venkat Duvvuru > >>> Signed-off-by: Venkat Duvvuru > >>> Reviewed-by: Sathya Perla > >>> Reviewed-by: Simon Horman > >>> Reviewed-by: Ben Pfaff > >>> --- > >>> lib/dpif-netlink.c| 18 +- > >>> lib/flow.c| 25 + > >>> lib/flow.h| 1 + > >>> lib/netdev-provider.h | 11 +++ > >>> lib/netdev.c | 34 ++ > >>> lib/netdev.h | 3 +++ > >>> 6 files changed, 91 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > >>> index e6d5a6ec5..b9ce9cbe2 100644 > >>> --- a/lib/dpif-netlink.c > >>> +++ b/lib/dpif-netlink.c > >>> @@ -2178,7 +2178,23 @@ parse_flow_put(struct dpif_netlink *dpif, > >>> struct dpif_flow_put *put) > >>> > >>> VLOG_DBG("added flow"); > >>> } else if (err != EEXIST) { > >>> -VLOG_ERR_RL(, "failed to offload flow: %s", > >>> ovs_strerror(err)); > >>> +struct netdev *oor_netdev = NULL; > >>> +if (err == ENOSPC && > >>> netdev_is_offload_rebalance_policy_enabled()) { > >>> +/* > >>> + * We need to set OOR on the input netdev (i.e, 'dev') > >>> for the > >>> + * flow. But if the flow has a tunnel attribute (i.e, > >>> decap action, > >>> + * with a virtual device like a VxLAN interface as its > >>> in-port), > >>> + * then lookup and set OOR on the underlying tunnel > >>> (real) netdev. > >>> + */ > >>> +oor_netdev = > >>> flow_get_tunnel_netdev(); > >>> +if (!oor_netdev) { > >>> +/* Not a 'tunnel' flow */ > >>> +oor_netdev = dev; > >>> +} > >>> +netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true); > >> > >> Why not just oor_netdev->hw_info.oor = true, see also below. > > > > The original code was directly accessing netdev members. It was > > changed > > based on a review comment to avoid direct access and add an interface. > > > >> > >> I have a general comment, don't know where to put it, so I put it > >> here. > >> Some hardware might have multiple tables. If one type of table is > >> full > >> the ENOSPC might be returned, but it does not mean all type of flows > >> can > >> no longer be offloaded. This might be a situation to think about. > > > > Ok, thanks for bringing it up. Currently from OvS daemon's perspective > > a > > request to add/delete a flow is issued on a netdev and the failure > > indicates > > that the particular netdev is out of resources. If we need to handle > > the > > condition where HW has different tables, we need to further extend > > this > > design and the tc interfaces to propagate this fine grained > > information. > > Would be good if other hardware vendors can comment here? > There was a discussion in another forum involving at least Mellanox, Broadcom and Netronome. >From a Netronome point of view this scheme is satisfactory and my recollection is that was the agreement of those involved in the discussion. > > >> > >>> +} > >>> +VLOG_ERR_RL(, "failed to offload flow: %s: %s", > >>> ovs_strerror(err), > >>> +(oor_netdev ? oor_netdev->name : dev->name)); > >>> } > >>> > >>> out: > >>> diff --git a/lib/flow.c b/lib/flow.c > >>> index 77ed3d9df..a39807908 100644 > >>> --- a/lib/flow.c > >>> +++ b/lib/flow.c > >>> @@ -19,6 +19,7 @@ > >>> #include > >>> #include > >>> #include > >>> +#include > >>> #include > >>> #include > >>> #include > >>> @@ -41,6 +42,8 @@ > >>> #include "unaligned.h" > >>> #include "util.h" > >>> #include "openvswitch/nsh.h" > >>> +#include "ovs-router.h" > >>> +#include "lib/netdev-provider.h" > >>> > >>> COVERAGE_DEFINE(flow_extract); > >>> COVERAGE_DEFINE(miniflow_malloc); > >>> @@ -3403,3 +3406,25 @@ flow_limit_vlans(int vlan_limit) > >>> flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS); > >>> } > >>> } > >>> + > >>> +struct netdev * > >>> +flow_get_tunnel_netdev(struct flow_tnl *tunnel) > >>> +{ > >>> +char iface[IFNAMSIZ]; > >>> +struct in6_addr ip6; > >>> +struct
Re: [ovs-dev] [PATCH v8 1/3] dpif-netlink: Detect Out-Of-Resource condition on a netdev
On 25 Oct 2018, at 16:00, Sriharsha Basavapatna wrote: Hi Eelco, Thanks for your comments, please see my response below. On Fri, Oct 19, 2018 at 7:52 PM Eelco Chaudron wrote: On 18 Oct 2018, at 18:13, Sriharsha Basavapatna via dev wrote: This is the first patch in the patch-set to support dynamic rebalancing of offloaded flows. The patch detects OOR condition on a netdev port when ENOSPC error is returned by TC-Flower while adding a flow rule. A new structure is added to the netdev called "netdev_hw_info", to store OOR related information required to perform dynamic offload-rebalancing. Signed-off-by: Sriharsha Basavapatna Co-authored-by: Venkat Duvvuru Signed-off-by: Venkat Duvvuru Reviewed-by: Sathya Perla Reviewed-by: Simon Horman Reviewed-by: Ben Pfaff --- lib/dpif-netlink.c| 18 +- lib/flow.c| 25 + lib/flow.h| 1 + lib/netdev-provider.h | 11 +++ lib/netdev.c | 34 ++ lib/netdev.h | 3 +++ 6 files changed, 91 insertions(+), 1 deletion(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index e6d5a6ec5..b9ce9cbe2 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2178,7 +2178,23 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) VLOG_DBG("added flow"); } else if (err != EEXIST) { -VLOG_ERR_RL(, "failed to offload flow: %s", ovs_strerror(err)); +struct netdev *oor_netdev = NULL; +if (err == ENOSPC && netdev_is_offload_rebalance_policy_enabled()) { +/* + * We need to set OOR on the input netdev (i.e, 'dev') for the + * flow. But if the flow has a tunnel attribute (i.e, decap action, + * with a virtual device like a VxLAN interface as its in-port), + * then lookup and set OOR on the underlying tunnel (real) netdev. + */ +oor_netdev = flow_get_tunnel_netdev(); +if (!oor_netdev) { +/* Not a 'tunnel' flow */ +oor_netdev = dev; +} +netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true); Why not just oor_netdev->hw_info.oor = true, see also below. The original code was directly accessing netdev members. It was changed based on a review comment to avoid direct access and add an interface. I have a general comment, don't know where to put it, so I put it here. Some hardware might have multiple tables. If one type of table is full the ENOSPC might be returned, but it does not mean all type of flows can no longer be offloaded. This might be a situation to think about. Ok, thanks for bringing it up. Currently from OvS daemon's perspective a request to add/delete a flow is issued on a netdev and the failure indicates that the particular netdev is out of resources. If we need to handle the condition where HW has different tables, we need to further extend this design and the tc interfaces to propagate this fine grained information. Would be good if other hardware vendors can comment here? +} +VLOG_ERR_RL(, "failed to offload flow: %s: %s", ovs_strerror(err), +(oor_netdev ? oor_netdev->name : dev->name)); } out: diff --git a/lib/flow.c b/lib/flow.c index 77ed3d9df..a39807908 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #include "unaligned.h" #include "util.h" #include "openvswitch/nsh.h" +#include "ovs-router.h" +#include "lib/netdev-provider.h" COVERAGE_DEFINE(flow_extract); COVERAGE_DEFINE(miniflow_malloc); @@ -3403,3 +3406,25 @@ flow_limit_vlans(int vlan_limit) flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS); } } + +struct netdev * +flow_get_tunnel_netdev(struct flow_tnl *tunnel) +{ +char iface[IFNAMSIZ]; +struct in6_addr ip6; +struct in6_addr gw; + +if (tunnel->ip_src) { +in6_addr_set_mapped_ipv4(, tunnel->ip_src); +} else if (ipv6_addr_is_set(>ipv6_src)) { +ip6 = tunnel->ipv6_src; +} else { +return NULL; +} + +if (!ovs_router_lookup(0, , iface, NULL, )) { +return NULL; +} + +return netdev_from_name(iface); +} diff --git a/lib/flow.h b/lib/flow.h index d03f1ba9c..aca60c41a 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -73,6 +73,7 @@ void flow_extract(struct dp_packet *, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *); void flow_get_metadata(const struct flow *, struct match *flow_metadata); +struct netdev *flow_get_tunnel_netdev(struct flow_tnl *tunnel); const char *ct_state_to_string(uint32_t state); uint32_t ct_state_from_string(const char *); diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 5a7947351..e320dad61 100644 ---
Re: [ovs-dev] [PATCH v8 1/3] dpif-netlink: Detect Out-Of-Resource condition on a netdev
Hi Eelco, Thanks for your comments, please see my response below. On Fri, Oct 19, 2018 at 7:52 PM Eelco Chaudron wrote: > > On 18 Oct 2018, at 18:13, Sriharsha Basavapatna via dev wrote: > > > This is the first patch in the patch-set to support dynamic > > rebalancing > > of offloaded flows. > > > > The patch detects OOR condition on a netdev port when ENOSPC error is > > returned by TC-Flower while adding a flow rule. A new structure is > > added > > to the netdev called "netdev_hw_info", to store OOR related > > information > > required to perform dynamic offload-rebalancing. > > > > Signed-off-by: Sriharsha Basavapatna > > > > Co-authored-by: Venkat Duvvuru > > Signed-off-by: Venkat Duvvuru > > Reviewed-by: Sathya Perla > > Reviewed-by: Simon Horman > > Reviewed-by: Ben Pfaff > > --- > > lib/dpif-netlink.c| 18 +- > > lib/flow.c| 25 + > > lib/flow.h| 1 + > > lib/netdev-provider.h | 11 +++ > > lib/netdev.c | 34 ++ > > lib/netdev.h | 3 +++ > > 6 files changed, 91 insertions(+), 1 deletion(-) > > > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > > index e6d5a6ec5..b9ce9cbe2 100644 > > --- a/lib/dpif-netlink.c > > +++ b/lib/dpif-netlink.c > > @@ -2178,7 +2178,23 @@ parse_flow_put(struct dpif_netlink *dpif, > > struct dpif_flow_put *put) > > > > VLOG_DBG("added flow"); > > } else if (err != EEXIST) { > > -VLOG_ERR_RL(, "failed to offload flow: %s", > > ovs_strerror(err)); > > +struct netdev *oor_netdev = NULL; > > +if (err == ENOSPC && > > netdev_is_offload_rebalance_policy_enabled()) { > > +/* > > + * We need to set OOR on the input netdev (i.e, 'dev') > > for the > > + * flow. But if the flow has a tunnel attribute (i.e, > > decap action, > > + * with a virtual device like a VxLAN interface as its > > in-port), > > + * then lookup and set OOR on the underlying tunnel > > (real) netdev. > > + */ > > +oor_netdev = flow_get_tunnel_netdev(); > > +if (!oor_netdev) { > > +/* Not a 'tunnel' flow */ > > +oor_netdev = dev; > > +} > > +netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true); > > Why not just oor_netdev->hw_info.oor = true, see also below. The original code was directly accessing netdev members. It was changed based on a review comment to avoid direct access and add an interface. > > I have a general comment, don't know where to put it, so I put it here. > Some hardware might have multiple tables. If one type of table is full > the ENOSPC might be returned, but it does not mean all type of flows can > no longer be offloaded. This might be a situation to think about. Ok, thanks for bringing it up. Currently from OvS daemon's perspective a request to add/delete a flow is issued on a netdev and the failure indicates that the particular netdev is out of resources. If we need to handle the condition where HW has different tables, we need to further extend this design and the tc interfaces to propagate this fine grained information. > > > +} > > +VLOG_ERR_RL(, "failed to offload flow: %s: %s", > > ovs_strerror(err), > > +(oor_netdev ? oor_netdev->name : dev->name)); > > } > > > > out: > > diff --git a/lib/flow.c b/lib/flow.c > > index 77ed3d9df..a39807908 100644 > > --- a/lib/flow.c > > +++ b/lib/flow.c > > @@ -19,6 +19,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > #include > > @@ -41,6 +42,8 @@ > > #include "unaligned.h" > > #include "util.h" > > #include "openvswitch/nsh.h" > > +#include "ovs-router.h" > > +#include "lib/netdev-provider.h" > > > > COVERAGE_DEFINE(flow_extract); > > COVERAGE_DEFINE(miniflow_malloc); > > @@ -3403,3 +3406,25 @@ flow_limit_vlans(int vlan_limit) > > flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS); > > } > > } > > + > > +struct netdev * > > +flow_get_tunnel_netdev(struct flow_tnl *tunnel) > > +{ > > +char iface[IFNAMSIZ]; > > +struct in6_addr ip6; > > +struct in6_addr gw; > > + > > +if (tunnel->ip_src) { > > +in6_addr_set_mapped_ipv4(, tunnel->ip_src); > > +} else if (ipv6_addr_is_set(>ipv6_src)) { > > +ip6 = tunnel->ipv6_src; > > +} else { > > +return NULL; > > +} > > + > > +if (!ovs_router_lookup(0, , iface, NULL, )) { > > +return NULL; > > +} > > + > > +return netdev_from_name(iface); > > +} > > diff --git a/lib/flow.h b/lib/flow.h > > index d03f1ba9c..aca60c41a 100644 > > --- a/lib/flow.h > > +++ b/lib/flow.h > > @@ -73,6 +73,7 @@ void flow_extract(struct dp_packet *, struct flow > > *); > > void flow_zero_wildcards(struct flow *, const struct flow_wildcards > > *); > > void flow_unwildcard_tp_ports(const struct flow *, struct > >
Re: [ovs-dev] [PATCH v8 1/3] dpif-netlink: Detect Out-Of-Resource condition on a netdev
On 18 Oct 2018, at 18:13, Sriharsha Basavapatna via dev wrote: This is the first patch in the patch-set to support dynamic rebalancing of offloaded flows. The patch detects OOR condition on a netdev port when ENOSPC error is returned by TC-Flower while adding a flow rule. A new structure is added to the netdev called "netdev_hw_info", to store OOR related information required to perform dynamic offload-rebalancing. Signed-off-by: Sriharsha Basavapatna Co-authored-by: Venkat Duvvuru Signed-off-by: Venkat Duvvuru Reviewed-by: Sathya Perla Reviewed-by: Simon Horman Reviewed-by: Ben Pfaff --- lib/dpif-netlink.c| 18 +- lib/flow.c| 25 + lib/flow.h| 1 + lib/netdev-provider.h | 11 +++ lib/netdev.c | 34 ++ lib/netdev.h | 3 +++ 6 files changed, 91 insertions(+), 1 deletion(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index e6d5a6ec5..b9ce9cbe2 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2178,7 +2178,23 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) VLOG_DBG("added flow"); } else if (err != EEXIST) { -VLOG_ERR_RL(, "failed to offload flow: %s", ovs_strerror(err)); +struct netdev *oor_netdev = NULL; +if (err == ENOSPC && netdev_is_offload_rebalance_policy_enabled()) { +/* + * We need to set OOR on the input netdev (i.e, 'dev') for the + * flow. But if the flow has a tunnel attribute (i.e, decap action, + * with a virtual device like a VxLAN interface as its in-port), + * then lookup and set OOR on the underlying tunnel (real) netdev. + */ +oor_netdev = flow_get_tunnel_netdev(); +if (!oor_netdev) { +/* Not a 'tunnel' flow */ +oor_netdev = dev; +} +netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true); Why not just oor_netdev->hw_info.oor = true, see also below. I have a general comment, don't know where to put it, so I put it here. Some hardware might have multiple tables. If one type of table is full the ENOSPC might be returned, but it does not mean all type of flows can no longer be offloaded. This might be a situation to think about. +} +VLOG_ERR_RL(, "failed to offload flow: %s: %s", ovs_strerror(err), +(oor_netdev ? oor_netdev->name : dev->name)); } out: diff --git a/lib/flow.c b/lib/flow.c index 77ed3d9df..a39807908 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #include "unaligned.h" #include "util.h" #include "openvswitch/nsh.h" +#include "ovs-router.h" +#include "lib/netdev-provider.h" COVERAGE_DEFINE(flow_extract); COVERAGE_DEFINE(miniflow_malloc); @@ -3403,3 +3406,25 @@ flow_limit_vlans(int vlan_limit) flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS); } } + +struct netdev * +flow_get_tunnel_netdev(struct flow_tnl *tunnel) +{ +char iface[IFNAMSIZ]; +struct in6_addr ip6; +struct in6_addr gw; + +if (tunnel->ip_src) { +in6_addr_set_mapped_ipv4(, tunnel->ip_src); +} else if (ipv6_addr_is_set(>ipv6_src)) { +ip6 = tunnel->ipv6_src; +} else { +return NULL; +} + +if (!ovs_router_lookup(0, , iface, NULL, )) { +return NULL; +} + +return netdev_from_name(iface); +} diff --git a/lib/flow.h b/lib/flow.h index d03f1ba9c..aca60c41a 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -73,6 +73,7 @@ void flow_extract(struct dp_packet *, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *); void flow_get_metadata(const struct flow *, struct match *flow_metadata); +struct netdev *flow_get_tunnel_netdev(struct flow_tnl *tunnel); const char *ct_state_to_string(uint32_t state); uint32_t ct_state_from_string(const char *); diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 5a7947351..e320dad61 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -35,6 +35,15 @@ extern "C" { struct netdev_tnl_build_header_params; #define NETDEV_NUMA_UNSPEC OVS_NUMA_UNSPEC +/* Offload-capable (HW) netdev information */ +struct netdev_hw_info { +bool oor; /* Out of Offload Resources ? */ +}; + +enum hw_info_type { +HW_INFO_TYPE_OOR = 1 /* OOR state */ +}; + /* A network device (e.g. an Ethernet device). * * Network device implementations may read these members but should not modify @@ -80,6 +89,8 @@ struct netdev { int n_rxq; struct shash_node *node;/* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ + +struct