Re: [ovs-dev] [PATCH v8 5/6] dpif-netdev: do hw flow offload in a thread

2018-04-11 Thread Shahaf Shuler
Tuesday, April 10, 2018 10:58 PM, Stokes, Ian:
> Subject: RE: [PATCH v8 5/6] dpif-netdev: do hw flow offload in a thread
> 
> > Currently, the major trigger for hw flow offload is at upcall
> > handling, which is actually in the datapath. Moreover, the hw offload
> > installation and modification is not that lightweight. Meaning, if
> > there are so many flows being added or modified frequently, it could
> > stall the datapath, which could result to packet loss.
> >
> > To diminish that, all those flow operations will be recorded and
> > appended to a list. A thread is then introduced to process this list
> > (to do the real flow offloading put/del operations). This could leave
> > the datapath as lightweight as possible.
> 
> Just a general query and not related to this patch specifically, but have you
> given any thought to statistics for the HWOL usecase? Should they be tracked
> in any way for OVS or if tracked by the NIC can they be accessed by OVS?

For Hardware offload, for supported devices, we can aid the help of the NIC for 
statistics of the flow rules. 
The APIs are already in DPDK. One can associate flow rule with a count action 
to have full count by the NIC for packet match this flow.
In case HW not supports, then counting can be done by the OVS. 

For Mellanox, flow counters by HW are supported. Not sure about the rest of the 
vendors. 

Having said that, I think the gain from HW offload for flow mark statistics is 
low, because the packet is processed by OVS regardless. 
For more advance HWOL like drop action / redirect action it is mandatory. 

> 
> More comments inline below.
> >
> > Signed-off-by: Yuanhan Liu 
> > Signed-off-by: Shahaf Shuler 
> > ---
> >  lib/dpif-netdev.c | 348
> > -
> >  1 file changed, 258 insertions(+), 90 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > 7489a2f..8300286
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -345,6 +345,12 @@ enum rxq_cycles_counter_type {
> >  RXQ_N_CYCLES
> >  };
> >
> > +enum {
> > +DP_NETDEV_FLOW_OFFLOAD_OP_ADD,
> > +DP_NETDEV_FLOW_OFFLOAD_OP_MOD,
> > +DP_NETDEV_FLOW_OFFLOAD_OP_DEL,
> > +};
> > +
> >  #define XPS_TIMEOUT 50LL/* In microseconds. */
> >
> >  /* Contained by struct dp_netdev_port's 'rxqs' member.  */ @@ -721,6
> > +727,8 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
> > static void emc_clear_entry(struct emc_entry *ce);
> >
> >  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
> > +static void queue_netdev_flow_del(struct dp_netdev_pmd_thread
> *pmd,
> > +  struct dp_netdev_flow *flow);
> >
> >  static void
> >  emc_cache_init(struct emc_cache *flow_cache) @@ -1854,13 +1862,11
> @@
> > struct flow_mark {
> >  struct cmap megaflow_to_mark;
> >  struct cmap mark_to_flow;
> >  struct id_pool *pool;
> > -struct ovs_mutex mutex;
> >  };
> >
> >  static struct flow_mark flow_mark = {
> >  .megaflow_to_mark = CMAP_INITIALIZER,
> >  .mark_to_flow = CMAP_INITIALIZER,
> > -.mutex = OVS_MUTEX_INITIALIZER,
> >  };
> >
> >  static uint32_t
> > @@ -2010,7 +2016,7 @@ flow_mark_flush(struct dp_netdev_pmd_thread
> > *pmd)
> >
> >  CMAP_FOR_EACH (flow, mark_node, _mark.mark_to_flow) {
> >  if (flow->pmd_id == pmd->core_id) {
> > -mark_to_flow_disassociate(pmd, flow);
> > +queue_netdev_flow_del(pmd, flow);
> >  }
> >  }
> >  }
> > @@ -2032,6 +2038,251 @@ mark_to_flow_find(const struct
> > dp_netdev_pmd_thread *pmd,
> >  return NULL;
> >  }
> >
> > +struct dp_flow_offload_item {
> > +struct dp_netdev_pmd_thread *pmd;
> > +struct dp_netdev_flow *flow;
> > +int op;
> > +struct match match;
> > +struct nlattr *actions;
> > +size_t actions_len;
> > +
> > +struct ovs_list node;
> > +};
> > +
> > +struct dp_flow_offload {
> > +struct ovs_mutex mutex;
> > +struct ovs_list list;
> > +pthread_cond_t cond;
> > +};
> > +
> > +static struct dp_flow_offload dp_flow_offload = {
> > +.mutex = OVS_MUTEX_INITIALIZER,
> > +.list  = OVS_LIST_INITIALIZER(_flow_offload.list),
> > +};
> > +
> > +static struct ovsthread_once offload_thread_once
> > += OVSTHREAD_ONCE_INITIALIZER;
> 
> The structs above are declared mid file after the pre-existing
> mark_to_flow_find function.
> 
> It would look cleaner if declared toward the beginning with the enums etc, so
> as to keep functions and structs separate.
> > +
> > +static struct dp_flow_offload_item *
> > +dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_netdev_flow *flow,
> > + int op)
> > +{
> > +struct dp_flow_offload_item *offload;
> > +
> > +offload = xzalloc(sizeof(*offload));
> > +offload->pmd = pmd;
> > +offload->flow = flow;
> > +

Re: [ovs-dev] [PATCH v8 5/6] dpif-netdev: do hw flow offload in a thread

2018-04-10 Thread Stokes, Ian
> Currently, the major trigger for hw flow offload is at upcall handling,
> which is actually in the datapath. Moreover, the hw offload installation
> and modification is not that lightweight. Meaning, if there are so many
> flows being added or modified frequently, it could stall the datapath,
> which could result to packet loss.
> 
> To diminish that, all those flow operations will be recorded and appended
> to a list. A thread is then introduced to process this list (to do the
> real flow offloading put/del operations). This could leave the datapath as
> lightweight as possible.

Just a general query and not related to this patch specifically, but have you 
given any thought to statistics for the HWOL usecase? Should they be tracked in 
any way for OVS or if tracked by the NIC can they be accessed by OVS?

More comments inline below.
> 
> Signed-off-by: Yuanhan Liu 
> Signed-off-by: Shahaf Shuler 
> ---
>  lib/dpif-netdev.c | 348 -
>  1 file changed, 258 insertions(+), 90 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7489a2f..8300286
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -345,6 +345,12 @@ enum rxq_cycles_counter_type {
>  RXQ_N_CYCLES
>  };
> 
> +enum {
> +DP_NETDEV_FLOW_OFFLOAD_OP_ADD,
> +DP_NETDEV_FLOW_OFFLOAD_OP_MOD,
> +DP_NETDEV_FLOW_OFFLOAD_OP_DEL,
> +};
> +
>  #define XPS_TIMEOUT 50LL/* In microseconds. */
> 
>  /* Contained by struct dp_netdev_port's 'rxqs' member.  */ @@ -721,6
> +727,8 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
> static void emc_clear_entry(struct emc_entry *ce);
> 
>  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
> +static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
> +  struct dp_netdev_flow *flow);
> 
>  static void
>  emc_cache_init(struct emc_cache *flow_cache) @@ -1854,13 +1862,11 @@
> struct flow_mark {
>  struct cmap megaflow_to_mark;
>  struct cmap mark_to_flow;
>  struct id_pool *pool;
> -struct ovs_mutex mutex;
>  };
> 
>  static struct flow_mark flow_mark = {
>  .megaflow_to_mark = CMAP_INITIALIZER,
>  .mark_to_flow = CMAP_INITIALIZER,
> -.mutex = OVS_MUTEX_INITIALIZER,
>  };
> 
>  static uint32_t
> @@ -2010,7 +2016,7 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
> 
>  CMAP_FOR_EACH (flow, mark_node, _mark.mark_to_flow) {
>  if (flow->pmd_id == pmd->core_id) {
> -mark_to_flow_disassociate(pmd, flow);
> +queue_netdev_flow_del(pmd, flow);
>  }
>  }
>  }
> @@ -2032,6 +2038,251 @@ mark_to_flow_find(const struct
> dp_netdev_pmd_thread *pmd,
>  return NULL;
>  }
> 
> +struct dp_flow_offload_item {
> +struct dp_netdev_pmd_thread *pmd;
> +struct dp_netdev_flow *flow;
> +int op;
> +struct match match;
> +struct nlattr *actions;
> +size_t actions_len;
> +
> +struct ovs_list node;
> +};
> +
> +struct dp_flow_offload {
> +struct ovs_mutex mutex;
> +struct ovs_list list;
> +pthread_cond_t cond;
> +};
> +
> +static struct dp_flow_offload dp_flow_offload = {
> +.mutex = OVS_MUTEX_INITIALIZER,
> +.list  = OVS_LIST_INITIALIZER(_flow_offload.list),
> +};
> +
> +static struct ovsthread_once offload_thread_once
> += OVSTHREAD_ONCE_INITIALIZER;

The structs above are declared mid file after the pre-existing 
mark_to_flow_find function.

It would look cleaner if declared toward the beginning with the enums etc, so 
as to keep functions and structs separate.
> +
> +static struct dp_flow_offload_item *
> +dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd,
> + struct dp_netdev_flow *flow,
> + int op)
> +{
> +struct dp_flow_offload_item *offload;
> +
> +offload = xzalloc(sizeof(*offload));
> +offload->pmd = pmd;
> +offload->flow = flow;
> +offload->op = op;
> +
> +dp_netdev_flow_ref(flow);
> +dp_netdev_pmd_try_ref(pmd);
> +
> +return offload;
> +}
> +
> +static void
> +dp_netdev_free_flow_offload(struct dp_flow_offload_item *offload) {
> +dp_netdev_pmd_unref(offload->pmd);
> +dp_netdev_flow_unref(offload->flow);
> +
> +free(offload->actions);
> +free(offload);
> +}
> +
> +static void
> +dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload) {
> +ovs_mutex_lock(_flow_offload.mutex);
> +ovs_list_push_back(_flow_offload.list, >node);
> +xpthread_cond_signal(_flow_offload.cond);
> +ovs_mutex_unlock(_flow_offload.mutex);
> +}
> +
> +static int
> +dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload) {
> +return mark_to_flow_disassociate(offload->pmd, offload->flow); }
> +
> +/*
> + * There are two flow offload operations here: addition and modification.
> + *
> + * For flow addition, this function does:
> + * - allocate a new flow mark id
> + * - 

[ovs-dev] [PATCH v8 5/6] dpif-netdev: do hw flow offload in a thread

2018-03-27 Thread Shahaf Shuler
From: Yuanhan Liu 

Currently, the major trigger for hw flow offload is at upcall handling,
which is actually in the datapath. Moreover, the hw offload installation
and modification is not that lightweight. Meaning, if there are so many
flows being added or modified frequently, it could stall the datapath,
which could result to packet loss.

To diminish that, all those flow operations will be recorded and appended
to a list. A thread is then introduced to process this list (to do the
real flow offloading put/del operations). This could leave the datapath
as lightweight as possible.

Signed-off-by: Yuanhan Liu 
Signed-off-by: Shahaf Shuler 
---
 lib/dpif-netdev.c | 348 -
 1 file changed, 258 insertions(+), 90 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7489a2f..8300286 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -345,6 +345,12 @@ enum rxq_cycles_counter_type {
 RXQ_N_CYCLES
 };
 
+enum {
+DP_NETDEV_FLOW_OFFLOAD_OP_ADD,
+DP_NETDEV_FLOW_OFFLOAD_OP_MOD,
+DP_NETDEV_FLOW_OFFLOAD_OP_DEL,
+};
+
 #define XPS_TIMEOUT 50LL/* In microseconds. */
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -721,6 +727,8 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
 static void emc_clear_entry(struct emc_entry *ce);
 
 static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
+static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
+  struct dp_netdev_flow *flow);
 
 static void
 emc_cache_init(struct emc_cache *flow_cache)
@@ -1854,13 +1862,11 @@ struct flow_mark {
 struct cmap megaflow_to_mark;
 struct cmap mark_to_flow;
 struct id_pool *pool;
-struct ovs_mutex mutex;
 };
 
 static struct flow_mark flow_mark = {
 .megaflow_to_mark = CMAP_INITIALIZER,
 .mark_to_flow = CMAP_INITIALIZER,
-.mutex = OVS_MUTEX_INITIALIZER,
 };
 
 static uint32_t
@@ -2010,7 +2016,7 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
 
 CMAP_FOR_EACH (flow, mark_node, _mark.mark_to_flow) {
 if (flow->pmd_id == pmd->core_id) {
-mark_to_flow_disassociate(pmd, flow);
+queue_netdev_flow_del(pmd, flow);
 }
 }
 }
@@ -2032,6 +2038,251 @@ mark_to_flow_find(const struct dp_netdev_pmd_thread 
*pmd,
 return NULL;
 }
 
+struct dp_flow_offload_item {
+struct dp_netdev_pmd_thread *pmd;
+struct dp_netdev_flow *flow;
+int op;
+struct match match;
+struct nlattr *actions;
+size_t actions_len;
+
+struct ovs_list node;
+};
+
+struct dp_flow_offload {
+struct ovs_mutex mutex;
+struct ovs_list list;
+pthread_cond_t cond;
+};
+
+static struct dp_flow_offload dp_flow_offload = {
+.mutex = OVS_MUTEX_INITIALIZER,
+.list  = OVS_LIST_INITIALIZER(_flow_offload.list),
+};
+
+static struct ovsthread_once offload_thread_once
+= OVSTHREAD_ONCE_INITIALIZER;
+
+static struct dp_flow_offload_item *
+dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_flow *flow,
+ int op)
+{
+struct dp_flow_offload_item *offload;
+
+offload = xzalloc(sizeof(*offload));
+offload->pmd = pmd;
+offload->flow = flow;
+offload->op = op;
+
+dp_netdev_flow_ref(flow);
+dp_netdev_pmd_try_ref(pmd);
+
+return offload;
+}
+
+static void
+dp_netdev_free_flow_offload(struct dp_flow_offload_item *offload)
+{
+dp_netdev_pmd_unref(offload->pmd);
+dp_netdev_flow_unref(offload->flow);
+
+free(offload->actions);
+free(offload);
+}
+
+static void
+dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
+{
+ovs_mutex_lock(_flow_offload.mutex);
+ovs_list_push_back(_flow_offload.list, >node);
+xpthread_cond_signal(_flow_offload.cond);
+ovs_mutex_unlock(_flow_offload.mutex);
+}
+
+static int
+dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
+{
+return mark_to_flow_disassociate(offload->pmd, offload->flow);
+}
+
+/*
+ * There are two flow offload operations here: addition and modification.
+ *
+ * For flow addition, this function does:
+ * - allocate a new flow mark id
+ * - perform hardware flow offload
+ * - associate the flow mark with flow and mega flow
+ *
+ * For flow modification, both flow mark and the associations are still
+ * valid, thus only item 2 needed.
+ */
+static int
+dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
+{
+struct dp_netdev_port *port;
+struct dp_netdev_pmd_thread *pmd = offload->pmd;
+struct dp_netdev_flow *flow = offload->flow;
+odp_port_t in_port = flow->flow.in_port.odp_port;
+bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
+struct offload_info info;
+uint32_t mark;
+int ret;
+
+if (flow->dead) {
+return -1;
+}
+
+if (modification) {
+