> -----Original Message----- > From: Stokes, Ian > Sent: Monday, March 12, 2018 3:54 PM > To: Yuanhan Liu <[email protected]>; [email protected] > Cc: Finn Christensen <[email protected]>; Darrell Ball <[email protected]>; > Chandran, Sugesh <[email protected]>; Simon Horman > <[email protected]> > Subject: RE: [PATCH v7 5/6] dpif-netdev: do hw flow offload in a thread >
Adding Shahaf Shuler > > -----Original Message----- > > From: Yuanhan Liu [mailto:[email protected]] > > Sent: Monday, January 29, 2018 7:00 AM > > To: [email protected] > > Cc: Stokes, Ian <[email protected]>; Finn Christensen > > <[email protected]>; Darrell Ball <[email protected]>; Chandran, Sugesh > > <[email protected]>; Simon Horman > > <[email protected]>; Yuanhan Liu <[email protected]> > > Subject: [PATCH v7 5/6] dpif-netdev: do hw flow offload in a thread > > > > Currently, the major trigger for hw flow offload is at upcall > > handling, which is actually in the datapath. Moreover, the hw offload > > installation and modification is not that lightweight. Meaning, if > > there are so many flows being added or modified frequently, it could > > stall the datapath, which could result to packet loss. > > > > To diminish that, all those flow operations will be recorded and > > appended to a list. A thread is then introduced to process this list > > (to do the real flow offloading put/del operations). This could leave > > the datapath as lightweight as possible. > > > > Signed-off-by: Yuanhan Liu <[email protected]> > > --- > > > > v5: - removed an not-used mutex lock > > --- > > lib/dpif-netdev.c | 348 > > ++++++++++++++++++++++++++++++++++++++++--------- > > ----- > > 1 file changed, 258 insertions(+), 90 deletions(-) > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index > > cb16e2e..b1f6973 > > 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -1854,13 +1854,11 @@ struct flow_mark { > > struct cmap megaflow_to_mark; > > struct cmap mark_to_flow; > > struct id_pool *pool; > > - struct ovs_mutex mutex; > > }; > > > > static struct flow_mark flow_mark = { > > .megaflow_to_mark = CMAP_INITIALIZER, > > .mark_to_flow = CMAP_INITIALIZER, > > - .mutex = OVS_MUTEX_INITIALIZER, > > }; > > > > static uint32_t > > @@ -2001,6 +1999,8 @@ mark_to_flow_disassociate(struct > > dp_netdev_pmd_thread *pmd, > > return ret; > > } > > > > +static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, > > + struct dp_netdev_flow *flow); > > This seems out of place here, suggest moving it towards the beginning of > the file along with the other function prototypes for dpif-netdev. > > > static void > > flow_mark_flush(struct dp_netdev_pmd_thread *pmd) { @@ -2008,7 > > +2008,7 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd) > > > > CMAP_FOR_EACH (flow, mark_node, &flow_mark.mark_to_flow) { > > if (flow->pmd_id == pmd->core_id) { > > - mark_to_flow_disassociate(pmd, flow); > > + queue_netdev_flow_del(pmd, flow); > > } > > } > > } > > @@ -2030,6 +2030,257 @@ mark_to_flow_find(const struct > > dp_netdev_pmd_thread *pmd, > > return NULL; > > } > > > > +enum { > > + DP_NETDEV_FLOW_OFFLOAD_OP_ADD, > > + DP_NETDEV_FLOW_OFFLOAD_OP_MOD, > > + DP_NETDEV_FLOW_OFFLOAD_OP_DEL, > > +}; > > Again, I'd prefer enums like this to be kept at the beginning of the file > rather than interspersed through the functions. > It probably warrants defining a specific HWOL either at the top of this > file or preferably spun out to a separate HWOL module all together so as > not to overload the dpif-netdev code. > > > + > > +struct dp_flow_offload_item { > > + struct dp_netdev_pmd_thread *pmd; > > + struct dp_netdev_flow *flow; > > + int op; > > + struct match match; > > + struct nlattr *actions; > > + size_t actions_len; > > + > > + struct ovs_list node; > > +}; > > + > > +struct dp_flow_offload { > > + struct ovs_mutex mutex; > > + struct ovs_list list; > > + pthread_cond_t cond; > > +}; > > + > > +static struct dp_flow_offload dp_flow_offload = { > > + .mutex = OVS_MUTEX_INITIALIZER, > > + .list = OVS_LIST_INITIALIZER(&dp_flow_offload.list), > > +}; > > + > > +static struct ovsthread_once offload_thread_once > > + = OVSTHREAD_ONCE_INITIALIZER; > > + > > +static struct dp_flow_offload_item * > > +dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd, > > + struct dp_netdev_flow *flow, > > + int op) > > +{ > > + struct dp_flow_offload_item *offload; > > + > > + offload = xzalloc(sizeof(*offload)); > > + offload->pmd = pmd; > > + offload->flow = flow; > > + offload->op = op; > > + > > + dp_netdev_flow_ref(flow); > > + dp_netdev_pmd_try_ref(pmd); > > + > > + return offload; > > +} > > + > > +static void > > +dp_netdev_free_flow_offload(struct dp_flow_offload_item *offload) { > > + dp_netdev_pmd_unref(offload->pmd); > > + dp_netdev_flow_unref(offload->flow); > > + > > + free(offload->actions); > > + free(offload); > > +} > > + > > +static void > > +dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload) { > > + ovs_mutex_lock(&dp_flow_offload.mutex); > > + ovs_list_push_back(&dp_flow_offload.list, &offload->node); > > + xpthread_cond_signal(&dp_flow_offload.cond); > > + ovs_mutex_unlock(&dp_flow_offload.mutex); > > +} > > + > > +static int > > +dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload) { > > + return mark_to_flow_disassociate(offload->pmd, offload->flow); } > > + > > +/* > > + * There are two flow offload operations here: addition and > modification. > > + * > > + * For flow addition, this function does: > > + * - allocate a new flow mark id > > + * - perform hardware flow offload > > + * - associate the flow mark with flow and mega flow > > + * > > + * For flow modification, both flow mark and the associations are > > +still > > + * valid, thus only item 2 needed. > > + */ > > +static int > > +dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) { > > + struct dp_netdev_port *port; > > + struct dp_netdev_pmd_thread *pmd = offload->pmd; > > + struct dp_netdev_flow *flow = offload->flow; > > + odp_port_t in_port = flow->flow.in_port.odp_port; > > + bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD; > > + struct offload_info info; > > + uint32_t mark; > > + int ret; > > + > > + if (flow->dead) { > > + return -1; > > + } > > + > > + if (modification) { > > + mark = flow->mark; > > + ovs_assert(mark != INVALID_FLOW_MARK); > > + } else { > > + /* > > + * If a mega flow has already been offloaded (from other PMD > > + * instances), do not offload it again. > > + */ > > + mark = megaflow_to_mark_find(&flow->mega_ufid); > > + if (mark != INVALID_FLOW_MARK) { > > + VLOG_DBG("flow has already been offloaded with mark > > + %u\n", > > mark); > > + if (flow->mark != INVALID_FLOW_MARK) { > > + ovs_assert(flow->mark == mark); > > + } else { > > + mark_to_flow_associate(mark, flow); > > + } > > + return 0; > > + } > > + > > + mark = flow_mark_alloc(); > > + if (mark == INVALID_FLOW_MARK) { > > + VLOG_ERR("failed to allocate flow mark!\n"); > > + } > > + } > > + info.flow_mark = mark; > > + > > + ovs_mutex_lock(&pmd->dp->port_mutex); > > + port = dp_netdev_lookup_port(pmd->dp, in_port); > > + if (!port) { > > + ovs_mutex_unlock(&pmd->dp->port_mutex); > > + return -1; > > + } > > + ret = netdev_flow_put(port->netdev, &offload->match, > > + CONST_CAST(struct nlattr *, offload- > >actions), > > + offload->actions_len, &flow->mega_ufid, > &info, > > + NULL); > > + ovs_mutex_unlock(&pmd->dp->port_mutex); > > + > > + if (ret) { > > + if (!modification) { > > + flow_mark_free(mark); > > + } else { > > + mark_to_flow_disassociate(pmd, flow); > > + } > > + return -1; > > + } > > + > > + if (!modification) { > > + megaflow_to_mark_associate(&flow->mega_ufid, mark); > > + mark_to_flow_associate(mark, flow); > > + } > > + > > + return 0; > > +} > > + > > +static void * > > +dp_netdev_flow_offload_main(void *data OVS_UNUSED) { > > + struct dp_flow_offload_item *offload; > > + struct ovs_list *list; > > + const char *op; > > + int ret; > > + > > + for (;;) { > > + ovs_mutex_lock(&dp_flow_offload.mutex); > > + if (ovs_list_is_empty(&dp_flow_offload.list)) { > > + ovsrcu_quiesce_start(); > > + ovs_mutex_cond_wait(&dp_flow_offload.cond, > > + &dp_flow_offload.mutex); > > + } > > + list = ovs_list_pop_front(&dp_flow_offload.list); > > + offload = CONTAINER_OF(list, struct dp_flow_offload_item, > node); > > + ovs_mutex_unlock(&dp_flow_offload.mutex); > > + > > + switch (offload->op) { > > + case DP_NETDEV_FLOW_OFFLOAD_OP_ADD: > > + op = "add"; > > + ret = dp_netdev_flow_offload_put(offload); > > + break; > > + case DP_NETDEV_FLOW_OFFLOAD_OP_MOD: > > + op = "modify"; > > + ret = dp_netdev_flow_offload_put(offload); > > + break; > > + case DP_NETDEV_FLOW_OFFLOAD_OP_DEL: > > + op = "delete"; > > + ret = dp_netdev_flow_offload_del(offload); > > + break; > > + default: > > + OVS_NOT_REACHED(); > > + } > > + > > + VLOG_DBG("%s to %s netdev flow\n", > > + ret == 0 ? "succeed" : "failed", op); > > + dp_netdev_free_flow_offload(offload); > > + } > > + > > + return NULL; > > +} > > + > > +static void > > +queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, > > + struct dp_netdev_flow *flow) { > > + struct dp_flow_offload_item *offload; > > + > > + if (ovsthread_once_start(&offload_thread_once)) { > > + xpthread_cond_init(&dp_flow_offload.cond, NULL); > > + ovs_thread_create("dp_netdev_flow_offload", > > + dp_netdev_flow_offload_main, NULL); > > + ovsthread_once_done(&offload_thread_once); > > + } > > + > > + offload = dp_netdev_alloc_flow_offload(pmd, flow, > > + > > DP_NETDEV_FLOW_OFFLOAD_OP_DEL); > > + dp_netdev_append_flow_offload(offload); > > +} > > + > > +static void > > +queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, > > + struct dp_netdev_flow *flow, struct match *match, > > + const struct nlattr *actions, size_t > > +actions_len) { > > + struct dp_flow_offload_item *offload; > > + int op; > > + > > + if (!netdev_is_flow_api_enabled()) { > > + return; > > + } > > + > > + if (ovsthread_once_start(&offload_thread_once)) { > > + xpthread_cond_init(&dp_flow_offload.cond, NULL); > > + ovs_thread_create("dp_netdev_flow_offload", > > + dp_netdev_flow_offload_main, NULL); > > + ovsthread_once_done(&offload_thread_once); > > + } > > + > > + if (flow->mark != INVALID_FLOW_MARK) { > > + op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD; > > + } else { > > + op = DP_NETDEV_FLOW_OFFLOAD_OP_ADD; > > + } > > + offload = dp_netdev_alloc_flow_offload(pmd, flow, op); > > + offload->match = *match; > > + offload->actions = xmalloc(actions_len); > > + memcpy(offload->actions, actions, actions_len); > > + offload->actions_len = actions_len; > > + > > + dp_netdev_append_flow_offload(offload); > > +} > > + > > static void > > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, > > struct dp_netdev_flow *flow) @@ -2044,7 > > +2295,7 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, > > dpcls_remove(cls, &flow->cr); > > cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow- > > >ufid)); > > if (flow->mark != INVALID_FLOW_MARK) { > > - mark_to_flow_disassociate(pmd, flow); > > + queue_netdev_flow_del(pmd, flow); > > } > > flow->dead = true; > > > > @@ -2625,88 +2876,6 @@ out: > > return error; > > } > > > > -/* > > - * There are two flow offload operations here: addition and > modification. > > - * > > - * For flow addition, this function does: > > - * - allocate a new flow mark id > > - * - perform hardware flow offload > > - * - associate the flow mark with flow and mega flow > > - * > > - * For flow modification, both flow mark and the associations are > > still > > - * valid, thus only item 2 needed. > > - */ > > -static void > > -try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, odp_port_t > in_port, > > - struct dp_netdev_flow *flow, struct match *match, > > - const struct nlattr *actions, size_t actions_len) > > -{ > > - struct offload_info info; > > - struct dp_netdev_port *port; > > - bool modification = flow->mark != INVALID_FLOW_MARK; > > - const char *op = modification ? "modify" : "add"; > > - uint32_t mark; > > - int ret; > > - > > - ovs_mutex_lock(&flow_mark.mutex); > > - > > - if (modification) { > > - mark = flow->mark; > > - } else { > > - if (!netdev_is_flow_api_enabled()) { > > - goto out; > > - } > > - > > - /* > > - * If a mega flow has already been offloaded (from other PMD > > - * instances), do not offload it again. > > - */ > > - mark = megaflow_to_mark_find(&flow->mega_ufid); > > - if (mark != INVALID_FLOW_MARK) { > > - VLOG_DBG("flow has already been offloaded with mark %u\n", > > mark); > > - mark_to_flow_associate(mark, flow); > > - goto out; > > - } > > - > > - mark = flow_mark_alloc(); > > - if (mark == INVALID_FLOW_MARK) { > > - VLOG_ERR("failed to allocate flow mark!\n"); > > - goto out; > > - } > > - } > > - info.flow_mark = mark; > > - > > - ovs_mutex_lock(&pmd->dp->port_mutex); > > - port = dp_netdev_lookup_port(pmd->dp, in_port); > > - if (!port) { > > - ovs_mutex_unlock(&pmd->dp->port_mutex); > > - goto out; > > - } > > - ret = netdev_flow_put(port->netdev, match, > > - CONST_CAST(struct nlattr *, actions), > > - actions_len, &flow->mega_ufid, &info, NULL); > > - ovs_mutex_unlock(&pmd->dp->port_mutex); > > - > > - if (ret) { > > - VLOG_ERR("failed to %s netdev flow with mark %u\n", op, mark); > > - if (!modification) { > > - flow_mark_free(mark); > > - } else { > > - mark_to_flow_disassociate(pmd, flow); > > - } > > - goto out; > > - } > > - > > - if (!modification) { > > - megaflow_to_mark_associate(&flow->mega_ufid, mark); > > - mark_to_flow_associate(mark, flow); > > - } > > - VLOG_DBG("succeed to %s netdev flow with mark %u\n", op, mark); > > - > > -out: > > - ovs_mutex_unlock(&flow_mark.mutex); > > -} > > - > > static void > > dp_netdev_get_mega_ufid(const struct match *match, ovs_u128 > > *mega_ufid) { @@ -2772,7 +2941,7 @@ dp_netdev_flow_add(struct > > dp_netdev_pmd_thread *pmd, > > cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, > > &flow- > > >node), > > dp_netdev_flow_hash(&flow->ufid)); > > > > - try_netdev_flow_put(pmd, in_port, flow, match, actions, > actions_len); > > + queue_netdev_flow_put(pmd, flow, match, actions, actions_len); > > > > if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) { > > struct ds ds = DS_EMPTY_INITIALIZER; @@ -2854,7 +3023,6 @@ > > flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, > > if (put->flags & DPIF_FP_MODIFY) { > > struct dp_netdev_actions *new_actions; > > struct dp_netdev_actions *old_actions; > > - odp_port_t in_port = netdev_flow->flow.in_port.odp_port; > > > > new_actions = dp_netdev_actions_create(put->actions, > > put->actions_len); > > @@ > > -2862,8 +3030,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, > > old_actions = dp_netdev_flow_get_actions(netdev_flow); > > ovsrcu_set(&netdev_flow->actions, new_actions); > > > > - try_netdev_flow_put(pmd, in_port, netdev_flow, match, > > - put->actions, put->actions_len); > > + queue_netdev_flow_put(pmd, netdev_flow, match, > > + put->actions, put->actions_len); > > > > if (stats) { > > get_dpif_flow_stats(netdev_flow, stats); > > -- > > 2.7.4 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
