> -----Original Message-----
> From: Yuanhan Liu [mailto:[email protected]]
> Sent: Monday, January 29, 2018 7:00 AM
> To: [email protected]
> Cc: Stokes, Ian <[email protected]>; Finn Christensen
> <[email protected]>; Darrell Ball <[email protected]>; Chandran, Sugesh
> <[email protected]>; Simon Horman <[email protected]>;
> Yuanhan Liu <[email protected]>
> Subject: [PATCH v7 5/6] dpif-netdev: do hw flow offload in a thread
> 
> Currently, the major trigger for hw flow offload is at upcall handling,
> which is actually in the datapath. Moreover, the hw offload installation
> and modification is not that lightweight. Meaning, if there are so many
> flows being added or modified frequently, it could stall the datapath,
> which could result to packet loss.
> 
> To diminish that, all those flow operations will be recorded and appended
> to a list. A thread is then introduced to process this list (to do the
> real flow offloading put/del operations). This could leave the datapath as
> lightweight as possible.
> 
> Signed-off-by: Yuanhan Liu <[email protected]>
> ---
> 
> v5: - removed an not-used mutex lock
> ---
>  lib/dpif-netdev.c | 348 ++++++++++++++++++++++++++++++++++++++++---------
> -----
>  1 file changed, 258 insertions(+), 90 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cb16e2e..b1f6973
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1854,13 +1854,11 @@ struct flow_mark {
>      struct cmap megaflow_to_mark;
>      struct cmap mark_to_flow;
>      struct id_pool *pool;
> -    struct ovs_mutex mutex;
>  };
> 
>  static struct flow_mark flow_mark = {
>      .megaflow_to_mark = CMAP_INITIALIZER,
>      .mark_to_flow = CMAP_INITIALIZER,
> -    .mutex = OVS_MUTEX_INITIALIZER,
>  };
> 
>  static uint32_t
> @@ -2001,6 +1999,8 @@ mark_to_flow_disassociate(struct
> dp_netdev_pmd_thread *pmd,
>      return ret;
>  }
> 
> +static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
> +                                  struct dp_netdev_flow *flow);

This seems out of place here, suggest moving it towards the beginning of the 
file along with the other function prototypes for dpif-netdev.

>  static void
>  flow_mark_flush(struct dp_netdev_pmd_thread *pmd)  { @@ -2008,7 +2008,7
> @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
> 
>      CMAP_FOR_EACH (flow, mark_node, &flow_mark.mark_to_flow) {
>          if (flow->pmd_id == pmd->core_id) {
> -            mark_to_flow_disassociate(pmd, flow);
> +            queue_netdev_flow_del(pmd, flow);
>          }
>      }
>  }
> @@ -2030,6 +2030,257 @@ mark_to_flow_find(const struct
> dp_netdev_pmd_thread *pmd,
>      return NULL;
>  }
> 
> +enum {
> +    DP_NETDEV_FLOW_OFFLOAD_OP_ADD,
> +    DP_NETDEV_FLOW_OFFLOAD_OP_MOD,
> +    DP_NETDEV_FLOW_OFFLOAD_OP_DEL,
> +};

Again, I'd prefer enums like this to be kept at the beginning of the file 
rather than interspersed through the functions. 
It probably warrants defining a specific HWOL either at the top of this file or 
preferably spun out to a separate HWOL module all together so as not to 
overload the dpif-netdev code.

> +
> +struct dp_flow_offload_item {
> +    struct dp_netdev_pmd_thread *pmd;
> +    struct dp_netdev_flow *flow;
> +    int op;
> +    struct match match;
> +    struct nlattr *actions;
> +    size_t actions_len;
> +
> +    struct ovs_list node;
> +};
> +
> +struct dp_flow_offload {
> +    struct ovs_mutex mutex;
> +    struct ovs_list list;
> +    pthread_cond_t cond;
> +};
> +
> +static struct dp_flow_offload dp_flow_offload = {
> +    .mutex = OVS_MUTEX_INITIALIZER,
> +    .list  = OVS_LIST_INITIALIZER(&dp_flow_offload.list),
> +};
> +
> +static struct ovsthread_once offload_thread_once
> +    = OVSTHREAD_ONCE_INITIALIZER;
> +
> +static struct dp_flow_offload_item *
> +dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd,
> +                             struct dp_netdev_flow *flow,
> +                             int op)
> +{
> +    struct dp_flow_offload_item *offload;
> +
> +    offload = xzalloc(sizeof(*offload));
> +    offload->pmd = pmd;
> +    offload->flow = flow;
> +    offload->op = op;
> +
> +    dp_netdev_flow_ref(flow);
> +    dp_netdev_pmd_try_ref(pmd);
> +
> +    return offload;
> +}
> +
> +static void
> +dp_netdev_free_flow_offload(struct dp_flow_offload_item *offload) {
> +    dp_netdev_pmd_unref(offload->pmd);
> +    dp_netdev_flow_unref(offload->flow);
> +
> +    free(offload->actions);
> +    free(offload);
> +}
> +
> +static void
> +dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload) {
> +    ovs_mutex_lock(&dp_flow_offload.mutex);
> +    ovs_list_push_back(&dp_flow_offload.list, &offload->node);
> +    xpthread_cond_signal(&dp_flow_offload.cond);
> +    ovs_mutex_unlock(&dp_flow_offload.mutex);
> +}
> +
> +static int
> +dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload) {
> +    return mark_to_flow_disassociate(offload->pmd, offload->flow); }
> +
> +/*
> + * There are two flow offload operations here: addition and modification.
> + *
> + * For flow addition, this function does:
> + * - allocate a new flow mark id
> + * - perform hardware flow offload
> + * - associate the flow mark with flow and mega flow
> + *
> + * For flow modification, both flow mark and the associations are still
> + * valid, thus only item 2 needed.
> + */
> +static int
> +dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) {
> +    struct dp_netdev_port *port;
> +    struct dp_netdev_pmd_thread *pmd = offload->pmd;
> +    struct dp_netdev_flow *flow = offload->flow;
> +    odp_port_t in_port = flow->flow.in_port.odp_port;
> +    bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
> +    struct offload_info info;
> +    uint32_t mark;
> +    int ret;
> +
> +    if (flow->dead) {
> +        return -1;
> +    }
> +
> +    if (modification) {
> +        mark = flow->mark;
> +        ovs_assert(mark != INVALID_FLOW_MARK);
> +    } else {
> +        /*
> +         * If a mega flow has already been offloaded (from other PMD
> +         * instances), do not offload it again.
> +         */
> +        mark = megaflow_to_mark_find(&flow->mega_ufid);
> +        if (mark != INVALID_FLOW_MARK) {
> +            VLOG_DBG("flow has already been offloaded with mark %u\n",
> mark);
> +            if (flow->mark != INVALID_FLOW_MARK) {
> +                ovs_assert(flow->mark == mark);
> +            } else {
> +                mark_to_flow_associate(mark, flow);
> +            }
> +            return 0;
> +        }
> +
> +        mark = flow_mark_alloc();
> +        if (mark == INVALID_FLOW_MARK) {
> +            VLOG_ERR("failed to allocate flow mark!\n");
> +        }
> +    }
> +    info.flow_mark = mark;
> +
> +    ovs_mutex_lock(&pmd->dp->port_mutex);
> +    port = dp_netdev_lookup_port(pmd->dp, in_port);
> +    if (!port) {
> +        ovs_mutex_unlock(&pmd->dp->port_mutex);
> +        return -1;
> +    }
> +    ret = netdev_flow_put(port->netdev, &offload->match,
> +                          CONST_CAST(struct nlattr *, offload->actions),
> +                          offload->actions_len, &flow->mega_ufid, &info,
> +                          NULL);
> +    ovs_mutex_unlock(&pmd->dp->port_mutex);
> +
> +    if (ret) {
> +        if (!modification) {
> +            flow_mark_free(mark);
> +        } else {
> +            mark_to_flow_disassociate(pmd, flow);
> +        }
> +        return -1;
> +    }
> +
> +    if (!modification) {
> +        megaflow_to_mark_associate(&flow->mega_ufid, mark);
> +        mark_to_flow_associate(mark, flow);
> +    }
> +
> +    return 0;
> +}
> +
> +static void *
> +dp_netdev_flow_offload_main(void *data OVS_UNUSED) {
> +    struct dp_flow_offload_item *offload;
> +    struct ovs_list *list;
> +    const char *op;
> +    int ret;
> +
> +    for (;;) {
> +        ovs_mutex_lock(&dp_flow_offload.mutex);
> +        if (ovs_list_is_empty(&dp_flow_offload.list)) {
> +            ovsrcu_quiesce_start();
> +            ovs_mutex_cond_wait(&dp_flow_offload.cond,
> +                                &dp_flow_offload.mutex);
> +        }
> +        list = ovs_list_pop_front(&dp_flow_offload.list);
> +        offload = CONTAINER_OF(list, struct dp_flow_offload_item, node);
> +        ovs_mutex_unlock(&dp_flow_offload.mutex);
> +
> +        switch (offload->op) {
> +        case DP_NETDEV_FLOW_OFFLOAD_OP_ADD:
> +            op = "add";
> +            ret = dp_netdev_flow_offload_put(offload);
> +            break;
> +        case DP_NETDEV_FLOW_OFFLOAD_OP_MOD:
> +            op = "modify";
> +            ret = dp_netdev_flow_offload_put(offload);
> +            break;
> +        case DP_NETDEV_FLOW_OFFLOAD_OP_DEL:
> +            op = "delete";
> +            ret = dp_netdev_flow_offload_del(offload);
> +            break;
> +        default:
> +            OVS_NOT_REACHED();
> +        }
> +
> +        VLOG_DBG("%s to %s netdev flow\n",
> +                 ret == 0 ? "succeed" : "failed", op);
> +        dp_netdev_free_flow_offload(offload);
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
> +                      struct dp_netdev_flow *flow) {
> +    struct dp_flow_offload_item *offload;
> +
> +    if (ovsthread_once_start(&offload_thread_once)) {
> +        xpthread_cond_init(&dp_flow_offload.cond, NULL);
> +        ovs_thread_create("dp_netdev_flow_offload",
> +                          dp_netdev_flow_offload_main, NULL);
> +        ovsthread_once_done(&offload_thread_once);
> +    }
> +
> +    offload = dp_netdev_alloc_flow_offload(pmd, flow,
> +
> DP_NETDEV_FLOW_OFFLOAD_OP_DEL);
> +    dp_netdev_append_flow_offload(offload);
> +}
> +
> +static void
> +queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
> +                      struct dp_netdev_flow *flow, struct match *match,
> +                      const struct nlattr *actions, size_t actions_len)
> +{
> +    struct dp_flow_offload_item *offload;
> +    int op;
> +
> +    if (!netdev_is_flow_api_enabled()) {
> +        return;
> +    }
> +
> +    if (ovsthread_once_start(&offload_thread_once)) {
> +        xpthread_cond_init(&dp_flow_offload.cond, NULL);
> +        ovs_thread_create("dp_netdev_flow_offload",
> +                          dp_netdev_flow_offload_main, NULL);
> +        ovsthread_once_done(&offload_thread_once);
> +    }
> +
> +    if (flow->mark != INVALID_FLOW_MARK) {
> +        op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
> +    } else {
> +        op = DP_NETDEV_FLOW_OFFLOAD_OP_ADD;
> +    }
> +    offload = dp_netdev_alloc_flow_offload(pmd, flow, op);
> +    offload->match = *match;
> +    offload->actions = xmalloc(actions_len);
> +    memcpy(offload->actions, actions, actions_len);
> +    offload->actions_len = actions_len;
> +
> +    dp_netdev_append_flow_offload(offload);
> +}
> +
>  static void
>  dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
>                            struct dp_netdev_flow *flow) @@ -2044,7 +2295,7
> @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
>      dpcls_remove(cls, &flow->cr);
>      cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow-
> >ufid));
>      if (flow->mark != INVALID_FLOW_MARK) {
> -        mark_to_flow_disassociate(pmd, flow);
> +        queue_netdev_flow_del(pmd, flow);
>      }
>      flow->dead = true;
> 
> @@ -2625,88 +2876,6 @@ out:
>      return error;
>  }
> 
> -/*
> - * There are two flow offload operations here: addition and modification.
> - *
> - * For flow addition, this function does:
> - * - allocate a new flow mark id
> - * - perform hardware flow offload
> - * - associate the flow mark with flow and mega flow
> - *
> - * For flow modification, both flow mark and the associations are still
> - * valid, thus only item 2 needed.
> - */
> -static void
> -try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port,
> -                    struct dp_netdev_flow *flow, struct match *match,
> -                    const struct nlattr *actions, size_t actions_len)
> -{
> -    struct offload_info info;
> -    struct dp_netdev_port *port;
> -    bool modification = flow->mark != INVALID_FLOW_MARK;
> -    const char *op = modification ? "modify" : "add";
> -    uint32_t mark;
> -    int ret;
> -
> -    ovs_mutex_lock(&flow_mark.mutex);
> -
> -    if (modification) {
> -        mark = flow->mark;
> -    } else {
> -        if (!netdev_is_flow_api_enabled()) {
> -            goto out;
> -        }
> -
> -        /*
> -         * If a mega flow has already been offloaded (from other PMD
> -         * instances), do not offload it again.
> -         */
> -        mark = megaflow_to_mark_find(&flow->mega_ufid);
> -        if (mark != INVALID_FLOW_MARK) {
> -            VLOG_DBG("flow has already been offloaded with mark %u\n",
> mark);
> -            mark_to_flow_associate(mark, flow);
> -            goto out;
> -        }
> -
> -        mark = flow_mark_alloc();
> -        if (mark == INVALID_FLOW_MARK) {
> -            VLOG_ERR("failed to allocate flow mark!\n");
> -            goto out;
> -        }
> -    }
> -    info.flow_mark = mark;
> -
> -    ovs_mutex_lock(&pmd->dp->port_mutex);
> -    port = dp_netdev_lookup_port(pmd->dp, in_port);
> -    if (!port) {
> -        ovs_mutex_unlock(&pmd->dp->port_mutex);
> -        goto out;
> -    }
> -    ret = netdev_flow_put(port->netdev, match,
> -                          CONST_CAST(struct nlattr *, actions),
> -                          actions_len, &flow->mega_ufid, &info, NULL);
> -    ovs_mutex_unlock(&pmd->dp->port_mutex);
> -
> -    if (ret) {
> -        VLOG_ERR("failed to %s netdev flow with mark %u\n", op, mark);
> -        if (!modification) {
> -            flow_mark_free(mark);
> -        } else {
> -            mark_to_flow_disassociate(pmd, flow);
> -        }
> -        goto out;
> -    }
> -
> -    if (!modification) {
> -        megaflow_to_mark_associate(&flow->mega_ufid, mark);
> -        mark_to_flow_associate(mark, flow);
> -    }
> -    VLOG_DBG("succeed to %s netdev flow with mark %u\n", op, mark);
> -
> -out:
> -    ovs_mutex_unlock(&flow_mark.mutex);
> -}
> -
>  static void
>  dp_netdev_get_mega_ufid(const struct match *match, ovs_u128 *mega_ufid)
> { @@ -2772,7 +2941,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread
> *pmd,
>      cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow-
> >node),
>                  dp_netdev_flow_hash(&flow->ufid));
> 
> -    try_netdev_flow_put(pmd, in_port, flow, match, actions, actions_len);
> +    queue_netdev_flow_put(pmd, flow, match, actions, actions_len);
> 
>      if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) {
>          struct ds ds = DS_EMPTY_INITIALIZER; @@ -2854,7 +3023,6 @@
> flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
>          if (put->flags & DPIF_FP_MODIFY) {
>              struct dp_netdev_actions *new_actions;
>              struct dp_netdev_actions *old_actions;
> -            odp_port_t in_port = netdev_flow->flow.in_port.odp_port;
> 
>              new_actions = dp_netdev_actions_create(put->actions,
>                                                     put->actions_len); @@
> -2862,8 +3030,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
>              old_actions = dp_netdev_flow_get_actions(netdev_flow);
>              ovsrcu_set(&netdev_flow->actions, new_actions);
> 
> -            try_netdev_flow_put(pmd, in_port, netdev_flow, match,
> -                                put->actions, put->actions_len);
> +            queue_netdev_flow_put(pmd, netdev_flow, match,
> +                                  put->actions, put->actions_len);
> 
>              if (stats) {
>                  get_dpif_flow_stats(netdev_flow, stats);
> --
> 2.7.4

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to