> -----Original Message-----
> From: Yuanhan Liu [mailto:y...@fridaylinux.org]
> Sent: Monday, January 29, 2018 7:00 AM
> To: d...@openvswitch.org
> Cc: Stokes, Ian <ian.sto...@intel.com>; Finn Christensen
> <f...@napatech.com>; Darrell Ball <db...@vmware.com>; Chandran, Sugesh
> <sugesh.chand...@intel.com>; Simon Horman <simon.hor...@netronome.com>;
> Yuanhan Liu <y...@fridaylinux.org>
> Subject: [PATCH v7 1/6] dpif-netdev: associate flow with a mark id
> 
> Most modern NICs have the ability to bind a flow with a mark, so that
> every packet matches such flow will have that mark present in its
> descriptor.
> 
> The basic idea of doing that is, when we receives packets later, we could
> directly get the flow from the mark. That could avoid some very costly CPU
> operations, including (but not limiting to) miniflow_extract, emc lookup,
> dpcls lookup, etc. Thus, performance could be greatly improved.
> 
> Thus, the major work of this patch is to associate a flow with a mark id
> (an uint32_t number). The association in netdev datapath is done by CMAP,
> while in hardware it's done by the rte_flow MARK action.
> 
> One tricky thing in OVS-DPDK is, the flow tables is per-PMD. For the case
> there is only one phys port but with 2 queues, there could be 2 PMDs. In
> other words, even for a single mega flow (i.e. udp,tp_src=1000), there
> could be 2 different dp_netdev flows, one for each PMD. That could results
> to the same mega flow being offloaded twice in the hardware, worse, we may
> get 2 different marks and only the last one will work.
> 
> To avoid that, a megaflow_to_mark CMAP is created. An entry will be added
> for the first PMD that wants to offload a flow. For later PMDs, it will
> see such megaflow is already offloaded, then the flow will not be
> offloaded to HW twice.
> 
> Meanwhile, the mark to flow mapping becomes to 1:N mapping. That is what
> the mark_to_flow CMAP is for. When the first PMD wants to offload a flow,
> it allocates a new mark and performs the flow offload by reusing the -
> >flow_put method. When it succeeds, a "mark to flow" entry will be added.
> For later PMDs, it will get the corresponding mark by above
> megaflow_to_mark CMAP. Then, another "mark to flow" entry will be added.
> 
> Co-authored-by: Finn Christensen <f...@napatech.com>
> Signed-off-by: Yuanhan Liu <y...@fridaylinux.org>
> Signed-off-by: Finn Christensen <f...@napatech.com>
> ---
> 
> v6: - fixed typos in commit log
>     - fixed a sparse warning
>     - used hash_int to compute the hash for mark_to_flow CMAP
>     - added more comments
>     - lock before port lookup
> 
> v5: - fixed check of flow_mark_has_no_ref (renamed from
>       is_last_flow_mark_reference).
>       This fixed an issue that it took too long to finish
>       flow add/removal if we do that repeatdly.
> 
>     - do mark_to_flow disassociation if flow modification failed
> ---
>  lib/dpif-netdev.c | 283
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/netdev.h      |   6 ++
>  2 files changed, 289 insertions(+)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ba62128..a514de8
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -75,6 +75,7 @@
>  #include "tnl-ports.h"
>  #include "unixctl.h"
>  #include "util.h"
> +#include "uuid.h"
> 
>  VLOG_DEFINE_THIS_MODULE(dpif_netdev);
> 
> @@ -430,7 +431,9 @@ struct dp_netdev_flow {
>      /* Hash table index by unmasked flow. */
>      const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */
>                                   /* 'flow_table'. */
> +    const struct cmap_node mark_node; /* In owning flow_mark's
> + mark_to_flow */
>      const ovs_u128 ufid;         /* Unique flow identifier. */
> +    const ovs_u128 mega_ufid;    /* Unique mega flow identifier. */
>      const unsigned pmd_id;       /* The 'core_id' of pmd thread owning
> this */
>                                   /* flow. */
> 
> @@ -441,6 +444,7 @@ struct dp_netdev_flow {
>      struct ovs_refcount ref_cnt;
> 
>      bool dead;
> +    uint32_t mark;               /* Unique flow mark assigned to a flow
> */
> 
>      /* Statistics. */
>      struct dp_netdev_flow_stats stats;
> @@ -1837,6 +1841,178 @@ dp_netdev_pmd_find_dpcls(struct
> dp_netdev_pmd_thread *pmd,
>      return cls;
>  }
> 
> +#define MAX_FLOW_MARK       (UINT32_MAX - 1)
> +#define INVALID_FLOW_MARK   (UINT32_MAX)
> +
> +struct megaflow_to_mark_data {
> +    const struct cmap_node node;
> +    ovs_u128 mega_ufid;
> +    uint32_t mark;
> +};
> +
> +struct flow_mark {
> +    struct cmap megaflow_to_mark;
> +    struct cmap mark_to_flow;
> +    struct id_pool *pool;
> +    struct ovs_mutex mutex;
> +};
> +
> +static struct flow_mark flow_mark = {
> +    .megaflow_to_mark = CMAP_INITIALIZER,
> +    .mark_to_flow = CMAP_INITIALIZER,
> +    .mutex = OVS_MUTEX_INITIALIZER,
> +};
> +
> +static uint32_t
> +flow_mark_alloc(void)
> +{
> +    uint32_t mark;
> +
> +    if (!flow_mark.pool) {
> +        /* Haven't initiated yet, do it here */
> +        flow_mark.pool = id_pool_create(0, MAX_FLOW_MARK);
> +    }
> +
> +    if (id_pool_alloc_id(flow_mark.pool, &mark)) {
> +        return mark;
> +    }
> +
> +    return INVALID_FLOW_MARK;
> +}
> +
> +static void
> +flow_mark_free(uint32_t mark)
> +{
> +    id_pool_free_id(flow_mark.pool, mark); }
> +
> +/* associate megaflow with a mark, which is a 1:1 mapping */ static
> +void megaflow_to_mark_associate(const ovs_u128 *mega_ufid, uint32_t
> +mark) {
> +    size_t hash = dp_netdev_flow_hash(mega_ufid);
> +    struct megaflow_to_mark_data *data = xzalloc(sizeof(*data));
> +
> +    data->mega_ufid = *mega_ufid;
> +    data->mark = mark;
> +
> +    cmap_insert(&flow_mark.megaflow_to_mark,
> +                CONST_CAST(struct cmap_node *, &data->node), hash); }
> +
> +/* disassociate meagaflow with a mark */ static void
> +megaflow_to_mark_disassociate(const ovs_u128 *mega_ufid) {
> +    size_t hash = dp_netdev_flow_hash(mega_ufid);
> +    struct megaflow_to_mark_data *data;
> +
> +    CMAP_FOR_EACH_WITH_HASH (data, node, hash,
> &flow_mark.megaflow_to_mark) {
> +        if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {
> +            cmap_remove(&flow_mark.megaflow_to_mark,
> +                        CONST_CAST(struct cmap_node *, &data->node),
> hash);
> +            free(data);
> +            return;
> +        }
> +    }
> +
> +    VLOG_WARN("masked ufid "UUID_FMT" is not associated with a mark?\n",
> +              UUID_ARGS((struct uuid *)mega_ufid)); }

Minor nit above, Capitalize the first letter the warning message 'Masked' to 
keep consistent with existing code standard.
There are multiple instances of this for info, debug & warning messages 
throughout the patch series but I will just flag this once here for brevity.

> +
> +static inline uint32_t
> +megaflow_to_mark_find(const ovs_u128 *mega_ufid) {
> +    size_t hash = dp_netdev_flow_hash(mega_ufid);
> +    struct megaflow_to_mark_data *data;
> +
> +    CMAP_FOR_EACH_WITH_HASH (data, node, hash,
> &flow_mark.megaflow_to_mark) {
> +        if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {
> +            return data->mark;
> +        }
> +    }

In keeping with other helper methods for the megaflow to mark, it would be 
useful here to add a debug message to report that a mark for this megaflow was 
not found.

> +
> +    return INVALID_FLOW_MARK;
> +}
> +
> +/* associate mark with a flow, which is 1:N mapping */ static void
> +mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow
> +*flow) {
> +    dp_netdev_flow_ref(flow);
> +
> +    cmap_insert(&flow_mark.mark_to_flow,
> +                CONST_CAST(struct cmap_node *, &flow->mark_node),
> +                hash_int(mark, 0));
> +    flow->mark = mark;
> +
> +    VLOG_DBG("associated dp_netdev flow %p with mark %u\n", flow,
> +mark); }
> +
> +static bool
> +flow_mark_has_no_ref(uint32_t mark)
> +{
> +    struct dp_netdev_flow *flow;
> +
> +    CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0),
> +                             &flow_mark.mark_to_flow) {
> +        if (flow->mark == mark) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +static int
> +mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd,
> +                          struct dp_netdev_flow *flow) {
> +    int ret = 0;
> +    uint32_t mark = flow->mark;
> +    struct cmap_node *mark_node = CONST_CAST(struct cmap_node *,
> +                                             &flow->mark_node);
> +
> +    cmap_remove(&flow_mark.mark_to_flow, mark_node, hash_int(mark, 0));
> +    flow->mark = INVALID_FLOW_MARK;
> +
> +    /*
> +     * no flow is referencing the mark any more? If so, let's
> +     * remove the flow from hardare and free the mark.
> +     */

Minor typo, 'hardware' instead of 'hardare' above.

> +    if (flow_mark_has_no_ref(mark)) {
> +        struct dp_netdev_port *port;
> +        odp_port_t in_port = flow->flow.in_port.odp_port;
> +
> +        ovs_mutex_lock(&pmd->dp->port_mutex);
> +        port = dp_netdev_lookup_port(pmd->dp, in_port);
> +        if (port) {
> +            ret = netdev_flow_del(port->netdev, &flow->mega_ufid, NULL);
> +        }
> +        ovs_mutex_unlock(&pmd->dp->port_mutex);
> +
> +        flow_mark_free(mark);
> +        VLOG_DBG("freed flow mark %u\n", mark);
> +
> +        megaflow_to_mark_disassociate(&flow->mega_ufid);
> +    }
> +    dp_netdev_flow_unref(flow);
> +
> +    return ret;
> +}
> +
> +static void
> +flow_mark_flush(struct dp_netdev_pmd_thread *pmd) {
> +    struct dp_netdev_flow *flow;
> +
> +    CMAP_FOR_EACH (flow, mark_node, &flow_mark.mark_to_flow) {
> +        if (flow->pmd_id == pmd->core_id) {
> +            mark_to_flow_disassociate(pmd, flow);
> +        }
> +    }
> +}
> +
>  static void
>  dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
>                            struct dp_netdev_flow *flow) @@ -1850,6 +2026,9
> @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
>      ovs_assert(cls != NULL);
>      dpcls_remove(cls, &flow->cr);
>      cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow-
> >ufid));
> +    if (flow->mark != INVALID_FLOW_MARK) {
> +        mark_to_flow_disassociate(pmd, flow);
> +    }
>      flow->dead = true;
> 
>      dp_netdev_flow_unref(flow);
> @@ -2429,6 +2608,101 @@ out:
>      return error;
>  }
> 
> +/*
> + * There are two flow offload operations here: addition and modification.
> + *
> + * For flow addition, this function does:
> + * - allocate a new flow mark id
> + * - perform hardware flow offload
> + * - associate the flow mark with flow and mega flow
> + *
> + * For flow modification, both flow mark and the associations are still
> + * valid, thus only item 2 needed.
> + */
> +static void
> +try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port,
> +                    struct dp_netdev_flow *flow, struct match *match,
> +                    const struct nlattr *actions, size_t actions_len) {
> +    struct offload_info info;
> +    struct dp_netdev_port *port;
> +    bool modification = flow->mark != INVALID_FLOW_MARK;
> +    const char *op = modification ? "modify" : "add";
> +    uint32_t mark;
> +    int ret;
> +
> +    ovs_mutex_lock(&flow_mark.mutex);
> +
> +    if (modification) {
> +        mark = flow->mark;
> +    } else {
> +        if (!netdev_is_flow_api_enabled()) {
> +            goto out;
> +        }
> +
> +        /*
> +         * If a mega flow has already been offloaded (from other PMD
> +         * instances), do not offload it again.
> +         */
> +        mark = megaflow_to_mark_find(&flow->mega_ufid);
> +        if (mark != INVALID_FLOW_MARK) {
> +            VLOG_DBG("flow has already been offloaded with mark %u\n",
> mark);
> +            mark_to_flow_associate(mark, flow);
> +            goto out;
> +        }
> +
> +        mark = flow_mark_alloc();
> +        if (mark == INVALID_FLOW_MARK) {
> +            VLOG_ERR("failed to allocate flow mark!\n");
> +            goto out;
> +        }
> +    }
> +    info.flow_mark = mark;
> +
> +    ovs_mutex_lock(&pmd->dp->port_mutex);
> +    port = dp_netdev_lookup_port(pmd->dp, in_port);
> +    if (!port) {
> +        ovs_mutex_unlock(&pmd->dp->port_mutex);
> +        goto out;
> +    }
> +    ret = netdev_flow_put(port->netdev, match,
> +                          CONST_CAST(struct nlattr *, actions),
> +                          actions_len, &flow->mega_ufid, &info, NULL);
> +    ovs_mutex_unlock(&pmd->dp->port_mutex);
> +
> +    if (ret) {
> +        VLOG_ERR("failed to %s netdev flow with mark %u\n", op, mark);
> +        if (!modification) {
> +            flow_mark_free(mark);
> +        } else {
> +            mark_to_flow_disassociate(pmd, flow);

               In the case here would it not be a call to flush? If the flow 
could not be modified wouldn't you have to remove all references for it not 
just on this PMD?

> +        }
> +        goto out;
> +    }
> +
> +    if (!modification) {
> +        megaflow_to_mark_associate(&flow->mega_ufid, mark);
> +        mark_to_flow_associate(mark, flow);
> +    }
> +    VLOG_DBG("succeed to %s netdev flow with mark %u\n", op, mark);
> +
> +out:
> +    ovs_mutex_unlock(&flow_mark.mutex);
> +}
> +
> +static void
> +dp_netdev_get_mega_ufid(const struct match *match, ovs_u128 *mega_ufid)
> +{
> +    struct flow masked_flow;
> +    size_t i;
> +
> +    for (i = 0; i < sizeof(struct flow); i++) {
> +        ((uint8_t *)&masked_flow)[i] = ((uint8_t *)&match->flow)[i] &
> +                                       ((uint8_t *)&match->wc)[i];
> +    }
> +    dpif_flow_hash(NULL, &masked_flow, sizeof(struct flow), mega_ufid);
> +}
> +
>  static struct dp_netdev_flow *
>  dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>                     struct match *match, const ovs_u128 *ufid, @@ -2464,12
> +2738,14 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>      memset(&flow->stats, 0, sizeof flow->stats);
>      flow->dead = false;
>      flow->batch = NULL;
> +    flow->mark = INVALID_FLOW_MARK;
>      *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
>      *CONST_CAST(struct flow *, &flow->flow) = match->flow;
>      *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
>      ovs_refcount_init(&flow->ref_cnt);
>      ovsrcu_set(&flow->actions, dp_netdev_actions_create(actions,
> actions_len));
> 
> +    dp_netdev_get_mega_ufid(match, CONST_CAST(ovs_u128 *,
> + &flow->mega_ufid));
>      netdev_flow_key_init_masked(&flow->cr.flow, &match->flow, &mask);
> 
>      /* Select dpcls for in_port. Relies on in_port to be exact match. */
> @@ -2479,6 +2755,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>      cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow-
> >node),
>                  dp_netdev_flow_hash(&flow->ufid));
> 
> +    try_netdev_flow_put(pmd, in_port, flow, match, actions,
> + actions_len);
> +
>      if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) {
>          struct ds ds = DS_EMPTY_INITIALIZER;
>          struct ofpbuf key_buf, mask_buf; @@ -2559,6 +2837,7 @@
> flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
>          if (put->flags & DPIF_FP_MODIFY) {
>              struct dp_netdev_actions *new_actions;
>              struct dp_netdev_actions *old_actions;
> +            odp_port_t in_port = netdev_flow->flow.in_port.odp_port;
> 
>              new_actions = dp_netdev_actions_create(put->actions,
>                                                     put->actions_len); @@
> -2566,6 +2845,9 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
>              old_actions = dp_netdev_flow_get_actions(netdev_flow);
>              ovsrcu_set(&netdev_flow->actions, new_actions);
> 
> +            try_netdev_flow_put(pmd, in_port, netdev_flow, match,
> +                                put->actions, put->actions_len);
> +
>              if (stats) {
>                  get_dpif_flow_stats(netdev_flow, stats);
>              }
> @@ -3635,6 +3917,7 @@ reload_affected_pmds(struct dp_netdev *dp)
> 
>      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>          if (pmd->need_reload) {
> +            flow_mark_flush(pmd);
>              dp_netdev_reload_pmd__(pmd);
>              pmd->need_reload = false;
>          }
> diff --git a/lib/netdev.h b/lib/netdev.h index ff1b604..9ee3092 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -188,6 +188,12 @@ void netdev_send_wait(struct netdev *, int qid);
> struct offload_info {
>      const struct dpif_class *dpif_class;
>      ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */
> +
> +    /*
> +     * The flow mark id assigened to the flow. If any pkts hit the flow,
> +     * it will be in the pkt meta data.
> +     */
> +    uint32_t flow_mark;
>  };
>  struct dpif_class;
>  struct netdev_flow_dump;
> --
> 2.7.4

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to