On Sun, Jul 5, 2020 at 6:29 PM Eli Britstein <[email protected]> wrote:
>
>
> On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote:
> > In this patch, we support offloading of VXLAN_ENCAP action for a vhost-user
> > port (aka "partial-action-offload"). At the time of offloading the flow, we
> > determine if the flow can be offloaded to an egress device, if the input
> > port is not offload capable such as a vhost-user port. We then offload the
> > flow with a VXLAN_ENCAP RTE action, to the egress device. We do not add
> > the OUTPUT RTE action, which indicates to the PMD that is is a partial
> > action offload request. Note that since the action is being offloaded in
> > egress direction, classification is expected to be done by OVS SW datapath
> > and hence there's no need to offload a MARK action.
> >
> > If offload succeeds, we save the information in 'dp_netdev_flow' so that
> > we skip execution of the corresponding action (previous patch) during SW
> > datapath processing.
> >
> > Signed-off-by: Sriharsha Basavapatna <[email protected]>
> > ---
> >   lib/dpif-netdev.c         | 212 ++++++++++++++++++++++++++++++++++++--
> >   lib/netdev-offload-dpdk.c |  78 ++++++++++----
> >   lib/netdev-offload.h      |   2 +
> >   3 files changed, 262 insertions(+), 30 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index e489e2d90..d289d265d 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -2488,10 +2488,174 @@ dp_netdev_append_flow_offload(struct 
> > dp_flow_offload_item *offload)
> >       ovs_mutex_unlock(&dp_flow_offload.mutex);
> >   }
> >
> > +static int
> > +partial_offload_egress_flow_del(struct dp_flow_offload_item *offload)
> > +{
> > +    struct dp_netdev_pmd_thread *pmd = offload->pmd;
> > +    struct dp_netdev_flow *flow = offload->flow;
> > +    const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
> > +    struct netdev *port;
> > +    int ret;
> > +
> > +    port = netdev_ports_get(flow->egress_offload_port, dpif_type_str);
> > +    if (!port) {
> > +        return -1;
> > +    }
> > +
> > +    /* Taking a global 'port_mutex' to fulfill thread safety
> > +     * restrictions for the netdev-offload-dpdk module. */
> > +    ovs_mutex_lock(&pmd->dp->port_mutex);
> > +    ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
> > +    ovs_mutex_unlock(&pmd->dp->port_mutex);
> > +    netdev_close(port);
> > +
> > +    if (ret) {
> > +        return ret;
> > +    }
> > +
> > +    flow->egress_offload_port = NULL;
> > +    flow->partial_actions_offloaded = false;
> > +
> > +    VLOG_DBG_RL("%s: flow: %p mega_ufid: "UUID_FMT" pmd_id: %d\n", 
> > __func__,
> > +                flow, UUID_ARGS((struct uuid *)&flow->mega_ufid),
> > +                offload->flow->pmd_id);
> > +    return ret;
> > +}
> It's almost the same code as mark_to_flow_disassociate. Maybe just
> enhance it?

This is in egress direction, while mark applies to ingress. It is
better to keep them separate.

> > +
> >   static int
> >   dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
> >   {
> > -    return mark_to_flow_disassociate(offload->pmd, offload->flow);
> > +    if (unlikely(offload->flow->partial_actions_offloaded &&
> > +        offload->flow->egress_offload_port != ODPP_NONE)) {
> > +        return partial_offload_egress_flow_del(offload);
> > +    } else {
> > +        return mark_to_flow_disassociate(offload->pmd, offload->flow);
> > +    }
> > +}
> > +
> > +/* Structure to hold a nl_parsed OVS action */
> > +struct action_attr {
> > +    int type;                /* OVS action type */
> > +    struct nlattr *action;   /* action attribute */
> > +};
> > +
> > +/*
> > + * Maxium number of actions to be parsed while selecting a flow for partial
> > + * action offload. This number is currently based on the minimum number of
> > + * attributes seen with the tunnel encap action (clone, tunnel_push, 
> > output).
> > + * This number includes output action to a single egress device (uplink) 
> > and
> > + * supports neither multiple clone() actions nor multiple output actions.
> > + * This number could change if and when we support other actions or
> > + * combinations of actions for partial offload.
> > + */
> > +#define MAX_ACTION_ATTRS    3 /* Max # action attributes supported */
> > +
> > +/*
> > + * This function parses the list of OVS "actions" of length "actions_len",
> > + * and returns them in an array of action "attrs", of size "max_attrs".
> > + * The parsed number of actions is returned in "num_attrs". If the number
> > + * of actions exceeds "max_attrs", parsing is stopped and E2BIG is 
> > returned.
> > + * Otherwise, returns success (0).
> > + */
> > +static int
> > +parse_nlattr_actions(struct nlattr *actions, size_t actions_len,
> > +                     struct action_attr *attrs, int max_attrs, int 
> > *num_attrs)
> > +{
> > +    const struct nlattr *a;
> > +    unsigned int left;
> > +    int num_actions = 0;
> > +    int n_attrs = 0;
> > +    int rc = 0;
> > +    int type;
> > +
> > +    *num_attrs = 0;
> > +
> > +    NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
> > +        type = nl_attr_type(a);
> > +
> > +        if (num_actions >= max_attrs) {
> > +            *num_attrs = num_actions;
> > +            return E2BIG;
> > +        }
> > +
> > +        attrs[num_actions].type = type;
> > +        attrs[num_actions].action = a;
> > +        num_actions++;
> > +        if (type == OVS_ACTION_ATTR_CLONE) {
> > +            rc = parse_nlattr_actions(nl_attr_get(a), nl_attr_get_size(a),
> > +                                      &attrs[num_actions],
> > +                                      (max_attrs - num_actions), &n_attrs);
> > +            num_actions += n_attrs;
> > +            if (rc == E2BIG) {
> > +                *num_attrs = num_actions;
> > +                return rc;
> > +            }
> > +        }
> > +    }
> > +
> > +    *num_attrs = num_actions;
> > +    return 0;
> > +}
> > +
> > +/* This function determines if the given flow should be partially offloaded
> > + * on the egress device, when the in-port is not offload-capable like a
> > + * vhost-user port. The function currently supports offloading of only
> > + * tunnel encap action.
> > + */
> > +static bool
> > +should_partial_offload_egress(struct netdev *in_netdev,
> > +                              struct dp_flow_offload_item *offload,
> > +                              struct netdev **egress_netdev)
> > +{
> > +    const char *dpif_type_str =
> > +        dpif_normalize_type(offload->pmd->dp->class->type);
> > +    struct action_attr attrs[MAX_ACTION_ATTRS];
> > +    odp_port_t out_port = ODPP_NONE;
> > +    struct netdev *out_netdev;
> > +    int num_attrs = 0;
> > +    int type;
> > +    int rc;
> > +
> > +    /* Support egress partial-offload only when in-port is vhost-user. */
> > +    if (!is_dpdk_vhost_netdev(in_netdev)) {
> > +        return false;
> > +    }
> > +
> > +    rc = parse_nlattr_actions(offload->actions, offload->actions_len, 
> > attrs,
> > +                              MAX_ACTION_ATTRS, &num_attrs);
> > +    if (rc == E2BIG) {
> > +        /* Action list too big; decline partial offload */
> > +        return false;
> > +    }
> > +
> > +    /* Number of attrs expected with tunnel encap action */
> > +    if (num_attrs < MAX_ACTION_ATTRS) {
> > +        return false;
> > +    }
> > +
> > +    /* Only support clone sub-actions for now, tnl-push specifically. */
> > +    if (attrs[0].type != OVS_ACTION_ATTR_CLONE ||
> > +        attrs[1].type != OVS_ACTION_ATTR_TUNNEL_PUSH ||
> > +        attrs[2].type != OVS_ACTION_ATTR_OUTPUT) {
> > +        return false;
> > +    }
> I think it's better to validate it in lib/netdev-offload-dpdk.c and not
> here. This will also nullify parse_nlattr_actions helper here.

We need to parse and get the output port here to determine if it
supports offloads (since we have already seen that the in-port is a
vhost-user port). Otherwise, there's no need to select this flow for
partial-action offload. This is the place where we make this decision
and hence it is better to eliminate actions that are not supported
also here. Basically, this function contains the entire
partial-action-offload selection logic for the flow. It is better not
to split it across files. Apart from this, we need to get the egress
netdev in this function since we invoke netdev_flow_put() on it (and
not on the ingress netdev).

> > +
> > +    /* Egress partial-offload needs an output action at the end. */
> > +    out_port = nl_attr_get_odp_port(attrs[2].action);
> > +    if (out_port == ODPP_NONE) {
> > +        return false;
> > +    }
> > +
> > +    /* Support egress partial-offload only when out-port is offload 
> > capable. */
> > +    out_netdev = netdev_ports_get(out_port, dpif_type_str);
> > +    if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
> > +        return false;
> > +    }
> > +
> > +    /* Flow can be egress partial-offloaded. */
> > +    *egress_netdev = out_netdev;
> > +    offload->flow->egress_offload_port = out_port;
> > +    return true;
> >   }
> >
> >   static int
> > @@ -2552,7 +2716,9 @@ dp_netdev_flow_offload_put(struct 
> > dp_flow_offload_item *offload)
> >       bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
> >       struct offload_info info;
> >       struct netdev *port;
> > -    uint32_t mark;
> > +    struct netdev *egress_port = NULL;
> > +    bool alloc_mark = true;
> > +    uint32_t mark = INVALID_FLOW_MARK;
> >       int ret;
> >
> >       if (flow->dead) {
> > @@ -2564,11 +2730,25 @@ dp_netdev_flow_offload_put(struct 
> > dp_flow_offload_item *offload)
> >           return -1;
> >       }
> >
> > -    if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
> > -            /* flow already offloaded */
> > +    info.attr_egress = 0;
> > +    info.partial_actions = 0;
> > +
> > +    if (unlikely(should_partial_offload_egress(port, offload, 
> > &egress_port))) {
> > +        if (egress_port) {
> >               netdev_close(port);
> > -            return 0;
> > +            port = egress_port;
> > +            info.attr_egress = 1;
> > +            alloc_mark = false;
> > +        }
> > +        info.partial_actions = 1;
> > +    }
> > +
> > +    if (alloc_mark && dp_netdev_alloc_flow_mark(flow, modification, 
> > &mark)) {
> > +            /* flow already offloaded */
> > +        netdev_close(port);
> > +        return 0;
> >       }
> > +
> >       info.flow_mark = mark;
> >
> >       /* Taking a global 'port_mutex' to fulfill thread safety restrictions 
> > for
> > @@ -2585,17 +2765,24 @@ dp_netdev_flow_offload_put(struct 
> > dp_flow_offload_item *offload)
> >           goto err_free;
> >       }
> >
> > -    if (!modification) {
> > +    if (unlikely(info.partial_actions && egress_port)) {
> > +        VLOG_DBG_RL("%s: flow: %p mega_ufid: "UUID_FMT" pmd_id: %d\n",
> > +                    __func__, flow, UUID_ARGS((struct uuid 
> > *)&flow->mega_ufid),
> > +                    flow->pmd_id);
> > +        flow->partial_actions_offloaded = true;
> > +    } else if (!modification) {
> >           megaflow_to_mark_associate(&flow->mega_ufid, mark);
> >           mark_to_flow_associate(mark, flow);
> >       }
> >       return 0;
> >
> >   err_free:
> > -    if (!modification) {
> > -        flow_mark_free(mark);
> > -    } else {
> > -        mark_to_flow_disassociate(pmd, flow);
> > +    if (mark != INVALID_FLOW_MARK) {
> > +        if (!modification) {
> > +            flow_mark_free(mark);
> > +        } else {
> > +            mark_to_flow_disassociate(pmd, flow);
> > +        }
> >       }
> >       return -1;
> >   }
> > @@ -2711,7 +2898,8 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread 
> > *pmd,
> >       ovs_assert(cls != NULL);
> >       dpcls_remove(cls, &flow->cr);
> >       cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
> > -    if (flow->mark != INVALID_FLOW_MARK) {
> > +    if (flow->mark != INVALID_FLOW_MARK || (flow->partial_actions_offloaded
> > +        && flow->egress_offload_port != ODPP_NONE)) {
> >           queue_netdev_flow_del(pmd, flow);
> >       }
> >       flow->dead = true;
> > @@ -3469,6 +3657,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
> >       flow->dead = false;
> >       flow->batch = NULL;
> >       flow->mark = INVALID_FLOW_MARK;
> > +    flow->partial_actions_offloaded = false;
> > +    flow->egress_offload_port = ODPP_NONE;
> >       *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
> >       *CONST_CAST(struct flow *, &flow->flow) = match->flow;
> >       *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
> > diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> > index 2ed3cb125..ad384e101 100644
> > --- a/lib/netdev-offload-dpdk.c
> > +++ b/lib/netdev-offload-dpdk.c
> > @@ -57,6 +57,7 @@ static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
> >   struct ufid_to_rte_flow_data {
> >       struct cmap_node node;
> >       ovs_u128 ufid;
> > +    uint32_t refcnt;
> >       struct rte_flow *rte_flow;
> >       bool actions_offloaded;
> >       struct dpif_flow_stats stats;
> > @@ -97,6 +98,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
> >           ovs_assert(data_prev->rte_flow == NULL);
> >       }
> >
> > +    data->refcnt = 1;
> >       data->ufid = *ufid;
> >       data->rte_flow = rte_flow;
> >       data->actions_offloaded = actions_offloaded;
> > @@ -1494,7 +1496,8 @@ static int
> >   parse_clone_actions(struct netdev *netdev,
> >                       struct flow_actions *actions,
> >                       const struct nlattr *clone_actions,
> > -                    const size_t clone_actions_len)
> > +                    const size_t clone_actions_len,
> > +                    struct offload_info *info)
> >   {
> >       const struct nlattr *ca;
> >       unsigned int cleft;
> > @@ -1519,8 +1522,11 @@ parse_clone_actions(struct netdev *netdev,
> >               add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> >                               raw_encap);
> >           } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
> > -            if (add_output_action(netdev, actions, ca)) {
> > -                return -1;
> > +            /* add output action only if full-offload */
> > +            if (!info->partial_actions) {
> > +                if (add_output_action(netdev, actions, ca)) {
> > +                    return -1;
> > +                }
> >               }
> >           } else {
> >               VLOG_DBG_RL(&rl,
> > @@ -1537,12 +1543,15 @@ static int
> >   parse_flow_actions(struct netdev *netdev,
> >                      struct flow_actions *actions,
> >                      struct nlattr *nl_actions,
> > -                   size_t nl_actions_len)
> > +                   size_t nl_actions_len,
> > +                   struct offload_info *info)
> >   {
> >       struct nlattr *nla;
> >       size_t left;
> >
> > -    add_count_action(actions);
> > +    if (!info->partial_actions) {
> > +        add_count_action(actions);
> > +    }
> >       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> >           if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
> >               if (add_output_action(netdev, actions, nla)) {
> > @@ -1573,7 +1582,7 @@ parse_flow_actions(struct netdev *netdev,
> >               size_t clone_actions_len = nl_attr_get_size(nla);
> >
> >               if (parse_clone_actions(netdev, actions, clone_actions,
> > -                                    clone_actions_len)) {
> > +                                    clone_actions_len, info)) {
> >                   return -1;
> >               }
> >           } else {
> > @@ -1595,15 +1604,22 @@ static struct rte_flow *
> >   netdev_offload_dpdk_actions(struct netdev *netdev,
> >                               struct flow_patterns *patterns,
> >                               struct nlattr *nl_actions,
> > -                            size_t actions_len)
> > +                            size_t actions_len,
> > +                            struct offload_info *info)
> >   {
> > -    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> > +    struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> >       struct flow_actions actions = { .actions = NULL, .cnt = 0 };
> >       struct rte_flow *flow = NULL;
> >       struct rte_flow_error error;
> >       int ret;
> >
> > -    ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
> > +    if (info->attr_egress) {
> > +        flow_attr.ingress = 0;
> > +        flow_attr.egress = 1;
> > +        flow_attr.transfer = 0;
> > +    }
> > +
> > +    ret = parse_flow_actions(netdev, &actions, nl_actions, 
> > actions_len,info);
> >       if (ret) {
> >           goto out;
> >       }
> > @@ -1635,8 +1651,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
> >       }
> >
> >       flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
> > -                                       actions_len);
> > -    if (!flow) {
> > +                                       actions_len, info);
> > +    if (flow) {
> > +        if (info->partial_actions && info->attr_egress) {
> > +            /* actions_offloaded should be set to false with partial 
> > actions,
> > +             * since it is still considered as partial-offload and not
> > +             * full-offload. */
> > +            actions_offloaded = false;
> > +        }
> > +    } else if (!(info->partial_actions && info->attr_egress)) {
> >           /* If we failed to offload the rule actions fallback to MARK+RSS
> >            * actions.
> >            */
> > @@ -1686,18 +1709,29 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, 
> > struct match *match,
> >                                struct dpif_flow_stats *stats)
> >   {
> >       struct ufid_to_rte_flow_data *rte_flow_data;
> > -    int ret;
> > +    int ret = 0;
> >
> > -    /*
> > -     * If an old rte_flow exists, it means it's a flow modification.
> > -     * Here destroy the old rte flow first before adding a new one.
> > -     */
> >       rte_flow_data = ufid_to_rte_flow_data_find(ufid);
> >       if (rte_flow_data && rte_flow_data->rte_flow) {
> > -        ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
> > -                                               rte_flow_data->rte_flow);
> > -        if (ret < 0) {
> > +        if (unlikely(info->partial_actions && info->attr_egress)) {
> > +            /* In the case of partial action offload, the same mega-flow
> > +             * could be offloaded by multiple PMD threads. Avoid creating
> > +             * multiple rte_flows and just update the refcnt.
> > +             */
> > +            VLOG_DBG_RL("%s: mega_ufid: "UUID_FMT" refcnt: %d\n", __func__,
> > +                        UUID_ARGS((struct uuid *)ufid), 
> > rte_flow_data->refcnt);
> > +            rte_flow_data->refcnt++;
> >               return ret;
> > +        } else {
> > +            /*
> > +             * If an old rte_flow exists, it means it's a flow 
> > modification.
> > +             * Here destroy the old rte flow first before adding a new one.
> > +             */
> > +            ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
> > +                                                   
> > rte_flow_data->rte_flow);
> > +            if (ret < 0) {
> > +                return ret;
> > +            }
> >           }
> >       }
> >
> > @@ -1719,6 +1753,12 @@ netdev_offload_dpdk_flow_del(struct netdev *netdev, 
> > const ovs_u128 *ufid,
> >           return -1;
> >       }
> >
> > +    VLOG_DBG_RL("%s: mega_ufid: "UUID_FMT" refcnt: %d\n", __func__,
> > +                UUID_ARGS((struct uuid *)ufid), rte_flow_data->refcnt);
> > +    if (rte_flow_data->refcnt-- > 1) {
> > +        return 0;
> > +    }
> > +
> >       if (stats) {
> >           memset(stats, 0, sizeof *stats);
> >       }
> > diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
> > index 4c0ed2ae8..55fcc711c 100644
> > --- a/lib/netdev-offload.h
> > +++ b/lib/netdev-offload.h
> > @@ -67,6 +67,8 @@ struct offload_info {
> >
> >       bool recirc_id_shared_with_tc;  /* Indicates whever tc chains will be 
> > in
> >                                        * sync with datapath recirc ids. */
> > +    uint8_t attr_egress;      /* Egress direction offload */
> > +    uint8_t partial_actions;  /* Partial action offload; no forward action 
> > */
> >
> >       /*
> >        * The flow mark id assigened to the flow. If any pkts hit the flow,
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to