On Tue, May 26, 2020 at 7:12 PM Eli Britstein <[email protected]> wrote:
>
>
> On 5/18/2020 10:27 PM, Sriharsha Basavapatna wrote:
> > In this patch, we support offloading of VXLAN_ENCAP action for a vhost-user
> > port (aka "partial-action-offload"). At the time of offloading the flow, we
> > determine if the flow can be offloaded to an egress device, if the input
> > port is not offload capable such as a vhost-user port. We then offload the
> > flow with a VXLAN_ENCAP RTE action, to the egress device. We do not add
> > the OUTPUT RTE action, which indicates to the PMD that is is a partial
> > action offload request. Note that since the action is being offloaded in
> > egress direction, classification is expected to be done by OVS SW datapath
> > and hence there's no need to offload a MARK action.
> >
> > If offload succeeds, we save the information in 'dp_netdev_flow' so that we
> > skip execution of the corresponding action (subsequent patch) during SW
> > datapath processing.
> >
> > Signed-off-by: Sriharsha Basavapatna <[email protected]>
> > ---
> > lib/dpif-netdev.c | 216 ++++++++++++++++++++++++++++++++++++--
> > lib/netdev-offload-dpdk.c | 50 ++++++---
> > lib/netdev-offload.h | 2 +
> > 3 files changed, 246 insertions(+), 22 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index 781b233f4..4315f237c 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -538,6 +538,16 @@ struct dp_netdev_flow {
> > bool dead;
> > uint32_t mark; /* Unique flow mark assigned to a flow */
> >
> > + /* The next two members are used to support partial offloading of
> > + * actions. The boolean flag tells if this flow has its actions
> > partially
> > + * offloaded. The egress port# tells if the action should be offloaded
> > + * on the egress (output) port instead of the in-port for the flow.
> > Note
> > + * that we support flows with a single egress port action.
> > + * (see MAX_ACTION_ATTRS for related comments).
> > + */
> > + bool partial_actions_offloaded;
> > + odp_port_t egress_offload_port;
> > +
> > /* Statistics. */
> > struct dp_netdev_flow_stats stats;
> >
> > @@ -2351,10 +2361,176 @@ dp_netdev_append_flow_offload(struct
> > dp_flow_offload_item *offload)
> > ovs_mutex_unlock(&dp_flow_offload.mutex);
> > }
> >
> > +static int
> > +partial_offload_egress_flow_del(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_netdev_flow *flow)
> > +{
> > + int ret;
> > + struct netdev *port;
> > + odp_port_t out_port = flow->egress_offload_port;
> > + const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
> > +
> > + port = netdev_ports_get(out_port, dpif_type_str);
> > + if (!port) {
> > + return -1;
> > + }
> > +
> > + /* Taking a global 'port_mutex' to fulfill thread safety
> > + * restrictions for the netdev-offload-dpdk module. */
> > + ovs_mutex_lock(&pmd->dp->port_mutex);
> > + ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
> > + ovs_mutex_unlock(&pmd->dp->port_mutex);
> > + netdev_close(port);
> > +
> > + return ret;
> > +}
> > +
> > static int
> > dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
> > {
> > - return mark_to_flow_disassociate(offload->pmd, offload->flow);
> > + if (offload->flow->partial_actions_offloaded &&
> > + offload->flow->egress_offload_port != ODPP_NONE) {
> > + return partial_offload_egress_flow_del(offload->pmd,
> > offload->flow);
> > + } else {
> > + return mark_to_flow_disassociate(offload->pmd, offload->flow);
> > + }
> > +}
> > +
> > +/* Structure to hold a nl_parsed OVS action */
> > +struct action_attr {
> > + int type; /* OVS action type */
> > + struct nlattr *action; /* action attribute */
> > +};
> > +
> > +/*
> > + * Maxium number of actions to be parsed while selecting a flow for partial
> > + * action offload. This number is currently based on the minimum number of
> > + * attributes seen with the tunnel encap action (clone, tunnel_push,
> > output).
> > + * This number includes output action to a single egress device (uplink)
> > and
> > + * supports neither multiple clone() actions nor multiple output actions.
> > + * This number could change if and when we support other actions or
> > + * combinations of actions for partial offload.
> > + */
> > +#define MAX_ACTION_ATTRS 3 /* Max # action attributes supported */
> > +
> > +/*
> > + * This function parses the list of OVS "actions" of length "actions_len",
> > + * and returns them in an array of action "attrs", of size "max_attrs".
> > + * The parsed number of actions is returned in "num_attrs". If the number
> > + * of actions exceeds "max_attrs", parsing is stopped and E2BIG is
> > returned.
> > + * Otherwise, returns success (0).
> > + */
> > +static int
> > +parse_nlattr_actions(struct nlattr *actions, size_t actions_len,
> > + struct action_attr *attrs, int max_attrs, int
> > *num_attrs)
> > +{
> > + const struct nlattr *a;
> > + unsigned int left;
> > + int num_actions = 0;
> > + int n_attrs = 0;
> > + int rc = 0;
> > + int type;
> > +
> > + *num_attrs = 0;
> > +
> > + NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
> > + type = nl_attr_type(a);
> > +
> > + if (num_actions >= max_attrs) {
> > + *num_attrs = num_actions;
> > + return E2BIG;
> > + }
> > +
> > + attrs[num_actions].type = type;
> > + attrs[num_actions].action = a;
> > + num_actions++;
> > + if (type == OVS_ACTION_ATTR_CLONE) {
> > + rc = parse_nlattr_actions(nl_attr_get(a), nl_attr_get_size(a),
> > + &attrs[num_actions],
> > + (max_attrs - num_actions), &n_attrs);
> > + num_actions += n_attrs;
> > + if (rc == E2BIG) {
> > + *num_attrs = num_actions;
> > + return rc;
> > + }
> > + }
> > + }
> > +
> > + *num_attrs = num_actions;
> > + return 0;
> > +}
> > +
> > +/* This function determines if the given flow should be partially offloaded
> > + * on the egress device, when the in-port is not offload-capable like a
> > + * vhost-user port. The function currently supports offloading of only
> > + * tunnel encap action.
> > + */
> > +static bool
> > +should_partial_offload_egress(struct netdev *in_netdev,
> > + struct dp_flow_offload_item *offload,
> > + struct netdev **egress_netdev)
> > +{
> > + const char *dpif_type_str =
> > + dpif_normalize_type(offload->pmd->dp->class->type);
> > + struct action_attr attrs[MAX_ACTION_ATTRS];
> > + odp_port_t out_port = ODPP_NONE;
> > + struct netdev *out_netdev;
> > + int num_attrs = 0;
> > + int type;
> > + int rc;
> > +
> > + /* Support egress partial-offload only when in-port is vhost-user. */
> > + if (!is_dpdk_vhost_netdev(in_netdev)) {
> > + return false;
> > + }
> > +
> > + rc = parse_nlattr_actions(offload->actions, offload->actions_len,
> > attrs,
> > + MAX_ACTION_ATTRS, &num_attrs);
> > + if (rc == E2BIG) {
> > + /* Action list too big; decline partial offload */
> > + return false;
> > + }
> > +
> > + /* Number of attrs expected with tunnel encap action */
> > + if (num_attrs < MAX_ACTION_ATTRS) {
> > + return false;
> > + }
> > +
> > + /* Only support clone sub-actions for now, tnl-push specifically. */
> > + if (attrs[0].type != OVS_ACTION_ATTR_CLONE ||
> > + attrs[1].type != OVS_ACTION_ATTR_TUNNEL_PUSH ||
> > + attrs[2].type != OVS_ACTION_ATTR_OUTPUT) {
> > + return false;
> > + }
> > +
> > + /* Egress partial-offload needs an output action at the end. */
> > + out_port = nl_attr_get_odp_port(attrs[2].action);
> > + if (out_port == ODPP_NONE) {
> > + return false;
> > + }
> > +
> > + /* Support egress partial-offload only when out-port is offload
> > capable. */
> > + out_netdev = netdev_ports_get(out_port, dpif_type_str);
> > + if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
> > + return false;
> > + }
> > +
> > + /* Flow can be egress partial-offloaded. */
> > + *egress_netdev = out_netdev;
> > + offload->flow->egress_offload_port = out_port;
> > + return true;
> > +}
> > +
> > +/* This function determines if the given flow actions can be partially
> > + * offloaded. Partial action offload is attempted when the in-port for
> > + * the flow is a vhost-user port (egress direction).
> > + */
> > +static bool
> > +dp_netdev_partial_offload_supported(struct netdev *in_netdev,
> > + struct dp_flow_offload_item *offload,
> > + struct netdev **egress_netdev)
> > +{
> > + return should_partial_offload_egress(in_netdev, offload,
> > egress_netdev);
> why not move the logic from should_partial_offload_egress to here and
> eliminate the other function.
Done.
> > }
> >
> > static int
> > @@ -2415,7 +2591,9 @@ dp_netdev_flow_offload_put(struct
> > dp_flow_offload_item *offload)
> > bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
> > struct offload_info info;
> > struct netdev *port;
> > - uint32_t mark;
> > + struct netdev *egress_port = NULL;
> > + bool alloc_mark = true;
> > + uint32_t mark = INVALID_FLOW_MARK;
> > int ret;
> >
> > if (flow->dead) {
> > @@ -2427,7 +2605,20 @@ dp_netdev_flow_offload_put(struct
> > dp_flow_offload_item *offload)
> > return -1;
> > }
> >
> > - if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
> > + info.attr_egress = 0;
> > + info.partial_actions = 0;
> > +
> > + if (dp_netdev_partial_offload_supported(port, offload, &egress_port)) {
> missing netdev_close(egress_port)
It is not missing, but it is not shown in the diffs since it is part
of existing code. As "egress_port" is assigned to "port", it gets
handled as netdev_close(port) further down.
.....
ovs_mutex_unlock(&pmd->dp->port_mutex);
netdev_close(port);
> > + if (egress_port) {
> > + netdev_close(port);
> > + port = egress_port;
> > + info.attr_egress = 1;
> > + alloc_mark = false;
> > + }
> > + info.partial_actions = 1;
> > + }
> > +
> > + if (alloc_mark && dp_netdev_alloc_flow_mark(flow, modification,
> > &mark)) {
> > /* flow already offloaded */
> > netdev_close(port);
> > return 0;
> > @@ -2448,17 +2639,21 @@ dp_netdev_flow_offload_put(struct
> > dp_flow_offload_item *offload)
> > goto err_free;
> > }
> >
> > - if (!modification) {
> > + if (info.partial_actions) {
> > + flow->partial_actions_offloaded = true;
> > + } else if (!modification) {
> > megaflow_to_mark_associate(&flow->mega_ufid, mark);
> > mark_to_flow_associate(mark, flow);
> > }
> > return 0;
> >
> > err_free:
> > - if (!modification) {
> > - netdev_offload_flow_mark_free(mark);
> > - } else {
> > - mark_to_flow_disassociate(pmd, flow);
> > + if (mark != INVALID_FLOW_MARK) {
> > + if (!modification) {
> > + netdev_offload_flow_mark_free(mark);
> > + } else {
> > + mark_to_flow_disassociate(pmd, flow);
> > + }
> > }
> > return -1;
> > }
> > @@ -2573,7 +2768,8 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread
> > *pmd,
> > ovs_assert(cls != NULL);
> > dpcls_remove(cls, &flow->cr);
> > cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
> > - if (flow->mark != INVALID_FLOW_MARK) {
> > + if (flow->mark != INVALID_FLOW_MARK || (flow->partial_actions_offloaded
> > + && flow->egress_offload_port != ODPP_NONE)) {
> > queue_netdev_flow_del(pmd, flow);
> > }
> > flow->dead = true;
> > @@ -3325,6 +3521,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
> > flow->dead = false;
> > flow->batch = NULL;
> > flow->mark = INVALID_FLOW_MARK;
> > + flow->partial_actions_offloaded = false;
> > + flow->egress_offload_port = ODPP_NONE;
> > *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
> > *CONST_CAST(struct flow *, &flow->flow) = match->flow;
> > *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
> > diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> > index 7be504688..44c5445c9 100644
> > --- a/lib/netdev-offload-dpdk.c
> > +++ b/lib/netdev-offload-dpdk.c
> > @@ -1698,7 +1698,8 @@ static int
> > parse_clone_actions(struct netdev *netdev,
> > struct flow_actions *actions,
> > const struct nlattr *clone_actions,
> > - const size_t clone_actions_len)
> > + const size_t clone_actions_len,
> > + struct offload_info *info)
> > {
> > const struct nlattr *ca;
> > unsigned int cleft;
> > @@ -1723,8 +1724,11 @@ parse_clone_actions(struct netdev *netdev,
> > add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> > raw_encap);
> > } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
> > - if (add_output_action(netdev, actions, ca)) {
> > - return -1;
> > + /* add output action only if full-offload */
> > + if (!info->partial_actions) {
> > + if (add_output_action(netdev, actions, ca)) {
> > + return -1;
> > + }
> > }
> > } else {
> > VLOG_DBG_RL(&rl,
> > @@ -1788,7 +1792,8 @@ parse_flow_actions(struct netdev *netdev,
> > struct flow_actions *actions,
> > struct nlattr *nl_actions,
> > size_t nl_actions_len,
> > - struct act_resources *act_resources)
> > + struct act_resources *act_resources,
> > + struct offload_info *info)
> > {
> > struct nlattr *nla;
> > size_t left;
> > @@ -1796,11 +1801,16 @@ parse_flow_actions(struct netdev *netdev,
> > if (!strcmp(netdev_get_type(netdev), "vxlan")) {
> > add_vxlan_decap_action(actions);
> > }
> > - add_count_action(actions);
> > + if (!info->partial_actions) {
> > + add_count_action(actions);
> > + }
> > NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> > if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
> > - if (add_output_action(netdev, actions, nla)) {
> > - return -1;
> > + /* add output action only if full-offload */
> > + if (!info->partial_actions) {
> > + if (add_output_action(netdev, actions, nla)) {
> > + return -1;
> > + }
> there is no need for this hunk. the partial offload handles only
> clone/output, here it's only output.
Removed it.
> > }
> > } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
> > free_flow_actions(actions);
> > @@ -1821,7 +1831,7 @@ parse_flow_actions(struct netdev *netdev,
> > size_t clone_actions_len = nl_attr_get_size(nla);
> >
> > if (parse_clone_actions(netdev, actions, clone_actions,
> > - clone_actions_len)) {
> > + clone_actions_len, info)) {
> > return -1;
> > }
> > } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
> > @@ -1899,16 +1909,23 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
> > size_t actions_len,
> > const ovs_u128 *ufid,
> > struct act_resources *act_resources,
> > - struct flows_handle *flows)
> > + struct flows_handle *flows,
> > + struct offload_info *info)
> > {
> > - const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> > + struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> > struct flow_actions actions = { .actions = NULL, .cnt = 0 };
> > struct flow_item flow_item = { .devargs = NULL };
> > struct rte_flow_error error;
> > int ret;
> >
> > + if (info->attr_egress) {
> > + flow_attr.ingress = 0;
> > + flow_attr.egress = 1;
> > + flow_attr.transfer = 0;
> > + }
> > +
> > ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len,
> > - act_resources);
> > + act_resources, info);
> > if (ret) {
> > goto out;
> > }
> > @@ -1957,8 +1974,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
> >
> > ret = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
> > actions_len, ufid, &act_resources,
> > - &flows);
> > - if (ret) {
> > + &flows, info);
> > + if (!ret) {
> > + if (info->partial_actions && info->attr_egress) {
> > + /* actions_offloaded should be set to false with partial
> > actions,
> > + * since it is still considered as partial-offload and not
> > + * full-offload. */
> > + actions_offloaded = false;
> > + }
> > + } else if (!(info->partial_actions && info->attr_egress)) {
> > /* If we failed to offload the rule actions fallback to MARK+RSS
> > * actions.
> > */
> > diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
> > index d6dd98367..3c079cc9d 100644
> > --- a/lib/netdev-offload.h
> > +++ b/lib/netdev-offload.h
> > @@ -67,6 +67,8 @@ struct offload_info {
> >
> > bool recirc_id_shared_with_tc; /* Indicates whever tc chains will be
> > in
> > * sync with datapath recirc ids. */
> > + uint8_t attr_egress; /* Egress direction offload */
> > + uint8_t partial_actions; /* Partial action offload; no forward action
> > */
> >
> > /*
> > * The flow mark id assigened to the flow. If any pkts hit the flow,
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev