Re: [ovs-dev] [RFC PATCH 2/4] dpif-netdev: Support partial-action-offload of VXLAN encap flow

Eli Britstein Wed, 06 May 2020 05:26:36 -0700


On 4/24/2020 11:23 AM, Sriharsha Basavapatna wrote:

In this patch, we support offloading of VXLAN_ENCAP action for a vhost-user
port (aka "partial-action-offload"). At the time of offloading the flow, we
determine if the flow can be offloaded to an egress device, if the input
port is not offload capable such as a vhost-user port. We then offload the
flow with a VXLAN_ENCAP RTE action, to the egress device. We do not add
the OUTPUT RTE action, which indicates to the PMD that is is a partial
action offload request. Note that since the action is being offloaded in
egress direction, classification is expected to be done by OVS SW datapath
and hence there's no need to offload a MARK action.

If offload succeeds, we save the information in 'dp_netdev_flow' so that we
skip execution of the corresponding action (subsequent patch) during SW
datapath processing.

Signed-off-by: Sriharsha Basavapatna <[email protected]>
---
  lib/dpif-netdev.c         | 172 ++++++++++++++++++++++++++++++++++++--
  lib/netdev-dpdk.c         |   5 ++
  lib/netdev-dpdk.h         |   1 +
  lib/netdev-offload-dpdk.c |  45 +++++++---
  lib/netdev-offload.h      |   2 +
  5 files changed, 206 insertions(+), 19 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 781b233f4..a47230067 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -538,6 +538,14 @@ struct dp_netdev_flow {
      bool dead;
      uint32_t mark;               /* Unique flow mark assigned to a flow */

+ /* The next two members are used to support partial offloading of

+     * actions. The boolean flag tells if this flow has its actions partially
+     * offloaded. The egress port# tells if the action should be offloaded
+     * on the egress (output) port instead of the in-port for the flow.
+     */
+    bool partial_actions_offloaded;
+    odp_port_t  egress_offload_port;

A flow might have multiple egress ports, and also multiple egress portsin clone.


For example:

ufid:fef472c4-e796-4d0d-a655-f45f9d1767d7,skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(vm1),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),packet_type(ns=0,id=0),eth(src=e4:11:22:33:44:50,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=4.4.4.10,tip=4.4.4.15,op=1/0xff,sha=e4:11:22:33:44:50/00:00:00:00:00:00,tha=00:00:00:00:00:00/00:00:00:00:00:00),packets:30, bytes:1560, used:0.432s, dp:ovs,actions:clone(tnl_push(tnl_port(vxlan_sys_4789),header(size=50,type=4,eth(dst=24:8a:07:ad:79:12,src=24:8a:07:ad:77:4a,dl_type=0x0800),ipv4(src=111.168.1.1,dst=111.168.1.2,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x64)),out_port(br-phy)),pf),br-int,vm2,clone(tnl_push(tnl_port(gre_sys),header(size=42,type=3,eth(dst=24:8a:07:ad:79:13,src=24:8a:07:ad:77:4b,dl_type=0x0800),ipv4(src=222.168.1.1,dst=222.168.1.2,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x6558),key=0xc8)),out_port(br-phy2)),pf2)

Here, I configured 2 PFs (one in each br-phy bridge), and in br-int iconfigured VXLAN and GRE (but it can be also 2 VXLANs). How is such usecase handled?

+
      /* Statistics. */
      struct dp_netdev_flow_stats stats;

@@ -2351,12 +2359,141 @@ dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)

      ovs_mutex_unlock(&dp_flow_offload.mutex);
  }

+static int dp_netdev_egress_flow_del(struct dp_netdev_pmd_thread *pmd,

+                                     struct dp_netdev_flow *flow)
+{
+    int ret;
+    struct netdev *port;
+    odp_port_t out_port = flow->egress_offload_port;
+    const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
+
+    port = netdev_ports_get(out_port, dpif_type_str);
+    if (!port) {
+        return -1;
+    }
+
+    /* Taking a global 'port_mutex' to fulfill thread safety
+     * restrictions for the netdev-offload-dpdk module. */
+    ovs_mutex_lock(&pmd->dp->port_mutex);
+    ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
+    ovs_mutex_unlock(&pmd->dp->port_mutex);
+    netdev_close(port);
+
+    return ret;
+}
+
  static int
  dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
  {
+    if (offload->flow->partial_actions_offloaded &&
+        offload->flow->egress_offload_port != ODPP_NONE) {
+        return dp_netdev_egress_flow_del(offload->pmd, offload->flow);
+    }
      return mark_to_flow_disassociate(offload->pmd, offload->flow);
  }

+/* This function determines if the given flow should be partially offloaded

+ * on the egress device, when the in-port is not offload-capable like a
+ * vhost-user port. The function currently supports offloading of only
+ * tunnel encap action.
+ */
+static bool
+should_partial_offload_egress(struct netdev *in_netdev,
+                              struct dp_flow_offload_item *offload,
+                              struct netdev **egress_netdev)
+{
+    const char *dpif_type_str =
+        dpif_normalize_type(offload->pmd->dp->class->type);
+    odp_port_t out_port = ODPP_NONE;
+    struct nlattr *clone_actions;
+    struct netdev *out_netdev;
+    bool encap_action = false;
+    size_t clone_actions_len;
+    const struct nlattr *a;
+    unsigned int left;
+    int type;
+
+    /* Support egress partial-offload only when in-port is vhost-user. */
+    if (!is_dpdk_vhost_netdev(in_netdev)) {
+        return false;
+    }
+
+    /* Only support clone sub-actions for now, tnl-push specifically. */
+    type = nl_attr_type(offload->actions);
+    if (type != OVS_ACTION_ATTR_CLONE) {
+        return false;
+    }
+    clone_actions = nl_attr_get(offload->actions);
+    clone_actions_len = nl_attr_get_size(offload->actions);
+
+    NL_ATTR_FOR_EACH (a, left, clone_actions, clone_actions_len) {
+        type = nl_attr_type(a);
+        bool last_action = (left <= NLA_ALIGN(a->nla_len));
+
+        if (type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
+            encap_action = true;
+        } else if (type == OVS_ACTION_ATTR_OUTPUT) {
+            if (last_action) {
+                    out_port = nl_attr_get_odp_port(a);
+                    break;
+            }
+        } else {
+            /* Reject if there are other actions */
+            return false;
+        }
+    }
+
+    /* Support egress partial-offload only when there is an output action. */
+    if (out_port == ODPP_NONE) {
+        return false;
+    }
+
+    /* Support only encap action for now. */
+    if (!encap_action) {
+        return false;
+    }
+
+    /* Support egress partial-offload only when out-port is offload capable. */
+    out_netdev = netdev_ports_get(out_port, dpif_type_str);
+    if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
+        return false;
+    }
+
+    /* Flow can be egress partial-offloaded. */
+    *egress_netdev = out_netdev;
+    offload->flow->egress_offload_port = out_port;
+    return true;
+}
+
+/* Place-holder function for partial action offload in ingress direction,
+ * with a vhost-user port as the output port for the flow.
+ */

No need for "place-holder". If there is a need in the future, add it then.

+static bool
+should_partial_offload_ingress(struct netdev *in_netdev OVS_UNUSED,
+                               struct dp_flow_offload_item *offload OVS_UNUSED)
+{
+    return false;
+}
+
+/* This function determines if the given flow actions can be partially
+ * offloaded. Partial action offload is attempted when either the in-port
+ * or the out-port for the flow is a vhost-user port.
+ */
+static bool
+dp_netdev_partial_offload_supported(struct netdev *in_netdev,
+                                   struct dp_flow_offload_item *offload,
+                                   struct netdev **egress_netdev)
+{
+    if (should_partial_offload_ingress(in_netdev, offload)) {
+        return true;
+    } else if (should_partial_offload_egress(in_netdev, offload,
+               egress_netdev)) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
  static int
  dp_netdev_alloc_flow_mark(struct dp_netdev_flow *flow, bool modification,
                            uint32_t *markp)
@@ -2415,7 +2552,9 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item 
*offload)
      bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
      struct offload_info info;
      struct netdev *port;
-    uint32_t mark;
+    struct netdev *egress_port = NULL;
+    bool alloc_mark = true;
+    uint32_t mark = INVALID_FLOW_MARK;
      int ret;

if (flow->dead) {

@@ -2427,7 +2566,20 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item 
*offload)
          return -1;
      }

- if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {

+    info.attr_egress = 0;
+    info.partial_actions = 0;
+
+    if (dp_netdev_partial_offload_supported(port, offload, &egress_port)) {
+        if (egress_port) {
+            netdev_close(port);
+            port = egress_port;
+            info.attr_egress = 1;
+            alloc_mark = false;
+        }
+        info.partial_actions = 1;
+    }
+
+    if (alloc_mark && dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
              /* flow already offloaded */
              netdev_close(port);
              return 0;
@@ -2448,17 +2600,21 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item 
*offload)
          goto err_free;
      }

- if (!modification) {

+    if (info.partial_actions) {
+        flow->partial_actions_offloaded = true;
+    } else if (!modification) {
          megaflow_to_mark_associate(&flow->mega_ufid, mark);
          mark_to_flow_associate(mark, flow);
      }
      return 0;

err_free:

-    if (!modification) {
-        netdev_offload_flow_mark_free(mark);
-    } else {
-        mark_to_flow_disassociate(pmd, flow);
+    if (mark != INVALID_FLOW_MARK) {
+        if (!modification) {
+            netdev_offload_flow_mark_free(mark);
+        } else {
+            mark_to_flow_disassociate(pmd, flow);
+        }
      }
      return -1;
  }
@@ -3325,6 +3481,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
      flow->dead = false;
      flow->batch = NULL;
      flow->mark = INVALID_FLOW_MARK;
+    flow->partial_actions_offloaded = false;
+    flow->egress_offload_port = ODPP_NONE;
      *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
      *CONST_CAST(struct flow *, &flow->flow) = match->flow;
      *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 944e4a152..ad880f3ff 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -559,6 +559,11 @@ is_dpdk_class(const struct netdev_class *class)
             || class->destruct == netdev_dpdk_vhost_destruct;
  }

+bool is_dpdk_vhost_netdev(struct netdev *netdev)

+{
+    return netdev->netdev_class->destruct == netdev_dpdk_vhost_destruct;
+}
+

Split this query function to a separate commit.

  /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
   * aligned at 1k or less. If a declared mbuf size is not a multiple of this
   * value, insufficient buffers are allocated to accomodate the packet in its
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 94e08e516..67e70d09f 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -37,6 +37,7 @@ void netdev_dpdk_register(void);
  void free_dpdk_buf(struct dp_packet *);

bool netdev_dpdk_flow_api_supported(struct netdev *);

+bool is_dpdk_vhost_netdev(struct netdev *);

int

  netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 7be504688..d87d8b1fa 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1698,7 +1698,8 @@ static int
  parse_clone_actions(struct netdev *netdev,
                      struct flow_actions *actions,
                      const struct nlattr *clone_actions,
-                    const size_t clone_actions_len)
+                    const size_t clone_actions_len,
+                    struct offload_info *info)
  {
      const struct nlattr *ca;
      unsigned int cleft;
@@ -1723,8 +1724,11 @@ parse_clone_actions(struct netdev *netdev,
              add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
                              raw_encap);
          } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
-            if (add_output_action(netdev, actions, ca)) {
-                return -1;
+            /* add output action only if full-offload */
+            if (!info->partial_actions) {
+                if (add_output_action(netdev, actions, ca)) {
+                    return -1;
+                }
              }
          } else {
              VLOG_DBG_RL(&rl,
@@ -1788,7 +1792,8 @@ parse_flow_actions(struct netdev *netdev,
                     struct flow_actions *actions,
                     struct nlattr *nl_actions,
                     size_t nl_actions_len,
-                   struct act_resources *act_resources)
+                   struct act_resources *act_resources,
+                   struct offload_info *info)
  {
      struct nlattr *nla;
      size_t left;
@@ -1799,8 +1804,11 @@ parse_flow_actions(struct netdev *netdev,
      add_count_action(actions);
      NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
          if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
-            if (add_output_action(netdev, actions, nla)) {
-                return -1;
+            /* add output action only if full-offload */
+            if (!info->partial_actions) {
+                if (add_output_action(netdev, actions, nla)) {
+                    return -1;
+                }
              }
          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
              free_flow_actions(actions);
@@ -1821,7 +1829,7 @@ parse_flow_actions(struct netdev *netdev,
              size_t clone_actions_len = nl_attr_get_size(nla);

if (parse_clone_actions(netdev, actions, clone_actions,

-                                    clone_actions_len)) {
+                                    clone_actions_len, info)) {
                  return -1;
              }
          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
@@ -1899,16 +1907,22 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
                              size_t actions_len,
                              const ovs_u128 *ufid,
                              struct act_resources *act_resources,
-                            struct flows_handle *flows)
+                            struct flows_handle *flows,
+                            struct offload_info *info)
  {
-    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
+    struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
      struct flow_actions actions = { .actions = NULL, .cnt = 0 };
      struct flow_item flow_item = { .devargs = NULL };
      struct rte_flow_error error;
      int ret;

+ if (info->attr_egress) {

+        flow_attr.ingress = 0;
+        flow_attr.egress = 1;

Should also .transfer = 0;

+    }
+
      ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len,
-                             act_resources);
+                             act_resources, info);
      if (ret) {
          goto out;
      }
@@ -1957,8 +1971,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,

ret = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,

                                        actions_len, ufid, &act_resources,
-                                      &flows);
-    if (ret) {
+                                      &flows, info);
+    if (!ret) {
+        if (info->partial_actions && info->attr_egress) {
+            /* actions_offloaded should be set to false with partial actions,
+             * since it is still considered as partial-offload and not
+             * full-offload. */
+            actions_offloaded = false;
+        }
+    } else {
          /* If we failed to offload the rule actions fallback to MARK+RSS
           * actions.
           */
diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
index d6dd98367..3c079cc9d 100644
--- a/lib/netdev-offload.h
+++ b/lib/netdev-offload.h
@@ -67,6 +67,8 @@ struct offload_info {

bool recirc_id_shared_with_tc; /* Indicates whever tc chains will be in

                                       * sync with datapath recirc ids. */
+    uint8_t attr_egress;      /* Egress direction offload */
+    uint8_t partial_actions;  /* Partial action offload; no forward action */

/*

       * The flow mark id assigened to the flow. If any pkts hit the flow,

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [RFC PATCH 2/4] dpif-netdev: Support partial-action-offload of VXLAN encap flow

Reply via email to