>-----Original Message-----
>From: Ivan Malov <[email protected]>
>Sent: Wednesday, July 20, 2022 3:18 PM
>To: [email protected]
>Cc: Eli Britstein <[email protected]>; Stephen Hemminger
><[email protected]>; Ilya Maximets <[email protected]>; Ori
>Kam <[email protected]>; Maxime Coquelin
><[email protected]>; David Marchand
><[email protected]>; Andrew Rybchenko
><[email protected]>
>Subject: [PATCH v3 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Manage "transfer" flows via the corresponding mechanism.
>Doing so requires that the traffic source be specified explicitly, via the
>corresponding pattern item.
>
>Signed-off-by: Ivan Malov <[email protected]>
>Acked-by: Andrew Rybchenko <[email protected]>
>---
> lib/netdev-dpdk.c         | 99 ++++++++++++++++++++++++++++++++-------
> lib/netdev-dpdk.h         |  4 +-
> lib/netdev-offload-dpdk.c | 61 ++++++++++++++++++++----
> 3 files changed, 135 insertions(+), 29 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>45e5d26d2..01fb40255 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>
> struct netdev_dpdk {
>     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>+        dpdk_port_t flow_transfer_proxy_port_id;
>         dpdk_port_t port_id;
>
>         /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1130,8
>+1131,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>-
> #ifdef ALLOW_EXPERIMENTAL_API
There are all over the patches this ifdef, even in cases it's harmless to do 
without. It makes the code less readable and might cause future cherry-picking 
issues. Try to minimize it only to places of a must.
>+    int ret;
>+
>     /*
>      * Full tunnel offload requires that tunnel ID metadata be
>      * delivered with "miss" packets from the hardware to the @@ -1141,6
>+1143,27 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>      * Request delivery of such metadata.
>      */
>     dpdk_eth_dev_init_rx_metadata(dev);
>+
>+    /*
>+     * Managing "transfer" flows requires that the user communicate them
>+     * via a port which has the privilege to control the embedded switch.
>+     * For some vendors, all ports in a given switching domain have
>+     * this privilege. For other vendors, it's only one port.
>+     *
>+     * Get the proxy port ID and remember it for later use.
>+     */
>+    ret = rte_flow_pick_transfer_proxy(dev->port_id,
>+                                       &dev->flow_transfer_proxy_port_id, 
>NULL);
>+    if (ret != 0) {
>+        /*
>+         * The PMD does not indicate the proxy port.
>+         * Assume the proxy is unneeded.
>+         */
>+        dev->flow_transfer_proxy_port_id = dev->port_id;
>+    }
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>+    /* No API to get transfer proxy; assume the proxy is unneeded. */
>+    dev->flow_transfer_proxy_port_id = dev->port_id;
> #endif /* ALLOW_EXPERIMENTAL_API */
>
>     rte_eth_dev_info_get(dev->port_id, &info); @@ -3762,8 +3785,12 @@
>netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>                    const char *argv[], void *aux OVS_UNUSED)  {
>     struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    struct netdev_dpdk *dev_self = NULL; #endif /*
>+ALLOW_EXPERIMENTAL_API */
>     struct rte_eth_dev_info dev_info;
>     dpdk_port_t sibling_port_id;
>+    struct netdev_dpdk *dev;
>     dpdk_port_t port_id;
>     bool used = false;
>     char *response;
>@@ -3781,8 +3808,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
>argc OVS_UNUSED,
>                   argv[1]);
>
>     RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>-        struct netdev_dpdk *dev;
>-
>         LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>             if (dev->port_id != sibling_port_id) {
>                 continue;
>@@ -3802,6 +3827,27 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>int argc OVS_UNUSED,
>     }
>     ds_destroy(&used_interfaces);
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    /*
>+     * The device being detached may happen to be a flow proxy port
>+     * for another device (still attached). If so, do not allow to
>+     * detach. Devices dependent on this one must be detached first.
I don't think this is acceptable to deny the port from being detached, or to 
enforce such ordering. For example, ports are being detached upon shutdown, 
with unknown order.
Suppose A is the proxy port for ports B,C. When port A is going to be detached, 
flush the offloads of ports B and C (in addition to flushing the offloads of 
port A). Then, it is safe to detach it.
>+     */
>+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>+        if (dev->port_id == port_id) {
>+            dev_self = dev;
>+        } else if (dev->flow_transfer_proxy_port_id == port_id) {
>+            response = xasprintf("Device '%s' can not be detached (flow 
>proxy)",
>+                                 argv[1]);
>+            goto error;
>+        }
>+    }
>+
>+    /* Indicate that the device being detached no longer needs a flow proxy.
>*/
>+    if (dev_self != NULL)
>+        dev_self->flow_transfer_proxy_port_id = port_id; #endif /*
>+ALLOW_EXPERIMENTAL_API */
>+
>     rte_eth_dev_info_get(port_id, &dev_info);
>     rte_eth_dev_close(port_id);
>     if (rte_dev_remove(dev_info.device) < 0) { @@ -5167,7 +5213,8 @@
>unlock:
> }
>
> int
>-netdev_dpdk_get_port_id(struct netdev *netdev)
>+netdev_dpdk_get_port_id(struct netdev *netdev,
>+                        bool flow_transfer_proxy)
Maybe better to have a separate API to get the proxy port-id. Then use it only 
where needed.
> {
>     struct netdev_dpdk *dev;
>     int ret = -1;
>@@ -5178,7 +5225,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = dev->port_id;
>+    ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id :
>+ dev->port_id;
>     ovs_mutex_unlock(&dev->mutex);
> out:
>     return ret;
>@@ -5214,13 +5261,15 @@ out:
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>-                             struct rte_flow *rte_flow,
>+                             bool transfer, struct rte_flow *rte_flow,
>                              struct rte_flow_error *error)  {
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>     int ret;
>
>-    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
>+    ret = rte_flow_destroy(transfer ?
>+                           dev->flow_transfer_proxy_port_id : dev->port_id,
>+                           rte_flow, error);
>     return ret;
> }
>
>@@ -5234,7 +5283,19 @@ netdev_dpdk_rte_flow_create(struct netdev
>*netdev,
>     struct rte_flow *flow;
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>-    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    if (!attr->transfer) {
>+        /*
>+         * The 1st item in any pattern is a traffic source one.
>+         * It is unnecessary in the case of non-transfer rules.
>+         */
>+        ++(items);
>+    }
>+#endif /* ALLOW_EXPERIMENTAL_API */
>+
>+    flow = rte_flow_create(attr->transfer ?
>+                           dev->flow_transfer_proxy_port_id : dev->port_id,
>+                           attr, items, actions, error);
>     return flow;
> }
>
>@@ -5262,7 +5323,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev
>*netdev,
>     }
>
>     dev = netdev_dpdk_cast(netdev);
>-    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
>+    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
>+                         actions, query, error);
>     return ret;
> }
>
>@@ -5284,8 +5346,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
>-                                    num_of_actions, error);
>+    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id,
>tunnel,
>+                                    actions, num_of_actions, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5306,8 +5368,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
>-                                error);
>+    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
>+                                items, num_of_items, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5328,7 +5390,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
>+    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
>+                                    m, info, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5349,8 +5412,8 @@
>netdev_dpdk_rte_flow_tunnel_action_decap_release(
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
>-                                               num_of_actions, error);
>+    ret = rte_flow_tunnel_action_decap_release(dev-
>>flow_transfer_proxy_port_id,
>+                                               actions, num_of_actions,
>+ error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5370,8 +5433,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
>-                                       error);
>+    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
>+                                       items, num_of_items, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 699be3fb4..6f18a73b2
>100644
>--- a/lib/netdev-dpdk.h
>+++ b/lib/netdev-dpdk.h
>@@ -35,7 +35,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev *);
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>-                             struct rte_flow *rte_flow,
>+                             bool transfer, struct rte_flow *rte_flow,
>                              struct rte_flow_error *error);  struct rte_flow *
>netdev_dpdk_rte_flow_create(struct netdev *netdev, @@ -49,7 +49,7 @@
>netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>                                  struct rte_flow_query_count *query,
>                                  struct rte_flow_error *error);  int -
>netdev_dpdk_get_port_id(struct netdev *netdev);
>+netdev_dpdk_get_port_id(struct netdev *netdev, bool
>+flow_transfer_proxy);
>
> #ifdef ALLOW_EXPERIMENTAL_API
>
>diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
>9cd5a0159..9320a1d0b 100644
>--- a/lib/netdev-offload-dpdk.c
>+++ b/lib/netdev-offload-dpdk.c
>@@ -353,8 +353,23 @@ dump_flow_pattern(struct ds *s,
>
>     if (item->type == RTE_FLOW_ITEM_TYPE_END) {
>         ds_put_cstr(s, "end ");
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
>+        const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
>+        const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
>+
>+        ds_put_cstr(s, "represented_port ");
>+
>+        DUMP_PATTERN_ITEM(ethdev_mask->port_id, false, "ethdev_port_id",
>+                          "%"PRIu16, ethdev_spec->port_id,
>+                          ethdev_mask->port_id, 0);
>+    } else if (flow_patterns->tnl_pmd_items_cnt &&
>+               pattern_index < 1 /* REPRESENTED_PORT */ +
>+                               flow_patterns->tnl_pmd_items_cnt) {
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>     } else if (flow_patterns->tnl_pmd_items_cnt &&
>                pattern_index < flow_patterns->tnl_pmd_items_cnt) {
>+#endif /* ALLOW_EXPERIMENTAL_API */
>         return;
>     } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
>         const struct rte_flow_item_eth *eth_spec = item->spec; @@ -927,7
>+942,8 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>             extra_str = ds_cstr(&s_extra);
>             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d
>%s",
>                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
>-                        netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>+                        netdev_dpdk_get_port_id(netdev, attr->transfer),
>+                        ds_cstr(&s));
>         }
>     } else {
>         enum vlog_level level = VLL_WARN; @@ -942,7 +958,8 @@
>netdev_offload_dpdk_flow_create(struct netdev *netdev,
>             extra_str = ds_cstr(&s_extra);
>             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>                     netdev_get_name(netdev), extra_str,
>-                    netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>+                    netdev_dpdk_get_port_id(netdev, attr->transfer),
>+                    ds_cstr(&s));
>         }
>     }
>     ds_destroy(&s);
>@@ -1035,6 +1052,12 @@ free_flow_patterns(struct flow_patterns
>*patterns)
>     struct rte_flow_error error;
>     int i;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    /* REPRESENTED_PORT */
>+    free(CONST_CAST(void *, patterns->items[0].spec));
>+    free(CONST_CAST(void *, patterns->items[0].mask)); #endif /*
This hard coding of [0] for this match is problematic. You could either malloc 
a regular item for it (and free it here), or ask about the type in the 0..n-1 
loop.
>+ALLOW_EXPERIMENTAL_API */
>+
>     if (patterns->tnl_pmd_items) {
>         struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
>         uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt; @@ -
>1049,7 +1072,12 @@ free_flow_patterns(struct flow_patterns *patterns)
>         }
>     }
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
>+         i < patterns->cnt; i++) {
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>     for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
>+#endif /* ALLOW_EXPERIMENTAL_API */
>         if (patterns->items[i].spec) {
>             free(CONST_CAST(void *, patterns->items[i].spec));
>         }
>@@ -1115,13 +1143,13 @@ vport_to_rte_tunnel(struct netdev *vport,
>         tunnel->tp_dst = tnl_cfg->dst_port;
>         if (!VLOG_DROP_DBG(&rl)) {
>             ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>-                          netdev_dpdk_get_port_id(netdev));
>+                          netdev_dpdk_get_port_id(netdev, true));
>         }
>     } else if (!strcmp(netdev_get_type(vport), "gre")) {
>         tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
>         if (!VLOG_DROP_DBG(&rl)) {
>             ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
>-                          netdev_dpdk_get_port_id(netdev));
>+                          netdev_dpdk_get_port_id(netdev, true));
>         }
>     } else {
>         VLOG_DBG_RL(&rl, "vport type '%s' is not supported", @@ -1383,10
>+1411,23 @@ parse_flow_match(struct netdev *netdev,
>                  struct flow_patterns *patterns,
>                  struct match *match)
> {
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    struct netdev *physdev = netdev_ports_get(orig_in_port, netdev-
>>dpif_type);
>+    struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof *ethdev_spec);
>+    struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof
>+*ethdev_mask); #endif /* ALLOW_EXPERIMENTAL_API */
>     struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
>     struct flow *consumed_masks;
>     uint8_t proto = 0;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    /* Add an explicit traffic source item to the beginning of the pattern. */
>+    ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
>+    *ethdev_mask = rte_flow_item_ethdev_mask;
>+    add_flow_pattern(patterns,
>RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
>+                     ethdev_spec, ethdev_mask, NULL); #endif /*
>+ALLOW_EXPERIMENTAL_API */
>+
>     consumed_masks = &match->wc.masks;
>
>     if (!flow_tnl_dst_is_set(&match->flow.tunnel)) { @@ -1784,7 +1825,7 @@
>add_represented_port_action(struct flow_actions *actions,
>     struct rte_flow_action_ethdev *ethdev;
>     int outdev_id;
>
>-    outdev_id = netdev_dpdk_get_port_id(outdev);
>+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
>     if (outdev_id < 0) {
>         return -1;
>     }
>@@ -1801,7 +1842,7 @@ add_port_id_action(struct flow_actions *actions,
>     struct rte_flow_action_port_id *port_id;
>     int outdev_id;
>
>-    outdev_id = netdev_dpdk_get_port_id(outdev);
>+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
>     if (outdev_id < 0) {
>         return -1;
>     }
>@@ -2333,6 +2374,7 @@ netdev_offload_dpdk_flow_destroy(struct
>ufid_to_rte_flow_data *rte_flow_data)
>     struct netdev *physdev;
>     struct netdev *netdev;
>     ovs_u128 *ufid;
>+    bool transfer;
>     int ret;
>
>     ovs_mutex_lock(&rte_flow_data->lock);
>@@ -2344,12 +2386,13 @@ netdev_offload_dpdk_flow_destroy(struct
>ufid_to_rte_flow_data *rte_flow_data)
>
>     rte_flow_data->dead = true;
>
>+    transfer = rte_flow_data->actions_offloaded;
>     rte_flow = rte_flow_data->rte_flow;
>     physdev = rte_flow_data->physdev;
>     netdev = rte_flow_data->netdev;
>     ufid = &rte_flow_data->ufid;
>
>-    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
>+    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow,
>+ &error);
>
>     if (ret == 0) {
>         struct netdev_offload_dpdk_data *data; @@ -2364,12 +2407,12 @@
>netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data
>*rte_flow_data)
>                     " flow destroy %d ufid " UUID_FMT,
>                     netdev_get_name(netdev), netdev_get_name(physdev),
>                     (intptr_t) rte_flow,
>-                    netdev_dpdk_get_port_id(physdev),
>+                    netdev_dpdk_get_port_id(physdev, transfer),
>                     UUID_ARGS((struct uuid *) ufid));
>     } else {
>         VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
>                  netdev_get_name(netdev), netdev_get_name(physdev),
>-                 netdev_dpdk_get_port_id(physdev),
>+                 netdev_dpdk_get_port_id(physdev, transfer),
>                  UUID_ARGS((struct uuid *) ufid));
>     }
>
>--
>2.30.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to