Hi Eli,

Thanks for reviewing this. Please see below.

On Tue, 21 Feb 2023, Eli Britstein wrote:



-----Original Message-----
From: Ivan Malov <ivan.ma...@arknetworks.am>
Sent: Tuesday, 21 February 2023 2:41
To: ovs-dev@openvswitch.org
Cc: Ilya Maximets <i.maxim...@ovn.org>; Eli Britstein <el...@nvidia.com>; Ori
Kam <or...@nvidia.com>; David Marchand <david.march...@redhat.com>
Subject: [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy
mechanism

External email: Use caution opening links or attachments


Manage "transfer" flows via the corresponding mechanism.
Doing so requires that the traffic source be specified explicitly, via the
corresponding pattern item.

Signed-off-by: Ivan Malov <ivan.ma...@arknetworks.am>
---
lib/netdev-dpdk.c         | 88 +++++++++++++++++++++++++++++++--------
lib/netdev-dpdk.h         |  4 +-
lib/netdev-offload-dpdk.c | 55 +++++++++++++++++++-----
3 files changed, 117 insertions(+), 30 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 2cebc3cca..3a9c9d9a0
100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -434,6 +434,7 @@ enum dpdk_hw_ol_features {

struct netdev_dpdk {
    PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+        dpdk_port_t flow_transfer_proxy_port_id;
This extra field here makes it overflow one cache line.
        dpdk_port_t port_id;

        /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1183,6
+1184,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
    uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
                                     RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
                                     RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+    int ret;

    /*
     * Full tunnel offload requires that tunnel ID metadata be @@ -1194,6
+1196,24 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
     */
    dpdk_eth_dev_init_rx_metadata(dev);

+    /*
+     * Managing "transfer" flows requires that the user communicate them
+     * via a port which has the privilege to control the embedded switch.
+     * For some vendors, all ports in a given switching domain have
+     * this privilege. For other vendors, it's only one port.
+     *
+     * Get the proxy port ID and remember it for later use.
+     */
+    ret = rte_flow_pick_transfer_proxy(dev->port_id,
+                                       &dev->flow_transfer_proxy_port_id, 
NULL);
+    if (ret != 0) {
+        /*
+         * The PMD does not indicate the proxy port.
+         * Assume the proxy is unneeded.
+         */
+        dev->flow_transfer_proxy_port_id = dev->port_id;
+    }
+
    rte_eth_dev_info_get(dev->port_id, &info);

    if (strstr(info.driver_name, "vf") != NULL) { @@ -3981,8 +4001,10 @@
netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
                   const char *argv[], void *aux OVS_UNUSED)  {
    struct ds used_interfaces = DS_EMPTY_INITIALIZER;
+    struct netdev_dpdk *dev_self = NULL;
    struct rte_eth_dev_info dev_info;
    dpdk_port_t sibling_port_id;
+    struct netdev_dpdk *dev;
    dpdk_port_t port_id;
    bool used = false;
    char *response;
@@ -4000,8 +4022,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
argc OVS_UNUSED,
                  argv[1]);

    RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
-        struct netdev_dpdk *dev;
-
        LIST_FOR_EACH (dev, list_node, &dpdk_list) {
            if (dev->port_id != sibling_port_id) {
                continue;
@@ -4021,6 +4041,25 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
int argc OVS_UNUSED,
    }
    ds_destroy(&used_interfaces);

+    /*
+     * The device being detached may happen to be a flow proxy port
+     * for another device (still attached). If so, do not allow to
+     * detach. Devices dependent on this one must be detached first.
+     */
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (dev->port_id == port_id) {
+            dev_self = dev;
+        } else if (dev->flow_transfer_proxy_port_id == port_id) {
+            response = xasprintf("Device '%s' can not be detached (flow 
proxy)",
+                                 argv[1]);
This is not acceptable.
When removing a port, we clean the offloads using 
netdev_offload_dpdk_flow_flush().
It should be enhanced to check if the proxy port is detached, remove the 
offloads of all the ports that used it.
There is a related patch proposed in [1].
[1] 
http://patchwork.ozlabs.org/project/openvswitch/patch/20220905144603.3585105-1-el...@nvidia.com/

I hear you. But do you want me to first wait for patch [1] to be applied
and then send the proxy enhancement to be applied on top of it?
Or do you believe I can do something in parallel here?

As I can see, patch [1] has not been applied yet.
Is there any problem with it?

If I need to wait, then I believe I can revoke this [3/3]
patch from the series for the time being and send v5.
What's your opinion?


+            goto error;
+        }
+    }
+
+    /* Indicate that the device being detached no longer needs a flow proxy.
*/
+    if (dev_self != NULL)
+        dev_self->flow_transfer_proxy_port_id = port_id;
+
    rte_eth_dev_info_get(port_id, &dev_info);
    rte_eth_dev_close(port_id);
    if (rte_dev_remove(dev_info.device) < 0) { @@ -5470,7 +5509,8 @@
unlock:
}

int
-netdev_dpdk_get_port_id(struct netdev *netdev)
+netdev_dpdk_get_port_id(struct netdev *netdev,
+                        bool flow_transfer_proxy)
{
    struct netdev_dpdk *dev;
    int ret = -1;
@@ -5481,7 +5521,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)

    dev = netdev_dpdk_cast(netdev);
    ovs_mutex_lock(&dev->mutex);
-    ret = dev->port_id;
+    ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id :
+ dev->port_id;
    ovs_mutex_unlock(&dev->mutex);
out:
    return ret;
@@ -5517,13 +5557,15 @@ out:

int
netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
-                             struct rte_flow *rte_flow,
+                             bool transfer, struct rte_flow *rte_flow,
                             struct rte_flow_error *error)  {
    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
    int ret;

-    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+    ret = rte_flow_destroy(transfer ?
+                           dev->flow_transfer_proxy_port_id : dev->port_id,
+                           rte_flow, error);
    return ret;
}

@@ -5537,7 +5579,17 @@ netdev_dpdk_rte_flow_create(struct netdev
*netdev,
    struct rte_flow *flow;
    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

-    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
+    if (!attr->transfer) {
+        /*
+         * The 1st item in any pattern is a traffic source one.
+         * It is unnecessary in the case of non-transfer rules.
+         */
+        ++(items);
+    }
+
+    flow = rte_flow_create(attr->transfer ?
+                           dev->flow_transfer_proxy_port_id : dev->port_id,
+                           attr, items, actions, error);
    return flow;
}

@@ -5565,7 +5617,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev
*netdev,
    }

    dev = netdev_dpdk_cast(netdev);
-    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
+    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
+                         actions, query, error);
    return ret;
}

@@ -5587,8 +5640,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct
netdev *netdev,

    dev = netdev_dpdk_cast(netdev);
    ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
-                                    num_of_actions, error);
+    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id,
tunnel,
+                                    actions, num_of_actions, error);
    ovs_mutex_unlock(&dev->mutex);
    return ret;
}
@@ -5609,8 +5662,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct
netdev *netdev,

    dev = netdev_dpdk_cast(netdev);
    ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
-                                error);
+    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
+                                items, num_of_items, error);
    ovs_mutex_unlock(&dev->mutex);
    return ret;
}
@@ -5631,7 +5684,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct
netdev *netdev,

    dev = netdev_dpdk_cast(netdev);
    ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
+    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
+                                    m, info, error);
    ovs_mutex_unlock(&dev->mutex);
    return ret;
}
@@ -5652,8 +5706,8 @@
netdev_dpdk_rte_flow_tunnel_action_decap_release(

    dev = netdev_dpdk_cast(netdev);
    ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
-                                               num_of_actions, error);
+    ret = rte_flow_tunnel_action_decap_release(dev-
flow_transfer_proxy_port_id,
+                                               actions, num_of_actions,
+ error);
    ovs_mutex_unlock(&dev->mutex);
    return ret;
}
@@ -5673,8 +5727,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct
netdev *netdev,

    dev = netdev_dpdk_cast(netdev);
    ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
-                                       error);
+    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
+                                       items, num_of_items, error);
    ovs_mutex_unlock(&dev->mutex);
    return ret;
}
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index
5cd95d00f..80b616dae 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -36,7 +36,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev
*);

int
netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
-                             struct rte_flow *rte_flow,
+                             bool transfer, struct rte_flow *rte_flow,
                             struct rte_flow_error *error);  struct rte_flow *
netdev_dpdk_rte_flow_create(struct netdev *netdev, @@ -50,7 +50,7 @@
netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
                                 struct rte_flow_query_count *query,
                                 struct rte_flow_error *error);  int -
netdev_dpdk_get_port_id(struct netdev *netdev);
+netdev_dpdk_get_port_id(struct netdev *netdev, bool
+flow_transfer_proxy);

#ifdef ALLOW_EXPERIMENTAL_API

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
7f2598a53..890c0ada6 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -355,8 +355,18 @@ dump_flow_pattern(struct ds *s,

    if (item->type == RTE_FLOW_ITEM_TYPE_END) {
        ds_put_cstr(s, "end ");
+    } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
+        const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
+        const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
+
+        ds_put_cstr(s, "represented_port ");
+
+        DUMP_PATTERN_ITEM(ethdev_mask->port_id, false,
"ethdev_port_id",
+                          "%"PRIu16, ethdev_spec->port_id,
+                          ethdev_mask->port_id, 0);
    } else if (flow_patterns->tnl_pmd_items_cnt &&
-               pattern_index < flow_patterns->tnl_pmd_items_cnt) {
+               pattern_index < 1 /* REPRESENTED_PORT */ +
+                               flow_patterns->tnl_pmd_items_cnt) {
        return;
    } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
        const struct rte_flow_item_eth *eth_spec = item->spec; @@ -882,7
+892,12 @@ dump_flow(struct ds *s, struct ds *s_extra,
        dump_flow_attr(s, s_extra, attr, flow_patterns, flow_actions);
    }
    ds_put_cstr(s, "pattern ");
-    for (i = 0; i < flow_patterns->cnt; i++) {
+    /*
+     * The 1st item in any pattern is a traffic source one.
+     * It is skipped in the case of non-transfer rules and
+     * must not be indicated when printing such rules, too.
+     */
+    for (i = 1 - attr->transfer; i < flow_patterns->cnt; i++) {
1 - transfer is not very clear, you can use !transfer.
        dump_flow_pattern(s, flow_patterns, i);
    }
    ds_put_cstr(s, "actions ");
@@ -920,7 +935,8 @@ netdev_offload_dpdk_flow_create(struct netdev
*netdev,
            extra_str = ds_cstr(&s_extra);
            VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d
%s",
                        netdev_get_name(netdev), (intptr_t) flow, extra_str,
-                        netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
+                        netdev_dpdk_get_port_id(netdev, attr->transfer),
+                        ds_cstr(&s));
        }
    } else {
        enum vlog_level level = VLL_WARN; @@ -935,7 +951,8 @@
netdev_offload_dpdk_flow_create(struct netdev *netdev,
            extra_str = ds_cstr(&s_extra);
            VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
                    netdev_get_name(netdev), extra_str,
-                    netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
+                    netdev_dpdk_get_port_id(netdev, attr->transfer),
+                    ds_cstr(&s));
        }
    }
    ds_destroy(&s);
@@ -1028,6 +1045,10 @@ free_flow_patterns(struct flow_patterns
*patterns)
    struct rte_flow_error error;
    int i;

+    /* REPRESENTED_PORT */
+    free(CONST_CAST(void *, patterns->items[0].spec));
+    free(CONST_CAST(void *, patterns->items[0].mask));
+
    if (patterns->tnl_pmd_items) {
        struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
        uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt; @@ -
1042,7 +1063,8 @@ free_flow_patterns(struct flow_patterns *patterns)
        }
    }

-    for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
+    for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
+         i < patterns->cnt; i++) {
Why do we need special freeing of [0] and not keep it here?
        if (patterns->items[i].spec) {
            free(CONST_CAST(void *, patterns->items[i].spec));
        }
@@ -1114,13 +1136,13 @@ vport_to_rte_tunnel(struct netdev *vport,
        tunnel->tp_dst = tnl_cfg->dst_port;
        if (!VLOG_DROP_DBG(&rl)) {
            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
-                          netdev_dpdk_get_port_id(netdev));
+                          netdev_dpdk_get_port_id(netdev, true));
        }
    } else if (!strcmp(netdev_get_type(vport), "gre")) {
        tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
        if (!VLOG_DROP_DBG(&rl)) {
            ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
-                          netdev_dpdk_get_port_id(netdev));
+                          netdev_dpdk_get_port_id(netdev, true));
        }
    } else {
        VLOG_DBG_RL(&rl, "vport type '%s' is not supported", @@ -1382,10
+1404,19 @@ parse_flow_match(struct netdev *netdev,
                 struct flow_patterns *patterns,
                 struct match *match)
{
+    struct netdev *physdev = netdev_ports_get(orig_in_port, netdev-
dpif_type);
+    struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof
*ethdev_spec);
+    struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof
+ *ethdev_mask);
    struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
    struct flow *consumed_masks;
    uint8_t proto = 0;

+    /* Add an explicit traffic source item to the beginning of the pattern. */
+    ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
+    *ethdev_mask = rte_flow_item_ethdev_mask;
+    add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
+                     ethdev_spec, ethdev_mask, NULL);
+
    consumed_masks = &match->wc.masks;

    if (!flow_tnl_dst_is_set(&match->flow.tunnel)) { @@ -1782,7 +1813,7 @@
add_represented_port_action(struct flow_actions *actions,
    struct rte_flow_action_ethdev *ethdev;
    int outdev_id;

-    outdev_id = netdev_dpdk_get_port_id(outdev);
+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
    if (outdev_id < 0) {
        return -1;
    }
@@ -2322,6 +2353,7 @@ netdev_offload_dpdk_flow_destroy(struct
ufid_to_rte_flow_data *rte_flow_data)
    struct netdev *physdev;
    struct netdev *netdev;
    ovs_u128 *ufid;
+    bool transfer;
    int ret;

    ovs_mutex_lock(&rte_flow_data->lock);
@@ -2333,12 +2365,13 @@ netdev_offload_dpdk_flow_destroy(struct
ufid_to_rte_flow_data *rte_flow_data)

    rte_flow_data->dead = true;

+    transfer = rte_flow_data->actions_offloaded;
    rte_flow = rte_flow_data->rte_flow;
    physdev = rte_flow_data->physdev;
    netdev = rte_flow_data->netdev;
    ufid = &rte_flow_data->ufid;

-    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
+    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow,
+ &error);

    if (ret == 0) {
        struct netdev_offload_dpdk_data *data; @@ -2353,12 +2386,12 @@
netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data
*rte_flow_data)
                    " flow destroy %d ufid " UUID_FMT,
                    netdev_get_name(netdev), netdev_get_name(physdev),
                    (intptr_t) rte_flow,
-                    netdev_dpdk_get_port_id(physdev),
+                    netdev_dpdk_get_port_id(physdev, transfer),
                    UUID_ARGS((struct uuid *) ufid));
    } else {
        VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
                 netdev_get_name(netdev), netdev_get_name(physdev),
-                 netdev_dpdk_get_port_id(physdev),
+                 netdev_dpdk_get_port_id(physdev, transfer),
                 UUID_ARGS((struct uuid *) ufid));
    }

--
2.20.1


_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to