>-----Original Message-----
>From: Ivan Malov <[email protected]>
>Sent: Wednesday, July 20, 2022 3:18 PM
>To: [email protected]
>Cc: Eli Britstein <[email protected]>; Stephen Hemminger
><[email protected]>; Ilya Maximets <[email protected]>; Ori
>Kam <[email protected]>; Maxime Coquelin
><[email protected]>; David Marchand
><[email protected]>; Andrew Rybchenko
><[email protected]>
>Subject: [PATCH v3 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Manage "transfer" flows via the corresponding mechanism.
>Doing so requires that the traffic source be specified explicitly, via the
>corresponding pattern item.
>
>Signed-off-by: Ivan Malov <[email protected]>
>Acked-by: Andrew Rybchenko <[email protected]>
>---
> lib/netdev-dpdk.c | 99 ++++++++++++++++++++++++++++++++-------
> lib/netdev-dpdk.h | 4 +-
> lib/netdev-offload-dpdk.c | 61 ++++++++++++++++++++----
> 3 files changed, 135 insertions(+), 29 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>45e5d26d2..01fb40255 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>
> struct netdev_dpdk {
> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>+ dpdk_port_t flow_transfer_proxy_port_id;
> dpdk_port_t port_id;
>
> /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1130,8
>+1131,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
> uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
> RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>-
> #ifdef ALLOW_EXPERIMENTAL_API
There are all over the patches this ifdef, even in cases it's harmless to do
without. It makes the code less readable and might cause future cherry-picking
issues. Try to minimize it only to places of a must.
>+ int ret;
>+
> /*
> * Full tunnel offload requires that tunnel ID metadata be
> * delivered with "miss" packets from the hardware to the @@ -1141,6
>+1143,27 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
> * Request delivery of such metadata.
> */
> dpdk_eth_dev_init_rx_metadata(dev);
>+
>+ /*
>+ * Managing "transfer" flows requires that the user communicate them
>+ * via a port which has the privilege to control the embedded switch.
>+ * For some vendors, all ports in a given switching domain have
>+ * this privilege. For other vendors, it's only one port.
>+ *
>+ * Get the proxy port ID and remember it for later use.
>+ */
>+ ret = rte_flow_pick_transfer_proxy(dev->port_id,
>+ &dev->flow_transfer_proxy_port_id,
>NULL);
>+ if (ret != 0) {
>+ /*
>+ * The PMD does not indicate the proxy port.
>+ * Assume the proxy is unneeded.
>+ */
>+ dev->flow_transfer_proxy_port_id = dev->port_id;
>+ }
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>+ /* No API to get transfer proxy; assume the proxy is unneeded. */
>+ dev->flow_transfer_proxy_port_id = dev->port_id;
> #endif /* ALLOW_EXPERIMENTAL_API */
>
> rte_eth_dev_info_get(dev->port_id, &info); @@ -3762,8 +3785,12 @@
>netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
> const char *argv[], void *aux OVS_UNUSED) {
> struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ struct netdev_dpdk *dev_self = NULL; #endif /*
>+ALLOW_EXPERIMENTAL_API */
> struct rte_eth_dev_info dev_info;
> dpdk_port_t sibling_port_id;
>+ struct netdev_dpdk *dev;
> dpdk_port_t port_id;
> bool used = false;
> char *response;
>@@ -3781,8 +3808,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
>argc OVS_UNUSED,
> argv[1]);
>
> RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>- struct netdev_dpdk *dev;
>-
> LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> if (dev->port_id != sibling_port_id) {
> continue;
>@@ -3802,6 +3827,27 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>int argc OVS_UNUSED,
> }
> ds_destroy(&used_interfaces);
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ /*
>+ * The device being detached may happen to be a flow proxy port
>+ * for another device (still attached). If so, do not allow to
>+ * detach. Devices dependent on this one must be detached first.
I don't think this is acceptable to deny the port from being detached, or to
enforce such ordering. For example, ports are being detached upon shutdown,
with unknown order.
Suppose A is the proxy port for ports B,C. When port A is going to be detached,
flush the offloads of ports B and C (in addition to flushing the offloads of
port A). Then, it is safe to detach it.
>+ */
>+ LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>+ if (dev->port_id == port_id) {
>+ dev_self = dev;
>+ } else if (dev->flow_transfer_proxy_port_id == port_id) {
>+ response = xasprintf("Device '%s' can not be detached (flow
>proxy)",
>+ argv[1]);
>+ goto error;
>+ }
>+ }
>+
>+ /* Indicate that the device being detached no longer needs a flow proxy.
>*/
>+ if (dev_self != NULL)
>+ dev_self->flow_transfer_proxy_port_id = port_id; #endif /*
>+ALLOW_EXPERIMENTAL_API */
>+
> rte_eth_dev_info_get(port_id, &dev_info);
> rte_eth_dev_close(port_id);
> if (rte_dev_remove(dev_info.device) < 0) { @@ -5167,7 +5213,8 @@
>unlock:
> }
>
> int
>-netdev_dpdk_get_port_id(struct netdev *netdev)
>+netdev_dpdk_get_port_id(struct netdev *netdev,
>+ bool flow_transfer_proxy)
Maybe better to have a separate API to get the proxy port-id. Then use it only
where needed.
> {
> struct netdev_dpdk *dev;
> int ret = -1;
>@@ -5178,7 +5225,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
>
> dev = netdev_dpdk_cast(netdev);
> ovs_mutex_lock(&dev->mutex);
>- ret = dev->port_id;
>+ ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id :
>+ dev->port_id;
> ovs_mutex_unlock(&dev->mutex);
> out:
> return ret;
>@@ -5214,13 +5261,15 @@ out:
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>- struct rte_flow *rte_flow,
>+ bool transfer, struct rte_flow *rte_flow,
> struct rte_flow_error *error) {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> int ret;
>
>- ret = rte_flow_destroy(dev->port_id, rte_flow, error);
>+ ret = rte_flow_destroy(transfer ?
>+ dev->flow_transfer_proxy_port_id : dev->port_id,
>+ rte_flow, error);
> return ret;
> }
>
>@@ -5234,7 +5283,19 @@ netdev_dpdk_rte_flow_create(struct netdev
>*netdev,
> struct rte_flow *flow;
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>- flow = rte_flow_create(dev->port_id, attr, items, actions, error);
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ if (!attr->transfer) {
>+ /*
>+ * The 1st item in any pattern is a traffic source one.
>+ * It is unnecessary in the case of non-transfer rules.
>+ */
>+ ++(items);
>+ }
>+#endif /* ALLOW_EXPERIMENTAL_API */
>+
>+ flow = rte_flow_create(attr->transfer ?
>+ dev->flow_transfer_proxy_port_id : dev->port_id,
>+ attr, items, actions, error);
> return flow;
> }
>
>@@ -5262,7 +5323,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev
>*netdev,
> }
>
> dev = netdev_dpdk_cast(netdev);
>- ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
>+ ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
>+ actions, query, error);
> return ret;
> }
>
>@@ -5284,8 +5346,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct
>netdev *netdev,
>
> dev = netdev_dpdk_cast(netdev);
> ovs_mutex_lock(&dev->mutex);
>- ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
>- num_of_actions, error);
>+ ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id,
>tunnel,
>+ actions, num_of_actions, error);
> ovs_mutex_unlock(&dev->mutex);
> return ret;
> }
>@@ -5306,8 +5368,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct
>netdev *netdev,
>
> dev = netdev_dpdk_cast(netdev);
> ovs_mutex_lock(&dev->mutex);
>- ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
>- error);
>+ ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
>+ items, num_of_items, error);
> ovs_mutex_unlock(&dev->mutex);
> return ret;
> }
>@@ -5328,7 +5390,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct
>netdev *netdev,
>
> dev = netdev_dpdk_cast(netdev);
> ovs_mutex_lock(&dev->mutex);
>- ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
>+ ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
>+ m, info, error);
> ovs_mutex_unlock(&dev->mutex);
> return ret;
> }
>@@ -5349,8 +5412,8 @@
>netdev_dpdk_rte_flow_tunnel_action_decap_release(
>
> dev = netdev_dpdk_cast(netdev);
> ovs_mutex_lock(&dev->mutex);
>- ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
>- num_of_actions, error);
>+ ret = rte_flow_tunnel_action_decap_release(dev-
>>flow_transfer_proxy_port_id,
>+ actions, num_of_actions,
>+ error);
> ovs_mutex_unlock(&dev->mutex);
> return ret;
> }
>@@ -5370,8 +5433,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct
>netdev *netdev,
>
> dev = netdev_dpdk_cast(netdev);
> ovs_mutex_lock(&dev->mutex);
>- ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
>- error);
>+ ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
>+ items, num_of_items, error);
> ovs_mutex_unlock(&dev->mutex);
> return ret;
> }
>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 699be3fb4..6f18a73b2
>100644
>--- a/lib/netdev-dpdk.h
>+++ b/lib/netdev-dpdk.h
>@@ -35,7 +35,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev *);
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>- struct rte_flow *rte_flow,
>+ bool transfer, struct rte_flow *rte_flow,
> struct rte_flow_error *error); struct rte_flow *
>netdev_dpdk_rte_flow_create(struct netdev *netdev, @@ -49,7 +49,7 @@
>netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
> struct rte_flow_query_count *query,
> struct rte_flow_error *error); int -
>netdev_dpdk_get_port_id(struct netdev *netdev);
>+netdev_dpdk_get_port_id(struct netdev *netdev, bool
>+flow_transfer_proxy);
>
> #ifdef ALLOW_EXPERIMENTAL_API
>
>diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
>9cd5a0159..9320a1d0b 100644
>--- a/lib/netdev-offload-dpdk.c
>+++ b/lib/netdev-offload-dpdk.c
>@@ -353,8 +353,23 @@ dump_flow_pattern(struct ds *s,
>
> if (item->type == RTE_FLOW_ITEM_TYPE_END) {
> ds_put_cstr(s, "end ");
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
>+ const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
>+ const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
>+
>+ ds_put_cstr(s, "represented_port ");
>+
>+ DUMP_PATTERN_ITEM(ethdev_mask->port_id, false, "ethdev_port_id",
>+ "%"PRIu16, ethdev_spec->port_id,
>+ ethdev_mask->port_id, 0);
>+ } else if (flow_patterns->tnl_pmd_items_cnt &&
>+ pattern_index < 1 /* REPRESENTED_PORT */ +
>+ flow_patterns->tnl_pmd_items_cnt) {
>+#else /* ! ALLOW_EXPERIMENTAL_API */
> } else if (flow_patterns->tnl_pmd_items_cnt &&
> pattern_index < flow_patterns->tnl_pmd_items_cnt) {
>+#endif /* ALLOW_EXPERIMENTAL_API */
> return;
> } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
> const struct rte_flow_item_eth *eth_spec = item->spec; @@ -927,7
>+942,8 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
> extra_str = ds_cstr(&s_extra);
> VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d
>%s",
> netdev_get_name(netdev), (intptr_t) flow, extra_str,
>- netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>+ netdev_dpdk_get_port_id(netdev, attr->transfer),
>+ ds_cstr(&s));
> }
> } else {
> enum vlog_level level = VLL_WARN; @@ -942,7 +958,8 @@
>netdev_offload_dpdk_flow_create(struct netdev *netdev,
> extra_str = ds_cstr(&s_extra);
> VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
> netdev_get_name(netdev), extra_str,
>- netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>+ netdev_dpdk_get_port_id(netdev, attr->transfer),
>+ ds_cstr(&s));
> }
> }
> ds_destroy(&s);
>@@ -1035,6 +1052,12 @@ free_flow_patterns(struct flow_patterns
>*patterns)
> struct rte_flow_error error;
> int i;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ /* REPRESENTED_PORT */
>+ free(CONST_CAST(void *, patterns->items[0].spec));
>+ free(CONST_CAST(void *, patterns->items[0].mask)); #endif /*
This hard coding of [0] for this match is problematic. You could either malloc
a regular item for it (and free it here), or ask about the type in the 0..n-1
loop.
>+ALLOW_EXPERIMENTAL_API */
>+
> if (patterns->tnl_pmd_items) {
> struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
> uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt; @@ -
>1049,7 +1072,12 @@ free_flow_patterns(struct flow_patterns *patterns)
> }
> }
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
>+ i < patterns->cnt; i++) {
>+#else /* ! ALLOW_EXPERIMENTAL_API */
> for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
>+#endif /* ALLOW_EXPERIMENTAL_API */
> if (patterns->items[i].spec) {
> free(CONST_CAST(void *, patterns->items[i].spec));
> }
>@@ -1115,13 +1143,13 @@ vport_to_rte_tunnel(struct netdev *vport,
> tunnel->tp_dst = tnl_cfg->dst_port;
> if (!VLOG_DROP_DBG(&rl)) {
> ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>- netdev_dpdk_get_port_id(netdev));
>+ netdev_dpdk_get_port_id(netdev, true));
> }
> } else if (!strcmp(netdev_get_type(vport), "gre")) {
> tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
> if (!VLOG_DROP_DBG(&rl)) {
> ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
>- netdev_dpdk_get_port_id(netdev));
>+ netdev_dpdk_get_port_id(netdev, true));
> }
> } else {
> VLOG_DBG_RL(&rl, "vport type '%s' is not supported", @@ -1383,10
>+1411,23 @@ parse_flow_match(struct netdev *netdev,
> struct flow_patterns *patterns,
> struct match *match)
> {
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ struct netdev *physdev = netdev_ports_get(orig_in_port, netdev-
>>dpif_type);
>+ struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof *ethdev_spec);
>+ struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof
>+*ethdev_mask); #endif /* ALLOW_EXPERIMENTAL_API */
> struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
> struct flow *consumed_masks;
> uint8_t proto = 0;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+ /* Add an explicit traffic source item to the beginning of the pattern. */
>+ ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
>+ *ethdev_mask = rte_flow_item_ethdev_mask;
>+ add_flow_pattern(patterns,
>RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
>+ ethdev_spec, ethdev_mask, NULL); #endif /*
>+ALLOW_EXPERIMENTAL_API */
>+
> consumed_masks = &match->wc.masks;
>
> if (!flow_tnl_dst_is_set(&match->flow.tunnel)) { @@ -1784,7 +1825,7 @@
>add_represented_port_action(struct flow_actions *actions,
> struct rte_flow_action_ethdev *ethdev;
> int outdev_id;
>
>- outdev_id = netdev_dpdk_get_port_id(outdev);
>+ outdev_id = netdev_dpdk_get_port_id(outdev, false);
> if (outdev_id < 0) {
> return -1;
> }
>@@ -1801,7 +1842,7 @@ add_port_id_action(struct flow_actions *actions,
> struct rte_flow_action_port_id *port_id;
> int outdev_id;
>
>- outdev_id = netdev_dpdk_get_port_id(outdev);
>+ outdev_id = netdev_dpdk_get_port_id(outdev, false);
> if (outdev_id < 0) {
> return -1;
> }
>@@ -2333,6 +2374,7 @@ netdev_offload_dpdk_flow_destroy(struct
>ufid_to_rte_flow_data *rte_flow_data)
> struct netdev *physdev;
> struct netdev *netdev;
> ovs_u128 *ufid;
>+ bool transfer;
> int ret;
>
> ovs_mutex_lock(&rte_flow_data->lock);
>@@ -2344,12 +2386,13 @@ netdev_offload_dpdk_flow_destroy(struct
>ufid_to_rte_flow_data *rte_flow_data)
>
> rte_flow_data->dead = true;
>
>+ transfer = rte_flow_data->actions_offloaded;
> rte_flow = rte_flow_data->rte_flow;
> physdev = rte_flow_data->physdev;
> netdev = rte_flow_data->netdev;
> ufid = &rte_flow_data->ufid;
>
>- ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
>+ ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow,
>+ &error);
>
> if (ret == 0) {
> struct netdev_offload_dpdk_data *data; @@ -2364,12 +2407,12 @@
>netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data
>*rte_flow_data)
> " flow destroy %d ufid " UUID_FMT,
> netdev_get_name(netdev), netdev_get_name(physdev),
> (intptr_t) rte_flow,
>- netdev_dpdk_get_port_id(physdev),
>+ netdev_dpdk_get_port_id(physdev, transfer),
> UUID_ARGS((struct uuid *) ufid));
> } else {
> VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
> netdev_get_name(netdev), netdev_get_name(physdev),
>- netdev_dpdk_get_port_id(physdev),
>+ netdev_dpdk_get_port_id(physdev, transfer),
> UUID_ARGS((struct uuid *) ufid));
> }
>
>--
>2.30.2
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev