Re: [ovs-dev] [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Ivan Malov, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 80 characters long (recommended limit is 79)
#50 FILE: lib/netdev-dpdk.c:1208:
   >flow_transfer_proxy_port_id, NULL);

WARNING: Line is 80 characters long (recommended limit is 79)
#95 FILE: lib/netdev-dpdk.c:4053:
response = xasprintf("Device '%s' can not be detached (flow proxy)",

ERROR: Inappropriate bracing around statement
#102 FILE: lib/netdev-dpdk.c:4060:
if (dev_self != NULL)

WARNING: Line is 80 characters long (recommended limit is 79)
#123 FILE: lib/netdev-dpdk.c:5524:
ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : dev->port_id;

WARNING: Line is 80 characters long (recommended limit is 79)
#212 FILE: lib/netdev-dpdk.c:5709:
ret = rte_flow_tunnel_action_decap_release(dev->flow_transfer_proxy_port_id,

Lines checked: 415, Warnings: 4, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 2/3] netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Ivan Malov, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#28 FILE: lib/netdev-offload-dpdk.c:743:
if (ethdev)

Lines checked: 80, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Ivan Malov, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 80 characters long (recommended limit is 79)
#50 FILE: lib/netdev-dpdk.c:1208:
   >flow_transfer_proxy_port_id, NULL);

WARNING: Line is 80 characters long (recommended limit is 79)
#95 FILE: lib/netdev-dpdk.c:4053:
response = xasprintf("Device '%s' can not be detached (flow proxy)",

ERROR: Inappropriate bracing around statement
#102 FILE: lib/netdev-dpdk.c:4060:
if (dev_self != NULL)

WARNING: Line is 80 characters long (recommended limit is 79)
#123 FILE: lib/netdev-dpdk.c:5524:
ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : dev->port_id;

WARNING: Line is 80 characters long (recommended limit is 79)
#212 FILE: lib/netdev-dpdk.c:5709:
ret = rte_flow_tunnel_action_decap_release(dev->flow_transfer_proxy_port_id,

Lines checked: 415, Warnings: 4, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 2/3] netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Ivan Malov, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#28 FILE: lib/netdev-offload-dpdk.c:743:
if (ethdev)

Lines checked: 80, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2023-02-20 Thread Ivan Malov via dev
Manage "transfer" flows via the corresponding mechanism.
Doing so requires that the traffic source be specified
explicitly, via the corresponding pattern item.

Signed-off-by: Ivan Malov 
---
 lib/netdev-dpdk.c | 88 +++
 lib/netdev-dpdk.h |  4 +-
 lib/netdev-offload-dpdk.c | 55 +++-
 3 files changed, 117 insertions(+), 30 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2cebc3cca..3a9c9d9a0 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -434,6 +434,7 @@ enum dpdk_hw_ol_features {
 
 struct netdev_dpdk {
 PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+dpdk_port_t flow_transfer_proxy_port_id;
 dpdk_port_t port_id;
 
 /* If true, device was attached by rte_eth_dev_attach(). */
@@ -1183,6 +1184,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
 uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+int ret;
 
 /*
  * Full tunnel offload requires that tunnel ID metadata be
@@ -1194,6 +1196,24 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
  */
 dpdk_eth_dev_init_rx_metadata(dev);
 
+/*
+ * Managing "transfer" flows requires that the user communicate them
+ * via a port which has the privilege to control the embedded switch.
+ * For some vendors, all ports in a given switching domain have
+ * this privilege. For other vendors, it's only one port.
+ *
+ * Get the proxy port ID and remember it for later use.
+ */
+ret = rte_flow_pick_transfer_proxy(dev->port_id,
+   >flow_transfer_proxy_port_id, 
NULL);
+if (ret != 0) {
+/*
+ * The PMD does not indicate the proxy port.
+ * Assume the proxy is unneeded.
+ */
+dev->flow_transfer_proxy_port_id = dev->port_id;
+}
+
 rte_eth_dev_info_get(dev->port_id, );
 
 if (strstr(info.driver_name, "vf") != NULL) {
@@ -3981,8 +4001,10 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
const char *argv[], void *aux OVS_UNUSED)
 {
 struct ds used_interfaces = DS_EMPTY_INITIALIZER;
+struct netdev_dpdk *dev_self = NULL;
 struct rte_eth_dev_info dev_info;
 dpdk_port_t sibling_port_id;
+struct netdev_dpdk *dev;
 dpdk_port_t port_id;
 bool used = false;
 char *response;
@@ -4000,8 +4022,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
   argv[1]);
 
 RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
-struct netdev_dpdk *dev;
-
 LIST_FOR_EACH (dev, list_node, _list) {
 if (dev->port_id != sibling_port_id) {
 continue;
@@ -4021,6 +4041,25 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 }
 ds_destroy(_interfaces);
 
+/*
+ * The device being detached may happen to be a flow proxy port
+ * for another device (still attached). If so, do not allow to
+ * detach. Devices dependent on this one must be detached first.
+ */
+LIST_FOR_EACH (dev, list_node, _list) {
+if (dev->port_id == port_id) {
+dev_self = dev;
+} else if (dev->flow_transfer_proxy_port_id == port_id) {
+response = xasprintf("Device '%s' can not be detached (flow 
proxy)",
+ argv[1]);
+goto error;
+}
+}
+
+/* Indicate that the device being detached no longer needs a flow proxy. */
+if (dev_self != NULL)
+dev_self->flow_transfer_proxy_port_id = port_id;
+
 rte_eth_dev_info_get(port_id, _info);
 rte_eth_dev_close(port_id);
 if (rte_dev_remove(dev_info.device) < 0) {
@@ -5470,7 +5509,8 @@ unlock:
 }
 
 int
-netdev_dpdk_get_port_id(struct netdev *netdev)
+netdev_dpdk_get_port_id(struct netdev *netdev,
+bool flow_transfer_proxy)
 {
 struct netdev_dpdk *dev;
 int ret = -1;
@@ -5481,7 +5521,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
 
 dev = netdev_dpdk_cast(netdev);
 ovs_mutex_lock(>mutex);
-ret = dev->port_id;
+ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : 
dev->port_id;
 ovs_mutex_unlock(>mutex);
 out:
 return ret;
@@ -5517,13 +5557,15 @@ out:
 
 int
 netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
- struct rte_flow *rte_flow,
+ bool transfer, struct rte_flow *rte_flow,
  struct rte_flow_error *error)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 int ret;
 
-ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+ret = rte_flow_destroy(transfer ?
+   dev->flow_transfer_proxy_port_id : dev->port_id,
+   rte_flow, 

[ovs-dev] [PATCH v4 0/3] DPDK: align flow offloads with 22.11

2023-02-20 Thread Ivan Malov via dev
Address three problems using three corresponding features in
DPDK, which have been around for a year and are now stable:

1) The need to make sure that metadata generated by flow
   rule execution be delivered from NIC to application;

2) Replacing PORT_ID action with REPRESENTED_PORT;

3) Potential need to manage "transfer" flows
   via a privileged ("transfer proxy") port.
---
 v2: amendments to care about proxy detach and port ID logging
 v3: a minor adjustment in the cover letter subject line
 v4: edits to apply review notes and align with 22.11

Ivan Malov (3):
  netdev-dpdk: negotiate delivery of per-packet Rx metadata
  netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT
  netdev-offload-dpdk: use flow transfer proxy mechanism

 lib/netdev-dpdk.c | 128 +-
 lib/netdev-dpdk.h |   4 +-
 lib/netdev-offload-dpdk.c |  86 +
 3 files changed, 174 insertions(+), 44 deletions(-)

-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 1/3] netdev-dpdk: negotiate delivery of per-packet Rx metadata

2023-02-20 Thread Ivan Malov via dev
This may be required by some PMDs in offload scenarios.

Signed-off-by: Ivan Malov 
---
 lib/netdev-dpdk.c | 40 
 1 file changed, 40 insertions(+)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index fb0dd43f7..2cebc3cca 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1140,6 +1140,36 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) 
OVS_REQUIRES(dev->mutex)
 }
 }
 
+static void
+dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
+{
+uint64_t rx_metadata = 0;
+int ret;
+
+/* For the fallback offload (non-"transfer" rules) */
+rx_metadata |= RTE_ETH_RX_METADATA_USER_MARK;
+/* For the full offload ("transfer" rules) */
+rx_metadata |= RTE_ETH_RX_METADATA_TUNNEL_ID;
+
+ret = rte_eth_rx_metadata_negotiate(dev->port_id, _metadata);
+if (ret == 0) {
+if (!(rx_metadata & RTE_ETH_RX_METADATA_USER_MARK)) {
+VLOG_DBG("The NIC will not provide per-packet USER_MARK on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+}
+if (!(rx_metadata & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
+VLOG_DBG("The NIC will not provide per-packet TUNNEL_ID on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+}
+} else if (ret == -ENOTSUP) {
+VLOG_DBG("Rx metadata negotiate procedure is not supported on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+} else {
+VLOG_WARN("Cannot negotiate Rx metadata on port "
+  DPDK_PORT_ID_FMT, dev->port_id);
+}
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev)
 OVS_REQUIRES(dev->mutex)
@@ -1154,6 +1184,16 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
 
+/*
+ * Full tunnel offload requires that tunnel ID metadata be
+ * delivered with "miss" packets from the hardware to the
+ * PMD. The same goes for megaflow mark metadata which is
+ * used in MARK + RSS offload scenario.
+ *
+ * Request delivery of such metadata.
+ */
+dpdk_eth_dev_init_rx_metadata(dev);
+
 rte_eth_dev_info_get(dev->port_id, );
 
 if (strstr(info.driver_name, "vf") != NULL) {
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 2/3] netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT

2023-02-20 Thread Ivan Malov via dev
Action PORT_ID has been deprecated. Use REPRESENTED_PORT instead.

Signed-off-by: Ivan Malov 
---
 lib/netdev-offload-dpdk.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index b3421c099..7f2598a53 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -735,14 +735,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
 ds_put_cstr(s, "rss / ");
 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
 ds_put_cstr(s, "count / ");
-} else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
-const struct rte_flow_action_port_id *port_id = actions->conf;
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT) {
+const struct rte_flow_action_ethdev *ethdev = actions->conf;
+
+ds_put_cstr(s, "represented_port ");
+
+if (ethdev)
+ds_put_format(s, "ethdev_port_id %d ", ethdev->port_id);
 
-ds_put_cstr(s, "port_id ");
-if (port_id) {
-ds_put_format(s, "original %d id %d ",
-  port_id->original, port_id->id);
-}
 ds_put_cstr(s, "/ ");
 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 ds_put_cstr(s, "drop / ");
@@ -1776,19 +1776,22 @@ add_count_action(struct flow_actions *actions)
 }
 
 static int
-add_port_id_action(struct flow_actions *actions,
-   struct netdev *outdev)
+add_represented_port_action(struct flow_actions *actions,
+struct netdev *outdev)
 {
-struct rte_flow_action_port_id *port_id;
+struct rte_flow_action_ethdev *ethdev;
 int outdev_id;
 
 outdev_id = netdev_dpdk_get_port_id(outdev);
 if (outdev_id < 0) {
 return -1;
 }
-port_id = xzalloc(sizeof *port_id);
-port_id->id = outdev_id;
-add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
+
+ethdev = xzalloc(sizeof *ethdev);
+ethdev->port_id = outdev_id;
+
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT, ethdev);
+
 return 0;
 }
 
@@ -1808,7 +1811,7 @@ add_output_action(struct netdev *netdev,
 return -1;
 }
 if (!netdev_flow_api_equals(netdev, outdev) ||
-add_port_id_action(actions, outdev)) {
+add_represented_port_action(actions, outdev)) {
 VLOG_DBG_RL(, "%s: Output to port \'%s\' cannot be offloaded.",
 netdev_get_name(netdev), netdev_get_name(outdev));
 ret = -1;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 2/3] netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT

2023-02-20 Thread Ivan Malov via dev
Action PORT_ID has been deprecated. Use REPRESENTED_PORT instead.

Signed-off-by: Ivan Malov 
---
 lib/netdev-offload-dpdk.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index b3421c099..7f2598a53 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -735,14 +735,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
 ds_put_cstr(s, "rss / ");
 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
 ds_put_cstr(s, "count / ");
-} else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
-const struct rte_flow_action_port_id *port_id = actions->conf;
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT) {
+const struct rte_flow_action_ethdev *ethdev = actions->conf;
+
+ds_put_cstr(s, "represented_port ");
+
+if (ethdev)
+ds_put_format(s, "ethdev_port_id %d ", ethdev->port_id);
 
-ds_put_cstr(s, "port_id ");
-if (port_id) {
-ds_put_format(s, "original %d id %d ",
-  port_id->original, port_id->id);
-}
 ds_put_cstr(s, "/ ");
 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 ds_put_cstr(s, "drop / ");
@@ -1776,19 +1776,22 @@ add_count_action(struct flow_actions *actions)
 }
 
 static int
-add_port_id_action(struct flow_actions *actions,
-   struct netdev *outdev)
+add_represented_port_action(struct flow_actions *actions,
+struct netdev *outdev)
 {
-struct rte_flow_action_port_id *port_id;
+struct rte_flow_action_ethdev *ethdev;
 int outdev_id;
 
 outdev_id = netdev_dpdk_get_port_id(outdev);
 if (outdev_id < 0) {
 return -1;
 }
-port_id = xzalloc(sizeof *port_id);
-port_id->id = outdev_id;
-add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
+
+ethdev = xzalloc(sizeof *ethdev);
+ethdev->port_id = outdev_id;
+
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT, ethdev);
+
 return 0;
 }
 
@@ -1808,7 +1811,7 @@ add_output_action(struct netdev *netdev,
 return -1;
 }
 if (!netdev_flow_api_equals(netdev, outdev) ||
-add_port_id_action(actions, outdev)) {
+add_represented_port_action(actions, outdev)) {
 VLOG_DBG_RL(, "%s: Output to port \'%s\' cannot be offloaded.",
 netdev_get_name(netdev), netdev_get_name(outdev));
 ret = -1;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 1/3] netdev-dpdk: negotiate delivery of per-packet Rx metadata

2023-02-20 Thread Ivan Malov via dev
This may be required by some PMDs in offload scenarios.

Signed-off-by: Ivan Malov 
---
 lib/netdev-dpdk.c | 40 
 1 file changed, 40 insertions(+)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index fb0dd43f7..2cebc3cca 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1140,6 +1140,36 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) 
OVS_REQUIRES(dev->mutex)
 }
 }
 
+static void
+dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
+{
+uint64_t rx_metadata = 0;
+int ret;
+
+/* For the fallback offload (non-"transfer" rules) */
+rx_metadata |= RTE_ETH_RX_METADATA_USER_MARK;
+/* For the full offload ("transfer" rules) */
+rx_metadata |= RTE_ETH_RX_METADATA_TUNNEL_ID;
+
+ret = rte_eth_rx_metadata_negotiate(dev->port_id, _metadata);
+if (ret == 0) {
+if (!(rx_metadata & RTE_ETH_RX_METADATA_USER_MARK)) {
+VLOG_DBG("The NIC will not provide per-packet USER_MARK on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+}
+if (!(rx_metadata & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
+VLOG_DBG("The NIC will not provide per-packet TUNNEL_ID on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+}
+} else if (ret == -ENOTSUP) {
+VLOG_DBG("Rx metadata negotiate procedure is not supported on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+} else {
+VLOG_WARN("Cannot negotiate Rx metadata on port "
+  DPDK_PORT_ID_FMT, dev->port_id);
+}
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev)
 OVS_REQUIRES(dev->mutex)
@@ -1154,6 +1184,16 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
 
+/*
+ * Full tunnel offload requires that tunnel ID metadata be
+ * delivered with "miss" packets from the hardware to the
+ * PMD. The same goes for megaflow mark metadata which is
+ * used in MARK + RSS offload scenario.
+ *
+ * Request delivery of such metadata.
+ */
+dpdk_eth_dev_init_rx_metadata(dev);
+
 rte_eth_dev_info_get(dev->port_id, );
 
 if (strstr(info.driver_name, "vf") != NULL) {
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2023-02-20 Thread Ivan Malov via dev
Manage "transfer" flows via the corresponding mechanism.
Doing so requires that the traffic source be specified
explicitly, via the corresponding pattern item.

Signed-off-by: Ivan Malov 
---
 lib/netdev-dpdk.c | 88 +++
 lib/netdev-dpdk.h |  4 +-
 lib/netdev-offload-dpdk.c | 55 +++-
 3 files changed, 117 insertions(+), 30 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2cebc3cca..3a9c9d9a0 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -434,6 +434,7 @@ enum dpdk_hw_ol_features {
 
 struct netdev_dpdk {
 PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+dpdk_port_t flow_transfer_proxy_port_id;
 dpdk_port_t port_id;
 
 /* If true, device was attached by rte_eth_dev_attach(). */
@@ -1183,6 +1184,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
 uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+int ret;
 
 /*
  * Full tunnel offload requires that tunnel ID metadata be
@@ -1194,6 +1196,24 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
  */
 dpdk_eth_dev_init_rx_metadata(dev);
 
+/*
+ * Managing "transfer" flows requires that the user communicate them
+ * via a port which has the privilege to control the embedded switch.
+ * For some vendors, all ports in a given switching domain have
+ * this privilege. For other vendors, it's only one port.
+ *
+ * Get the proxy port ID and remember it for later use.
+ */
+ret = rte_flow_pick_transfer_proxy(dev->port_id,
+   >flow_transfer_proxy_port_id, 
NULL);
+if (ret != 0) {
+/*
+ * The PMD does not indicate the proxy port.
+ * Assume the proxy is unneeded.
+ */
+dev->flow_transfer_proxy_port_id = dev->port_id;
+}
+
 rte_eth_dev_info_get(dev->port_id, );
 
 if (strstr(info.driver_name, "vf") != NULL) {
@@ -3981,8 +4001,10 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
const char *argv[], void *aux OVS_UNUSED)
 {
 struct ds used_interfaces = DS_EMPTY_INITIALIZER;
+struct netdev_dpdk *dev_self = NULL;
 struct rte_eth_dev_info dev_info;
 dpdk_port_t sibling_port_id;
+struct netdev_dpdk *dev;
 dpdk_port_t port_id;
 bool used = false;
 char *response;
@@ -4000,8 +4022,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
   argv[1]);
 
 RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
-struct netdev_dpdk *dev;
-
 LIST_FOR_EACH (dev, list_node, _list) {
 if (dev->port_id != sibling_port_id) {
 continue;
@@ -4021,6 +4041,25 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 }
 ds_destroy(_interfaces);
 
+/*
+ * The device being detached may happen to be a flow proxy port
+ * for another device (still attached). If so, do not allow to
+ * detach. Devices dependent on this one must be detached first.
+ */
+LIST_FOR_EACH (dev, list_node, _list) {
+if (dev->port_id == port_id) {
+dev_self = dev;
+} else if (dev->flow_transfer_proxy_port_id == port_id) {
+response = xasprintf("Device '%s' can not be detached (flow 
proxy)",
+ argv[1]);
+goto error;
+}
+}
+
+/* Indicate that the device being detached no longer needs a flow proxy. */
+if (dev_self != NULL)
+dev_self->flow_transfer_proxy_port_id = port_id;
+
 rte_eth_dev_info_get(port_id, _info);
 rte_eth_dev_close(port_id);
 if (rte_dev_remove(dev_info.device) < 0) {
@@ -5470,7 +5509,8 @@ unlock:
 }
 
 int
-netdev_dpdk_get_port_id(struct netdev *netdev)
+netdev_dpdk_get_port_id(struct netdev *netdev,
+bool flow_transfer_proxy)
 {
 struct netdev_dpdk *dev;
 int ret = -1;
@@ -5481,7 +5521,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
 
 dev = netdev_dpdk_cast(netdev);
 ovs_mutex_lock(>mutex);
-ret = dev->port_id;
+ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : 
dev->port_id;
 ovs_mutex_unlock(>mutex);
 out:
 return ret;
@@ -5517,13 +5557,15 @@ out:
 
 int
 netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
- struct rte_flow *rte_flow,
+ bool transfer, struct rte_flow *rte_flow,
  struct rte_flow_error *error)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 int ret;
 
-ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+ret = rte_flow_destroy(transfer ?
+   dev->flow_transfer_proxy_port_id : dev->port_id,
+   rte_flow, 

[ovs-dev] [PATCH v4 0/3] DPDK: align flow offloads with 22.11

2023-02-20 Thread Ivan Malov via dev
Address three problems using three corresponding features in
DPDK, which have been around for a year and are now stable:

1) The need to make sure that metadata generated by flow
   rule execution be delivered from NIC to application;

2) Replacing PORT_ID action with REPRESENTED_PORT;

3) Potential need to manage "transfer" flows
   via a privileged ("transfer proxy") port.
---
 v2: amendments to care about proxy detach and port ID logging
 v3: a minor adjustment in the cover letter subject line
 v4: edits to apply review notes and align with 22.11

Ivan Malov (3):
  netdev-dpdk: negotiate delivery of per-packet Rx metadata
  netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT
  netdev-offload-dpdk: use flow transfer proxy mechanism

 lib/netdev-dpdk.c | 128 +-
 lib/netdev-dpdk.h |   4 +-
 lib/netdev-offload-dpdk.c |  86 +
 3 files changed, 174 insertions(+), 44 deletions(-)

-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] MAINTAINERS: Move myself to emeritus status

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Russell Bryant, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 MAINTAINERS: Move myself to emeritus status
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] MAINTAINERS: Move myself to emeritus status

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Russell Bryant, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 MAINTAINERS: Move myself to emeritus status
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] MAINTAINERS: Move myself to emeritus status

2023-02-20 Thread Russell Bryant
From: Russell Bryant 

I have not been involved in OVN development long enough that I should
transition to emeritus status.

Signed-off-by: Russell Bryant 
---
 MAINTAINERS.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS.rst b/MAINTAINERS.rst
index 0d19bd622..334f28f47 100644
--- a/MAINTAINERS.rst
+++ b/MAINTAINERS.rst
@@ -55,8 +55,6 @@ This is the current list of active OVN committers:
  - mmich...@redhat.com
* - Numan Siddique
  - nusd...@redhat.com
-   * - Russell Bryant
- - russ...@ovn.org
 
 The project also maintains a list of Emeritus Committers (or Maintainers).
 More information about Emeritus Committers can be found
@@ -67,3 +65,5 @@ More information about Emeritus Committers can be found
 
* - Name
  - Email
+   * - Russell Bryant
+ - russ...@ovn.org
-- 
2.39.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] MAINTAINERS: Move myself to emeritus status

2023-02-20 Thread Russell Bryant
From: Russell Bryant 

I have not been involved in OVN development long enough that I should
transition to emeritus status.

Signed-off-by: Russell Bryant 
---
 MAINTAINERS.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS.rst b/MAINTAINERS.rst
index 0d19bd622..334f28f47 100644
--- a/MAINTAINERS.rst
+++ b/MAINTAINERS.rst
@@ -55,8 +55,6 @@ This is the current list of active OVN committers:
  - mmich...@redhat.com
* - Numan Siddique
  - nusd...@redhat.com
-   * - Russell Bryant
- - russ...@ovn.org
 
 The project also maintains a list of Emeritus Committers (or Maintainers).
 More information about Emeritus Committers can be found
@@ -67,3 +65,5 @@ More information about Emeritus Committers can be found
 
* - Name
  - Email
+   * - Russell Bryant
+ - russ...@ovn.org
-- 
2.39.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] MAINTAINERS: Move myself to emeritus status

2023-02-20 Thread Russell Bryant
I have not been active in OVS development in long enough that I should
move to emeritus status.

Signed-off-by: Russell Bryant 
---
 MAINTAINERS.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS.rst b/MAINTAINERS.rst
index 1dc406170..5df9aab78 100644
--- a/MAINTAINERS.rst
+++ b/MAINTAINERS.rst
@@ -65,8 +65,6 @@ This is the current list of active Open vSwitch committers:
  - jpet...@ovn.org
* - Pravin B Shelar
  - pshe...@ovn.org
-   * - Russell Bryant
- - russ...@ovn.org
* - Simon Horman
  - ho...@ovn.org
* - Thomas Graf
@@ -91,6 +89,8 @@ More information about Emeritus Committers can be found here:
  - e...@eecs.berkeley.edu
* - Joe Stringer
  - j...@ovn.org
+   * - Russell Bryant
+ - russ...@ovn.org
 
 .. Cut here for the Documentation/internals/maintainers.rst
 
-- 
2.39.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] MAINTAINERS: Move myself to emeritus status

2023-02-20 Thread Russell Bryant
From: Russell Bryant 

I have not been active in OVS development in long enough that I should
move to emeritus status.

Signed-off-by: Russell Bryant 
---
 MAINTAINERS.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS.rst b/MAINTAINERS.rst
index 1dc406170..5df9aab78 100644
--- a/MAINTAINERS.rst
+++ b/MAINTAINERS.rst
@@ -65,8 +65,6 @@ This is the current list of active Open vSwitch committers:
  - jpet...@ovn.org
* - Pravin B Shelar
  - pshe...@ovn.org
-   * - Russell Bryant
- - russ...@ovn.org
* - Simon Horman
  - ho...@ovn.org
* - Thomas Graf
@@ -91,6 +89,8 @@ More information about Emeritus Committers can be found here:
  - e...@eecs.berkeley.edu
* - Joe Stringer
  - j...@ovn.org
+   * - Russell Bryant
+ - russ...@ovn.org
 
 .. Cut here for the Documentation/internals/maintainers.rst
 
-- 
2.39.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] id-pool: Refactor to use a bitmap.

2023-02-20 Thread Ilya Maximets
On 1/31/23 17:21, Mark Michelson wrote:
> Using a bitmap makes the id-pool use less memory and be more
> cache-friendly. It theoretically should be faster since hashes do not
> have to be computed.
> 
> This takes the approach of expanding the bitmap when necessary rather
> than allocating the entire space at once. This makes the approach less
> memory-intensive for id pools with a large theoretical maximum number of
> values.
> 
> Signed-off-by: Mark Michelson 
> ---
>  lib/id-pool.c | 110 +++---
>  1 file changed, 41 insertions(+), 69 deletions(-)
> 
> diff --git a/lib/id-pool.c b/lib/id-pool.c
> index 69910ad08..b24c681b6 100644
> --- a/lib/id-pool.c
> +++ b/lib/id-pool.c
> @@ -17,25 +17,20 @@
>  
>  #include 
>  #include "id-pool.h"
> -#include "openvswitch/hmap.h"
> -#include "hash.h"
> +#include "bitmap.h"
>  
> -struct id_node {
> -struct hmap_node node;
> -uint32_t id;
> -};
> +#define ID_POOL_BASE_ALLOC 64
>  
>  struct id_pool {
> -struct hmap map;
> +unsigned long *bitmap;
> +uint32_t bitmap_size;  /* Current highest offset the bitmap can hold */
>  uint32_t base; /* IDs in the range of [base, base + n_ids). */
>  uint32_t n_ids;/* Total number of ids in the pool. */
> -uint32_t next_free_id; /* Possible next free id. */
>  };

Hi, Mark.  Thanks for the patch!

This is definitely an interesting idea.  I have a couple of thoughts around it.

First is, I think, at some point the size of a bitmap will surpass the size
of an actual hash map, depending on how may IDs we have allocated and then
freed.  But that will require us to continuously allocate a huge amount of IDs
first and then free them, so that doesn't seem to be an issue after all as
we'll likely hit memory limitations first.

Second is about performance.  We have a benchmark for different implementations
of the ID pool in tests/test-id-fpool.c.  If we ignore the concurrency parts,
we can use it to compare performance of the simple id-pool before and after.
So, I did.  The results for 500K IDs are below.

Before:

$ ./tests/ovstest test-id-fpool benchmark 50 1
Benchmarking n=50 on 1 thread.
 type\thread:   1Avg
 id-pool new: 109109 ms
 id-pool del: 112112 ms
 id-pool mix: 196196 ms
 id-pool rnd:  1+ -1 ms

After:

$ ./tests/ovstest test-id-fpool benchmark 50 1
Benchmarking n=50 on 1 thread.
 type\thread:   1Avg
 id-pool new: 879879 ms
 id-pool del:  10 10 ms
 id-pool mix:1722   1722 ms
 id-pool rnd:1354   1354 ms

There is a clear win in the performance of random add/del case.  Current
implementation is not even able to finish the test, while bitmap shows
a fair performance.

However, there is a noticeable degradation in sequential allocation ('new')
and a mixed workload ('mix').  This is caused by execution of a bitmap
scan over 500K elements on each call, and that turns out to be slow at a
certain scale.

My guess, is that we can significantly improve performance by keeping the
'next_free_id' logic intact, so we don't need to perform a full scan every
single time.  What do you think?

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3] utilities: add support to set umask in ovs-ctl

2023-02-20 Thread Vladislav Odintsov
Thanks, Ilya!

> On 20 Feb 2023, at 22:21, Ilya Maximets  wrote:
> 
> On 2/15/23 22:20, Vladislav Odintsov wrote:
>> Hi Ilya,
>> 
>> Thats my bad - they were the initial names of options, which I renamed later 
>> and missed this place before sending a patch.
>> I’m absolutely fine with proposed change. Please fold it while applying the 
>> patch.
> 
> Thanks, Vladislav and Eelco!
> I updated the NEWS and applied the change.
> 
> Best regards, Ilya Maximets.
> 
>> 
>> Thanks.
>> 
>> regards,
>> Vladislav Odintsov
>> 
>>> On 15 Feb 2023, at 22:25, Ilya Maximets  wrote:
>>> 
>>> On 2/10/23 17:02, Vladislav Odintsov wrote:
 This patch adds new ovs-ctl options to pass umask configuration to allow
 OVS daemons set requested socket permissions on group.  Previous
 behaviour (if using with systemd service unit) created sockets with 0750
 permissions mask (group has no write permission).
 
 Write permission for group is reasonable in usecase, where ovs-vswitchd
 or ovsdb-server runs as a non-privileged user:group (say,
 openvswitch:openvswitch) and it is needed to access unix socket from
 process running as another non-privileged user.  In this case
 administrator has to add that user to openvswitch group and can connect
 to OVS sockets from a process running under that user.
 
 Two new ovs-ctl options --ovsdb-server-umask and --ovs-vswitchd-umask
 were added to manage umask values for appropriate daemons.  This is
 useful for systemd users: both ovs-vswitchd and ovsdb-server systemd
 units read options from single /etc/sysconfig/openvswitch configuration
 file.  So, with separate options it is possible to set umask only for
 specific daemon.
 
 OPTIONS="--ovsdb-server-umask=0002"
 
 in /etc/openvswitch/sysconfig file will set umask to 0002 value before
 starting only ovsdb-server, while
 
 OPTIONS="--ovs-vswitchd-umask=0002"
 
 will set umask to ovs-vswitchd daemon.
 
 Previous behaviour (not setting umask) is left as default.
 
 Reported-at: 
 https://mail.openvswitch.org/pipermail/ovs-dev/2023-January/401501.html
 Signed-off-by: Vladislav Odintsov 
 
 ---
 v2 -> v3:
 - addressed Eelco's review comments.
 
 v1 -> v2:
 - added item in NEWS file as Ilya's suggestion;
 - addressed Eelco's review comments;
 - moved umask call from ovs-ctl to ovs-lib;
 - added restoration of umask to effective value before the umask change;
 - previous version --ovs-umask option was split into two:
   --ovs-vswitchd-umask and --ovsdb-server-umask in order to make
   possible umask configuration for specific daemon when running with
   systemd.
 ---
 NEWS |  7 +++
 utilities/ovs-ctl.in | 16 
 utilities/ovs-lib.in | 17 ++---
 3 files changed, 33 insertions(+), 7 deletions(-)
 
 diff --git a/NEWS b/NEWS
 index fe6055a27..f7df598bd 100644
 --- a/NEWS
 +++ b/NEWS
 @@ -4,6 +4,13 @@ Post-v3.1.0
 * OVS now collects per-interface upcall statistics that can be obtained
   via 'ovs-appctl dpctl/show -s' or the interface's statistics column
   in OVSDB.  Available with upstream kernel 6.2+.
 +   - ovs-ctl:
 + * Added support to set umask value when starting OVS daemons.  New 
 options
 +   --ovsdb-server-umask=MODE and --ovs-vswitchd-umask=MODE were added 
 for
 +   that.  For instance, when write access on befalf of OVS group is 
 needed
 +   for ovsdb-server, pass --ovsdb-umask=0002.  Use --vswitchd-umask 
 to set
 +   umask ovs-vswitchd daemon umask.  This will allow ovsdb-server or
 +   ovs-vswitchd to create sockets with access mode of 0770.
>>> 
>>> The options in the example are incorrect.
>>> Also, the text seems slightly too extensive.
>>> 
>>> What do you think about this:
>>> 
>>>  - ovs-ctl:
>>>* Added new options --[ovsdb-server|ovs-vswitchd]-umask=MODE to set umask
>>>  value when starting OVS daemons.  E.g., use --ovsdb-server-umask=0002
>>>  in order to create OVSDB sockets with access mode of 0770.
>>> 
>>> ?
>>> 
>>> I could fold this in while applying the change.
>>> 
>>> Best regards, Ilya Maximets.
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Regards,
Vladislav Odintsov

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3] utilities: add support to set umask in ovs-ctl

2023-02-20 Thread Ilya Maximets
On 2/15/23 22:20, Vladislav Odintsov wrote:
> Hi Ilya,
> 
> Thats my bad - they were the initial names of options, which I renamed later 
> and missed this place before sending a patch.
> I’m absolutely fine with proposed change. Please fold it while applying the 
> patch.

Thanks, Vladislav and Eelco!
I updated the NEWS and applied the change.

Best regards, Ilya Maximets.

> 
> Thanks.
> 
> regards,
> Vladislav Odintsov
> 
>> On 15 Feb 2023, at 22:25, Ilya Maximets  wrote:
>>
>> On 2/10/23 17:02, Vladislav Odintsov wrote:
>>> This patch adds new ovs-ctl options to pass umask configuration to allow
>>> OVS daemons set requested socket permissions on group.  Previous
>>> behaviour (if using with systemd service unit) created sockets with 0750
>>> permissions mask (group has no write permission).
>>>
>>> Write permission for group is reasonable in usecase, where ovs-vswitchd
>>> or ovsdb-server runs as a non-privileged user:group (say,
>>> openvswitch:openvswitch) and it is needed to access unix socket from
>>> process running as another non-privileged user.  In this case
>>> administrator has to add that user to openvswitch group and can connect
>>> to OVS sockets from a process running under that user.
>>>
>>> Two new ovs-ctl options --ovsdb-server-umask and --ovs-vswitchd-umask
>>> were added to manage umask values for appropriate daemons.  This is
>>> useful for systemd users: both ovs-vswitchd and ovsdb-server systemd
>>> units read options from single /etc/sysconfig/openvswitch configuration
>>> file.  So, with separate options it is possible to set umask only for
>>> specific daemon.
>>>
>>> OPTIONS="--ovsdb-server-umask=0002"
>>>
>>> in /etc/openvswitch/sysconfig file will set umask to 0002 value before
>>> starting only ovsdb-server, while
>>>
>>> OPTIONS="--ovs-vswitchd-umask=0002"
>>>
>>> will set umask to ovs-vswitchd daemon.
>>>
>>> Previous behaviour (not setting umask) is left as default.
>>>
>>> Reported-at: 
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-January/401501.html
>>> Signed-off-by: Vladislav Odintsov 
>>>
>>> ---
>>> v2 -> v3:
>>>  - addressed Eelco's review comments.
>>>
>>> v1 -> v2:
>>>  - added item in NEWS file as Ilya's suggestion;
>>>  - addressed Eelco's review comments;
>>>  - moved umask call from ovs-ctl to ovs-lib;
>>>  - added restoration of umask to effective value before the umask change;
>>>  - previous version --ovs-umask option was split into two:
>>>--ovs-vswitchd-umask and --ovsdb-server-umask in order to make
>>>possible umask configuration for specific daemon when running with
>>>systemd.
>>> ---
>>> NEWS |  7 +++
>>> utilities/ovs-ctl.in | 16 
>>> utilities/ovs-lib.in | 17 ++---
>>> 3 files changed, 33 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index fe6055a27..f7df598bd 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -4,6 +4,13 @@ Post-v3.1.0
>>>  * OVS now collects per-interface upcall statistics that can be obtained
>>>via 'ovs-appctl dpctl/show -s' or the interface's statistics column
>>>in OVSDB.  Available with upstream kernel 6.2+.
>>> +   - ovs-ctl:
>>> + * Added support to set umask value when starting OVS daemons.  New 
>>> options
>>> +   --ovsdb-server-umask=MODE and --ovs-vswitchd-umask=MODE were added 
>>> for
>>> +   that.  For instance, when write access on befalf of OVS group is 
>>> needed
>>> +   for ovsdb-server, pass --ovsdb-umask=0002.  Use --vswitchd-umask to 
>>> set
>>> +   umask ovs-vswitchd daemon umask.  This will allow ovsdb-server or
>>> +   ovs-vswitchd to create sockets with access mode of 0770.
>>
>> The options in the example are incorrect.
>> Also, the text seems slightly too extensive.
>>
>> What do you think about this:
>>
>>   - ovs-ctl:
>> * Added new options --[ovsdb-server|ovs-vswitchd]-umask=MODE to set umask
>>   value when starting OVS daemons.  E.g., use --ovsdb-server-umask=0002
>>   in order to create OVSDB sockets with access mode of 0770.
>>
>> ?
>>
>> I could fold this in while applying the change.
>>
>> Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] conntrack:fix conntrack_clean may access the same exp_list each time be called

2023-02-20 Thread Paolo Valerio
Paolo Valerio  writes:

> Hello Liang,
>
> Liang Mancang  writes:
>
>> when a exp_list contains more than the clean_end's number of nodes,
>> and these nodes will not expire immediately. Then, every times we
>> call conntrack_clean, it use the same next_sweep to get exp_list.
>>
>
> Yes, in general, if the previous count exceeds the clean_end, it should
> not make the sweeper restart from a list just swept, but it should not
> happen that a single list contains more than n_conn_limit / 64.
>
> Did you observe a single exp_list containing more than n_conn_limit / 64
> entries?
>
>> Actually, we should add i every times after we call ct_sweep.
>>
>> Signed-off-by: Liang Mancang 
>> ---
>>  lib/conntrack.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 524670e45..5029b2cda 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -1512,12 +1512,13 @@ conntrack_clean(struct conntrack *ct, long long now)
>>  clean_end = n_conn_limit / 64;
>>  
>>  for (i = ct->next_sweep; i < N_EXP_LISTS; i++) {
>> -count += ct_sweep(ct, >exp_lists[i], now);
>> -
>>  if (count > clean_end) {
>>  next_wakeup = 0;
>> +
>
> This new line is not needed, and a Fixes tag could be added:
>
> Fixes: 3d9c1b855a5f ("conntrack: Replace timeout based expiration lists with 
> rculists.")
>
> The patch LGTM, 
>

Sorry, the last line slipped out. Please consider my question and the
other comments. I will explicitly tag the patch once we're done.

>>  break;
>>  }
>> +
>> +count += ct_sweep(ct, >exp_lists[i], now);
>>  }
>>  
>>  ct->next_sweep = (i < N_EXP_LISTS) ? i : 0;
>> -- 
>> 2.30.0.windows.2
>>
>> ___
>> dev mailing list
>> d...@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] conntrack:fix conntrack_clean may access the same exp_list each time be called

2023-02-20 Thread Paolo Valerio
Hello Liang,

Liang Mancang  writes:

> when a exp_list contains more than the clean_end's number of nodes,
> and these nodes will not expire immediately. Then, every times we
> call conntrack_clean, it use the same next_sweep to get exp_list.
>

Yes, in general, if the previous count exceeds the clean_end, it should
not make the sweeper restart from a list just swept, but it should not
happen that a single list contains more than n_conn_limit / 64.

Did you observe a single exp_list containing more than n_conn_limit / 64
entries?

> Actually, we should add i every times after we call ct_sweep.
>
> Signed-off-by: Liang Mancang 
> ---
>  lib/conntrack.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 524670e45..5029b2cda 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -1512,12 +1512,13 @@ conntrack_clean(struct conntrack *ct, long long now)
>  clean_end = n_conn_limit / 64;
>  
>  for (i = ct->next_sweep; i < N_EXP_LISTS; i++) {
> -count += ct_sweep(ct, >exp_lists[i], now);
> -
>  if (count > clean_end) {
>  next_wakeup = 0;
> +

This new line is not needed, and a Fixes tag could be added:

Fixes: 3d9c1b855a5f ("conntrack: Replace timeout based expiration lists with 
rculists.")

The patch LGTM, 

>  break;
>  }
> +
> +count += ct_sweep(ct, >exp_lists[i], now);
>  }
>  
>  ct->next_sweep = (i < N_EXP_LISTS) ? i : 0;
> -- 
> 2.30.0.windows.2
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 2/2] ipfix: make template and stats interval configurable

2023-02-20 Thread Ilya Maximets
On 2/10/23 17:03, Adrián Moreno wrote:
> From: Adrian Moreno 
> 
> Add options to the IPFIX table configure the interval to send statistics
> and template information.
> 
> Signed-off-by: Adrian Moreno 
> 
> ---
> - v3:
>- Removed unit tests which generate errors in Intel CI. Will submit in
>  independent series.
> - v2:
>   - Fixed a potential race condition in unit test.
> - v1:
>   - Added unit test.
> ---
>  NEWS |  2 ++
>  ofproto/ofproto-dpif-ipfix.c | 38 ++--
>  ofproto/ofproto.h|  9 +
>  vswitchd/bridge.c| 17 
>  vswitchd/vswitch.ovsschema   | 12 +++-
>  vswitchd/vswitch.xml | 20 +++
>  6 files changed, 87 insertions(+), 11 deletions(-)
> 
> diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
> index 1a49cdffe..8ab609dfb 100644
> --- a/vswitchd/vswitch.ovsschema
> +++ b/vswitchd/vswitch.ovsschema
> @@ -1,6 +1,6 @@
>  {"name": "Open_vSwitch",
>   "version": "8.3.1",

You're adding new columns.  Need to bump the version to 8.4.0.

The numbering scheme is described here:
  https://docs.openvswitch.org/en/latest/ref/ovsdb.7/#schemas

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 3/3] route-table: Retrieving the preferred source address from Netlink.

2023-02-20 Thread Simon Horman
On Tue, Feb 14, 2023 at 12:39:06PM +0900, Nobuhiro MIKI wrote:
> We can use the "ip route add ... src ..." command to set the preferred
> source address for each entry in the kernel FIB. OVS has a mechanism to
> cache the FIB, but the preferred source address is ignored and
> calculated with its own logic. This patch resolves the difference
> between kernel FIB and OVS route table cache by retrieving the
> RTA_PREFSRC attribute of Netlink messages.
> 
> Signed-off-by: Nobuhiro MIKI 

Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 2/3] ovs-router: Introduce src option in ovs/route/add command.

2023-02-20 Thread Simon Horman
On Tue, Feb 14, 2023 at 12:39:05PM +0900, Nobuhiro MIKI wrote:
> When adding a route with ovs/route/add command, the source address
> in "ovs_router_entry" structure is always the FIRST address that the
> interface has. See "ovs_router_get_netdev_source_address"
> function for more information.
> 
> If an interface has multiple ipv4 and/or ipv6 addresses, there are use
> cases where the user wants to control the source address. This patch
> therefore addresses this issue by adding a src parameter.
> 
> Note that same constraints also exist when caching routes from
> Kernel FIB with Netlink, but are not dealt with in this patch.
> 
> Signed-off-by: Nobuhiro MIKI 

aside: this patch was base64 encoded, which is slightly inconvenient
   on my side (which is not important in itself). Curiously
   the other patches in this series are not.
> ---
>  NEWS|   3 +
>  lib/ovs-router.c| 134 
>  ofproto/ofproto-tnl-unixctl.man |   9 ++-
>  tests/ovs-router.at |  78 +++
>  4 files changed, 188 insertions(+), 36 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index fe6055a2700b..9d98e1573e3b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -4,6 +4,9 @@ Post-v3.1.0
>   * OVS now collects per-interface upcall statistics that can be obtained
> via 'ovs-appctl dpctl/show -s' or the interface's statistics column
> in OVSDB.  Available with upstream kernel 6.2+.
> +   - ovs-appctl:
> + * Add support for selecting the source address with the
> +   “ovs-appctl ovs/route/add" command.
>  
>  
>  v3.1.0 - xx xxx 
> diff --git a/lib/ovs-router.c b/lib/ovs-router.c
> index 5d0fbd503e9e..8f2587444034 100644
> --- a/lib/ovs-router.c
> +++ b/lib/ovs-router.c
> @@ -164,6 +164,47 @@ static void rt_init_match(struct match *match, uint32_t 
> mark,
>  match->flow.pkt_mark = mark;
>  }
>  
> +static int

Maybe bool is a more appropriate return type for this function?
Likewise for ovs_router_get_netdev_source_address().
But perhaps that is a cleanup for another time.

Also, there is a lot of comonality between verify_prefsrc()
and ovs_router_get_netdev_source_address(). I am curious to know
if you considered combining them, or otherwise sharing code between them?

> +verify_prefsrc(const struct in6_addr *ip6_dst,
> +   const char output_bridge[],
> +   struct in6_addr *prefsrc)
> +{
> +struct in6_addr *mask, *addr6;
> +int err, n_in6, i;
> +struct netdev *dev;
> +
> +err = netdev_open(output_bridge, NULL, );
> +if (err) {
> +return err;
> +}
> +
> +err = netdev_get_addr_list(dev, , , _in6);
> +if (err) {
> +goto out;
> +}
> +
> +err = ENOENT;

nit: If you move this to below the for loop...

> +for (i = 0; i < n_in6; i++) {
> +struct in6_addr a1, a2;
> +a1 = ipv6_addr_bitand(ip6_dst, [i]);
> +a2 = ipv6_addr_bitand(prefsrc, [i]);
> +
> +/* Check that the intarface has "prefsrc" and
> + * it is same broadcast domain with "ip6_dst". */
> +if (IN6_ARE_ADDR_EQUAL(prefsrc, [i]) &&
> +IN6_ARE_ADDR_EQUAL(, )) {
> +err = 0;

... then I don't think you need to set err here, as it will already
be set to zero from the call to netdev_get_addr_list.

> +goto out;
> +}
> +}
> +
> +out:
> +free(addr6);
> +free(mask);
> +netdev_close(dev);
> +return err;
> +}
> +
>  int
>  ovs_router_get_netdev_source_address(const struct in6_addr *ip6_dst,
>   const char output_bridge[],
> @@ -217,7 +258,8 @@ static int
>  ovs_router_insert__(uint32_t mark, uint8_t priority, bool local,
>  const struct in6_addr *ip6_dst,
>  uint8_t plen, const char output_bridge[],
> -const struct in6_addr *gw)
> +const struct in6_addr *gw,
> +const struct in6_addr *ip6_src)
>  {
>  const struct cls_rule *cr;
>  struct ovs_router_entry *p;
> @@ -236,11 +278,21 @@ ovs_router_insert__(uint32_t mark, uint8_t priority, 
> bool local,
>  p->plen = plen;
>  p->local = local;
>  p->priority = priority;
> -err = ovs_router_get_netdev_source_address(ip6_dst, output_bridge,
> -   >src_addr);
> -if (err && ipv6_addr_is_set(gw)) {
> -err = ovs_router_get_netdev_source_address(gw, output_bridge,
> +
> +if (ipv6_addr_is_set(ip6_src)) {
> +p->src_addr = *ip6_src;
> +
> +err = verify_prefsrc(ip6_dst, output_bridge, >src_addr);
> +if (err && ipv6_addr_is_set(gw)) {
> +err = verify_prefsrc(gw, output_bridge, >src_addr);
> +}
> +} else {
> +err = ovs_router_get_netdev_source_address(ip6_dst, output_bridge,
> >src_addr);
> +if (err && ipv6_addr_is_set(gw)) 

Re: [ovs-dev] [PATCH v3 1/3] netdev-dummy: Support multiple IP addresses

2023-02-20 Thread Simon Horman
On Tue, Feb 14, 2023 at 12:39:04PM +0900, Nobuhiro MIKI wrote:
> This is useful in test cases where multiple IPv4/IPv6 addresses
> are assigned together.
> 
> Signed-off-by: Nobuhiro MIKI 
> ---
>  lib/netdev-dummy.c | 66 +-
>  1 file changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 5d59c9c0312b..abeb99f3e3d6 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c

...

> @@ -1178,7 +1184,10 @@ netdev_dummy_send(struct netdev *netdev, int qid,
>  dummy_packet_conn_send(>conn, buffer, size);
>  
>  /* Reply to ARP requests for 'dev''s assigned IP address. */
> -if (dev->address.s_addr) {
> +struct netdev_addr_dummy *addr_dummy;
> +LIST_FOR_EACH (addr_dummy, node, >addrs) {
> +ovs_be32 address = 
> in6_addr_get_mapped_ipv4(_dummy->address);
> +
>  struct dp_packet dp;
>  struct flow flow;
>  
> @@ -1186,7 +1195,7 @@ netdev_dummy_send(struct netdev *netdev, int qid,
>  flow_extract(, );
>  if (flow.dl_type == htons(ETH_TYPE_ARP)
>  && flow.nw_proto == ARP_OP_REQUEST
> -&& flow.nw_dst == dev->address.s_addr) {
> +&& flow.nw_dst == address) {
>  struct dp_packet *reply = dp_packet_new(0);
>  compose_arp(reply, ARP_OP_REPLY, dev->hwaddr, flow.dl_src,
>  false, flow.nw_dst, flow.nw_src);

The next few lines of this function are.

netdev_dummy_queue_packet(dev, reply, NULL, 0);
}
}

I wonder if it would be appropriate to add a break; after
the call to netdev_dummy_queue_packet(). I don't think we expect
multiple hits. And it would save spinning over the loop unnecessarily,
though perhaps we don't expect the list of addresses to be very long
else we wouldn't use a list at all.

...

Regardless, this patch looks good to me.

Reviewed-by: Simon Horman 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 2/2] ipfix: make template and stats interval configurable

2023-02-20 Thread Simon Horman
On Fri, Feb 10, 2023 at 05:03:14PM +0100, Adrián Moreno wrote:
> From: Adrian Moreno 
> 
> Add options to the IPFIX table configure the interval to send statistics
> and template information.
> 
> Signed-off-by: Adrian Moreno 
> 
> ---
> - v3:
>- Removed unit tests which generate errors in Intel CI. Will submit in
>  independent series.
> - v2:
>   - Fixed a potential race condition in unit test.
> - v1:
>   - Added unit test.

Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 1/2] ofproto-ipfix: use per-domain template timeouts

2023-02-20 Thread Simon Horman
On Fri, Feb 10, 2023 at 05:03:13PM +0100, Adrián Moreno wrote:
> From: Adrian Moreno 
> 
> IPFIX templates have to be sent for each Observation Domain ID.
> Currently, a timer is kept at each dpif_ipfix_exporter to send them.
> This works fine for per-bridge sampling where there is only one
> Observation Domain ID per exporter. However, this is does not work for
> per-flow sampling where more than one Observation Domain IDs can be
> specified by the controller. In this case, ovs-vswitchd will only send
> template information for one (arbitrary) DomainID.
> 
> Fix per-flow sampling by using an hmap to keep a timer for each
> Observation Domain ID.
> 
> Signed-off-by: Adrian Moreno 

Reviewed-by: Simon Horman 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-build] |fail| pw1743629 [ovs-dev, branch-3.1, 1/2] Set release date for 3.1.0.

2023-02-20 Thread Ilya Maximets
On 2/20/23 12:01, Phelan, Michael wrote:
> Hi Ilya,
> Sorry I was OOO Thursday and Friday of last week so I'm only seeing your mail 
> now.
> 
> I've updated the CI so it should build with DPDK 22.11 for branch-3.1 from 
> now on.

No problem.  Thanks for the update!

Best regards, Ilya Maximets.

> 
> Thanks,
> Michael.
> 
>> -Original Message-
>> From: Ilya Maximets 
>> Sent: Thursday 16 February 2023 13:17
>> To: ovs_jenkins ; Phelan, Michael
>> 
>> Cc: i.maxim...@ovn.org; ovs-dev ; Stokes, Ian
>> 
>> Subject: Re: [ovs-build] |fail| pw1743629 [ovs-dev,branch-3.1,1/2] Set
>> release date for 3.1.0.
>>
>> Hi, Michael.
>>
>> Could you check what happened here?  Looks like CI tried to build
>> with a wrong version of DPDK.  Should be 22.11 for branch-3.1.
>>
>> Thanks!
>>
>> Best regards, Ilya Maximets.
>>
>> P.S. It's not a release blocking issue, so not very urgent.
>>
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] controller: Fix hairpin SNAT flow explosion for the same SNAT IPs case.

2023-02-20 Thread Ilya Maximets
It's common to have 'hairpin_snat_ip' to be configured exactly the
same for each and every Load Balancer in a setup.  For example,
ovn-kubernetes does that, setting '169.254.169.5 fd69::5' value
unconditionally for every LB:
  
https://github.com/ovn-org/ovn-kubernetes/blob/cd78ae1af4657d38bdc41003a8737aa958d62b9d/go-controller/pkg/ovn/controller/services/loadbalancer.go#L314

Current implementation of ovn-controller will create an OpenFlow rule
for every datapath for each VIP in case of 'hairpin_snat_ip', because
we need to handle the case where LBs with the same VIP are applied to
different datapaths and have different SNAT IPs.

In large scale setups with tens of thousands of LBs and hundreds of
nodes, this generates millions of OpenFlow rules, making ovn-controller
choke and fall into unrecoverable disconnection loop with poll
intervals up to 200 seconds in ovn-heater's density-heavy scenario with
just 120 nodes.  It also generates rules with thousands of conjunctions
that do not fit into a single FLOW_MOD, breaking the OpenFlow
management communication.

Fix that by checking that all the 'hairpin_snat_ip' options on all the
Load Balancers are exactly the same and using just one OpenFlow rule
per VIP if that's the case.  This is possible because even if LBs with
the same VIP are applied to different datapaths, we're still going to
SNAT with the exactly same hairpin SNAT IP.

Ideally, we can only check that 'hairpin_snat_ip' is the same for all
LBs with the same VIP, but that is not necessary for now, as the only
common use case for this option today is ovn-kubernetes.  Can be added
in the future by enhancing the logic in ovn-northd, if necessary.

Note:

There seems to be a separate issue in ovn-controller.  If only Load
Balancer data is updated, I-P processing is going through the 'lb_data'
node.  And handler for that node deletes old flows and adds new ones
for each updated LB.  This triggers OpenFlow flow DEL + ADD.  On DEL
of hairpin reply learning flows in table 68, OVS cleans up all the
learned flows in table 69 as well, because of 'delete_learned' flag.

However, when changes are happening in higher level nodes and
en_lflow_output_run() triggers lflow_run(), lflow_run() doesn't delete
current hairpin flows before re-processing them.  That leads to the
flow MOD instead of DEL + ADD.  Since the learning flow is not deleted,
but modified, that doesn't trigger removal of learned rules in OVS.
This is what happening if 'lb_same_hairpin_ips' flips the value.
The test adjusted to change the 'hairpin_snat_ip' twice, to trigger
an 'lb_data' processing.  Otherwise, old learned flow for the VIP
stays in table 69.

This should not be a huge issue, as it's still a VIP address and it
should not make any harm to have that learned flow.  Also, the value
of 'lb_same_hairpin_ips' doesn't ever flip in current ovn-kubernetes.

Reported-at: https://bugzilla.redhat.com/2171423
Signed-off-by: Ilya Maximets 
---
 controller/lflow.c  | 51 -
 controller/lflow.h  |  1 +
 controller/ovn-controller.c | 17 +
 lib/lb.c|  2 ++
 lib/lb.h|  1 +
 northd/northd.c | 27 +---
 ovn-sb.xml  | 11 
 tests/ovn.at|  7 +
 8 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index a0a26460c..fe895b915 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -100,6 +100,7 @@ consider_logical_flow(const struct sbrec_logical_flow 
*lflow,
 static void
 consider_lb_hairpin_flows(const struct ovn_controller_lb *lb,
   bool use_ct_mark,
+  bool same_hairpin_snat_ips,
   struct ovn_desired_flow_table *flow_table,
   struct simap *ids);
 
@@ -1829,16 +1830,19 @@ add_lb_ct_snat_hairpin_dp_flows(const struct 
ovn_controller_lb *lb,
 
 
 /* Add a ct_snat flow for each VIP of the LB. If this LB does not use
- * "hairpin_snat_ip", we can SNAT using the VIP.
+ * "hairpin_snat_ip", we can SNAT using the VIP.  If "hairpin_snat_ip"
+ * values are set and are exactly the same on every LB, we can SNAT using
+ * "hairpin_snat_ip".
  *
- * If this LB uses "hairpin_snat_ip", we add a flow to one dimension of a
- * conjunctive flow 'id'. The other dimension consists of the datapaths
+ * Otherwise, if this LB uses "hairpin_snat_ip", we add a flow to one dimension
+ * of a conjunctive flow 'id'. The other dimension consists of the datapaths
  * that this LB belongs to. These flows (and the actual SNAT flow) get added
  * by add_lb_ct_snat_hairpin_dp_flows(). */
 static void
 add_lb_ct_snat_hairpin_vip_flow(const struct ovn_controller_lb *lb,
 uint32_t id,
 struct ovn_lb_vip *lb_vip,
+bool same_hairpin_snat_ips,
 

Re: [ovs-dev] [ovs-build] |fail| pw1743629 [ovs-dev, branch-3.1, 1/2] Set release date for 3.1.0.

2023-02-20 Thread Phelan, Michael
Hi Ilya,
Sorry I was OOO Thursday and Friday of last week so I'm only seeing your mail 
now.

I've updated the CI so it should build with DPDK 22.11 for branch-3.1 from now 
on.

Thanks,
Michael.

> -Original Message-
> From: Ilya Maximets 
> Sent: Thursday 16 February 2023 13:17
> To: ovs_jenkins ; Phelan, Michael
> 
> Cc: i.maxim...@ovn.org; ovs-dev ; Stokes, Ian
> 
> Subject: Re: [ovs-build] |fail| pw1743629 [ovs-dev,branch-3.1,1/2] Set
> release date for 3.1.0.
> 
> Hi, Michael.
> 
> Could you check what happened here?  Looks like CI tried to build
> with a wrong version of DPDK.  Should be 22.11 for branch-3.1.
> 
> Thanks!
> 
> Best regards, Ilya Maximets.
> 
> P.S. It's not a release blocking issue, so not very urgent.
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v1 1/1] net: openvswitch: ovs_packet_cmd_execute put sw_flow mainbody in stack

2023-02-20 Thread Simon Horman
On Mon, Feb 20, 2023 at 03:11:17PM +0800, Eddy Tao wrote:
> Hi, Simon:
> 
> 
>     About your concern for the stack size, it leads to more room for
> improvement.
> 
> I will file a new version which will have smaller stack occupation and
> better performance
> 
> 
> The new revision is invoked by existing examples of using struct in stack,
> in the same file net/openvswitch/datapath.c
> 
> struct sw_flow_actions *get_flow_actions(..)
> {
>     struct sw_flow_key masked_key;==> sizeof sw_flow_key is 464 bytes
> 
> static noinline_for_stack int
> ovs_nla_init_match_and_action(..)
> {
>     struct sw_flow_mask mask;==> sizeof sw_flow_mask is 496 bytes
> 
> 
> The first example reminded me, revisiting the code in
> ovs_packet_cmd_execute, basically sw_flow serves as a container for
> sw_flow_actions and sw_flow_key only.
> 
> We do not need the bulk of tunnel info memory in sw_flow, which saves us
> 200+ bytes further -- less is more.
> 
> 
> The new revision will be presented shortly after some sanity and benchmark

Thanks, for addressing my review.
It is the stack size issue that is my greatest concern.

Please consider including performance results
in the patch description of v2.

Kind regards,
Simon
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4] ofproto-dpif-xlate: Remove repeated function for judge garp

2023-02-20 Thread 0-day Robot
Bleep bloop.  Greetings Han Ding, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 80 characters long (recommended limit is 79)
#32 FILE: lib/flow.h:1136:
if (!eth_addr_is_broadcast(FLOW_WC_GET_AND_MASK_WC(flow, wc, dl_dst))) {

Lines checked: 109, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4] ofproto-dpif-xlate: Remove repeated function for judge garp

2023-02-20 Thread Han Ding


Function is_gratuitous_arp() and function is_garp() are all used to judge
whether the flow is gratuitous arp. It is not necessary to use two functions
to do the same thing and just keep one.

Gratuitous ARP message is a generally link-level broadcast messages and
carry the same IP in sender and target fields. This patch add the check
in function is_garp() whether the arp is a broadcast message and whether
the arp is an arp request or an arp reply.

Signed-off-by: Han Ding 
---

Notes:
v2:
- Add the check whether the ARP packet is a broadcast message in is_garp().

v3:
- Add the check whether the packet is an arp request or an arp reply in 
is_garp().

v4:
- Add description of differences between patch versions
- Use WC_MASK_FIELD() macro instead of memset.

 lib/flow.h   | 16 +---
 ofproto/ofproto-dpif-xlate.c | 32 ++--
 2 files changed, 15 insertions(+), 33 deletions(-)

diff --git a/lib/flow.h b/lib/flow.h
index c647ad83c..037deb6cd 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -1133,10 +1133,20 @@ static inline bool is_garp(const struct flow *flow,
struct flow_wildcards *wc)
 {
 if (is_arp(flow)) {
-return (FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src) ==
-FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_dst));
-}
+if (!eth_addr_is_broadcast(FLOW_WC_GET_AND_MASK_WC(flow, wc, dl_dst))) 
{
+return false;
+}

+if (wc) {
+WC_MASK_FIELD(wc, nw_proto);
+}
+
+if (flow->nw_proto == ARP_OP_REQUEST ||
+flow->nw_proto == ARP_OP_REPLY) {
+return (FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src) ==
+FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_dst));
+}
+}
 return false;
 }

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index a9cf3cbee..b3c13f6bf 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -2543,34 +2543,6 @@ output_normal(struct xlate_ctx *ctx, const struct 
xbundle *out_xbundle,
 memcpy(>xin->flow.vlans, _vlans, sizeof(old_vlans));
 }

-/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
- * migration.  Older Citrix-patched Linux DomU used gratuitous ARP replies to
- * indicate this; newer upstream kernels use gratuitous ARP requests. */
-static bool
-is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc)
-{
-if (flow->dl_type != htons(ETH_TYPE_ARP)) {
-return false;
-}
-
-memset(>masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
-if (!eth_addr_is_broadcast(flow->dl_dst)) {
-return false;
-}
-
-memset(>masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
-if (flow->nw_proto == ARP_OP_REPLY) {
-return true;
-} else if (flow->nw_proto == ARP_OP_REQUEST) {
-memset(>masks.nw_src, 0xff, sizeof wc->masks.nw_src);
-memset(>masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
-
-return flow->nw_src == flow->nw_dst;
-} else {
-return false;
-}
-}
-
 /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
  * dropped.  Returns true if they may be forwarded, false if they should be
  * dropped.
@@ -2619,7 +2591,7 @@ is_admissible(struct xlate_ctx *ctx, struct xport 
*in_port,
 mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
 if (mac
 && mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle
-&& (!is_gratuitous_arp(flow, ctx->wc)
+&& (!is_garp(flow, ctx->wc)
 || mac_entry_is_grat_arp_locked(mac))) {
 ovs_rwlock_unlock(>ml->rwlock);
 xlate_report(ctx, OFT_DETAIL,
@@ -3062,7 +3034,7 @@ xlate_normal(struct xlate_ctx *ctx)
 }

 /* Learn source MAC. */
-bool is_grat_arp = is_gratuitous_arp(flow, wc);
+bool is_grat_arp = is_garp(flow, wc);
 if (ctx->xin->allow_side_effects
 && flow->packet_type == htonl(PT_ETH)
 && in_port && in_port->pt_mode != NETDEV_PT_LEGACY_L3
--
2.27.0




___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev