[ovs-dev] [PATCH V3 2/2] dpif-netdev: Fix flushing of a vport

2023-06-11 Thread Eli Britstein via dev
When using a userspace vport ("vxlan0"), dpif-netdev adds an additional
netdev ("vxlan_sys_4789"). The dpif netdev ("vxlan0") is added to the
netdev-offload ports map, thus flows are associated on this netdev.

However, flushing is done on the dpif-netdev level ("vxlan_sys_4789"),
and relevant offload flows are not destroyed.

To fix it, add the datapath netdev to the netdev-offload ports map. In
case there is no different internal netdev, use the dpif netdev, as before.

Fixes: adbd4301a249 ("netdev-offload-dpdk: Use per-netdev offload metadata.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c   | 15 ++-
 lib/dpif-netlink.c  |  5 -
 lib/dpif-provider.h |  5 +++--
 lib/dpif.c  |  8 +---
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 70b953ae6..52d2998d7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -547,7 +547,8 @@ static int get_port_by_name(struct dp_netdev *dp, const 
char *devname,
 static void dp_netdev_free(struct dp_netdev *)
 OVS_REQUIRES(dp_netdev_mutex);
 static int do_add_port(struct dp_netdev *dp, const char *devname,
-   const char *type, odp_port_t port_no)
+   const char *type, odp_port_t port_no,
+   struct netdev **datapath_netdev)
 OVS_REQ_WRLOCK(dp->port_rwlock);
 static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
 OVS_REQ_WRLOCK(dp->port_rwlock);
@@ -1884,7 +1885,7 @@ create_dp_netdev(const char *name, const struct 
dpif_class *class,
 
 error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
  "internal"),
-ODPP_LOCAL);
+ODPP_LOCAL, NULL);
 ovs_rwlock_unlock(>port_rwlock);
 if (error) {
 dp_netdev_free(dp);
@@ -2151,7 +2152,7 @@ out:
 
 static int
 do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
-odp_port_t port_no)
+odp_port_t port_no, struct netdev **datapath_netdev)
 OVS_REQ_WRLOCK(dp->port_rwlock)
 {
 struct netdev_saved_flags *sf;
@@ -2167,6 +2168,9 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 if (error) {
 return error;
 }
+if (datapath_netdev) {
+*datapath_netdev = port->netdev;
+}
 
 hmap_insert(>ports, >node, hash_port_no(port_no));
 seq_change(dp->port_seq);
@@ -2196,7 +2200,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 
 static int
 dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
- odp_port_t *port_nop)
+ odp_port_t *port_nop, struct netdev **datapath_netdev)
 {
 struct dp_netdev *dp = get_dp_netdev(dpif);
 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
@@ -2215,7 +2219,8 @@ dpif_netdev_port_add(struct dpif *dpif, struct netdev 
*netdev,
 }
 if (!error) {
 *port_nop = port_no;
-error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
+error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no,
+datapath_netdev);
 }
 ovs_rwlock_unlock(>port_rwlock);
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 60bd39643..a02f0f2d9 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1144,7 +1144,7 @@ dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink 
*dpif,
 
 static int
 dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
-  odp_port_t *port_nop)
+  odp_port_t *port_nop, struct netdev **datapath_netdev)
 {
 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
 int error = EOPNOTSUPP;
@@ -1157,6 +1157,9 @@ dpif_netlink_port_add(struct dpif *dpif_, struct netdev 
*netdev,
 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
 }
 fat_rwlock_unlock(>upcall_lock);
+if (datapath_netdev) {
+*datapath_netdev = netdev;
+}
 
 return error;
 }
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index a33c6ec30..47c573d95 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -203,10 +203,11 @@ struct dpif_class {
  * ODPP_NONE, attempts to use that as the port's port number.
  *
  * If port is successfully added, sets '*port_no' to the new port's
- * port number.  Returns EBUSY if caller attempted to choose a port
+ * port number, and datapath_netdev to a potentially created netdev in the
+ * dpif-class level.  Returns EBUSY if caller attempted to choose a port
  * number, and it was in use. */
 int (*port_add)(struct dpif *dpif, struct netdev *netdev,
-odp_port_t *port_no);
+odp_port_t *port_no, struct netdev **datapath_netdev);
 
 /* Removes port 

[ovs-dev] [PATCH V3 1/2] netdev-offload-dpdk: Fix flushing of a physdev

2023-06-11 Thread Eli Britstein via dev
Vport's offloads are done on the tracked orig-in-port, but the flow itself
is associated in the vport's map.

Removing the physdev will flush all the ports that are on its map, but
not the ones on other netdevs' maps. Since flows take reference count on
both their vport and their physdev, the physdev still has references on.
Trying to remove it and re-add it fails with "already in use" error.

Fix it by flushing the physdev's offload flows in all related netdevs,
e.g. the netdev itself, or for physical devices, all vports.

Fixes: adbd4301a249 ("netdev-offload-dpdk: Use per-netdev offload metadata.")
Reported-by: 15895987278 
Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 35 ++-
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 14bc87771..992627fa2 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -2537,15 +2537,15 @@ out:
 return ret;
 }
 
-static int
-netdev_offload_dpdk_flow_flush(struct netdev *netdev)
+static void
+flush_netdev_flows_in_related(struct netdev *netdev, struct netdev *related)
 {
-struct cmap *map = offload_data_map(netdev);
-struct ufid_to_rte_flow_data *data;
 unsigned int tid = netdev_offload_thread_id();
+struct cmap *map = offload_data_map(related);
+struct ufid_to_rte_flow_data *data;
 
 if (!map) {
-return -1;
+return;
 }
 
 CMAP_FOR_EACH (data, node, map) {
@@ -2556,6 +2556,31 @@ netdev_offload_dpdk_flow_flush(struct netdev *netdev)
 netdev_offload_dpdk_flow_destroy(data);
 }
 }
+}
+
+static bool
+flush_in_vport_cb(struct netdev *vport,
+  odp_port_t odp_port OVS_UNUSED,
+  void *aux)
+{
+struct netdev *netdev = aux;
+
+/* Only vports are related to physical devices. */
+if (netdev_vport_is_vport_class(vport->netdev_class)) {
+flush_netdev_flows_in_related(netdev, vport);
+}
+
+return false;
+}
+
+static int
+netdev_offload_dpdk_flow_flush(struct netdev *netdev)
+{
+flush_netdev_flows_in_related(netdev, netdev);
+
+if (!netdev_vport_is_vport_class(netdev->netdev_class)) {
+netdev_ports_traverse(netdev->dpif_type, flush_in_vport_cb, netdev);
+}
 
 return 0;
 }
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 2/2] dpif-netdev: Fix flushing of a vport

2023-06-10 Thread Eli Britstein via dev
When using a userspace vport ("vxlan0"), dpif-netdev adds an additional
netdev ("vxlan_sys_4789"). The dpif netdev ("vxlan0") is added to the
netdev-offload ports map, thus flows are associated on this netdev.

However, flushing is done on the dpif-netdev level ("vxlan_sys_4789"),
and relevant offload flows are not destroyed.

To fix it, add the datapath netdev to the netdev-offload ports map. In
case there is no different internal netdev, use the dpif netdev, as before.

Fixes: adbd4301a249 ("netdev-offload-dpdk: Use per-netdev offload metadata.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c   | 15 ++-
 lib/dpif-netlink.c  |  5 -
 lib/dpif-provider.h |  5 +++--
 lib/dpif.c  |  8 +---
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 70b953ae6..f29e3e5ec 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -547,7 +547,8 @@ static int get_port_by_name(struct dp_netdev *dp, const 
char *devname,
 static void dp_netdev_free(struct dp_netdev *)
 OVS_REQUIRES(dp_netdev_mutex);
 static int do_add_port(struct dp_netdev *dp, const char *devname,
-   const char *type, odp_port_t port_no)
+   const char *type, odp_port_t port_no,
+   struct netdev **datapath_netdev);
 OVS_REQ_WRLOCK(dp->port_rwlock);
 static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
 OVS_REQ_WRLOCK(dp->port_rwlock);
@@ -1884,7 +1885,7 @@ create_dp_netdev(const char *name, const struct 
dpif_class *class,
 
 error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
  "internal"),
-ODPP_LOCAL);
+ODPP_LOCAL, NULL);
 ovs_rwlock_unlock(>port_rwlock);
 if (error) {
 dp_netdev_free(dp);
@@ -2151,7 +2152,7 @@ out:
 
 static int
 do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
-odp_port_t port_no)
+odp_port_t port_no, struct netdev **datapath_netdev)
 OVS_REQ_WRLOCK(dp->port_rwlock)
 {
 struct netdev_saved_flags *sf;
@@ -2167,6 +2168,9 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 if (error) {
 return error;
 }
+if (datapath_netdev) {
+*datapath_netdev = port->netdev;
+}
 
 hmap_insert(>ports, >node, hash_port_no(port_no));
 seq_change(dp->port_seq);
@@ -2196,7 +2200,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 
 static int
 dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
- odp_port_t *port_nop)
+ odp_port_t *port_nop, struct netdev **datapath_netdev)
 {
 struct dp_netdev *dp = get_dp_netdev(dpif);
 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
@@ -2215,7 +2219,8 @@ dpif_netdev_port_add(struct dpif *dpif, struct netdev 
*netdev,
 }
 if (!error) {
 *port_nop = port_no;
-error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
+error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no,
+datapath_netdev);
 }
 ovs_rwlock_unlock(>port_rwlock);
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 60bd39643..a02f0f2d9 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1144,7 +1144,7 @@ dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink 
*dpif,
 
 static int
 dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
-  odp_port_t *port_nop)
+  odp_port_t *port_nop, struct netdev **datapath_netdev)
 {
 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
 int error = EOPNOTSUPP;
@@ -1157,6 +1157,9 @@ dpif_netlink_port_add(struct dpif *dpif_, struct netdev 
*netdev,
 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
 }
 fat_rwlock_unlock(>upcall_lock);
+if (datapath_netdev) {
+*datapath_netdev = netdev;
+}
 
 return error;
 }
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index a33c6ec30..47c573d95 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -203,10 +203,11 @@ struct dpif_class {
  * ODPP_NONE, attempts to use that as the port's port number.
  *
  * If port is successfully added, sets '*port_no' to the new port's
- * port number.  Returns EBUSY if caller attempted to choose a port
+ * port number, and datapath_netdev to a potentially created netdev in the
+ * dpif-class level.  Returns EBUSY if caller attempted to choose a port
  * number, and it was in use. */
 int (*port_add)(struct dpif *dpif, struct netdev *netdev,
-odp_port_t *port_no);
+odp_port_t *port_no, struct netdev **datapath_netdev);
 
 /* Removes port 

[ovs-dev] [PATCH V2 1/2] netdev-offload-dpdk: Fix flushing of a physdev

2023-06-10 Thread Eli Britstein via dev
Vport's offloads are done on the tracked orig-in-port, but the flow itself
is associated in the vport's map.

Removing the physdev will flush all the ports that are on its map, but
not the ones on other netdevs' maps. Since flows take reference count on
both their vport and their physdev, the physdev still has references on.
Trying to remove it and re-add it fails with "already in use" error.

Fix it by flushing the physdev's offload flows in all related netdevs,
e.g. the netdev itself, or for physical devices, all vports.

Fixes: adbd4301a249 ("netdev-offload-dpdk: Use per-netdev offload metadata.")
Reported-by: 15895987278 
Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 35 ++-
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 14bc87771..992627fa2 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -2537,15 +2537,15 @@ out:
 return ret;
 }
 
-static int
-netdev_offload_dpdk_flow_flush(struct netdev *netdev)
+static void
+flush_netdev_flows_in_related(struct netdev *netdev, struct netdev *related)
 {
-struct cmap *map = offload_data_map(netdev);
-struct ufid_to_rte_flow_data *data;
 unsigned int tid = netdev_offload_thread_id();
+struct cmap *map = offload_data_map(related);
+struct ufid_to_rte_flow_data *data;
 
 if (!map) {
-return -1;
+return;
 }
 
 CMAP_FOR_EACH (data, node, map) {
@@ -2556,6 +2556,31 @@ netdev_offload_dpdk_flow_flush(struct netdev *netdev)
 netdev_offload_dpdk_flow_destroy(data);
 }
 }
+}
+
+static bool
+flush_in_vport_cb(struct netdev *vport,
+  odp_port_t odp_port OVS_UNUSED,
+  void *aux)
+{
+struct netdev *netdev = aux;
+
+/* Only vports are related to physical devices. */
+if (netdev_vport_is_vport_class(vport->netdev_class)) {
+flush_netdev_flows_in_related(netdev, vport);
+}
+
+return false;
+}
+
+static int
+netdev_offload_dpdk_flow_flush(struct netdev *netdev)
+{
+flush_netdev_flows_in_related(netdev, netdev);
+
+if (!netdev_vport_is_vport_class(netdev->netdev_class)) {
+netdev_ports_traverse(netdev->dpif_type, flush_in_vport_cb, netdev);
+}
 
 return 0;
 }
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] netdev-offload-dpdk: Fix flushing of a physdev

2023-06-05 Thread Eli Britstein via dev


>-Original Message-
>From: David Marchand 
>Sent: Friday, 26 May 2023 17:48
>To: Eli Britstein 
>Cc: d...@openvswitch.org; Ilya Maximets ;
>wuxi_...@163.com
>Subject: Re: [ovs-dev] [PATCH 1/2] netdev-offload-dpdk: Fix flushing of a
>physdev
>
>External email: Use caution opening links or attachments
>
>
>On Fri, May 26, 2023 at 4:35 PM David Marchand
> wrote:
>>
>> Hello Eli,
>>
>> On Mon, Sep 5, 2022 at 4:46 PM Eli Britstein via dev
>>  wrote:
>> >
>> > Vport's offloads are done on the tracked orig-in-port, but the flow
>> > itself is associated in the vport's map.
>> >
>> > Removing the physdev will flush all the ports that are on its map,
>> > but
>>
>> all the flows*
>>
>> > not the ones on other netdevs' maps. Since flows take reference
>> > count on both their vport and their physdev, the physdev fails to be
>removed.
>>
>> I tested with a simple ping over a vxlan tunnel.
>> In my testing, I do manage to remove the physdev port.
>> The revalidator later flushes the expired flow (related to the vport),
>> and the offload thread ends up crashing.
>>
>> netdev_dpdk_get_port_id (netdev=netdev@entry=0x17d333600) at
>> ../lib/netdev-dpdk.c:5438
>> 5438if (!is_dpdk_class(netdev->netdev_class)) {
>> (gdb) bt
>> #0  netdev_dpdk_get_port_id (netdev=netdev@entry=0x17d333600) at
>> ../lib/netdev-dpdk.c:5438
>> #1  0x00a34930 in netdev_offload_dpdk_flow_destroy
>> (rte_flow_data=0x7fa51c0104a0) at ../lib/netdev-offload-dpdk.c:2349
>> #2  0x00926f7c in mark_to_flow_disassociate (dp=0x5c93c80,
>> flow=0x7fa4f400d8a0) at ../lib/dpif-netdev.c:2621
>> #3  0x009276f7 in dp_netdev_flow_offload_del
>> (item=0x7fa4fc003660) at ../lib/dpif-netdev.c:2743
>> #4  dp_offload_flow (item=0x7fa4fc003660) at ../lib/dpif-netdev.c:2855
>> #5  dp_netdev_flow_offload_main (arg=0x59aced0) at
>> ../lib/dpif-netdev.c:2918
>> #6  0x009c0635 in ovsthread_wrapper (aux_=) at
>> ../lib/ovs-thread.c:423
>>
>> (gdb) p rte_flow_data->physdev
>> $5 = (struct netdev *) 0x17d333600
>> (gdb) p rte_flow_data->netdev
>> $6 = (struct netdev *) 0x60bb520
>> (gdb) p *rte_flow_data->physdev
>> $7 = {name = 0x0, netdev_class = 0x0, auto_classified = false,
>> ol_flags = 0, mtu_user_config = false, ref_cnt = 0, change_seq = 0,
>> reconfigure_seq = 0x0, last_reconfigure_seq = 0, n_txq = 0, n_rxq = 0,
>> node = 0x0, saved_flags_list = {
>> prev = 0x0, next = 0x0}, flow_api = {p = 0x0}, dpif_type = 0x0,
>> hw_info = {oor = false, miss_api_supported = false, offload_count = 0,
>> pending_count = 0, offload_data = {p = 0x0}}}
>> (gdb) p *rte_flow_data->netdev
>> $8 = {name = 0x60b8eb0 "vxlan0", netdev_class = 0xcc5518
>> , auto_classified = false, ol_flags = 0,
>> mtu_user_config = false, ref_cnt = 8, change_seq = 4, reconfigure_seq
>> = 0x60b9120, last_reconfigure_seq = 1802,
>>   n_txq = 0, n_rxq = 0, node = 0x60b8da0, saved_flags_list = {prev =
>> 0x60bb570, next = 0x60bb570}, flow_api = {p = 0xb7ede0
>> }, dpif_type = 0xb7e42b "netdev", hw_info = {oor
>> = false, miss_api_supported = true,
>> offload_count = 0, pending_count = 0, offload_data = {p =
>> 0x60bc090}}}
>>
>> There is probably something wrong with the physdev refcnt... and it
>> seems I am hitting an issue close but different to yours.
>
>Ah ah.. nvm, the refcnt issue is on the debug log.
>I'll send a fix for this.
>
>But then I am not able to reproduce your issue.
>I am simply seeing the vport flow expiring.
>So I am probably not testing the right way, could you share a reproducer?

Sorry. Indeed, the commit msg is misleading.
The physport is being able to be detached, while there is a tunnel offloaded 
traffic.
However, re-adding it (before the flows are aged) encounters an "already in 
use" error:
$ ovs-vsctl del-port p0
$ ovs-vsctl add-port br-phy0 p0 -- set interface p0 type=dpdk 
options:dpdk-devargs=":08:00.0,dv_xmeta_en=3"
ovs-vsctl: Error detected while setting up 'p0': 'p0' is trying to use device 
':08:00.0,dv_xmeta_en=3' which is already in use by 'p0'.  See ovs-vswitchd 
log for details.
ovs-vsctl: The default log directory is "/var/log/openvswitch".

I can rebase and rephrase the commit msg.
>
>Thanks!
>
>
>--
>David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2023-06-05 Thread Eli Britstein via dev



>-Original Message-
>From: Ivan Malov 
>Sent: Sunday, 4 June 2023 15:58
>To: Eli Britstein 
>Cc: ovs-dev@openvswitch.org; Ilya Maximets ; Ori
>Kam ; David Marchand 
>Subject: RE: [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Hi Eli,
>
>Thanks for reviewing this. Please see below.
>
>On Tue, 21 Feb 2023, Eli Britstein wrote:
>
>>
>>
>>> -Original Message-
>>> From: Ivan Malov 
>>> Sent: Tuesday, 21 February 2023 2:41
>>> To: ovs-dev@openvswitch.org
>>> Cc: Ilya Maximets ; Eli Britstein
>>> ; Ori Kam ; David Marchand
>>> 
>>> Subject: [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy
>>> mechanism
>>>
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> Manage "transfer" flows via the corresponding mechanism.
>>> Doing so requires that the traffic source be specified explicitly,
>>> via the corresponding pattern item.
>>>
>>> Signed-off-by: Ivan Malov 
>>> ---
>>> lib/netdev-dpdk.c | 88 +++
>>> lib/netdev-dpdk.h |  4 +-
>>> lib/netdev-offload-dpdk.c | 55 +++-
>>> 3 files changed, 117 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>>> 2cebc3cca..3a9c9d9a0
>>> 100644
>>> --- a/lib/netdev-dpdk.c
>>> +++ b/lib/netdev-dpdk.c
>>> @@ -434,6 +434,7 @@ enum dpdk_hw_ol_features {
>>>
>>> struct netdev_dpdk {
>>> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE,
>cacheline0,
>>> +dpdk_port_t flow_transfer_proxy_port_id;
>> This extra field here makes it overflow one cache line.
>>> dpdk_port_t port_id;
>>>
>>> /* If true, device was attached by rte_eth_dev_attach(). */
>>> @@ -1183,6
>>> +1184,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>> uint32_t rx_chksm_offload_capa =
>RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>>>  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>>>  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>>> +int ret;
>>>
>>> /*
>>>  * Full tunnel offload requires that tunnel ID metadata be @@
>>> -1194,6
>>> +1196,24 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>>  */
>>> dpdk_eth_dev_init_rx_metadata(dev);
>>>
>>> +/*
>>> + * Managing "transfer" flows requires that the user communicate them
>>> + * via a port which has the privilege to control the embedded switch.
>>> + * For some vendors, all ports in a given switching domain have
>>> + * this privilege. For other vendors, it's only one port.
>>> + *
>>> + * Get the proxy port ID and remember it for later use.
>>> + */
>>> +ret = rte_flow_pick_transfer_proxy(dev->port_id,
>>> +   >flow_transfer_proxy_port_id, 
>>> NULL);
>>> +if (ret != 0) {
>>> +/*
>>> + * The PMD does not indicate the proxy port.
>>> + * Assume the proxy is unneeded.
>>> + */
>>> +dev->flow_transfer_proxy_port_id = dev->port_id;
>>> +}
>>> +
>>> rte_eth_dev_info_get(dev->port_id, );
>>>
>>> if (strstr(info.driver_name, "vf") != NULL) { @@ -3981,8 +4001,10
>>> @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc
>OVS_UNUSED,
>>>const char *argv[], void *aux OVS_UNUSED)  {
>>> struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>>> +struct netdev_dpdk *dev_self = NULL;
>>> struct rte_eth_dev_info dev_info;
>>> dpdk_port_t sibling_port_id;
>>> +struct netdev_dpdk *dev;
>>> dpdk_port_t port_id;
>>> bool used = false;
>>> char *response;
>>> @@ -4000,8 +4022,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>>> int argc OVS_UNUSED,
>>>   argv[1]);
>>>
>>> RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>>> -struct netdev_dpdk *dev;
>>> -
>>> LIST_FOR_EACH (dev, list_node, _list) {
>>> if (dev->port_id != sibling_port_id) {
>>> continue;
>>&

Re: [ovs-dev] [PATCH dpdk-latest v3 0/6] Add support for DPDK meter HW offload

2023-04-01 Thread Eli Britstein via dev
Hi

The kernel provides infrastructure that does not exist for DPDK. TC is 
different and cannot be imitated directly in all cases.
Using a meter object created on one port (proxy) on another one (for flow) is 
illegal.
Also, with this approach, even if fixed, offload will not work on if the port 
is added after the meter is created.
Regarding modify, it should be on the fly. When a meter is updated, the related 
HW meters should be updated via their proxy ports (as the ones created them).
If all flows using it are un-offloaded, they will not be re-offloaded.

Thanks,
Eli

>-Original Message-
>From: Nole Zhang 
>Sent: Friday, 31 March 2023 6:20
>To: Eli Britstein ; Simon Horman
>; d...@openvswitch.org
>Cc: Ilya Maximets ; Chaoyong He
>; Kevin Liu ; oss-drivers
>
>Subject: RE: [PATCH dpdk-latest v3 0/6] Add support for DPDK meter HW
>offload
>
>External email: Use caution opening links or attachments
>
>
>Hi,
>Thanks for your reply.
>Our original idea is imitating OVS-TC, meter action and flow is
>independence. First through the proxy port create the meter, then flow use
>meter with meter id. So I only add the proxy port, doesn't revise the flow with
>proxy port. So you think this idea cant be accepted?
>As your said, when the flow first time use the meter/proxy port, we
>offload the meter to the HW, when the last flow with the meter/proxy port
>will be delete, we delete the meter from the HW.
> About your idea, I have some issue about modify  the flow, 
> because the
>flow cant get the meter status, so we can't through flow to modify the meter
>in the HW.  We have two way to solve it, one is modify the meter, we delete
>all the flow with meter from the HW, the key is through meter id lookup the
>all flow with the meter id from the HW.  Another is update the meter through
>the proxy port in the HW. Do you think which is better?
>B.R.
>Peng
>
>> -Original Message-
>> From: Eli Britstein 
>> Sent: Thursday, March 30, 2023 8:34 PM
>> To: Simon Horman ; d...@openvswitch.org
>> Cc: Ilya Maximets ; Chaoyong He
>> ; Nole Zhang ;
>> Kevin Liu ; oss-drivers
>> 
>> Subject: RE: [PATCH dpdk-latest v3 0/6] Add support for DPDK meter HW
>> offload
>>
>> Hi
>>
>> The usage of the proxy port is wrong.
>> All transfer offload should be migrated to use it. There was [1-4],
>> but it was not completed.
>>
>> Meters should be created only on the proxy ports. As all offloads are
>> moved to use proxy ports, this is how they are shared.
>> Since we don't know the proxy port when meter is added, it can be lazy
>> created upon the first flow that uses this meter/proxy-port
>> (referenced counted), and destroy upon the last flow destroy using it.
>> As there might be multiple "proxy" ports, there might be multiple HW
>> meter objects associated with the same SW one. This should be managed.
>>
>> [1]
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>>
>work.ozlabs.org%2Fproject%2Fopenvswitch%2Flist%2F%3Fseries%3D302525
>%26
>>
>state=05%7C01%7Celibr%40nvidia.com%7C0f1851b0a5e94982778108db
>3196
>>
>c6d8%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638158295884302
>138%7
>>
>CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB
>TiI6Ik1
>>
>haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=%2B1fCFn4UeTJpbVSr
>m3Sh5KqA7%
>> 2FG64u81oeBP9xLY0q8%3D=0
>> =*
>> [2]
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>>
>work.ozlabs.org%2Fproject%2Fopenvswitch%2Flist%2F%3Fseries%3D310413
>%26
>>
>state=05%7C01%7Celibr%40nvidia.com%7C0f1851b0a5e94982778108db
>3196
>>
>c6d8%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638158295884302
>138%7
>>
>CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB
>TiI6Ik1
>>
>haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=nywcgXWj2VfgzYW5D
>DgnJWm002r2
>> Jv03VDShjkToMiA%3D=0
>> =*
>> [3]
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>>
>work.ozlabs.org%2Fproject%2Fopenvswitch%2Flist%2F%3Fseries%3D310415
>%26
>>
>state=05%7C01%7Celibr%40nvidia.com%7C0f1851b0a5e94982778108db
>3196
>>
>c6d8%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638158295884302
>138%7
>>
>CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB
>TiI6Ik1
>>
>haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=9Ee5Qz9yCwx%2BHcF6
>f%2BT3F8Oo
>> NwZyYYcinT10wh7BG%2F0%3D=0
>> =*
>> [4]
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>>
>work.ozlabs.org%2Fproject%2

Re: [ovs-dev] [PATCH dpdk-latest v3 0/6] Add support for DPDK meter HW offload

2023-03-30 Thread Eli Britstein via dev
Hi

The usage of the proxy port is wrong.
All transfer offload should be migrated to use it. There was [1-4], but it was 
not completed.

Meters should be created only on the proxy ports. As all offloads are moved to 
use proxy ports, this is how they are shared.
Since we don't know the proxy port when meter is added, it can be lazy created 
upon the first flow that uses this meter/proxy-port (referenced counted), and 
destroy upon the last flow destroy using it.
As there might be multiple "proxy" ports, there might be multiple HW meter 
objects associated with the same SW one. This should be managed.

[1] http://patchwork.ozlabs.org/project/openvswitch/list/?series=302525=*
[2] http://patchwork.ozlabs.org/project/openvswitch/list/?series=310413=*
[3] http://patchwork.ozlabs.org/project/openvswitch/list/?series=310415=*
[4] http://patchwork.ozlabs.org/project/openvswitch/list/?series=342885=*

Thanks,
Eli

>-Original Message-
>From: Simon Horman 
>Sent: Thursday, 30 March 2023 14:21
>To: d...@openvswitch.org
>Cc: Ilya Maximets ; Eli Britstein ;
>Chaoyong He ; Peng Zhang
>; Jin Liu ; oss-
>driv...@corigine.com
>Subject: [PATCH dpdk-latest v3 0/6] Add support for DPDK meter HW offload
>
>External email: Use caution opening links or attachments
>
>
>Hi,
>
>this series adds support for DPDK meter HW offload.
>
>Changes between v2 and v3.
>* Use common API for DPDK and non-DPDK meter offloads
>* Make use of netdev_ports_traverse to offload the meter
>* Add dpdk-latest to subject prefix
>
>Changes between v1 and v2:
>* Add the prox mechanism: add the meter by proxy id
>* Change the offload interface from netdev-dpdk to the netdev-offload
>* Changed base to dpdk-latest branch
>
>Peng Zhang (6):
>  netdev-offload-dpdk: use flow transfer proxy
>  netdev-offload: Let meter offload API can be used with DPDK
>  dpif-netdev: Offloading meter with DPDK
>  netdev-offload-dpdk: Implement meter offload API for DPDK
>  netdev-dpdk: add meter algorithms
>  netdev-dpdk-offload: Add support for meter action
>
> Documentation/howto/dpdk.rst  |   5 +-
> lib/dpif-netdev.c |  22 ++-
> lib/netdev-dpdk.c | 306 +-
> lib/netdev-dpdk.h |  43 +
> lib/netdev-offload-dpdk.c |  97 +++
> lib/netdev-offload-provider.h |  21 ++-
> lib/netdev-offload-tc.c   |   9 +-
> lib/netdev-offload.c  | 135 ++-
> lib/netdev-offload.h  |   9 +
> 9 files changed, 633 insertions(+), 14 deletions(-)
>
>--
>2.30.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2023-02-21 Thread Eli Britstein via dev



>-Original Message-
>From: Ivan Malov 
>Sent: Tuesday, 21 February 2023 2:41
>To: ovs-dev@openvswitch.org
>Cc: Ilya Maximets ; Eli Britstein ; Ori
>Kam ; David Marchand 
>Subject: [PATCH v4 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Manage "transfer" flows via the corresponding mechanism.
>Doing so requires that the traffic source be specified explicitly, via the
>corresponding pattern item.
>
>Signed-off-by: Ivan Malov 
>---
> lib/netdev-dpdk.c | 88 +++
> lib/netdev-dpdk.h |  4 +-
> lib/netdev-offload-dpdk.c | 55 +++-
> 3 files changed, 117 insertions(+), 30 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 2cebc3cca..3a9c9d9a0
>100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -434,6 +434,7 @@ enum dpdk_hw_ol_features {
>
> struct netdev_dpdk {
> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>+dpdk_port_t flow_transfer_proxy_port_id;
This extra field here makes it overflow one cache line.
> dpdk_port_t port_id;
>
> /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1183,6
>+1184,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
> uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>+int ret;
>
> /*
>  * Full tunnel offload requires that tunnel ID metadata be @@ -1194,6
>+1196,24 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>  */
> dpdk_eth_dev_init_rx_metadata(dev);
>
>+/*
>+ * Managing "transfer" flows requires that the user communicate them
>+ * via a port which has the privilege to control the embedded switch.
>+ * For some vendors, all ports in a given switching domain have
>+ * this privilege. For other vendors, it's only one port.
>+ *
>+ * Get the proxy port ID and remember it for later use.
>+ */
>+ret = rte_flow_pick_transfer_proxy(dev->port_id,
>+   >flow_transfer_proxy_port_id, 
>NULL);
>+if (ret != 0) {
>+/*
>+ * The PMD does not indicate the proxy port.
>+ * Assume the proxy is unneeded.
>+ */
>+dev->flow_transfer_proxy_port_id = dev->port_id;
>+}
>+
> rte_eth_dev_info_get(dev->port_id, );
>
> if (strstr(info.driver_name, "vf") != NULL) { @@ -3981,8 +4001,10 @@
>netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>const char *argv[], void *aux OVS_UNUSED)  {
> struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>+struct netdev_dpdk *dev_self = NULL;
> struct rte_eth_dev_info dev_info;
> dpdk_port_t sibling_port_id;
>+struct netdev_dpdk *dev;
> dpdk_port_t port_id;
> bool used = false;
> char *response;
>@@ -4000,8 +4022,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
>argc OVS_UNUSED,
>   argv[1]);
>
> RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>-struct netdev_dpdk *dev;
>-
> LIST_FOR_EACH (dev, list_node, _list) {
> if (dev->port_id != sibling_port_id) {
> continue;
>@@ -4021,6 +4041,25 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>int argc OVS_UNUSED,
> }
> ds_destroy(_interfaces);
>
>+/*
>+ * The device being detached may happen to be a flow proxy port
>+ * for another device (still attached). If so, do not allow to
>+ * detach. Devices dependent on this one must be detached first.
>+ */
>+LIST_FOR_EACH (dev, list_node, _list) {
>+if (dev->port_id == port_id) {
>+dev_self = dev;
>+} else if (dev->flow_transfer_proxy_port_id == port_id) {
>+response = xasprintf("Device '%s' can not be detached (flow 
>proxy)",
>+ argv[1]);
This is not acceptable.
When removing a port, we clean the offloads using 
netdev_offload_dpdk_flow_flush().
It should be enhanced to check if the proxy port is detached, remove the 
offloads of all the ports that used it.
There is a related patch proposed in [1].
[1] 
http://patchwork.ozlabs.org/project/openvswitch/patch/20220905144603.3585105-1-el...@nvidia.com/

>+goto error;
>+}
>+}
>+
>+/* Indicate that the device being detached no longer needs a flow proxy.
>*/
>+if (dev_self != NULL)
>+dev_self->flow_transfer_proxy

Re: [ovs-dev] [PATCH] netdev-offload-dpdk: Fix transfer flows.

2023-01-11 Thread Eli Britstein via dev



>-Original Message-
>From: David Marchand 
>Sent: Wednesday, 11 January 2023 10:53
>To: Ivan Malov ; Eli Britstein ;
>d...@openvswitch.org
>Cc: Ilya Maximets ; Ori Kam ;
>Andrew Rybchenko ; Ian Stokes
>
>Subject: [PATCH] netdev-offload-dpdk: Fix transfer flows.
>
>External email: Use caution opening links or attachments
>
>
>Following DPDK commit bd2a4d4b2e3a ("ethdev: forbid direction attribute in
>transfer flow rules"), the ingress attribute presence is rejected for transfer
>flow.
Either "... for *a* transfer flow" or "... for transfer flow*s*"
Other than that,
Acked-by: Eli Britstein 
>
>Fixes: a77c7796f23a ("dpdk: Update to use v22.11.1.")
>Signed-off-by: David Marchand 
>---
> lib/netdev-offload-dpdk.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
>diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
>38f00fd309..b3421c0996 100644
>--- a/lib/netdev-offload-dpdk.c
>+++ b/lib/netdev-offload-dpdk.c
>@@ -2242,7 +2242,7 @@ netdev_offload_dpdk_actions(struct netdev
>*netdev,
> struct nlattr *nl_actions,
> size_t actions_len)  {
>-const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
>+const struct rte_flow_attr flow_attr = { .transfer = 1, };
> struct flow_actions actions = {
> .actions = NULL,
> .cnt = 0,
>--
>2.39.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 0/3] Rework the usage of DPDK transfer flow offloads

2023-01-11 Thread Eli Britstein via dev



>-Original Message-
>From: David Marchand 
>Sent: Wednesday, 11 January 2023 10:32
>To: Eli Britstein ; Ivan Malov 
>Cc: Ilya Maximets ; d...@openvswitch.org; Stephen
>Hemminger ; Ori Kam ;
>Maxime Coquelin ; Ian Stokes
>; Andrew Rybchenko
>
>Subject: Re: [PATCH v3 0/3] Rework the usage of DPDK transfer flow offloads
>
>External email: Use caution opening links or attachments
>
>
>On Wed, Jan 11, 2023 at 9:16 AM Eli Britstein  wrote:
>>
>>
>>
>> >-Original Message-
>> >From: David Marchand 
>> >Sent: Wednesday, 11 January 2023 9:59
>> >To: Ivan Malov 
>> >Cc: Ilya Maximets ; Eli Britstein
>> >; d...@openvswitch.org; Stephen Hemminger
>> >; Ori Kam ; Maxime
>> >Coquelin ; Ian Stokes
>> >; Andrew Rybchenko
>> >
>> >Subject: Re: [PATCH v3 0/3] Rework the usage of DPDK transfer flow
>> >offloads
>> >
>> >External email: Use caution opening links or attachments
>> >
>> >
>> >Hi Ivan,
>> >
>> >On Fri, Jul 22, 2022 at 12:02 PM Ivan Malov 
>wrote:
>> >>
>> >> Hi Ilya, Eli,
>> >>
>> >> Many thanks for your feedback. Please see below.
>> >>
>> >> On Wed, 20 Jul 2022, Ilya Maximets wrote:
>> >>
>> >> > On 7/20/22 14:18, Ivan Malov wrote:
>> >> >> DPDK has got support for offloads involving assignment of
>> >> >> per-packet Rx metadata (flag, mark, tunnel ID and the likes).
>> >> >> However, delivery of such metadata from the NIC to the PMD might
>> >> >> need to be negotiated in advance. API [1] addresses that
>> >> >> problem. Make
>> >OvS invoke this API.
>> >> >>
>> >> >> Another problem is how flow rules with attribute "transfer"
>> >> >> refer to embedded switch ports by means of their representors.
>> >> >> Action PORT_ID has proved ambiguous: it refers to a DPDK ethdev,
>> >> >> but in fact steers packets to the entity represented by the
>> >> >> ethdev, in example,
>> >to a VF.
>> >> >> The problem is addressed by [2]. Use the solution in OvS accordingly.
>> >> >>
>> >> >> In addition, [2] and [3] address filtering traffic by input
>> >> >> ports of the embedded switch. In the suggested approach,
>> >> >> "transfer" rules are managed via the only ethdev with sufficient
>> >> >> privileges, whilst match criteria include an explicit item to indicate 
>> >> >> the
>desired input port.
>> >> >> Revisit OvS support for "transfer" rules to follow the said approach.
>> >> >>
>> >> >> The following tests have been considered so far:
>> >> >> - build check with the current dpdk-next-net;
>> >> >> - running "make check" for every patch;
>> >> >> - tunnel offload demo with net/sfc PMD.
>> >> >>
>> >> >> [1]
>> >> >>
>>
>>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fma%
>2
>>
>>F=05%7C01%7Celibr%40nvidia.com%7C655010753f9c45488f9e08daf3ae
>4c5
>>
>>2%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C63809022718993219
>7%7CU
>>
>>nknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTi
>I6Ik1h
>>
>>aWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=CZvqD1p6gHMqgVhyF
>uizcskk6Pwd
>> >GD3N4SLgr27DeZE%3D=0
>> >> >> ils.dpdk.org%2Farchives%2Fdev%2F2021-
>> >October%2F224291.html=05%
>> >> >>
>>
>>7C01%7Celibr%40nvidia.com%7C9f047907e3d54640a11f08daf3a9b52a%7C430
>8
>> >> >>
>>
>>3d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638090207473941055%7CUnk
>n
>> >own
>> >> >>
>>
>>%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1ha
>W
>> >wi
>> >> >>
>>
>>LCJXVCI6Mn0%3D%7C3000%7C%7C%7C=OwaqM%2BGfklo73Lb%2BJI
>p
>> >5vcH0DH
>> >> >> F%2FL6PZCbrvEgupwWc%3D=0 [2]
>> >> >>
>>
>>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fma%
>2
>>
>>F=05%7C01%7Celibr%40nvidia.com%7C655010753f9c45488f9e08daf3ae
>4c5
>>
>>2%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C63809022718993219
>7%7CU
>>
>>nknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw

Re: [ovs-dev] [PATCH v3 0/3] Rework the usage of DPDK transfer flow offloads

2023-01-11 Thread Eli Britstein via dev



>-Original Message-
>From: David Marchand 
>Sent: Wednesday, 11 January 2023 9:59
>To: Ivan Malov 
>Cc: Ilya Maximets ; Eli Britstein ;
>d...@openvswitch.org; Stephen Hemminger
>; Ori Kam ; Maxime
>Coquelin ; Ian Stokes
>; Andrew Rybchenko
>
>Subject: Re: [PATCH v3 0/3] Rework the usage of DPDK transfer flow offloads
>
>External email: Use caution opening links or attachments
>
>
>Hi Ivan,
>
>On Fri, Jul 22, 2022 at 12:02 PM Ivan Malov  wrote:
>>
>> Hi Ilya, Eli,
>>
>> Many thanks for your feedback. Please see below.
>>
>> On Wed, 20 Jul 2022, Ilya Maximets wrote:
>>
>> > On 7/20/22 14:18, Ivan Malov wrote:
>> >> DPDK has got support for offloads involving assignment of
>> >> per-packet Rx metadata (flag, mark, tunnel ID and the likes).
>> >> However, delivery of such metadata from the NIC to the PMD might
>> >> need to be negotiated in advance. API [1] addresses that problem. Make
>OvS invoke this API.
>> >>
>> >> Another problem is how flow rules with attribute "transfer" refer
>> >> to embedded switch ports by means of their representors. Action
>> >> PORT_ID has proved ambiguous: it refers to a DPDK ethdev, but in
>> >> fact steers packets to the entity represented by the ethdev, in example,
>to a VF.
>> >> The problem is addressed by [2]. Use the solution in OvS accordingly.
>> >>
>> >> In addition, [2] and [3] address filtering traffic by input ports
>> >> of the embedded switch. In the suggested approach, "transfer" rules
>> >> are managed via the only ethdev with sufficient privileges, whilst
>> >> match criteria include an explicit item to indicate the desired input 
>> >> port.
>> >> Revisit OvS support for "transfer" rules to follow the said approach.
>> >>
>> >> The following tests have been considered so far:
>> >> - build check with the current dpdk-next-net;
>> >> - running "make check" for every patch;
>> >> - tunnel offload demo with net/sfc PMD.
>> >>
>> >> [1]
>> >>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fma
>> >> ils.dpdk.org%2Farchives%2Fdev%2F2021-
>October%2F224291.html=05%
>> >>
>7C01%7Celibr%40nvidia.com%7C9f047907e3d54640a11f08daf3a9b52a%7C4308
>> >>
>3d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638090207473941055%7CUnkn
>own
>> >>
>%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haW
>wi
>> >>
>LCJXVCI6Mn0%3D%7C3000%7C%7C%7C=OwaqM%2BGfklo73Lb%2BJIp
>5vcH0DH
>> >> F%2FL6PZCbrvEgupwWc%3D=0 [2]
>> >>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fma
>> >> ils.dpdk.org%2Farchives%2Fdev%2F2021-
>October%2F224620.html=05%
>> >>
>7C01%7Celibr%40nvidia.com%7C9f047907e3d54640a11f08daf3a9b52a%7C4308
>> >>
>3d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638090207473941055%7CUnkn
>own
>> >>
>%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haW
>wi
>> >>
>LCJXVCI6Mn0%3D%7C3000%7C%7C%7C=WF%2FsuI3pSCjMcHetlKbJze5
>xFon8
>> >> cnj%2F4A4YCd9QwWg%3D=0 [3]
>> >>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fma
>> >> ils.dpdk.org%2Farchives%2Fdev%2F2021-
>October%2F225081.html=05%
>> >>
>7C01%7Celibr%40nvidia.com%7C9f047907e3d54640a11f08daf3a9b52a%7C4308
>> >>
>3d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638090207473941055%7CUnkn
>own
>> >>
>%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haW
>wi
>> >>
>LCJXVCI6Mn0%3D%7C3000%7C%7C%7C=AP07kpKyl3DPp01Rui7G7AIq
>%2B4SH
>> >> YrnlOAU30lNP%2BXg%3D=0
>> >> ---
>> >>  v1 -> v2: amendments to care about proxy detach and port ID
>> >> logging
>> >>  v2 -> v3: a minor adjustment in the cover letter's subject line
>> >>
>> >> Ivan Malov (3):
>> >>   netdev-dpdk: negotiate delivery of per-packet Rx metadata
>> >>   netdev-offload-dpdk: replace action PORT_ID with REPRESENTED_PORT
>> >>   netdev-offload-dpdk: use flow transfer proxy mechanism
>> >>
>> >>  lib/netdev-dpdk.c | 141 +-
>
>> >>  lib/netdev-dpdk.h |   4 +-
>> >>  lib/netdev-offload-dpdk.c |  91 +---
>> >>  3 files changed, 209 insertions(+), 27 deletions(-)
>> >>
>> >
>> 

Re: [ovs-dev] [PATCH 0/6] Add support for DPDK meter HW offload

2023-01-09 Thread Eli Britstein via dev


>-Original Message-
>From: Nole Zhang 
>Sent: Monday, 9 January 2023 11:23
>To: Eli Britstein ; d...@openvswitch.org
>Cc: Eelco Chaudron ; Ilya Maximets
>; Chaoyong He ; oss-
>drivers 
>Subject: RE: [PATCH 0/6] Add support for DPDK meter HW offload
>
>External email: Use caution opening links or attachments
>
>
>> -Original Message-
>> From: Eli Britstein 
>> Sent: 2023年1月8日 15:27
>> To: Nole Zhang ; d...@openvswitch.org
>> Cc: Eelco Chaudron ; Ilya Maximets
>> ; Chaoyong He ; oss-
>> drivers 
>> Subject: RE: [PATCH 0/6] Add support for DPDK meter HW offload
>>
>>
>>
>> >-Original Message-
>> >From: Nole Zhang 
>> >Sent: Friday, 6 January 2023 11:28
>> >To: Eli Britstein ; d...@openvswitch.org
>> >Cc: Eelco Chaudron ; Ilya Maximets
>> >; Chaoyong He ; oss-
>> >drivers ; Nole Zhang
>> >
>> >Subject: RE: [PATCH 0/6] Add support for DPDK meter HW offload
>> >
>> >External email: Use caution opening links or attachments
>> >
>> >
>> >> -Original Message-
>> >> From: Eli Britstein 
>> >> Sent: 2022年12月26日 18:04
>> >> To: Simon Horman ;
>> d...@openvswitch.org
>> >> Cc: Eelco Chaudron ; Ilya Maximets
>> >> ; Chaoyong He ;
>oss-
>> >> drivers ; Nole Zhang
>> >> 
>> >> Subject: RE: [PATCH 0/6] Add support for DPDK meter HW offload
>> >>
>> >> Dpif-netdev should not implement internal HW offload details. If
>> >> need to "apply on all ports", it needs to be done in offload layer.
>> >> However, in arch level, there is a problem with the proposed series.
>> >> It will create a meter object per port, while in SW it is one
>> >> object, that can be shared between multiple flows, on different ports.
>> >
>> >In dpif-netdev, it doesn't relate with implement internal HW offload
>> >details, I just try to add the meter to the PMD if the PMD support
>> >the meter
>> offload.
>> [Eli Britstein] your loops over ports, not over PMDs. See in [1], for
>> example in
>> dpif_netdev_offload_meter_set():
>> +HMAP_FOR_EACH (port, node, >ports) {
>> +dev = port->netdev;
>> Am I wrong?
>
>Thanks for your notice, yes, as ovs code, it will add the meter in different 
>port.
>
>As our design,  for different port, if the different port has the same PMD with
>the same meter id, it will just  add the meter successfully once in the dpdk 
>and
>it can achieve sharing the same NIC different vf.
>
>If I add the judge for the PMD, for different PMD, just add the meter once, do
>you think it is ok?
No. Even if you improve the code to create "once". Suppose the PF is port 0, 
and 2 VF representors are ports 1,2.
If the meter is created on port 1, using it with ports 0,2 is illegal from DPDK 
generic point of view. It might be supported depending on specific PMD support.
What will happen if port 1 is detached from OVS for example?
I think it has to be on the "proxy" port, see below.
>
>>
>> Other than that, there is already a convenient API to traverse ports
>> for offload - netdev_ports_traverse().
>
>Ok, thanks, I will investigate it.
>
>> >
>> >No, it will create a meter object per PMD not per port, so the meter
>> >can share the same NIC different vf,  different NIC can't share  the
>> >meter, it is same with ovs-tc meter offload
>> [Eli Britstein] no, it will create an object per port, as this is your code.
>> To create a shared object for all the VFs in the same NIC, need to use
>> the "proxy" port. Such work has started in [2].
>>
>> [1]
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>>
>work.ozlabs.org%2Fproject%2Fopenvswitch%2Fpatch%2F20221216155054.9&
>dat
>>
>a=05%7C01%7Celibr%40nvidia.com%7C6ecc84b885334be08f7d08daf2230e2a%
>7C43
>>
>083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638088529626823112%7CUn
>known%
>>
>7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwi
>LCJX
>>
>VCI6Mn0%3D%7C3000%7C%7C%7C=rcM52oDqpL%2F%2BewlX8ZRZSTL
>mJslYBK7SI
>> 4c9BX3ocY8%3D=0
>> 86464-3-simon.hor...@corigine.com/
>> [2]
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>>
>work.ozlabs.org%2Fproject%2Fopenvswitch%2Fpatch%2F20220720121823.2&
>dat
>>
>a=05%7C01%7Celibr%40nvidia.com%7C6ecc84b885334be08f7d08daf2230e2a%
>7C43
>>
>083d15727340c1b7db39efd9ccc17a%7C0%7C0

Re: [ovs-dev] [PATCH 0/6] Add support for DPDK meter HW offload

2023-01-07 Thread Eli Britstein via dev


>-Original Message-
>From: Nole Zhang 
>Sent: Friday, 6 January 2023 11:28
>To: Eli Britstein ; d...@openvswitch.org
>Cc: Eelco Chaudron ; Ilya Maximets
>; Chaoyong He ; oss-
>drivers ; Nole Zhang 
>Subject: RE: [PATCH 0/6] Add support for DPDK meter HW offload
>
>External email: Use caution opening links or attachments
>
>
>> -Original Message-
>> From: Eli Britstein 
>> Sent: 2022年12月26日 18:04
>> To: Simon Horman ; d...@openvswitch.org
>> Cc: Eelco Chaudron ; Ilya Maximets
>> ; Chaoyong He ; oss-
>> drivers ; Nole Zhang
>> 
>> Subject: RE: [PATCH 0/6] Add support for DPDK meter HW offload
>>
>> Dpif-netdev should not implement internal HW offload details. If need
>> to "apply on all ports", it needs to be done in offload layer.
>> However, in arch level, there is a problem with the proposed series.
>> It will create a meter object per port, while in SW it is one object,
>> that can be shared between multiple flows, on different ports.
>
>In dpif-netdev, it doesn't relate with implement internal HW offload details, I
>just try to add the meter to the PMD if the PMD support the meter offload.
[Eli Britstein] your loops over ports, not over PMDs. See in [1], for example 
in dpif_netdev_offload_meter_set():
+HMAP_FOR_EACH (port, node, >ports) {
+dev = port->netdev;
Am I wrong?

Other than that, there is already a convenient API to traverse ports for 
offload - netdev_ports_traverse().
>
>No, it will create a meter object per PMD not per port, so the meter can share
>the same NIC different vf,  different NIC can't share  the meter, it is same 
>with
>ovs-tc meter offload
[Eli Britstein] no, it will create an object per port, as this is your code.
To create a shared object for all the VFs in the same NIC, need to use the 
"proxy" port. Such work has started in [2].

[1] 
http://patchwork.ozlabs.org/project/openvswitch/patch/20221216155054.986464-3-simon.hor...@corigine.com/
[2] 
http://patchwork.ozlabs.org/project/openvswitch/patch/20220720121823.2497727-4-ivan.ma...@oktetlabs.ru/

>>
>> >-Original Message-
>> >From: Simon Horman 
>> >Sent: Friday, 16 December 2022 17:51
>> >To: d...@openvswitch.org
>> >Cc: Eelco Chaudron ; Ilya Maximets
>> >; Eli Britstein ; Chaoyong He
>> >; oss-driv...@corigine.com; Peng Zhang
>> >; Simon Horman
>> 
>> >Subject: [PATCH 0/6] Add support for DPDK meter HW offload
>> >
>> >External email: Use caution opening links or attachments
>> >
>> >
>> >Hi,
>> >
>> >this series adds support for DPDK meter HW offload
>> >
>> >* Patch 1/6: Add netdev provider API for HW offload of DPDK meters
>> >* Patch 2/6: Add DPIF API to offload OpenFlow meters to DPDK
>> >* Patch 3/6: Implement netdev provider API for HW offload of DPDK
>> >meters
>> >* Patch 4/6: Add more DPDK meter algorithms
>> >* Patch 4/6: Add support for meter action ti DPDK HW offload
>> >* Patch 4/6: Add CI builds with ALLOW_EXPERIMENTAL_API
>> >
>> >Peng Zhang (6):
>> >  netdev-offload: Add DPDK meter offload API
>> >  dpif-netdev: Offloading meter with DPDK
>> >  netdev-offload-dpdk: Implement meter offload API for DPDK
>> >  netdev-dpdk: add meter algorithms
>> >  netdev-dpdk-offload: Add support for meter action
>> >  ci: add the opts about ALLOW_EXPERIMENTAL_API
>> >
>> > .ci/linux-build.sh   |   4 +
>> > .github/workflows/build-and-test.yml |  31 
>> > Documentation/howto/dpdk.rst |   5 +-
>> > lib/dpif-netdev.c| 102 +++
>> > lib/netdev-dpdk.c| 243 +++
>> > lib/netdev-dpdk.h|  41 +
>> > lib/netdev-offload-dpdk.c| 101 +++
>> > lib/netdev-offload-provider.h|  30 
>> > lib/netdev-offload.c |  59 +++
>> > lib/netdev-offload.h |   9 +
>> > 10 files changed, 623 insertions(+), 2 deletions(-)
>> >
>> >--
>> >2.30.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 0/6] Add support for DPDK meter HW offload

2022-12-26 Thread Eli Britstein via dev
Dpif-netdev should not implement internal HW offload details. If need to "apply 
on all ports", it needs to be done in offload layer.
However, in arch level, there is a problem with the proposed series. It will 
create a meter object per port, while in SW it is one object, that can be 
shared between multiple flows, on different ports.

>-Original Message-
>From: Simon Horman 
>Sent: Friday, 16 December 2022 17:51
>To: d...@openvswitch.org
>Cc: Eelco Chaudron ; Ilya Maximets
>; Eli Britstein ; Chaoyong He
>; oss-driv...@corigine.com; Peng Zhang
>; Simon Horman 
>Subject: [PATCH 0/6] Add support for DPDK meter HW offload
>
>External email: Use caution opening links or attachments
>
>
>Hi,
>
>this series adds support for DPDK meter HW offload
>
>* Patch 1/6: Add netdev provider API for HW offload of DPDK meters
>* Patch 2/6: Add DPIF API to offload OpenFlow meters to DPDK
>* Patch 3/6: Implement netdev provider API for HW offload of DPDK meters
>* Patch 4/6: Add more DPDK meter algorithms
>* Patch 4/6: Add support for meter action ti DPDK HW offload
>* Patch 4/6: Add CI builds with ALLOW_EXPERIMENTAL_API
>
>Peng Zhang (6):
>  netdev-offload: Add DPDK meter offload API
>  dpif-netdev: Offloading meter with DPDK
>  netdev-offload-dpdk: Implement meter offload API for DPDK
>  netdev-dpdk: add meter algorithms
>  netdev-dpdk-offload: Add support for meter action
>  ci: add the opts about ALLOW_EXPERIMENTAL_API
>
> .ci/linux-build.sh   |   4 +
> .github/workflows/build-and-test.yml |  31 
> Documentation/howto/dpdk.rst |   5 +-
> lib/dpif-netdev.c| 102 +++
> lib/netdev-dpdk.c| 243 +++
> lib/netdev-dpdk.h|  41 +
> lib/netdev-offload-dpdk.c| 101 +++
> lib/netdev-offload-provider.h|  30 
> lib/netdev-offload.c |  59 +++
> lib/netdev-offload.h |   9 +
> 10 files changed, 623 insertions(+), 2 deletions(-)
>
>--
>2.30.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 7/8] ofproto: Fix 'reply.type' may be used uninitialized

2022-11-16 Thread Eli Britstein via dev
../ofproto/ofproto.c: In function 'handle_openflow':
../lib/ofp-bundle.c:195:15: error: 'reply.type' may be used uninitialized in 
this function [-Werror=maybe-uninitialized]
  195 | m->type = htons(msg->type);
  |   ^
../ofproto/ofproto.c:8460:36: note: 'reply.type' was declared here
 8460 | struct ofputil_bundle_ctrl_msg reply;
  |^

Fixes: 777af88d50b8 ("Add basic implementation for OpenFlow 1.4 bundles")
Signed-off-by: Eli Britstein 
---
 ofproto/ofproto.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 3a527683c..5ba1b55fb 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -8471,6 +8471,7 @@ handle_bundle_control(struct ofconn *ofconn, const struct 
ofp_header *oh)
 return error;
 }
 reply.flags = 0;
+reply.type = 0;
 reply.bundle_id = bctrl.bundle_id;
 
 switch (bctrl.type) {
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 6/8] netlink: Fix writing bytes into a region of size 0 overflows the destination

2022-11-16 Thread Eli Britstein via dev
With --enable-Werror and --with-dpdk=no, with gcc (Ubuntu 11.2.0-19ubuntu1),
there are the following warnings (errors) emmitted. Those are reported
in [1] to be GCC bug. Workaround it.

In function 'memset',
inlined from 'nl_msg_put_uninit' at ../lib/netlink.c:212:9,
inlined from 'nl_msg_put_unspec_uninit' at ../lib/netlink.c:248:26,
inlined from 'nl_msg_put_unspec' at ../lib/netlink.c:276:11,
inlined from 'nl_msg_put_u8' at ../lib/netlink.c:294:5:
/usr/include/x86_64-linux-gnu/bits/string_fortified.h:59:10: error: 
'__builtin_memset' writing 3 bytes into a region of size 0 overflows the 
destination [-Werror=stringop-overflow=]
   59 |   return __builtin___memset_chk (__dest, __ch, __len,
  |  ^
In function 'memset',
inlined from 'nl_msg_put_uninit' at ../lib/netlink.c:212:9,
inlined from 'nl_msg_put_unspec_uninit' at ../lib/netlink.c:248:26,
inlined from 'nl_msg_put_unspec' at ../lib/netlink.c:276:11,
inlined from 'nl_msg_put_u16' at ../lib/netlink.c:302:5:
/usr/include/x86_64-linux-gnu/bits/string_fortified.h:59:10: error: 
'__builtin_memset' writing 2 bytes into a region of size 0 overflows the 
destination [-Werror=stringop-overflow=]
   59 |   return __builtin___memset_chk (__dest, __ch, __len,
  |  ^

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92718

Signed-off-by: Eli Britstein 
---
 lib/netlink.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/lib/netlink.c b/lib/netlink.c
index 6215282d6..c30620fdb 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -200,6 +200,14 @@ nl_msg_put(struct ofpbuf *msg, const void *data, size_t 
size)
 memcpy(nl_msg_put_uninit(msg, size), data, size);
 }
 
+static void
+nl_msg_zero_pad(char *pad_ptr, size_t pad_size)
+{
+while (pad_size--) {
+*pad_ptr++ = 0;
+}
+}
+
 /* Appends 'size' bytes of data, plus Netlink padding if needed, to the tail
  * end of 'msg', reallocating and copying its data if necessary.  Returns a
  * pointer to the first byte of the new data, which is left uninitialized. */
@@ -208,9 +216,7 @@ nl_msg_put_uninit(struct ofpbuf *msg, size_t size)
 {
 size_t pad = PAD_SIZE(size, NLMSG_ALIGNTO);
 char *p = ofpbuf_put_uninit(msg, size + pad);
-if (pad) {
-memset(p + size, 0, pad);
-}
+nl_msg_zero_pad(p + size, pad);
 return p;
 }
 
@@ -231,9 +237,7 @@ nl_msg_push_uninit(struct ofpbuf *msg, size_t size)
 {
 size_t pad = PAD_SIZE(size, NLMSG_ALIGNTO);
 char *p = ofpbuf_push_uninit(msg, size + pad);
-if (pad) {
-memset(p + size, 0, pad);
-}
+nl_msg_zero_pad(p + size, pad);
 return p;
 }
 
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 4/8] dpctl: Fix zone/limit may be used uninitialized

2022-11-16 Thread Eli Britstein via dev
With --enable-Werror and --with-dpdk=no:

../lib/dpctl.c: In function 'dpctl_ct_set_limits':
../lib/ct-dpif.c:698:22: error: 'zone' may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
  698 | zone_limit->zone = zone;
  |  ^
../lib/dpctl.c:2139:18: note: 'zone' was declared here
 2139 | uint16_t zone;
  |  ^

../lib/ct-dpif.c:699:23: error: 'limit' may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
  699 | zone_limit->limit = limit;
  |   ^
../lib/dpctl.c:2140:18: note: 'limit' was declared here
 2140 | uint32_t limit;
  |  ^

Fixes: 4eeec031d4c4 ("dpctl: Implement dpctl commands for conntrack per zone 
limit")
Signed-off-by: Eli Britstein 
---
 lib/dpctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index 29041fa3e..714bf3c4b 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -2136,8 +2136,9 @@ dpctl_ct_set_limits(int argc, const char *argv[],
 
 /* Parse ct zone limit tuples */
 while (i < argc) {
-uint16_t zone;
-uint32_t limit;
+uint32_t limit = 0;
+uint16_t zone = 0;
+
 if (!ct_dpif_parse_zone_limit_tuple(argv[i++], , , )) {
 error = EINVAL;
 goto error;
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 5/8] ovsdb: Fix 'table' may be used uninitialized

2022-11-16 Thread Eli Britstein via dev
With --enable-Werror and --with-dpdk=no:

../ovsdb/ovsdb.c: In function 'ovsdb_schema_from_json':
../ovsdb/ovsdb.c:246:9: error: 'table' may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
  246 | shash_add(>tables, table->name, table);
  | ^
../ovsdb/ovsdb.c:230:36: note: 'table' was declared here
  230 | struct ovsdb_table_schema *table;
  |^

Fixes: f85f8ebbfac9 ("Initial implementation of OVSDB.")
Signed-off-by: Eli Britstein 
---
 ovsdb/ovsdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 1c011fab0..8e0bbc967 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -227,7 +227,7 @@ ovsdb_schema_from_json(const struct json *json, struct 
ovsdb_schema **schemap)
 schema = ovsdb_schema_create(json_string(name), version,
  cksum ? json_string(cksum) : "");
 SHASH_FOR_EACH (node, json_object(tables)) {
-struct ovsdb_table_schema *table;
+struct ovsdb_table_schema *table = NULL;
 
 if (node->name[0] == '_') {
 error = ovsdb_syntax_error(json, NULL, "names beginning with "
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 8/8] debian, rhel: Enable Werror option in spec files

2022-11-16 Thread Eli Britstein via dev
After resolving DPDK cast align warnings as stated in [1], and
resolving some more warnings in OVS side, enforce -Werror for debian and
rhel builds too.

[1] 0b6d2faace76 ("ci: Remove -Wno-cast-align from CI.")

Signed-off-by: Eli Britstein 
---
 debian/rules | 4 ++--
 rhel/openvswitch.spec.in | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/debian/rules b/debian/rules
index 971bc1775..ffc218e9d 100755
--- a/debian/rules
+++ b/debian/rules
@@ -24,7 +24,7 @@ override_dh_auto_configure:
cd _debian && ( \
test -e Makefile || \
../configure --prefix=/usr --localstatedir=/var --enable-ssl \
---sysconfdir=/etc \
+--sysconfdir=/etc --enable-Werror \
 $(DATAPATH_CONFIGURE_OPTS) \
 $(EXTRA_CONFIGURE_OPTS) \
 )
@@ -34,7 +34,7 @@ ifeq (,$(filter nodpdk, $(DEB_BUILD_OPTIONS)))
cd _dpdk && ( \
test -e Makefile || \
 ../configure --prefix=/usr --localstatedir=/var --enable-ssl \
- --with-dpdk=shared --sysconfdir=/etc \
+ --with-dpdk=shared --sysconfdir=/etc --enable-Werror \
 $(DATAPATH_CONFIGURE_OPTS) \
 $(EXTRA_CONFIGURE_OPTS) \
 )
diff --git a/rhel/openvswitch.spec.in b/rhel/openvswitch.spec.in
index 9903dd10a..35ae42356 100644
--- a/rhel/openvswitch.spec.in
+++ b/rhel/openvswitch.spec.in
@@ -70,7 +70,7 @@ Tailored Open vSwitch SELinux policy
 
 %build
 ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=%{_localstatedir} \
---libdir=%{_libdir} --enable-ssl --enable-shared
+--libdir=%{_libdir} --enable-ssl --enable-shared --enable-Werror
 make %{_smp_mflags}
 make selinux-policy
 
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 3/8] ovs-ofctl: Fix 'usable_protocols' may be used uninitialized

2022-11-16 Thread Eli Britstein via dev
With --enable-Werror and --with-dpdk=no:

../utilities/ovs-ofctl.c: In function 'ofctl_group_mod_file':
../utilities/ovs-ofctl.c:3107:16: error: 'usable_protocols' may be used 
uninitialized in this function [-Werror=maybe-uninitialized]
 3107 | protocol = open_vconn_for_flow_mod(remote, , 
usable_protocols);
  |^
../utilities/ovs-ofctl.c:3124:27: note: 'usable_protocols' was declared here
 3124 | enum ofputil_protocol usable_protocols;
  |   ^

Fixes: 69185eb25acb ("ovs-ofctl: Only allow usable protocols for group 
commands")
Signed-off-by: Eli Britstein 
---
 utilities/ovs-ofctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index fe9114580..6805140d6 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -3121,7 +3121,7 @@ static void
 ofctl_group_mod_file(int argc OVS_UNUSED, char *argv[], int command)
 {
 struct ofputil_group_mod *gms = NULL;
-enum ofputil_protocol usable_protocols;
+enum ofputil_protocol usable_protocols = 0;
 size_t n_gms = 0;
 char *error;
 
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 0/8] Fix warnings and enable Werror

2022-11-16 Thread Eli Britstein via dev
Fixing various warnings and enable Werror for debian/rhel builds.

Eli Britstein (8):
  dp-packet: Fix dp-packet may be used initialized
  ofp-port: Fix 'strnlen' specified bound may exceed source size
  ovs-ofctl: Fix 'usable_protocols' may be used uninitialized
  dpctl: Fix zone/limit may be used uninitialized
  ovsdb: Fix 'table' may be used uninitialized
  netlink: Fix writing bytes into a region of size 0 overflows the
destination
  ofproto: Fix 'reply.type' may be used uninitialized
  debian, rhel: Enable Werror option in spec files

 debian/rules |  4 ++--
 lib/dp-packet.c  |  1 -
 lib/dp-packet.h  |  7 ---
 lib/dpctl.c  |  5 +++--
 lib/netlink.c| 16 ++--
 lib/ofp-port.c   |  3 ++-
 ofproto/ofproto.c|  1 +
 ovsdb/ovsdb.c|  2 +-
 rhel/openvswitch.spec.in |  2 +-
 utilities/ovs-ofctl.c|  2 +-
 10 files changed, 25 insertions(+), 18 deletions(-)

-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 2/8] ofp-port: Fix 'strnlen' specified bound may exceed source size

2022-11-16 Thread Eli Britstein via dev
With --enable-Werror and --with-dpdk=no:

In function 'ovs_strlcpy',
inlined from 'ovs_strlcpy' at ../lib/util.c:377:1,
inlined from 'ofputil_port_to_string' at ../lib/ofp-port.c:273:9,
inlined from 'ofputil_port_from_string.part.0' at ../lib/ofp-port.c:170:13:
../lib/util.c:380:22: error: 'strnlen' specified bound 15 may exceed source 
size 11 [-Werror=stringop-overread]
  380 | size_t len = strnlen(src, size - 1);
  |  ^

Fixes: 28b114322856 ("ofp-util: New function ofputil_port_to_string().")
Signed-off-by: Eli Britstein 
---
 lib/ofp-port.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/ofp-port.c b/lib/ofp-port.c
index 16d587488..6e884cd3f 100644
--- a/lib/ofp-port.c
+++ b/lib/ofp-port.c
@@ -270,7 +270,8 @@ ofputil_port_to_string(ofp_port_t port,
 {
 const char *reserved_name = ofputil_port_get_reserved_name(port);
 if (reserved_name) {
-ovs_strlcpy(namebuf, reserved_name, bufsize);
+ovs_strlcpy(namebuf, reserved_name, MIN(strlen(reserved_name),
+bufsize));
 return;
 }
 
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/8] dp-packet: Fix dp-packet may be used initialized

2022-11-16 Thread Eli Britstein via dev
With --enable-Werror and --with-dpdk=no:

In function 'dp_packet_reset_offload',
inlined from 'dp_packet_init__' at ../lib/dp-packet.c:35:5,
inlined from 'dp_packet_use__' at ../lib/dp-packet.c:50:5,
inlined from 'dp_packet_use' at ../lib/dp-packet.c:60:5,
inlined from 'dp_packet_init' at ../lib/dp-packet.c:126:5,
inlined from 'dp_packet_new' at ../lib/dp-packet.c:154:5:
../lib/dp-packet.h:944:32: error: 'MEM[(uint32_t *)p_14 + 16B]' may be used 
uninitialized [-Werror=maybe-uninitialized]
  944 | *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_SUPPORTED_MASK;
  |^

Fixes: a47e2db209e4 ("dp-packet: Refactor offloading API.")
Signed-off-by: Eli Britstein 
---
 lib/dp-packet.c | 1 -
 lib/dp-packet.h | 7 ---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 4538d2a61..f654752a1 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -32,7 +32,6 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum 
dp_packet_source so
 dp_packet_reset_offsets(b);
 pkt_metadata_init(>md, 0);
 dp_packet_reset_cutlen(b);
-dp_packet_reset_offload(b);
 /* Initialize implementation-specific fields of dp_packet. */
 dp_packet_init_specific(b);
 /* By default assume the packet type to be Ethernet. */
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 55eeaab2c..9864dfcbf 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -572,7 +572,8 @@ dp_packet_init_specific(struct dp_packet *p)
 {
 /* This initialization is needed for packets that do not come from DPDK
  * interfaces, when vswitchd is built with --with-dpdk. */
-p->mbuf.ol_flags = p->mbuf.tx_offload = p->mbuf.packet_type = 0;
+*dp_packet_ol_flags_ptr(p) = 0;
+p->mbuf.tx_offload = p->mbuf.packet_type = 0;
 p->mbuf.nb_segs = 1;
 p->mbuf.next = NULL;
 }
@@ -638,9 +639,9 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
 #else /* DPDK_NETDEV */
 
 static inline void
-dp_packet_init_specific(struct dp_packet *p OVS_UNUSED)
+dp_packet_init_specific(struct dp_packet *p)
 {
-/* There are no implementation-specific fields for initialization. */
+*dp_packet_ol_flags_ptr(p) = 0;
 }
 
 static inline void *
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] debian, rhel: Enable Werror option in spec files

2022-11-14 Thread Eli Britstein via dev



>-Original Message-
>From: Simon Horman 
>Sent: Monday, 14 November 2022 12:44
>To: Eli Britstein 
>Cc: d...@openvswitch.org; Ilya Maximets ; Simon
>Horman ; Salem Sol 
>Subject: Re: [PATCH] debian, rhel: Enable Werror option in spec files
>
>External email: Use caution opening links or attachments
>
>
>On Sun, Nov 13, 2022 at 04:46:23PM +0200, Eli Britstein wrote:
>> Following resolving DPDK cast align warnings as stated in [1], enforce
>> -Werror for RPM builds too.
>>
>> [1] 0b6d2faace76 ("ci: Remove -Wno-cast-align from CI.")
>>
>> Signed-off-by: Eli Britstein 
>
>Reviewed-by: Simon Horman 
>
>I'll let this sit a little longer for review to accumulate before applying.
Actually, I see CI fails with it, as there are some warnings that are now 
discovered. We need to fix them first.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] debian, rhel: Enable Werror option in spec files

2022-11-13 Thread Eli Britstein via dev
Following resolving DPDK cast align warnings as stated in [1], enforce
-Werror for RPM builds too.

[1] 0b6d2faace76 ("ci: Remove -Wno-cast-align from CI.")

Signed-off-by: Eli Britstein 
---
 debian/rules | 4 ++--
 rhel/openvswitch.spec.in | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/debian/rules b/debian/rules
index 971bc1775..ffc218e9d 100755
--- a/debian/rules
+++ b/debian/rules
@@ -24,7 +24,7 @@ override_dh_auto_configure:
cd _debian && ( \
test -e Makefile || \
../configure --prefix=/usr --localstatedir=/var --enable-ssl \
---sysconfdir=/etc \
+--sysconfdir=/etc --enable-Werror \
 $(DATAPATH_CONFIGURE_OPTS) \
 $(EXTRA_CONFIGURE_OPTS) \
 )
@@ -34,7 +34,7 @@ ifeq (,$(filter nodpdk, $(DEB_BUILD_OPTIONS)))
cd _dpdk && ( \
test -e Makefile || \
 ../configure --prefix=/usr --localstatedir=/var --enable-ssl \
- --with-dpdk=shared --sysconfdir=/etc \
+ --with-dpdk=shared --sysconfdir=/etc --enable-Werror \
 $(DATAPATH_CONFIGURE_OPTS) \
 $(EXTRA_CONFIGURE_OPTS) \
 )
diff --git a/rhel/openvswitch.spec.in b/rhel/openvswitch.spec.in
index 9903dd10a..35ae42356 100644
--- a/rhel/openvswitch.spec.in
+++ b/rhel/openvswitch.spec.in
@@ -70,7 +70,7 @@ Tailored Open vSwitch SELinux policy
 
 %build
 ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=%{_localstatedir} \
---libdir=%{_libdir} --enable-ssl --enable-shared
+--libdir=%{_libdir} --enable-ssl --enable-shared --enable-Werror
 make %{_smp_mflags}
 make selinux-policy
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4] netdev-offload-dpdk: Enhance the support of tunnel pop action

2022-11-10 Thread Eli Britstein via dev
Acked-by: Eli Britstein 

>-Original Message-
>From: Simon Horman 
>Sent: Wednesday, 9 November 2022 21:55
>To: d...@openvswitch.org
>Cc: Eelco Chaudron ; Ilya Maximets
>; Eli Britstein ; Chaoyong He
>; oss-driv...@corigine.com; Louis Peens
>; Simon Horman 
>Subject: [PATCH v4] netdev-offload-dpdk: Enhance the support of tunnel pop
>action
>
>External email: Use caution opening links or attachments
>
>
>From: Chaoyong He 
>
>Populate the 'is_ipv6' field of 'struct rte_flow_tunnel', which can be used in
>the implementation of tunnel pop action for DPDK PMD.
>
>Fixes: be56e063d028 ("netdev-offload-dpdk: Support tunnel pop action.")
>Signed-off-by: Chaoyong He 
>Reviewed-by: Louis Peens 
>Signed-off-by: Simon Horman 
>---
> lib/netdev-offload-dpdk.c | 14 ++
> 1 file changed, 10 insertions(+), 4 deletions(-)
>
>v4
>* Reworked changelog
>* Dropped other patches from patchset
>
>v3
>* Add fixes tag
>
>v2
>* No change
>
>diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
>80a64a6cc06a..38f00fd309e6 100644
>--- a/lib/netdev-offload-dpdk.c
>+++ b/lib/netdev-offload-dpdk.c
>@@ -1099,12 +1099,18 @@ vport_to_rte_tunnel(struct netdev *vport,
> const struct netdev_tunnel_config *tnl_cfg;
>
> memset(tunnel, 0, sizeof *tunnel);
>+
>+tnl_cfg = netdev_get_tunnel_config(vport);
>+if (!tnl_cfg) {
>+return -1;
>+}
>+
>+if (!IN6_IS_ADDR_V4MAPPED(_cfg->ipv6_dst)) {
>+tunnel->is_ipv6 = true;
>+}
>+
> if (!strcmp(netdev_get_type(vport), "vxlan")) {
> tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
>-tnl_cfg = netdev_get_tunnel_config(vport);
>-if (!tnl_cfg) {
>-return -1;
>-}
> tunnel->tp_dst = tnl_cfg->dst_port;
> if (!VLOG_DROP_DBG()) {
> ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>--
>2.30.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 0/3] add functions about rte_flow to ovs-dpdk

2022-10-27 Thread Eli Britstein via dev
This series has 2 commits for DSCP and one bug fix not related to it. Not sure 
why they are on the same series.
For DSCP, there is no way from RTE API point of view to rewrite partial bits on 
the DSCP byte. I think those are NACK. Please explain if I'm wrong.
For the bug fix commit, the code is OK but the commit message tells a long 
story, opinions etc instead of just stating the bug fix.

>-Original Message-
>From: Simon Horman 
>Sent: Wednesday, 26 October 2022 12:05
>To: d...@openvswitch.org
>Cc: Eelco Chaudron ; Ilya Maximets
>; Eli Britstein ; Chaoyong He
>; oss-driv...@corigine.com
>Subject: Re: [PATCH v3 0/3] add functions about rte_flow to ovs-dpdk
>
>External email: Use caution opening links or attachments
>
>
>On Fri, Oct 07, 2022 at 12:39:20PM +0200, Simon Horman wrote:
>> This patch series add some logics about rte_flow to ovs-dpdk, includes:
>>
>> * Support offload of set IPv4/IPv6 DSCP action
>> * Enhanced the support of tunnel pop action
>
>Hi all,
>
>gentle ping for review.
>
>>
>> Changes since v2
>> * Revise commit messages
>> * Add the fix tag to patch 3/3
>> * Revise dscp masking to check for correct partial mask
>> * Drop geneve vport patch, differed as future work
>>
>> Changes since v1
>> * Address checkpatch warnings
>> * Drop the geneve decap patch, planning to along with the option support in
>the future
>>
>> Chaoyong He (3):
>>   netdev-offload-dpdk: Support offload of set IPv4 DSCP action
>>   netdev-offload-dpdk: Support offload of set IPv6 DSCP action
>>   netdev-offload-dpdk: Enhance the support of tunnel pop action
>>
>>  lib/netdev-offload-dpdk.c | 47
>+++
>>  1 file changed, 43 insertions(+), 4 deletions(-)
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] dpif-netdev: Fix flushing of a vport

2022-09-05 Thread Eli Britstein via dev
When using a userspace vport ("vxlan0"), dpif-netdev adds an additional
netdev ("vxlan_sys_4789"). The dpif netdev ("vxlan0") is added to the
netdev-offload ports map, thus flows are associated on this netdev.

However, flushing is done on the dpif-netdev level ("vxlan_sys_4789"),
and relevant offload flows are not destroyed.

To fix it, add the datapath netdev to the netdev-offload ports map. In
case there is no different internal netdev, use the dpif netdev, as before.

Fixes: adbd4301a249 ("netdev-offload-dpdk: Use per-netdev offload metadata.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c   | 15 ++-
 lib/dpif-netlink.c  |  5 -
 lib/dpif-provider.h |  5 +++--
 lib/dpif.c  |  8 +---
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a45b46014..b251de881 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -536,7 +536,8 @@ static int get_port_by_name(struct dp_netdev *dp, const 
char *devname,
 static void dp_netdev_free(struct dp_netdev *)
 OVS_REQUIRES(dp_netdev_mutex);
 static int do_add_port(struct dp_netdev *dp, const char *devname,
-   const char *type, odp_port_t port_no)
+   const char *type, odp_port_t port_no,
+   struct netdev **datapath_netdev)
 OVS_REQ_WRLOCK(dp->port_rwlock);
 static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
 OVS_REQ_WRLOCK(dp->port_rwlock);
@@ -1845,7 +1846,7 @@ create_dp_netdev(const char *name, const struct 
dpif_class *class,
 
 error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
  "internal"),
-ODPP_LOCAL);
+ODPP_LOCAL, NULL);
 ovs_rwlock_unlock(>port_rwlock);
 if (error) {
 dp_netdev_free(dp);
@@ -2112,7 +2113,7 @@ out:
 
 static int
 do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
-odp_port_t port_no)
+odp_port_t port_no, struct netdev **datapath_netdev)
 OVS_REQ_WRLOCK(dp->port_rwlock)
 {
 struct netdev_saved_flags *sf;
@@ -2128,6 +2129,9 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 if (error) {
 return error;
 }
+if (datapath_netdev) {
+*datapath_netdev = port->netdev;
+}
 
 hmap_insert(>ports, >node, hash_port_no(port_no));
 seq_change(dp->port_seq);
@@ -2157,7 +2161,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 
 static int
 dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
- odp_port_t *port_nop)
+ odp_port_t *port_nop, struct netdev **datapath_netdev)
 {
 struct dp_netdev *dp = get_dp_netdev(dpif);
 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
@@ -2176,7 +2180,8 @@ dpif_netdev_port_add(struct dpif *dpif, struct netdev 
*netdev,
 }
 if (!error) {
 *port_nop = port_no;
-error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
+error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no,
+datapath_netdev);
 }
 ovs_rwlock_unlock(>port_rwlock);
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index a620a6ec5..55d5a4593 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1139,7 +1139,7 @@ dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink 
*dpif,
 
 static int
 dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
-  odp_port_t *port_nop)
+  odp_port_t *port_nop, struct netdev **datapath_netdev)
 {
 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
 int error = EOPNOTSUPP;
@@ -1152,6 +1152,9 @@ dpif_netlink_port_add(struct dpif *dpif_, struct netdev 
*netdev,
 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
 }
 fat_rwlock_unlock(>upcall_lock);
+if (datapath_netdev) {
+*datapath_netdev = netdev;
+}
 
 return error;
 }
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 12477a24f..b79b10e6c 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -195,10 +195,11 @@ struct dpif_class {
  * ODPP_NONE, attempts to use that as the port's port number.
  *
  * If port is successfully added, sets '*port_no' to the new port's
- * port number.  Returns EBUSY if caller attempted to choose a port
+ * port number, and datapath_netdev to a potentially created netdev in the
+ * dpif-class level.  Returns EBUSY if caller attempted to choose a port
  * number, and it was in use. */
 int (*port_add)(struct dpif *dpif, struct netdev *netdev,
-odp_port_t *port_no);
+odp_port_t *port_no, struct netdev **datapath_netdev);
 
 /* Removes port 

[ovs-dev] [PATCH 1/2] netdev-offload-dpdk: Fix flushing of a physdev

2022-09-05 Thread Eli Britstein via dev
Vport's offloads are done on the tracked orig-in-port, but the flow itself
is associated in the vport's map.

Removing the physdev will flush all the ports that are on its map, but
not the ones on other netdevs' maps. Since flows take reference count on
both their vport and their physdev, the physdev fails to be removed.

Fix it by flushing the physdev's offload flows in all related netdevs,
e.g. the netdev itself, or for physical devices, all vports.

Fixes: adbd4301a249 ("netdev-offload-dpdk: Use per-netdev offload metadata.")
Reported-by: 15895987278 
Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 35 ++-
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index cceefbc50..981897da1 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -2530,15 +2530,15 @@ out:
 return ret;
 }
 
-static int
-netdev_offload_dpdk_flow_flush(struct netdev *netdev)
+static void
+flush_netdev_flows_in_related(struct netdev *netdev, struct netdev *related)
 {
-struct cmap *map = offload_data_map(netdev);
-struct ufid_to_rte_flow_data *data;
 unsigned int tid = netdev_offload_thread_id();
+struct cmap *map = offload_data_map(related);
+struct ufid_to_rte_flow_data *data;
 
 if (!map) {
-return -1;
+return;
 }
 
 CMAP_FOR_EACH (data, node, map) {
@@ -2549,6 +2549,31 @@ netdev_offload_dpdk_flow_flush(struct netdev *netdev)
 netdev_offload_dpdk_flow_destroy(data);
 }
 }
+}
+
+static bool
+flush_in_vport_cb(struct netdev *vport,
+  odp_port_t odp_port OVS_UNUSED,
+  void *aux)
+{
+struct netdev *netdev = aux;
+
+/* Only vports are related to physical devices. */
+if (netdev_vport_is_vport_class(vport->netdev_class)) {
+flush_netdev_flows_in_related(netdev, vport);
+}
+
+return false;
+}
+
+static int
+netdev_offload_dpdk_flow_flush(struct netdev *netdev)
+{
+flush_netdev_flows_in_related(netdev, netdev);
+
+if (!netdev_vport_is_vport_class(netdev->netdev_class)) {
+netdev_ports_traverse(netdev->dpif_type, flush_in_vport_cb, netdev);
+}
 
 return 0;
 }
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/1] netdev-offload: Set 'miss_api_supported' to be under netdev

2022-08-31 Thread Eli Britstein via dev
Commit [1] introduced a flag in dpif-netdev level, to optimize
performance and avoid hw_miss_packet_recover() for devices with no such
support.
However, there is a race condition between traffic processing and
assigning a 'flow_api' object to the netdev. In such case, EOPNOTSUPP is
returned by netdev_hw_miss_packet_recover() in netdev-offload.c layer
because 'flow_api' is not yet initialized. As a result, the flag is
falsely disabled, and subsequent packets won't be recovered, though they
should.

In order to fix it, move the flag to be in netdev-offload layer, to
avoid that race.

[1]: 6e50c1651869 ("dpif-netdev: Avoid hw_miss_packet_recover() for devices 
with no support.")

Fixes: 6e50c1651869 ("dpif-netdev: Avoid hw_miss_packet_recover() for devices 
with no support.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c| 18 +++---
 lib/netdev-offload.c | 28 +++-
 lib/netdev-offload.h |  2 ++
 lib/netdev.c |  1 +
 4 files changed, 33 insertions(+), 16 deletions(-)

Revision history:
- v2: bool -> atomic_bool

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a45b460145..2c08a71c8d 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -431,7 +431,6 @@ struct dp_netdev_rxq {
 unsigned intrvl_idx;   /* Write index for 'cycles_intrvl'. */
 struct dp_netdev_pmd_thread *pmd;  /* pmd thread that polls this queue. */
 bool is_vhost; /* Is rxq of a vhost port. */
-bool hw_miss_api_supported;/* hw_miss_packet_recover() supported.*/
 
 /* Counters of cycles spent successfully polling and processing pkts. */
 atomic_ullong cycles[RXQ_N_CYCLES];
@@ -5416,7 +5415,6 @@ port_reconfigure(struct dp_netdev_port *port)
 
 port->rxqs[i].port = port;
 port->rxqs[i].is_vhost = !strncmp(port->type, "dpdkvhost", 9);
-port->rxqs[i].hw_miss_api_supported = true;
 
 err = netdev_rxq_open(netdev, >rxqs[i].rx, i);
 if (err) {
@@ -8034,17 +8032,15 @@ dp_netdev_hw_flow(const struct dp_netdev_pmd_thread 
*pmd,
 #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
 /* Restore the packet if HW processing was terminated before completion. */
 struct dp_netdev_rxq *rxq = pmd->ctx.last_rxq;
+bool miss_api_supported;
 
-if (rxq->hw_miss_api_supported) {
+atomic_read_relaxed(>port->netdev->hw_info.miss_api_supported,
+_api_supported);
+if (miss_api_supported) {
 int err = netdev_hw_miss_packet_recover(rxq->port->netdev, packet);
-if (err) {
-if (err != EOPNOTSUPP) {
-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
-} else {
-/* API unsupported by the port; avoid subsequent calls. */
-rxq->hw_miss_api_supported = false;
-}
+if (err && err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
 }
 }
 #endif
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 9fde5f7a95..4592262bd3 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -183,6 +183,7 @@ netdev_assign_flow_api(struct netdev *netdev)
 CMAP_FOR_EACH (rfa, cmap_node, _flow_apis) {
 if (!rfa->flow_api->init_flow_api(netdev)) {
 ovs_refcount_ref(>refcnt);
+atomic_store_relaxed(>hw_info.miss_api_supported, true);
 ovsrcu_set(>flow_api, rfa->flow_api);
 VLOG_INFO("%s: Assigned flow API '%s'.",
   netdev_get_name(netdev), rfa->flow_api->type);
@@ -191,6 +192,7 @@ netdev_assign_flow_api(struct netdev *netdev)
 VLOG_DBG("%s: flow API '%s' is not suitable.",
  netdev_get_name(netdev), rfa->flow_api->type);
 }
+atomic_store_relaxed(>hw_info.miss_api_supported, false);
 VLOG_INFO("%s: No suitable flow API found.", netdev_get_name(netdev));
 
 return -1;
@@ -322,12 +324,28 @@ int
 netdev_hw_miss_packet_recover(struct netdev *netdev,
   struct dp_packet *packet)
 {
-const struct netdev_flow_api *flow_api =
-ovsrcu_get(const struct netdev_flow_api *, >flow_api);
+const struct netdev_flow_api *flow_api;
+bool miss_api_supported;
+int rv;
+
+atomic_read_relaxed(>hw_info.miss_api_supported,
+_api_supported);
+if (!miss_api_supported) {
+return EOPNOTSUPP;
+}
+
+flow_api = ovsrcu_get(const struct netdev_flow_api *, >flow_api);
+if (!flow_api || !flow_api->hw_miss_packet_recover) {
+return EOPNOTSUPP;
+}
+
+rv = flow_api->hw_miss_packet_recover(netdev, packet);
+if (rv == EOPNOTSUPP) {
+/* API unsupported by the port; avoid subsequent calls. */
+atomic_store_re

Re: [ovs-dev] [PATCH 1/5] netdev-offload-dpdk: Support offload of set IPv4 DSCP action

2022-08-19 Thread Eli Britstein via dev



On 8/16/2022 3:50 PM, Simon Horman wrote:

From: Chaoyong He 

Add the support of offload of set IPv4 DSCP action.

Signed-off-by: Chaoyong He 
Signed-off-by: Simon Horman 
---
  lib/netdev-offload-dpdk.c | 21 ++---
  1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index cceefbc50751..732ce6021722 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -770,6 +770,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
IP_ARGS(set_ipv4->ipv4_addr));
  }
  ds_put_cstr(s, "/ ");
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP) {
+const struct rte_flow_action_set_dscp *set_dscp = actions->conf;
+
+ds_put_cstr(s, "set_dscp ");
+if (set_dscp) {
+ds_put_format(s, "dscp_value %d ", set_dscp->dscp);
+}
+ds_put_cstr(s, "/ ");
  } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) {
  const struct rte_flow_action_set_ttl *set_ttl = actions->conf;
  
@@ -1813,7 +1821,8 @@ add_output_action(struct netdev *netdev,

  static int
  add_set_flow_action__(struct flow_actions *actions,
const void *value, void *mask,
-  const size_t size, const int attr)
+  const size_t size, const int attr,
+  bool dscp_flag)


Instead of a special argument for dscp, we can have a generic bitmask 
argument, that if set will override the size argument.


This way it will be easier to add more such fields in the future.


  {
  void *spec;
  
@@ -1824,7 +1833,7 @@ add_set_flow_action__(struct flow_actions *actions,

  if (is_all_zeros(mask, size)) {
  return 0;
  }
-if (!is_all_ones(mask, size)) {
+if (!dscp_flag && !is_all_ones(mask, size)) {

if ((bitmask && (*(uint8_t) mask) == bitmask) || !is_all_ones(mask, size)) {

  VLOG_DBG_RL(, "Partial mask is not supported");
  return -1;
  }
@@ -1849,6 +1858,8 @@ BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) 
==
MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src));
  BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_dscp) ==
+  MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_tos));
  BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
  BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
@@ -1874,11 +1885,14 @@ parse_set_actions(struct flow_actions *actions,
  {
  const struct nlattr *sa;
  unsigned int sleft;
+bool dscp_flag = false;
  
  #define add_set_flow_action(field, type)  \

+if (type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP)   \
+dscp_flag = true; \


bitmask = RTE_IPV4_HDR_DSCP_MASK

else bitmask = 0


  if (add_set_flow_action__(actions, >field,   
\
mask ? CONST_CAST(void *, >field) : NULL, 
\
-  sizeof key->field, type)) { \
+  sizeof key->field, type, dscp_flag)) {  \
  return -1;
\
  }
  
@@ -1900,6 +1914,7 @@ parse_set_actions(struct flow_actions *actions,
  
  add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC);

  add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST);
+add_set_flow_action(ipv4_tos, RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP);
  add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL);
  
  if (mask && !is_all_zeros(mask, sizeof *mask)) {

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 3/5] netdev-offload-dpdk: Add support of IPv6 tunnel

2022-08-19 Thread Eli Britstein via dev



On 8/16/2022 3:50 PM, Simon Horman wrote:

From: Chaoyong He 

Add support of IPv6 tunnel.
IPv6 tunnel is already supported. The missing setting of is_ipv6 field 
affects only if using a dpdk PMD that would return restore info flags 
with RTE_FLOW_RESTORE_INFO_TUNNEL on, but 
RTE_FLOW_RESTORE_INFO_ENCAPSULATED off (AFAIK there isn't such PMD).


Please have a more descriptive/correct commit message.


Also:

Fixes: be56e063d028 ("netdev-offload-dpdk: Support tunnel pop action.")


Signed-off-by: Chaoyong He 
Signed-off-by: Simon Horman 
---
  lib/netdev-offload-dpdk.c | 14 ++
  1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 28017091f680..d1c5f978da88 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1107,12 +1107,18 @@ vport_to_rte_tunnel(struct netdev *vport,
  const struct netdev_tunnel_config *tnl_cfg;
  
  memset(tunnel, 0, sizeof *tunnel);

+
+tnl_cfg = netdev_get_tunnel_config(vport);
+if (!tnl_cfg) {
+return -1;
+}
+
+if (!IN6_IS_ADDR_V4MAPPED(_cfg->ipv6_dst)) {
+tunnel->is_ipv6 = true;
+}
+
  if (!strcmp(netdev_get_type(vport), "vxlan")) {
  tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
-tnl_cfg = netdev_get_tunnel_config(vport);
-if (!tnl_cfg) {
-return -1;
-}
  tunnel->tp_dst = tnl_cfg->dst_port;
  if (!VLOG_DROP_DBG()) {
  ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2022-07-20 Thread Eli Britstein via dev



>-Original Message-
>From: Ivan Malov 
>Sent: Wednesday, July 20, 2022 3:18 PM
>To: d...@openvswitch.org
>Cc: Eli Britstein ; Stephen Hemminger
>; Ilya Maximets ; Ori
>Kam ; Maxime Coquelin
>; David Marchand
>; Andrew Rybchenko
>
>Subject: [PATCH v3 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Manage "transfer" flows via the corresponding mechanism.
>Doing so requires that the traffic source be specified explicitly, via the
>corresponding pattern item.
>
>Signed-off-by: Ivan Malov 
>Acked-by: Andrew Rybchenko 
>---
> lib/netdev-dpdk.c | 99 ---
> lib/netdev-dpdk.h |  4 +-
> lib/netdev-offload-dpdk.c | 61 
> 3 files changed, 135 insertions(+), 29 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>45e5d26d2..01fb40255 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>
> struct netdev_dpdk {
> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>+dpdk_port_t flow_transfer_proxy_port_id;
> dpdk_port_t port_id;
>
> /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1130,8
>+1131,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
> uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>-
> #ifdef ALLOW_EXPERIMENTAL_API
There are all over the patches this ifdef, even in cases it's harmless to do 
without. It makes the code less readable and might cause future cherry-picking 
issues. Try to minimize it only to places of a must.
>+int ret;
>+
> /*
>  * Full tunnel offload requires that tunnel ID metadata be
>  * delivered with "miss" packets from the hardware to the @@ -1141,6
>+1143,27 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>  * Request delivery of such metadata.
>  */
> dpdk_eth_dev_init_rx_metadata(dev);
>+
>+/*
>+ * Managing "transfer" flows requires that the user communicate them
>+ * via a port which has the privilege to control the embedded switch.
>+ * For some vendors, all ports in a given switching domain have
>+ * this privilege. For other vendors, it's only one port.
>+ *
>+ * Get the proxy port ID and remember it for later use.
>+ */
>+ret = rte_flow_pick_transfer_proxy(dev->port_id,
>+   >flow_transfer_proxy_port_id, 
>NULL);
>+if (ret != 0) {
>+/*
>+ * The PMD does not indicate the proxy port.
>+ * Assume the proxy is unneeded.
>+ */
>+dev->flow_transfer_proxy_port_id = dev->port_id;
>+}
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>+/* No API to get transfer proxy; assume the proxy is unneeded. */
>+dev->flow_transfer_proxy_port_id = dev->port_id;
> #endif /* ALLOW_EXPERIMENTAL_API */
>
> rte_eth_dev_info_get(dev->port_id, ); @@ -3762,8 +3785,12 @@
>netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>const char *argv[], void *aux OVS_UNUSED)  {
> struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>+#ifdef ALLOW_EXPERIMENTAL_API
>+struct netdev_dpdk *dev_self = NULL; #endif /*
>+ALLOW_EXPERIMENTAL_API */
> struct rte_eth_dev_info dev_info;
> dpdk_port_t sibling_port_id;
>+struct netdev_dpdk *dev;
> dpdk_port_t port_id;
> bool used = false;
> char *response;
>@@ -3781,8 +3808,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
>argc OVS_UNUSED,
>   argv[1]);
>
> RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>-struct netdev_dpdk *dev;
>-
> LIST_FOR_EACH (dev, list_node, _list) {
> if (dev->port_id != sibling_port_id) {
> continue;
>@@ -3802,6 +3827,27 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>int argc OVS_UNUSED,
> }
> ds_destroy(_interfaces);
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+/*
>+ * The device being detached may happen to be a flow proxy port
>+ * for another device (still attached). If so, do not allow to
>+ * detach. Devices dependent on this one must be detached first.
I don't think this is acceptable to deny the port from being detached, or to 
enforce such ordering. For example, ports are being detached upon shutdown, 
with unknown order.
Suppose A is the proxy port for ports B,C. When port A is going to be detached, 
flus

[ovs-dev] [PATCH] conntrack: Fix conntrack multiple new state

2022-07-17 Thread Eli Britstein via dev
A connection is established if we see packets from both directions.
The cited commit [1] fixed the issue of sending twice in one direction,
but still an issue if more than that.
Fix it.

Fixes: a867c010ee91 ("conntrack: Fix conntrack new state")
Signed-off-by: Eli Britstein 
---
 lib/conntrack-other.c   | 7 ---
 tests/system-traffic.at | 9 +
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c
index d3b4601858..7f3e63c384 100644
--- a/lib/conntrack-other.c
+++ b/lib/conntrack-other.c
@@ -48,18 +48,19 @@ other_conn_update(struct conntrack *ct, struct conn *conn_,
   struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
 {
 struct conn_other *conn = conn_other_cast(conn_);
-enum ct_update_res ret = CT_UPDATE_VALID;
 
 if (reply && conn->state != OTHERS_BIDIR) {
 conn->state = OTHERS_BIDIR;
 } else if (conn->state == OTHERS_FIRST) {
 conn->state = OTHERS_MULTIPLE;
-ret = CT_UPDATE_VALID_NEW;
 }
 
 conn_update_expiration(ct, >up, other_timeouts[conn->state], now);
 
-return ret;
+if (conn->state == OTHERS_BIDIR) {
+return CT_UPDATE_VALID;
+}
+return CT_UPDATE_VALID_NEW;
 }
 
 static bool
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 89107ab624..182a78847e 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3078,6 +3078,15 @@ NXST_FLOW reply:
  table=1, priority=100,ct_state=+est+trk,in_port=1 actions=output:2
 ])
 
+dnl Send a 3rd UDP packet on port 1
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
packet=5054000a505400090800451c0011a4cd0a0101010a010102000100020008
 actions=resubmit(,0)"])
+
+dnl There still should not be any packet that matches the established ct_state.
+AT_CHECK([ovs-ofctl dump-flows br0 "table=1 in_port=1,ct_state=+trk+est" | 
ofctl_strip], [0], [dnl
+NXST_FLOW reply:
+ table=1, priority=100,ct_state=+est+trk,in_port=1 actions=output:2
+])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] netdev-offload: Set 'miss_api_supported' to be under netdev

2022-06-30 Thread Eli Britstein via dev
Commit [1] introduced a flag in dpif-netdev level, to optimize
performance and avoid hw_miss_packet_recover() for devices with no such
support.
However, there is a race condition between traffic processing and
assigning a 'flow_api' object to the netdev. In such case, EOPNOTSUPP is
returned by netdev_hw_miss_packet_recover() in netdev-offload.c layer
because 'flow_api' is not yet initialized. As a result, the flag is
falsely disabled, and subsequent packets won't be recovered, though they
should.

In order to fix it, move the flag to be in netdev-offload layer, to
avoid that race.

[1]: 6e50c1651869 ("dpif-netdev: Avoid hw_miss_packet_recover() for devices 
with no support.")

Fixes: 6e50c1651869 ("dpif-netdev: Avoid hw_miss_packet_recover() for devices 
with no support.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c| 15 ---
 lib/netdev-offload.c | 25 -
 lib/netdev-offload.h |  1 +
 3 files changed, 25 insertions(+), 16 deletions(-)

Github actions:
- v1: https://github.com/elibritstein/OVS/actions/runs/2587743230

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f46b9fe183..a286050b57 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -431,7 +431,6 @@ struct dp_netdev_rxq {
 unsigned intrvl_idx;   /* Write index for 'cycles_intrvl'. */
 struct dp_netdev_pmd_thread *pmd;  /* pmd thread that polls this queue. */
 bool is_vhost; /* Is rxq of a vhost port. */
-bool hw_miss_api_supported;/* hw_miss_packet_recover() supported.*/
 
 /* Counters of cycles spent successfully polling and processing pkts. */
 atomic_ullong cycles[RXQ_N_CYCLES];
@@ -5421,7 +5420,6 @@ port_reconfigure(struct dp_netdev_port *port)
 
 port->rxqs[i].port = port;
 port->rxqs[i].is_vhost = !strncmp(port->type, "dpdkvhost", 9);
-port->rxqs[i].hw_miss_api_supported = true;
 
 err = netdev_rxq_open(netdev, >rxqs[i].rx, i);
 if (err) {
@@ -8039,16 +8037,11 @@ dp_netdev_hw_flow(const struct dp_netdev_pmd_thread 
*pmd,
 /* Restore the packet if HW processing was terminated before completion. */
 struct dp_netdev_rxq *rxq = pmd->ctx.last_rxq;
 
-if (rxq->hw_miss_api_supported) {
+if (rxq->port->netdev->hw_info.miss_api_supported) {
 int err = netdev_hw_miss_packet_recover(rxq->port->netdev, packet);
-if (err) {
-if (err != EOPNOTSUPP) {
-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
-} else {
-/* API unsupported by the port; avoid subsequent calls. */
-rxq->hw_miss_api_supported = false;
-}
+if (err && err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
 }
 }
 #endif
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index fb108c0d50..50c5076a63 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -182,6 +182,7 @@ netdev_assign_flow_api(struct netdev *netdev)
 CMAP_FOR_EACH (rfa, cmap_node, _flow_apis) {
 if (!rfa->flow_api->init_flow_api(netdev)) {
 ovs_refcount_ref(>refcnt);
+netdev->hw_info.miss_api_supported = true;
 ovsrcu_set(>flow_api, rfa->flow_api);
 VLOG_INFO("%s: Assigned flow API '%s'.",
   netdev_get_name(netdev), rfa->flow_api->type);
@@ -190,6 +191,7 @@ netdev_assign_flow_api(struct netdev *netdev)
 VLOG_DBG("%s: flow API '%s' is not suitable.",
  netdev_get_name(netdev), rfa->flow_api->type);
 }
+netdev->hw_info.miss_api_supported = false;
 VLOG_INFO("%s: No suitable flow API found.", netdev_get_name(netdev));
 
 return -1;
@@ -263,12 +265,25 @@ int
 netdev_hw_miss_packet_recover(struct netdev *netdev,
   struct dp_packet *packet)
 {
-const struct netdev_flow_api *flow_api =
-ovsrcu_get(const struct netdev_flow_api *, >flow_api);
+const struct netdev_flow_api *flow_api;
+int rv;
+
+if (!netdev->hw_info.miss_api_supported) {
+return EOPNOTSUPP;
+}
+
+flow_api = ovsrcu_get(const struct netdev_flow_api *, >flow_api);
+if (!flow_api || !flow_api->hw_miss_packet_recover) {
+return EOPNOTSUPP;
+}
+
+rv = flow_api->hw_miss_packet_recover(netdev, packet);
+if (rv == EOPNOTSUPP) {
+/* API unsupported by the port; avoid subsequent calls. */
+netdev->hw_info.miss_api_supported = false;
+}
 
-return (flow_api && flow_api->hw_miss_packet_recover)
-? flow_api->hw_miss_packet_recover(netdev, packet)
-: EOPNOTSUPP;
+return rv;
 }
 
 int
diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
index 8237a85ddb..c56f9d49ad 1006

Re: [ovs-dev] [PATCH] netdev-offload-dpdk: setting RSS hash types in RSS action

2022-06-24 Thread Eli Britstein via dev
+Ori


From: Finn, Emma 
Sent: Friday, June 24, 2022 5:41 PM
To: Ilya Maximets ; ovs-dev@openvswitch.org 
; Stokes, Ian 
Cc: Eli Britstein ; Slava Ovsiienko ; 
Flavio Leitner ; Matan Azrad 
Subject: RE: [ovs-dev] [PATCH] netdev-offload-dpdk: setting RSS hash types in 
RSS action

External email: Use caution opening links or attachments


> -Original Message-
> From: dev  On Behalf Of Ilya
> Maximets
> Sent: Monday 20 June 2022 19:00
> To: ovs-dev@openvswitch.org; Stokes, Ian 
> Cc: Eli Britstein ; viachesl...@nvidia.com; Flavio Leitner
> ; i.maxim...@ovn.org; ma...@nvidia.com
> Subject: Re: [ovs-dev] [PATCH] netdev-offload-dpdk: setting RSS hash types
> in RSS action
>
> On 3/22/22 03:08, Harold Huang wrote:
> > Hello,
> > Is there any opinion from the OVS or DPDK MLX5 driver maintainers?
> > This is a serious issue we've found when we use MLX5 PMD driver to
> > offload OVS-DPDK.
>
> It looks like DPDK is very inconsistent and drivers do not really put any 
> effort
> in setting up a "best effort" hashing mechanism.
> mlx5 driver seems to use just RTE_ETH_RSS_IP.
>
> I suppose, there is no harm in using the same set of hashing fields as we do
> for RSS configuration in netdev-dpdk.
>
> Ian, Eli, what do you think?
>
Hi,

I took this patch and tested on both Intel E810 and i40e. Flows are being 
offloaded correctly and this won't break MARK and RSS action for Intel NICs.

Thanks,
Emma

> >
> > On Fri, Mar 18, 2022 at 1:31 PM Tonghao Zhang
>  wrote:
> >>
> >> On Thu, Mar 17, 2022 at 12:01 PM Harold Huang
>  wrote:
> >>>
> >>> Hi,
> >>>
> >>> On Wed, Mar 16, 2022 at 4:32 PM Tonghao Zhang
>  wrote:
> >>>>
> >>>> On Wed, Mar 16, 2022 at 4:09 PM Harold Huang
>  wrote:
> >>>>>
> >>>>> When we send parallel flows such as VXLAN to a PF[1] port in
> >>>>> OVS-DPDK with multiple PMDs. OVS will create a RTE flow with Mark
> >>>>> and RSS actions to send flows to the software data path. But the
> >>>>> RSS action does not work well and all the flows are forwarded to a
> >>>>> single PMD. This is because RSS hash types should be set in RSS action.
> >>>>>
> >>>>> [1]: In our testbed, a Mellanox ConnectX-6 is used as a PF port.
> >>>> One question, did you test this patch on another NIC. e.g. ixgbe,
> >>>> i40e we hope ovs can run on more NIC.
> >>>
> >>> Thanks for your suggestions.  I have tested it in Intel E810,
> >>> Broadcom NetXtreme-E. All of them could work well without this patch,
> ie.
> >>> parallel flows are RSS to different queues. And both of them could
> >>> work well after adding this patch.  But 82599 NIC with IXGBE driver
> >>> does not support Mark action and the offloaded Mark and RSS action
> >>> are failed. AFAIK it does not matter because Mark and RSS action is
> >>> used to accelerate the software datapath with Mark ID lookup. I do
> >>> not have an I40E testbed at present.  But I guess it could also work
> >>> well because Mark id action is also supported. Last but most
> >>> important, we should make sure anding RTE_ETH_RSS_IP |
> >>> RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP  RSS types do not have a
> negative
> >>> effect. It seems that most physical network drivers which support
> >>> mark could also support these RSS hash types. It could be seen at
> >>> [1] these RSS hash types have been set in the DPDK port
> >>> initialization.  But IMO, it is better to find a solution from the MLX5 
> >>> dpdk
> driver.
> >>>
> >>> [1]:
> >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fopenvswitch%2Fovs%2Fblob%2Fmaster%2Flib%2Fnetdev-data=05%7C01%7Celibr%40nvidia.com%7C32f22d1688cb4e0434e808da55ef9bb5%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637916784861900812%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=wuia5DfwM4Esw3y1yd8cZE5%2BJyPzzXTm2zKKhat%2FxJk%3Dreserved=0
> dpdk.c#L16
> >>> 7
> >> Ok, let's wait for maintainer comments.
> >>>
> >>>>> Signed-off-by: Harold Huang 
> >>>>> ---
> >>>>>  lib/netdev-offload-dpdk.c | 3 ++-
> >>>>>  1 file changed, 2 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk

Re: [ovs-dev] [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2022-06-08 Thread Eli Britstein via dev
Hi Ivan,

>-Original Message-
>From: Ivan Malov 
>Sent: Wednesday, June 8, 2022 10:02 PM
>To: Eli Britstein 
>Cc: d...@openvswitch.org; Andrew Rybchenko
>; Ilya Maximets ;
>Ori Kam ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>; Stephen Hemminger
>; David Marchand
>; Gaetan Rivet ; Maxime
>Coquelin 
>Subject: RE: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Hi Eli,
>
>On Wed, 8 Jun 2022, Eli Britstein wrote:
>
>> Hi Ivan,
>>
>>> -Original Message-
>>> From: Ivan Malov 
>>> Sent: Wednesday, June 8, 2022 5:46 PM
>>> To: Eli Britstein 
>>> Cc: d...@openvswitch.org; Andrew Rybchenko
>>> ; Ilya Maximets ;
>>> Ori Kam ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>>> ; Stephen Hemminger
>>> ; David Marchand
>>> ; Gaetan Rivet ; Maxime
>>> Coquelin 
>>> Subject: RE: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>>> mechanism
>>>
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> Hi Eli,
>>>
>>> On Wed, 8 Jun 2022, Eli Britstein wrote:
>>>
>>>> Hi Ivan,
>>>>
>>>>> -Original Message-
>>>>> From: Ivan Malov 
>>>>> Sent: Tuesday, June 7, 2022 11:56 PM
>>>>> To: Eli Britstein 
>>>>> Cc: d...@openvswitch.org; Andrew Rybchenko
>>>>> ; Ilya Maximets
>>>>> ; Ori Kam ;
>>>>> NBU-Contact-Thomas Monjalon (EXTERNAL) ;
>>>>> Stephen Hemminger ; David Marchand
>>>>> ; Gaetan Rivet ;
>Maxime
>>>>> Coquelin 
>>>>> Subject: RE: [PATCH 3/3] netdev-offload-dpdk: use flow transfer
>>>>> proxy mechanism
>>>>>
>>>>> External email: Use caution opening links or attachments
>>>>>
>>>>>
>>>>> Hi Eli,
>>>>>
>>>>> On Wed, 1 Jun 2022, Eli Britstein wrote:
>>>>>
>>>>>> - Missing proper handling of the testpmd syntax logging. It
>>>>>> changes the used
>>>>> port according to "transfer", but the log still uses
>>> netdev_dpdk_get_port_id().
>>>>>
>>>>> Thanks for noticing. I will see to it in the next version.
>>>>>
>>>>>> - The usage of the "proxy" port for rte-flow implies that this
>>>>>> proxy port is
>>>>> attached to OVS, otherwise it is not "started" and creation of
>>>>> flows will
>>> fail.
>>>>>
>>>>> That's the way it is. If there is no proxy for a given port, then
>>>>> the original port value will be used for managing flows. For
>>>>> vendors that don't need the proxy, this will work. For others, it won't.
>That's OK.
>>>
>>>> I don't really understand why this can't be done inside dpdk domain
>>>> (if there
>>> is a proxy, and it is up, use it, otherwise don't).
>>>> That's *currently* the way it is. I understand that if dpdk works
>>>> like this OVS
>>> should align, but maybe you or someone else here knows why dpdk works
>>> like this? (not too late to change, this is experimental...).
>>>
>>>
>>> Regardless of DPDK, on some NICs, it is possible to insert rules via
>>> unprivileged PFs or VFs, but there are also NICs which cannot do it.
>>>
>>> In DPDK, this contradiction has to be resolved somehow.
>>> In example, for NICs that can only manage flows via privileged ports,
>>> two possible solutions exist:
>>>
>>> 1. Route flow management requests from unprivileged ethdevs
>>>to the privileged one implicitly, inside the PMD. This
>>>is transparent to users, but, at the same time, it is
>>>tricky because the application does not realise that
>>>flows it manages via an ethdev "B" are in fact
>>>communicated to the NIC via an ethdev "A".
>>>
>>>Unbeknownst of the implicit scheme, the application may
>>>detach the privileged ethdev "A" in-between. And, when
>>>time comes to remove flows, doing so via ethdev "B"
>>>will fail. This scheme breaks in-app housekeeping.
>>>
>>> 2. Expose the "proxy" port existence to the application.
>>>If it knows the truth about

Re: [ovs-dev] [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2022-06-08 Thread Eli Britstein via dev
Hi Ivan,

>-Original Message-
>From: Ivan Malov 
>Sent: Wednesday, June 8, 2022 5:46 PM
>To: Eli Britstein 
>Cc: d...@openvswitch.org; Andrew Rybchenko
>; Ilya Maximets ;
>Ori Kam ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>; Stephen Hemminger
>; David Marchand
>; Gaetan Rivet ; Maxime
>Coquelin 
>Subject: RE: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Hi Eli,
>
>On Wed, 8 Jun 2022, Eli Britstein wrote:
>
>> Hi Ivan,
>>
>>> -Original Message-
>>> From: Ivan Malov 
>>> Sent: Tuesday, June 7, 2022 11:56 PM
>>> To: Eli Britstein 
>>> Cc: d...@openvswitch.org; Andrew Rybchenko
>>> ; Ilya Maximets ;
>>> Ori Kam ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>>> ; Stephen Hemminger
>>> ; David Marchand
>>> ; Gaetan Rivet ; Maxime
>>> Coquelin 
>>> Subject: RE: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>>> mechanism
>>>
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> Hi Eli,
>>>
>>> On Wed, 1 Jun 2022, Eli Britstein wrote:
>>>
>>>> - Missing proper handling of the testpmd syntax logging. It changes
>>>> the used
>>> port according to "transfer", but the log still uses
>netdev_dpdk_get_port_id().
>>>
>>> Thanks for noticing. I will see to it in the next version.
>>>
>>>> - The usage of the "proxy" port for rte-flow implies that this proxy
>>>> port is
>>> attached to OVS, otherwise it is not "started" and creation of flows will
>fail.
>>>
>>> That's the way it is. If there is no proxy for a given port, then the
>>> original port value will be used for managing flows. For vendors that
>>> don't need the proxy, this will work. For others, it won't. That's OK.
>
>> I don't really understand why this can't be done inside dpdk domain (if there
>is a proxy, and it is up, use it, otherwise don't).
>> That's *currently* the way it is. I understand that if dpdk works like this 
>> OVS
>should align, but maybe you or someone else here knows why dpdk works like
>this? (not too late to change, this is experimental...).
>
>
>Regardless of DPDK, on some NICs, it is possible to insert rules via
>unprivileged PFs or VFs, but there are also NICs which cannot do it.
>
>In DPDK, this contradiction has to be resolved somehow.
>In example, for NICs that can only manage flows via privileged ports, two
>possible solutions exist:
>
>1. Route flow management requests from unprivileged ethdevs
>to the privileged one implicitly, inside the PMD. This
>is transparent to users, but, at the same time, it is
>tricky because the application does not realise that
>flows it manages via an ethdev "B" are in fact
>communicated to the NIC via an ethdev "A".
>
>Unbeknownst of the implicit scheme, the application may
>detach the privileged ethdev "A" in-between. And, when
>time comes to remove flows, doing so via ethdev "B"
>will fail. This scheme breaks in-app housekeeping.
>
>2. Expose the "proxy" port existence to the application.
>If it knows the truth about the real ethdev that
>handles the transfer flows, it won't attempt to
>detach it in-between. The housekeeping is fine.
>
>Outing the existence of the "proxy" port to users seems like the most
>reasonable approach. This is why it was implemented in DPDK like this.
>Currently, it's indeed an experimental feature. DPDK PMDs which need it, are
>supposed to switch to it during the transition phase.
Thanks very much for the explanation, though IMHO relevant PMDs could still 
hide it and not do this "outing" of their internals.
>
>However, I should stress out that to NICs that support managing transfer
>flows on any PFs and VFs, this proxy scheme is a don't care. The
>corresponding drivers may not implement the proxy query method at all:
>
>https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithu
>b.com%2FDPDK%2Fdpdk%2Fblob%2Fmain%2Flib%2Fethdev%2Frte_flow.c%2
>3L1345data=05%7C01%7Celibr%40nvidia.com%7Cf5a80eb00f0342498
>63308da495dab8b%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C6
>37902963929533013%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMD
>AiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C
>sdata=ojwUOsPlz09NXtDXfeO8lAT%2BHcgGYWNRdIhxB6f0cy0%3D
>mp;reserved=0
>
>The generic part of the API will just return the original port I

Re: [ovs-dev] [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2022-06-08 Thread Eli Britstein via dev
Hi Ivan,

>-Original Message-
>From: Ivan Malov 
>Sent: Tuesday, June 7, 2022 11:56 PM
>To: Eli Britstein 
>Cc: d...@openvswitch.org; Andrew Rybchenko
>; Ilya Maximets ;
>Ori Kam ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>; Stephen Hemminger
>; David Marchand
>; Gaetan Rivet ; Maxime
>Coquelin 
>Subject: RE: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Hi Eli,
>
>On Wed, 1 Jun 2022, Eli Britstein wrote:
>
>> - Missing proper handling of the testpmd syntax logging. It changes the used
>port according to "transfer", but the log still uses netdev_dpdk_get_port_id().
>
>Thanks for noticing. I will see to it in the next version.
>
>> - The usage of the "proxy" port for rte-flow implies that this proxy port is
>attached to OVS, otherwise it is not "started" and creation of flows will fail.
>
>That's the way it is. If there is no proxy for a given port, then the original 
>port
>value will be used for managing flows. For vendors that don't need the proxy,
>this will work. For others, it won't. That's OK.
I don't really understand why this can't be done inside dpdk domain (if there 
is a proxy, and it is up, use it, otherwise don't).
That's *currently* the way it is. I understand that if dpdk works like this OVS 
should align, but maybe you or someone else here knows why dpdk works like 
this? (not too late to change, this is experimental...).
>
>>
>>> -Original Message-----
>>> From: Ivan Malov 
>>> Sent: Monday, May 30, 2022 5:16 PM
>>> To: d...@openvswitch.org
>>> Cc: Andrew Rybchenko ; Ilya Maximets
>>> ; Ori Kam ; Eli Britstein
>>> ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>>> ; Stephen Hemminger
>>> ; David Marchand
>>> ; Gaetan Rivet ; Maxime
>>> Coquelin 
>>> Subject: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>>> mechanism
>>>
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> Manage "transfer" flows via the corresponding mechanism.
>>> Doing so requires that the traffic source be specified explicitly,
>>> via the corresponding pattern item.
>>>
>>> Signed-off-by: Ivan Malov 
>>> Acked-by: Andrew Rybchenko 
>>> ---
>>> lib/netdev-dpdk.c | 73 ---
>>> lib/netdev-dpdk.h |  2 +-
>>> lib/netdev-offload-dpdk.c | 43 ++-
>>> 3 files changed, 103 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>>> 45e5d26d2..d0bf4613a 100644
>>> --- a/lib/netdev-dpdk.c
>>> +++ b/lib/netdev-dpdk.c
>>> @@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>>>
>>> struct netdev_dpdk {
>>> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE,
>cacheline0,
>>> +dpdk_port_t flow_transfer_proxy_port_id;
>>> dpdk_port_t port_id;
>>>
>>> /* If true, device was attached by rte_eth_dev_attach(). */
>>> @@ -1115,6
>>> +1116,23 @@ dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
>>>   DPDK_PORT_ID_FMT, dev->port_id);
>>> }
>>> }
>>> +
>>> +static void
>>> +dpdk_eth_dev_init_flow_transfer_proxy(struct netdev_dpdk *dev) {
>>> +int ret;
>>> +
>>> +ret = rte_flow_pick_transfer_proxy(dev->port_id,
>>> +   >flow_transfer_proxy_port_id, 
>>> NULL);
>>> +if (ret == 0)
>>> +return;
>>> +
>>> +/*
>>> + * The PMD does not indicate the proxy port.
>>> + * It is OK to assume the proxy is unneeded.
>>> + */
>>> +dev->flow_transfer_proxy_port_id = dev->port_id; }
>>> #endif /* ALLOW_EXPERIMENTAL_API */
>>>
>>> static int
>>> @@ -1141,6 +1159,19 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>>  * Request delivery of such metadata.
>>>  */
>>> dpdk_eth_dev_init_rx_metadata(dev);
>>> +
>>> +/*
>>> + * Managing "transfer" flows requires that the user communicate them
>>> + * via a port which has the privilege to control the embedded switch.
>>> + * For some vendors, all ports in a given switching domain have
>>> + * this privilege. For other vendors, it's only one port.
>>> + *
>&g

Re: [ovs-dev] [PATCH 1/3] netdev-dpdk: negotiate delivery of per-packet Rx metadata

2022-06-01 Thread Eli Britstein via dev
"TUNNEL_ID" is a bad name, but that's how dpdk called it.
There was a discussion about having this knowledge in OVS so we can avoid 
calling rte_flow_get_restore_info(). How else it is used?

>-Original Message-
>From: Ivan Malov 
>Sent: Monday, May 30, 2022 5:16 PM
>To: d...@openvswitch.org
>Cc: Andrew Rybchenko ; Ilya Maximets
>; Ori Kam ; Eli Britstein
>; NBU-Contact-Thomas Monjalon (EXTERNAL)
>; Stephen Hemminger
>; David Marchand
>; Gaetan Rivet ; Maxime
>Coquelin 
>Subject: [PATCH 1/3] netdev-dpdk: negotiate delivery of per-packet Rx
>metadata
>
>External email: Use caution opening links or attachments
>
>
>This may be required by some PMDs in offload scenarios.
>
>Signed-off-by: Ivan Malov 
>Acked-by: Andrew Rybchenko 
>---
> lib/netdev-dpdk.c | 44 
> 1 file changed, 44 insertions(+)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index f9535bfb4..45e5d26d2
>100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -1085,6 +1085,38 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk
>*dev) OVS_REQUIRES(dev->mutex)
> }
> }
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+static void
>+dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev) {
>+uint64_t rx_metadata = 0;
>+int ret;
>+
>+/* For the fallback offload (non-"transfer" rules) */
>+rx_metadata |= RTE_ETH_RX_METADATA_USER_MARK;
>+/* For the full offload ("transfer" rules) */
>+rx_metadata |= RTE_ETH_RX_METADATA_TUNNEL_ID;
>+
>+ret = rte_eth_rx_metadata_negotiate(dev->port_id, _metadata);
>+if (ret == 0) {
>+if (!(rx_metadata & RTE_ETH_RX_METADATA_USER_MARK)) {
>+VLOG_DBG("The NIC will not provide per-packet USER_MARK on port
>"
>+ DPDK_PORT_ID_FMT, dev->port_id);
>+}
>+if (!(rx_metadata & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
>+VLOG_DBG("The NIC will not provide per-packet TUNNEL_ID on port "
>+ DPDK_PORT_ID_FMT, dev->port_id);
>+}
>+} else if (ret == -ENOTSUP) {
>+VLOG_DBG("Rx metadata negotiate procedure is not supported on port
>"
>+ DPDK_PORT_ID_FMT, dev->port_id);
>+} else {
>+VLOG_WARN("Cannot negotiate Rx metadata on port "
>+  DPDK_PORT_ID_FMT, dev->port_id);
>+}
>+}
>+#endif /* ALLOW_EXPERIMENTAL_API */
>+
> static int
> dpdk_eth_dev_init(struct netdev_dpdk *dev)
> OVS_REQUIRES(dev->mutex)
>@@ -1099,6 +1131,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>  RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>  RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+/*
>+ * Full tunnel offload requires that tunnel ID metadata be
>+ * delivered with "miss" packets from the hardware to the
>+ * PMD. The same goes for megaflow mark metadata which is
>+ * used in MARK + RSS offload scenario.
>+ *
>+ * Request delivery of such metadata.
>+ */
>+dpdk_eth_dev_init_rx_metadata(dev);
>+#endif /* ALLOW_EXPERIMENTAL_API */
>+
> rte_eth_dev_info_get(dev->port_id, );
>
> if (strstr(info.driver_name, "vf") != NULL) {
>--
>2.30.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

2022-06-01 Thread Eli Britstein via dev
- Missing proper handling of the testpmd syntax logging. It changes the used 
port according to "transfer", but the log still uses netdev_dpdk_get_port_id().
- The usage of the "proxy" port for rte-flow implies that this proxy port is 
attached to OVS, otherwise it is not "started" and creation of flows will fail.

>-Original Message-
>From: Ivan Malov 
>Sent: Monday, May 30, 2022 5:16 PM
>To: d...@openvswitch.org
>Cc: Andrew Rybchenko ; Ilya Maximets
>; Ori Kam ; Eli Britstein
>; NBU-Contact-Thomas Monjalon (EXTERNAL)
>; Stephen Hemminger
>; David Marchand
>; Gaetan Rivet ; Maxime
>Coquelin 
>Subject: [PATCH 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Manage "transfer" flows via the corresponding mechanism.
>Doing so requires that the traffic source be specified explicitly, via the
>corresponding pattern item.
>
>Signed-off-by: Ivan Malov 
>Acked-by: Andrew Rybchenko 
>---
> lib/netdev-dpdk.c | 73 ---
> lib/netdev-dpdk.h |  2 +-
> lib/netdev-offload-dpdk.c | 43 ++-
> 3 files changed, 103 insertions(+), 15 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>45e5d26d2..d0bf4613a 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>
> struct netdev_dpdk {
> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>+dpdk_port_t flow_transfer_proxy_port_id;
> dpdk_port_t port_id;
>
> /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1115,6
>+1116,23 @@ dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
>   DPDK_PORT_ID_FMT, dev->port_id);
> }
> }
>+
>+static void
>+dpdk_eth_dev_init_flow_transfer_proxy(struct netdev_dpdk *dev) {
>+int ret;
>+
>+ret = rte_flow_pick_transfer_proxy(dev->port_id,
>+   >flow_transfer_proxy_port_id, 
>NULL);
>+if (ret == 0)
>+return;
>+
>+/*
>+ * The PMD does not indicate the proxy port.
>+ * It is OK to assume the proxy is unneeded.
>+ */
>+dev->flow_transfer_proxy_port_id = dev->port_id; }
> #endif /* ALLOW_EXPERIMENTAL_API */
>
> static int
>@@ -1141,6 +1159,19 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>  * Request delivery of such metadata.
>  */
> dpdk_eth_dev_init_rx_metadata(dev);
>+
>+/*
>+ * Managing "transfer" flows requires that the user communicate them
>+ * via a port which has the privilege to control the embedded switch.
>+ * For some vendors, all ports in a given switching domain have
>+ * this privilege. For other vendors, it's only one port.
>+ *
>+ * Get the proxy port ID and remember it for later use.
>+ */
>+dpdk_eth_dev_init_flow_transfer_proxy(dev);
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>+/* It is OK to assume the proxy is unneeded. */
>+dev->flow_transfer_proxy_port_id = dev->port_id;
> #endif /* ALLOW_EXPERIMENTAL_API */
>
> rte_eth_dev_info_get(dev->port_id, );
>@@ -5214,13 +5245,15 @@ out:
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>- struct rte_flow *rte_flow,
>+ bool transfer, struct rte_flow *rte_flow,
>  struct rte_flow_error *error)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> int ret;
>
>-ret = rte_flow_destroy(dev->port_id, rte_flow, error);
>+ret = rte_flow_destroy(transfer ?
>+   dev->flow_transfer_proxy_port_id : dev->port_id,
>+   rte_flow, error);
> return ret;
> }
>
>@@ -5234,7 +5267,19 @@ netdev_dpdk_rte_flow_create(struct netdev
>*netdev,
> struct rte_flow *flow;
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>-flow = rte_flow_create(dev->port_id, attr, items, actions, error);
>+#ifdef ALLOW_EXPERIMENTAL_API
>+if (!attr->transfer) {
>+/*
>+ * The 1st item in any pattern is a traffic source one.
>+ * It is unnecessary in the case of non-transfer rules.
>+ */
>+++(items);
>+}
>+#endif /* ALLOW_EXPERIMENTAL_API */
>+
>+flow = rte_flow_create(attr->transfer ?
>+   dev->flow_transfer_proxy_port_id : dev->port_id,
>+   attr, items, actions, error);
> return flow;
> }
>
>@@ -5262,7 +5307,8 @@ netdev_dpdk_rte_flow_query_count(struct netde

Re: [ovs-dev] [PATCH V2 3/3] netdev-offload-dpdk: Add geneve header pattern match

2022-05-18 Thread Eli Britstein via dev
  1.  The focus was VXLAN. Need to enhance DPDK to support it. If this is not 
important, let’s abandon this patch-set until DPDK is enhanced.
  2.  There is no need. DPDK has only specific encaps for VXLAN/NVGRE. Other 
encaps are done only by “RAW”. In OVS VXLAN is used if applicable, and fallback 
to RAW. Geneve is under this category.

From: Hemal Shah 
Sent: Wednesday, May 18, 2022 6:02 PM
To: Eli Britstein 
Cc: Ilya Maximets ; d...@openvswitch.org
Subject: Re: [ovs-dev] [PATCH V2 3/3] netdev-offload-dpdk: Add geneve header 
pattern match

Eli,

I'm trying to understand options handling during Geneve encap/decap offload.

  1.  This patchset will allow decap offload for Geneve w/o options only. I do 
not think that covers important use cases for Geneve. Geneve was meant to be 
used with options. Why is the limitation of not having options support in 
struct rte_flow_restore_info gating the offload design?
  2.  I have not seen companion patch support for encap offload for Geneve. Is 
similar restriction of not offloading Geneve w/ options apply on the encap 
offload?
Hemal

On Sat, May 7, 2022 at 10:07 PM Eli Britstein via dev 
mailto:ovs-dev@openvswitch.org>> wrote:


>-Original Message-
>From: Ilya Maximets mailto:i.maxim...@ovn.org>>
>Sent: Wednesday, May 4, 2022 2:44 PM
>To: Eli Britstein mailto:el...@nvidia.com>>; 
>d...@openvswitch.org<mailto:d...@openvswitch.org>
>Cc: i.maxim...@ovn.org<mailto:i.maxim...@ovn.org>; Gaetan Rivet 
>mailto:gaet...@nvidia.com>>;
>msant...@redhat.com<mailto:msant...@redhat.com>; Nir Anteby 
>mailto:nant...@nvidia.com>>
>Subject: Re: [PATCH V2 3/3] netdev-offload-dpdk: Add geneve header pattern
>match
>
>External email: Use caution opening links or attachments
>
>
>On 2/7/22 18:24, Eli Britstein wrote:
>> Add support for matching on geneve header.
>>
>> Signed-off-by: Eli Britstein mailto:el...@nvidia.com>>
>> Reviewed-by: Nir Anteby mailto:nant...@nvidia.com>>
>> Acked-by: Michael Santana mailto:msant...@redhat.com>>
>> ---
>>  NEWS  |  2 ++
>>  lib/netdev-offload-dpdk.c | 58
>> +++
>>  2 files changed, 60 insertions(+)
>>
>> diff --git a/NEWS b/NEWS
>> index e1c48f3a1..41a80d127 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -29,6 +29,8 @@ v2.17.0 - xx xxx 
>>   * Add support for DPDK 21.11.
>>   * Forbid use of DPDK multiprocess feature.
>>   * Add support for running threads on cores >= RTE_MAX_LCORE.
>> + * Add hardware offload support for GENEVE flows (experimental).
>> +   Available only if DPDK experimantal APIs enabled during the build.
>> - Python:
>>   * For SSL support, the use of the pyOpenSSL library has been replaced
>> with the native 'ssl' module.
>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
>> index edd4e009d..0303bd2df 100644
>> --- a/lib/netdev-offload-dpdk.c
>> +++ b/lib/netdev-offload-dpdk.c
>> @@ -638,6 +638,24 @@ dump_flow_pattern(struct ds *s,
>>ntohl(*key_spec), ntohl(*key_mask), 0);
>>  }
>>  ds_put_cstr(s, "/ ");
>> +} else if (item->type == RTE_FLOW_ITEM_TYPE_GENEVE) {
>> +const struct rte_flow_item_geneve *geneve_spec = item->spec;
>> +const struct rte_flow_item_geneve *geneve_mask = item->mask;
>> +ovs_be32 spec_vni, mask_vni;
>> +
>> +ds_put_cstr(s, "geneve ");
>> +if (geneve_spec) {
>> +if (!geneve_mask) {
>> +geneve_mask = _flow_item_geneve_mask;
>> +}
>> +spec_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
>> +   geneve_spec->vni));
>> +mask_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
>> +   geneve_mask->vni));
>> +DUMP_PATTERN_ITEM(geneve_mask->vni, false, "vni", "%"PRIu32,
>> +  ntohl(spec_vni) >> 8, ntohl(mask_vni) >> 8, 
>> 0);
>> +}
>> +ds_put_cstr(s, "/ ");
>>  } else {
>>  ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
>>  }
>> @@ -1351,6 +1369,44 @@ parse_gre_match(struct flow_patterns
>*patterns,
>>  return 0;
>>  }
>>
>> +static int
>> +parse_geneve_match(struct flow_patterns *patterns,
>> +   struct match *match) {
>> +struc

Re: [ovs-dev] [PATCH V2 3/3] netdev-offload-dpdk: Add geneve header pattern match

2022-05-07 Thread Eli Britstein via dev



>-Original Message-
>From: Ilya Maximets 
>Sent: Wednesday, May 4, 2022 2:44 PM
>To: Eli Britstein ; d...@openvswitch.org
>Cc: i.maxim...@ovn.org; Gaetan Rivet ;
>msant...@redhat.com; Nir Anteby 
>Subject: Re: [PATCH V2 3/3] netdev-offload-dpdk: Add geneve header pattern
>match
>
>External email: Use caution opening links or attachments
>
>
>On 2/7/22 18:24, Eli Britstein wrote:
>> Add support for matching on geneve header.
>>
>> Signed-off-by: Eli Britstein 
>> Reviewed-by: Nir Anteby 
>> Acked-by: Michael Santana 
>> ---
>>  NEWS  |  2 ++
>>  lib/netdev-offload-dpdk.c | 58
>> +++
>>  2 files changed, 60 insertions(+)
>>
>> diff --git a/NEWS b/NEWS
>> index e1c48f3a1..41a80d127 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -29,6 +29,8 @@ v2.17.0 - xx xxx 
>>   * Add support for DPDK 21.11.
>>   * Forbid use of DPDK multiprocess feature.
>>   * Add support for running threads on cores >= RTE_MAX_LCORE.
>> + * Add hardware offload support for GENEVE flows (experimental).
>> +   Available only if DPDK experimantal APIs enabled during the build.
>> - Python:
>>   * For SSL support, the use of the pyOpenSSL library has been replaced
>> with the native 'ssl' module.
>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
>> index edd4e009d..0303bd2df 100644
>> --- a/lib/netdev-offload-dpdk.c
>> +++ b/lib/netdev-offload-dpdk.c
>> @@ -638,6 +638,24 @@ dump_flow_pattern(struct ds *s,
>>ntohl(*key_spec), ntohl(*key_mask), 0);
>>  }
>>  ds_put_cstr(s, "/ ");
>> +} else if (item->type == RTE_FLOW_ITEM_TYPE_GENEVE) {
>> +const struct rte_flow_item_geneve *geneve_spec = item->spec;
>> +const struct rte_flow_item_geneve *geneve_mask = item->mask;
>> +ovs_be32 spec_vni, mask_vni;
>> +
>> +ds_put_cstr(s, "geneve ");
>> +if (geneve_spec) {
>> +if (!geneve_mask) {
>> +geneve_mask = _flow_item_geneve_mask;
>> +}
>> +spec_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
>> +   geneve_spec->vni));
>> +mask_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
>> +   geneve_mask->vni));
>> +DUMP_PATTERN_ITEM(geneve_mask->vni, false, "vni", "%"PRIu32,
>> +  ntohl(spec_vni) >> 8, ntohl(mask_vni) >> 8, 
>> 0);
>> +}
>> +ds_put_cstr(s, "/ ");
>>  } else {
>>  ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
>>  }
>> @@ -1351,6 +1369,44 @@ parse_gre_match(struct flow_patterns
>*patterns,
>>  return 0;
>>  }
>>
>> +static int
>> +parse_geneve_match(struct flow_patterns *patterns,
>> +   struct match *match) {
>> +struct rte_flow_item_geneve *geneve_spec, *geneve_mask;
>> +struct flow *consumed_masks;
>> +int ret;
>> +
>> +ret = parse_tnl_ip_match(patterns, match, IPPROTO_UDP);
>> +if (ret) {
>> +return -1;
>> +}
>> +parse_tnl_udp_match(patterns, match);
>> +
>> +consumed_masks = >wc.masks;
>> +/* GENEVE */
>> +geneve_spec = xzalloc(sizeof *geneve_spec);
>> +geneve_mask = xzalloc(sizeof *geneve_mask);
>> +
>> +put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, geneve_spec->vni),
>> +   htonl(ntohll(match->flow.tunnel.tun_id) << 8));
>> +put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, geneve_mask->vni),
>> +   htonl(ntohll(match->wc.masks.tunnel.tun_id) <<
>> + 8));
>> +
>> +consumed_masks->tunnel.tun_id = 0;
>> +consumed_masks->tunnel.flags = 0;
>> +/* tunnel.metadata.present.len value indicates the number of
>> + * options, it's mask does not indicate any match on the packet,
>> + * thus masked.
>
>I'm not sure I get that.  Options are part of the geneve header, so if the 
>match
>is requested, we have to match on them.  And there is a special item for them
>- RTE_FLOW_ITEM_TYPE_GENEVE_OPT, which, I think, should be used in this
>patch.
It is correct dpdk supports this flow item. However, it doesn't support options 
in 'struct rte_flow_restore_info'. Therefore, it currently cannot be supported. 
Options are *optional*. This patch-set can support geneve w/o options only. If 
there are options to be matched, they are not cleared from the masks, and the 
parsing fails.
>
>It also not clear why flags are cleared without handling them.
There are no flags field in geneve header, maybe except OAM. I can add it after 
completing the previous comment.
>
>Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] docs: Note ALLOW_EXPERIMENTAL_API for tunnel offloads

2022-04-17 Thread Eli Britstein via dev
Tunnel offload APIs have '__rte_experimental' attribute, therefore
available only if ALLOW_EXPERIMENTAL_API is defined. Documente it.

Signed-off-by: Eli Britstein 
---
 Documentation/howto/dpdk.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 81f236d3b..04609b20b 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -402,6 +402,10 @@ Supported actions for hardware offload are:
 - Clone/output (tnl_push and output) for encapsulating over a tunnel.
 - Tunnel pop, for packets received on physical ports.
 
+.. note::
+  Tunnel offloads are experimental APIs in DPDK. In order to enable it,
+  compile with -DALLOW_EXPERIMENTAL_API.
+
 Multiprocess
 
 
-- 
2.26.2.1730.g385c171

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V4 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2022-03-16 Thread Eli Britstein via dev



> -Original Message-
> From: Ilya Maximets 
> Sent: Wednesday, March 16, 2022 2:43 PM
> To: Eli Britstein ; d...@openvswitch.org; Emma Finn
> 
> Cc: i.maxim...@ovn.org; Ian Stokes 
> Subject: Re: [ovs-dev] [PATCH V4 1/2] netdev-offload-dpdk: Use has_vlan
> match attribute
> 
> External email: Use caution opening links or attachments
> 
> 
> On 2/7/22 17:56, Eli Britstein via dev wrote:
> > DPDK 20.11 introduced an ability to specify existance/non-existance of
> > VLAN tag by [1].
> > Use this attribute.
> >
> > [1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN
> > items")
> 
> Hi, Eli.  I'm afraid we still can't use the 'has_vlan' item until there are 
> drivers
> that silently ignore it.  And, unfortunately, there are may of them.  I 
> created
> a DPDK bug for that issue:
AFAIU, the problem is not about drivers silently ignoring, but with drivers 
that fail validation when using this flag.
If a driver silently ignores, the same behavior as if not using this flag at 
all.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugs
> .dpdk.org%2Fshow_bug.cgi%3Fid%3D958data=04%7C01%7Celibr%40
> nvidia.com%7C27fe1d1dcfdc4bf4a2f708da074a8809%7C43083d15727340c1
> b7db39efd9ccc17a%7C0%7C0%7C637830314377416526%7CUnknown%7CT
> WFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLC
> JXVCI6Mn0%3D%7C3000sdata=y1C4%2BFZ5kv6tYsV%2B5muxyuP86X
> S24LQPx5Nx4nGzPuc%3Dreserved=0
> And sent a patch to mark drivers with partial support, as Thomas
> suggested:
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatc
> hes.dpdk.org%2Fproject%2Fdpdk%2Fpatch%2F20220316120157.390311-1-
> i.maximets%40ovn.org%2Fdata=04%7C01%7Celibr%40nvidia.com%7
> C27fe1d1dcfdc4bf4a2f708da074a8809%7C43083d15727340c1b7db39efd9cc
> c17a%7C0%7C0%7C637830314377416526%7CUnknown%7CTWFpbGZsb3d8
> eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3
> D%7C3000sdata=ZuUeoPofJpGQxWaIbYnpO7dehehFkVymJ%2Btx5Dr3
> fC8%3Dreserved=0
> 
> Is there a way to fix the issue without using the 'has_vlan' field?
I don't think so. The problem is a missing match, so packets hit a wrong 
offloaded flow instead of miss and creating a correct flow.
> 
> Emma, you said that you will ask about support in i40e driver.
> Is there any progress on that front?
> 
> Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 3/3] netdev-offload-dpdk: Add geneve header pattern match

2022-02-07 Thread Eli Britstein via dev
Add support for matching on geneve header.

Signed-off-by: Eli Britstein 
Reviewed-by: Nir Anteby 
Acked-by: Michael Santana 
---
 NEWS  |  2 ++
 lib/netdev-offload-dpdk.c | 58 +++
 2 files changed, 60 insertions(+)

diff --git a/NEWS b/NEWS
index e1c48f3a1..41a80d127 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,8 @@ v2.17.0 - xx xxx 
  * Add support for DPDK 21.11.
  * Forbid use of DPDK multiprocess feature.
  * Add support for running threads on cores >= RTE_MAX_LCORE.
+ * Add hardware offload support for GENEVE flows (experimental).
+   Available only if DPDK experimantal APIs enabled during the build.
- Python:
  * For SSL support, the use of the pyOpenSSL library has been replaced
with the native 'ssl' module.
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index edd4e009d..0303bd2df 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -638,6 +638,24 @@ dump_flow_pattern(struct ds *s,
   ntohl(*key_spec), ntohl(*key_mask), 0);
 }
 ds_put_cstr(s, "/ ");
+} else if (item->type == RTE_FLOW_ITEM_TYPE_GENEVE) {
+const struct rte_flow_item_geneve *geneve_spec = item->spec;
+const struct rte_flow_item_geneve *geneve_mask = item->mask;
+ovs_be32 spec_vni, mask_vni;
+
+ds_put_cstr(s, "geneve ");
+if (geneve_spec) {
+if (!geneve_mask) {
+geneve_mask = _flow_item_geneve_mask;
+}
+spec_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+   geneve_spec->vni));
+mask_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+   geneve_mask->vni));
+DUMP_PATTERN_ITEM(geneve_mask->vni, false, "vni", "%"PRIu32,
+  ntohl(spec_vni) >> 8, ntohl(mask_vni) >> 8, 0);
+}
+ds_put_cstr(s, "/ ");
 } else {
 ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
 }
@@ -1351,6 +1369,44 @@ parse_gre_match(struct flow_patterns *patterns,
 return 0;
 }
 
+static int
+parse_geneve_match(struct flow_patterns *patterns,
+   struct match *match)
+{
+struct rte_flow_item_geneve *geneve_spec, *geneve_mask;
+struct flow *consumed_masks;
+int ret;
+
+ret = parse_tnl_ip_match(patterns, match, IPPROTO_UDP);
+if (ret) {
+return -1;
+}
+parse_tnl_udp_match(patterns, match);
+
+consumed_masks = >wc.masks;
+/* GENEVE */
+geneve_spec = xzalloc(sizeof *geneve_spec);
+geneve_mask = xzalloc(sizeof *geneve_mask);
+
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, geneve_spec->vni),
+   htonl(ntohll(match->flow.tunnel.tun_id) << 8));
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, geneve_mask->vni),
+   htonl(ntohll(match->wc.masks.tunnel.tun_id) << 8));
+
+consumed_masks->tunnel.tun_id = 0;
+consumed_masks->tunnel.flags = 0;
+/* tunnel.metadata.present.len value indicates the number of
+ * options, it's mask does not indicate any match on the packet,
+ * thus masked.
+ */
+memset(_masks->tunnel.metadata.present, 0,
+   sizeof consumed_masks->tunnel.metadata.present);
+
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_GENEVE, geneve_spec,
+ geneve_mask, NULL);
+return 0;
+}
+
 static int OVS_UNUSED
 parse_flow_tnl_match(struct netdev *tnldev,
  struct flow_patterns *patterns,
@@ -1366,6 +1422,8 @@ parse_flow_tnl_match(struct netdev *tnldev,
 
 if (!strcmp(netdev_get_type(tnldev), "vxlan")) {
 ret = parse_vxlan_match(patterns, match);
+} else if (!strcmp(netdev_get_type(tnldev), "geneve")) {
+ret = parse_geneve_match(patterns, match);
 }
 else if (!strcmp(netdev_get_type(tnldev), "gre")) {
 ret = parse_gre_match(patterns, match);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 0/3] Support geneve offloads

2022-02-07 Thread Eli Britstein via dev
Add support for dpdk geneve tunnel offloads.

v2-v1:
- Rebase.

GitHub actions:
v1: https://github.com/elibritstein/OVS/actions/runs/1633157455
v2: https://github.com/elibritstein/OVS/actions/runs/1807027490

Eli Britstein (3):
  netdev-dpdk: Add flow_api support for netdev geneve vports
  netdev-offload-dpdk: Support tnl_pop for geneve tunnel
  netdev-offload-dpdk: Add geneve header pattern match

 NEWS  |  2 ++
 lib/netdev-dpdk.c |  3 +-
 lib/netdev-offload-dpdk.c | 71 +++
 3 files changed, 75 insertions(+), 1 deletion(-)

-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 2/3] netdev-offload-dpdk: Support tnl_pop for geneve tunnel

2022-02-07 Thread Eli Britstein via dev
Signed-off-by: Eli Britstein 
Reviewed-by: Nir Anteby 
Acked-by: Michael Santana 
---
 lib/netdev-offload-dpdk.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 94dc6a9b7..edd4e009d 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1109,6 +1109,17 @@ vport_to_rte_tunnel(struct netdev *vport,
 ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
   netdev_dpdk_get_port_id(netdev));
 }
+} else if (!strcmp(netdev_get_type(vport), "geneve")) {
+tunnel->type = RTE_FLOW_ITEM_TYPE_GENEVE;
+tnl_cfg = netdev_get_tunnel_config(vport);
+if (!tnl_cfg) {
+return -1;
+}
+tunnel->tp_dst = tnl_cfg->dst_port;
+if (!VLOG_DROP_DBG()) {
+ds_put_format(s_tnl, "flow tunnel create %d type geneve; ",
+  netdev_dpdk_get_port_id(netdev));
+}
 } else {
 VLOG_DBG_RL(, "vport type '%s' is not supported",
 netdev_get_type(vport));
@@ -2582,6 +2593,8 @@ get_vport_netdev(const char *dpif_type,
 aux.type = "vxlan";
 } else if (tunnel->type == RTE_FLOW_ITEM_TYPE_GRE) {
 aux.type = "gre";
+} else if (tunnel->type == RTE_FLOW_ITEM_TYPE_GENEVE) {
+aux.type = "geneve";
 }
 netdev_ports_traverse(dpif_type, get_vport_netdev_cb, );
 
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/3] netdev-dpdk: Add flow_api support for netdev geneve vports

2022-02-07 Thread Eli Britstein via dev
Add the acceptance of geneve devices to netdev_dpdk_flow_api_supported()
API, to allow offloading of DPDK geneve devices.

Signed-off-by: Eli Britstein 
Reviewed-by: Nir Anteby 
Acked-by: Michael Santana 
---
 lib/netdev-dpdk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index b6b29c75e..0b600f285 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -5196,7 +5196,8 @@ netdev_dpdk_flow_api_supported(struct netdev *netdev)
 bool ret = false;
 
 if ((!strcmp(netdev_get_type(netdev), "vxlan") ||
- !strcmp(netdev_get_type(netdev), "gre")) &&
+ !strcmp(netdev_get_type(netdev), "gre") ||
+ !strcmp(netdev_get_type(netdev), "geneve")) &&
 !strcmp(netdev_get_dpif_type(netdev), "netdev")) {
 ret = true;
 goto out;
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/1] dpif-netdev: Keep orig_in_port as a field of the flow

2022-02-07 Thread Eli Britstein via dev
A flow may be modified after its initial offload failed. In this case,
according to [1], the modification is handled as a flow add.
For a vport flow "add", the orig_in_port should be provided.
Keep that field in the flow struct, so it can be provided in the flow
modification use case.

[1] 0d25621e4d9f ("dpif-netdev: Fix flow modification after failure.")

Fixes: b5e6f6f6bfbe ("dpif-netdev: Provide orig_in_port in metadata for 
tunneled packets.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev-private-flow.h | 1 +
 lib/dpif-netdev.c  | 9 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h
index 66016eb09..7425dd44e 100644
--- a/lib/dpif-netdev-private-flow.h
+++ b/lib/dpif-netdev-private-flow.h
@@ -104,6 +104,7 @@ struct dp_netdev_flow {
 bool dead;
 uint32_t mark;   /* Unique flow mark for netdev offloading. */
 uint64_t simple_match_mark;  /* Unique flow mark for the simple match. */
+odp_port_t orig_in_port;
 
 /* Statistics. */
 struct dp_netdev_flow_stats stats;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e28e0b554..b4ff515a1 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2993,7 +2993,7 @@ static void
 queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
   struct dp_netdev_flow *flow, struct match *match,
   const struct nlattr *actions, size_t actions_len,
-  odp_port_t orig_in_port, int op)
+  int op)
 {
 struct dp_offload_thread_item *item;
 struct dp_offload_flow_item *flow_offload;
@@ -3008,7 +3008,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
 flow_offload->actions = xmalloc(actions_len);
 memcpy(flow_offload->actions, actions, actions_len);
 flow_offload->actions_len = actions_len;
-flow_offload->orig_in_port = orig_in_port;
+flow_offload->orig_in_port = flow->orig_in_port;
 
 item->timestamp = pmd->ctx.now;
 dp_netdev_offload_flow_enqueue(item);
@@ -4084,6 +4084,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 flow->dead = false;
 flow->batch = NULL;
 flow->mark = INVALID_FLOW_MARK;
+flow->orig_in_port = orig_in_port;
 *CONST_CAST(unsigned *, >pmd_id) = pmd->core_id;
 *CONST_CAST(struct flow *, >flow) = match->flow;
 *CONST_CAST(ovs_u128 *, >ufid) = *ufid;
@@ -4118,7 +4119,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 }
 
 queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
-  orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
+  DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
 log_netdev_flow_change(flow, match, NULL, actions, actions_len);
 
 return flow;
@@ -4160,7 +4161,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 ovsrcu_set(_flow->actions, new_actions);
 
 queue_netdev_flow_put(pmd, netdev_flow, match,
-  put->actions, put->actions_len, ODPP_NONE,
+  put->actions, put->actions_len,
   DP_NETDEV_FLOW_OFFLOAD_OP_MOD);
 log_netdev_flow_change(netdev_flow, match, old_actions,
put->actions, put->actions_len);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V4 2/2] netdev-offload-dpdk: Fix ethernet type for VLANs

2022-02-07 Thread Eli Britstein via dev
For VLANs, the match of ethernet type should be specified in inner_type
field of the vlan match, and not type field in ethernet match.
Fix it.

Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow")
Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index e0d56abc1..12d299603 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1438,12 +1438,13 @@ parse_flow_match(struct netdev *netdev,
 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
-/* Match any protocols. */
-mask->inner_type = 0;
-
 if (eth_spec && eth_mask) {
 eth_spec->has_vlan = 1;
 eth_mask->has_vlan = 1;
+spec->inner_type = eth_spec->type;
+mask->inner_type = eth_mask->type;
+eth_spec->type = match->flow.vlans[0].tpid;
+eth_mask->type = match->wc.masks.vlans[0].tpid;
 }
 
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask, NULL);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V4 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2022-02-07 Thread Eli Britstein via dev
DPDK 20.11 introduced an ability to specify existance/non-existance of
VLAN tag by [1].
Use this attribute.

[1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items")

Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 94dc6a9b7..e0d56abc1 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -363,6 +363,8 @@ dump_flow_pattern(struct ds *s,
 
 ds_put_cstr(s, "eth ");
 if (eth_spec) {
+uint32_t has_vlan_mask;
+
 if (!eth_mask) {
 eth_mask = _flow_item_eth_mask;
 }
@@ -377,6 +379,9 @@ dump_flow_pattern(struct ds *s,
 DUMP_PATTERN_ITEM(eth_mask->type, false, "type", "0x%04"PRIx16,
   ntohs(eth_spec->type),
   ntohs(eth_mask->type), 0);
+has_vlan_mask = eth_mask->has_vlan ? UINT32_MAX : 0;
+DUMP_PATTERN_ITEM(has_vlan_mask, false, "has_vlan", "%d",
+  eth_spec->has_vlan, eth_mask->has_vlan, 0);
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
@@ -1369,6 +1374,7 @@ parse_flow_match(struct netdev *netdev,
  struct flow_patterns *patterns,
  struct match *match)
 {
+struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
 struct flow *consumed_masks;
 uint8_t proto = 0;
 
@@ -1414,6 +1420,11 @@ parse_flow_match(struct netdev *netdev,
 memset(_masks->dl_src, 0, sizeof consumed_masks->dl_src);
 consumed_masks->dl_type = 0;
 
+spec->has_vlan = 0;
+mask->has_vlan = 1;
+eth_spec = spec;
+eth_mask = mask;
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask, NULL);
 }
 
@@ -1430,6 +1441,11 @@ parse_flow_match(struct netdev *netdev,
 /* Match any protocols. */
 mask->inner_type = 0;
 
+if (eth_spec && eth_mask) {
+eth_spec->has_vlan = 1;
+eth_mask->has_vlan = 1;
+}
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask, NULL);
 }
 /* For untagged matching match->wc.masks.vlans[0].tci is 0x and
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest] ci: Remove -Wno-cast-align from CI

2022-01-04 Thread Eli Britstein via dev


On 1/4/2022 4:56 PM, Eelco Chaudron wrote:

External email: Use caution opening links or attachments


On 5 Dec 2021, at 8:34, Eli Britstein via dev wrote:


Following [1]-[3] in DPDK, there are no more such warnings from DPDK.
Remove ignoring them if they occur.

GitHub actions:
v1: https://github.com/elibritstein/OVS/actions/runs/1540651133

[1] a3f8d0587188 ("net: avoid cast-align warning in VLAN insert function")
[2] da0333c8790b ("mbuf: avoid cast-align warning in data offset macro")
[3] 6de430b7079e ("eal/x86: avoid cast-align warning in memcpy functions")


Changes look fine to me, maybe you can include the fixes tags to mention the 
commits added these?!


It was inevitable from day 1, since DPDK always had those issues (until 
now), so it was not a "bug" to fix now.


For utilities/ovs-dev.py, the "-W" is there from the first commit that 
added dpdk support:


25dfecf88742 ("ovs-dev.py: Add support for dpdk builds.")

For .ci/linux-build.sh, it's added in this commit:

ecc3c395b5a6 ("travis: Fix DPDK build and treat bad-function-cast 
warning as non-error")


See in its commit message:

    Due to incorrect casts in the DPDK headers, we have to disable
    bad-function-cast and cast-align warnings as being treated as errors
    for now.



Acked-by: Eelco Chaudron 



Signed-off-by: Eli Britstein 
---
  .ci/linux-build.sh   | 4 
  utilities/ovs-dev.py | 1 -
  2 files changed, 5 deletions(-)

diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index e20cc6ad0..65578880b 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -226,10 +226,6 @@ if [ "$DPDK" ]; then
  DPDK_VER="20.11.1"
  fi
  install_dpdk $DPDK_VER
-if [ "$CC" = "clang" ]; then
-# Disregard cast alignment errors until DPDK is fixed
-CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -Wno-cast-align"
-fi
  if [ -n "$DPDK_EXPERIMENTAL" ]; then
  CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -DALLOW_EXPERIMENTAL_API"
  fi
diff --git a/utilities/ovs-dev.py b/utilities/ovs-dev.py
index c45788acd..534c5e7f1 100755
--- a/utilities/ovs-dev.py
+++ b/utilities/ovs-dev.py
@@ -90,7 +90,6 @@ def conf():

  if options.with_dpdk:
  configure.append("--with-dpdk=" + options.with_dpdk)
-cflags += " -Wno-cast-align -Wno-bad-function-cast"  # DPDK warnings.

Guess this script only works with older kernels due to always including the 
--with-linux= option.


  if options.optimize is None:
  options.optimize = 0
--
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/3] netdev-offload-dpdk: Add geneve header pattern match

2021-12-29 Thread Eli Britstein via dev
Add support for matching on geneve header.

Signed-off-by: Eli Britstein 
Reviewed-by: Nir Anteby 
---
 NEWS  |  2 ++
 lib/netdev-offload-dpdk.c | 58 +++
 2 files changed, 60 insertions(+)

diff --git a/NEWS b/NEWS
index bc4a1cfac..18f15b61f 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,8 @@ Post-v2.16.0
  * Add hardware offload support for GRE flows (experimental).
Available only if DPDK experimantal APIs enabled during the build.
  * Add support for DPDK 21.11.
+ * Add hardware offload support for GENEVE flows (experimental).
+   Available only if DPDK experimantal APIs enabled during the build.
- Python:
  * For SSL support, the use of the pyOpenSSL library has been replaced
with the native 'ssl' module.
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 50650bd07..36b780be6 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -491,6 +491,24 @@ dump_flow_pattern(struct ds *s,
   ntohl(*key_spec), ntohl(*key_mask), 0);
 }
 ds_put_cstr(s, "/ ");
+} else if (item->type == RTE_FLOW_ITEM_TYPE_GENEVE) {
+const struct rte_flow_item_geneve *geneve_spec = item->spec;
+const struct rte_flow_item_geneve *geneve_mask = item->mask;
+ovs_be32 spec_vni, mask_vni;
+
+ds_put_cstr(s, "geneve ");
+if (geneve_spec) {
+if (!geneve_mask) {
+geneve_mask = _flow_item_geneve_mask;
+}
+spec_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+   geneve_spec->vni));
+mask_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+   geneve_mask->vni));
+DUMP_PATTERN_ITEM(geneve_mask->vni, false, "vni", "%"PRIu32,
+  ntohl(spec_vni) >> 8, ntohl(mask_vni) >> 8, 0);
+}
+ds_put_cstr(s, "/ ");
 } else {
 ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
 }
@@ -1197,6 +1215,44 @@ parse_gre_match(struct flow_patterns *patterns,
 return 0;
 }
 
+static int
+parse_geneve_match(struct flow_patterns *patterns,
+   struct match *match)
+{
+struct rte_flow_item_geneve *geneve_spec, *geneve_mask;
+struct flow *consumed_masks;
+int ret;
+
+ret = parse_tnl_ip_match(patterns, match, IPPROTO_UDP);
+if (ret) {
+return -1;
+}
+parse_tnl_udp_match(patterns, match);
+
+consumed_masks = >wc.masks;
+/* GENEVE */
+geneve_spec = xzalloc(sizeof *geneve_spec);
+geneve_mask = xzalloc(sizeof *geneve_mask);
+
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, geneve_spec->vni),
+   htonl(ntohll(match->flow.tunnel.tun_id) << 8));
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, geneve_mask->vni),
+   htonl(ntohll(match->wc.masks.tunnel.tun_id) << 8));
+
+consumed_masks->tunnel.tun_id = 0;
+consumed_masks->tunnel.flags = 0;
+/* tunnel.metadata.present.len value indicates the number of
+ * options, it's mask does not indicate any match on the packet,
+ * thus masked.
+ */
+memset(_masks->tunnel.metadata.present, 0,
+   sizeof consumed_masks->tunnel.metadata.present);
+
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_GENEVE, geneve_spec,
+ geneve_mask, NULL);
+return 0;
+}
+
 static int OVS_UNUSED
 parse_flow_tnl_match(struct netdev *tnldev,
  struct flow_patterns *patterns,
@@ -1212,6 +1268,8 @@ parse_flow_tnl_match(struct netdev *tnldev,
 
 if (!strcmp(netdev_get_type(tnldev), "vxlan")) {
 ret = parse_vxlan_match(patterns, match);
+} else if (!strcmp(netdev_get_type(tnldev), "geneve")) {
+ret = parse_geneve_match(patterns, match);
 }
 else if (!strcmp(netdev_get_type(tnldev), "gre")) {
 ret = parse_gre_match(patterns, match);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/3] netdev-offload-dpdk: Support tnl_pop for geneve tunnel

2021-12-29 Thread Eli Britstein via dev
Signed-off-by: Eli Britstein 
Reviewed-by: Nir Anteby 
---
 lib/netdev-offload-dpdk.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 402353125..50650bd07 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -955,6 +955,17 @@ vport_to_rte_tunnel(struct netdev *vport,
 ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
   netdev_dpdk_get_port_id(netdev));
 }
+} else if (!strcmp(netdev_get_type(vport), "geneve")) {
+tunnel->type = RTE_FLOW_ITEM_TYPE_GENEVE;
+tnl_cfg = netdev_get_tunnel_config(vport);
+if (!tnl_cfg) {
+return -1;
+}
+tunnel->tp_dst = tnl_cfg->dst_port;
+if (!VLOG_DROP_DBG()) {
+ds_put_format(s_tnl, "flow tunnel create %d type geneve; ",
+  netdev_dpdk_get_port_id(netdev));
+}
 } else {
 VLOG_DBG_RL(, "vport type '%s' is not supported",
 netdev_get_type(vport));
@@ -2378,6 +2389,8 @@ get_vport_netdev(const char *dpif_type,
 aux.type = "vxlan";
 } else if (tunnel->type == RTE_FLOW_ITEM_TYPE_GRE) {
 aux.type = "gre";
+} else if (tunnel->type == RTE_FLOW_ITEM_TYPE_GENEVE) {
+aux.type = "geneve";
 }
 netdev_ports_traverse(dpif_type, get_vport_netdev_cb, );
 
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/3] Support geneve offloads

2021-12-29 Thread Eli Britstein via dev
Add support for dpdk geneve tunnel offloads.

GitHub actions:
v1: https://github.com/elibritstein/OVS/actions/runs/1633157455

Eli Britstein (3):
  netdev-dpdk: Add flow_api support for netdev geneve vports
  netdev-offload-dpdk: Support tnl_pop for geneve tunnel
  netdev-offload-dpdk: Add geneve header pattern match

 NEWS  |  2 ++
 lib/netdev-dpdk.c |  3 +-
 lib/netdev-offload-dpdk.c | 71 +++
 3 files changed, 75 insertions(+), 1 deletion(-)

-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/3] netdev-dpdk: Add flow_api support for netdev geneve vports

2021-12-29 Thread Eli Britstein via dev
Add the acceptance of geneve devices to netdev_dpdk_flow_api_supported()
API, to allow offloading of DPDK geneve devices.

Signed-off-by: Eli Britstein 
Reviewed-by: Nir Anteby 
---
 lib/netdev-dpdk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 6782d3e8f..311e6e065 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -5221,7 +5221,8 @@ netdev_dpdk_flow_api_supported(struct netdev *netdev)
 bool ret = false;
 
 if ((!strcmp(netdev_get_type(netdev), "vxlan") ||
- !strcmp(netdev_get_type(netdev), "gre")) &&
+ !strcmp(netdev_get_type(netdev), "gre") ||
+ !strcmp(netdev_get_type(netdev), "geneve")) &&
 !strcmp(netdev_get_dpif_type(netdev), "netdev")) {
 ret = true;
 goto out;
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-dpdk: Dummy implementation of the rte_flow API for testing purposes.

2021-12-20 Thread Eli Britstein via dev



On 12/17/2021 12:06 AM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


New 'options:dummy-rte-flow' configuration knob for netdev-dpdk
ports, i.e. type=dpdk/dpdkvhostuserclient, to turn on dummy rte_flow
API.  This version of API will do nothing but reply with success to
every request, except for tunnel restoration info, which needs to
report that there is no relevant information.

Use 'name' format for port names in order to get more
distinguishable port ids for virtual ports that doesn't have them.
DPDK port ids will be equal to 1000 + .


To use virtual ports we can use such DPDK ports (vdev attribute, 
"af_packet" for example), so there won't be a need for those fake DPDK 
port numbers.


If we still keep this option, we can simply use 
DPDK_ETH_PORT_ID_INVALID. What further information does this dummy 
port-id provide?




Ex.:

   ovs-vsctl add-port ovsbr vhost0 \
   -- set Interface vhost0 type=dpdkvhostuserclient \
   options:vhost-server-path=/tmp/vhost0 \
   options:dummy-rte-flow=true

In this example, vhost0 will report port_id = 1000 to the offloading
module, therefore that number will be used in rte_flow flows.
Ports with valid DPDK port ids will keep using them.

It's OK to have a dummy implementation that accepts offloading
requests, but does nothing useful, because all the flows are
additionally installed to normal flow tables in userspace datapath
and all flow dumps are managed directly by the netdev-offload-dpdk
module without looking at what is actually installed in HW.

Since some port IDs are fake, it's probably better to not mix
real and dummy offloading.

This feature is useful for testing since it allows to debug a lot of
aspects of flow offloading and netdev-offload-dpdk implementation
in a fully virtual environment.  E.g. should allow writing some system
tests for netdev-offload-dpdk.
Is this commit supposed to be followed by another commit to show the 
benefit of it? testing perhaps?


Option works for physical ports too, so can be used for testing with
physical NICs that doesn't support offload, doesn't support all the
features or if the real offloading is undesirable for some reason.

Since the real DPDK ports are used, traffic can be injected at high
rate to multiple PMD threads, so some sort of stress/performance
testing for the higher layers of offloading infrastructure can be done.

Option is for testing only, so not adding any documentation.
Configuration should be done in the same command in which the port is
added, otherwise flow API will not be initialized.  Disabling in
runtime will not have any effect.  Enabling in runtime after the port
is added may result in inability to remove flows previously installed
by the real rte_flow API.

Signed-off-by: Ilya Maximets 
---
  lib/netdev-dpdk.c | 78 +++
  1 file changed, 65 insertions(+), 13 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 6782d3e8f..9342b51ca 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -60,6 +60,7 @@
  #include "ovs-rcu.h"
  #include "ovs-thread.h"
  #include "packets.h"
+#include "random.h"
  #include "smap.h"
  #include "sset.h"
  #include "timeval.h"
@@ -519,6 +520,10 @@ struct netdev_dpdk {

  /* VF configuration. */
  struct eth_addr requested_hwaddr;
+
+/* Dummy flow API. */
+bool dummy_rte_flow;
+dpdk_port_t dummy_port_id;
  );

  PADDED_MEMBERS(CACHE_LINE_SIZE,
@@ -1242,6 +1247,9 @@ common_construct(struct netdev *netdev, dpdk_port_t 
port_no,
  dev->started = false;
  dev->reset_needed = false;

+dev->dummy_rte_flow = false;
+dev->dummy_port_id = port_no;
+
  ovsrcu_init(>qos_conf, NULL);

  ovsrcu_init(>ingress_policer, NULL);
@@ -1704,6 +1712,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, 
struct smap *args)
  smap_add_format(args, "configured_tx_queues", "%d", netdev->n_txq);
  smap_add_format(args, "mtu", "%d", dev->mtu);

+if (dev->dummy_rte_flow) {
+/* It's a debug-only option, not reporting when it is disabled. */
+smap_add(args, "dummy-rte-flow", "true");
+}
+
  if (dev->type == DPDK_DEV_ETH) {
  smap_add_format(args, "requested_rxq_descriptors", "%d",
  dev->requested_rxq_size);
@@ -2008,6 +2021,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
struct smap *args,
  netdev_request_reconfigure(netdev);
  }

+if (!dev->dummy_rte_flow) {
+dev->dummy_rte_flow = smap_get_bool(args, "dummy-rte-flow", false);
+}
+
  rx_fc_en = smap_get_bool(args, "rx-flow-ctrl", false);
  tx_fc_en = smap_get_bool(args, "tx-flow-ctrl", false);
  autoneg = smap_get_bool(args, "flow-ctrl-autoneg", false);
@@ -2082,6 +2099,11 @@ netdev_dpdk_vhost_client_set_config(struct netdev 
*netdev,
  VLOG_INFO("Max Tx retries for vhost device '%s' set 

[ovs-dev] [PATCH dpdk-latest] ci: Remove -Wno-cast-align from CI

2021-12-04 Thread Eli Britstein via dev
Following [1]-[3] in DPDK, there are no more such warnings from DPDK.
Remove ignoring them if they occur.

GitHub actions:
v1: https://github.com/elibritstein/OVS/actions/runs/1540651133

[1] a3f8d0587188 ("net: avoid cast-align warning in VLAN insert function")
[2] da0333c8790b ("mbuf: avoid cast-align warning in data offset macro")
[3] 6de430b7079e ("eal/x86: avoid cast-align warning in memcpy functions")

Signed-off-by: Eli Britstein 
---
 .ci/linux-build.sh   | 4 
 utilities/ovs-dev.py | 1 -
 2 files changed, 5 deletions(-)

diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index e20cc6ad0..65578880b 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -226,10 +226,6 @@ if [ "$DPDK" ]; then
 DPDK_VER="20.11.1"
 fi
 install_dpdk $DPDK_VER
-if [ "$CC" = "clang" ]; then
-# Disregard cast alignment errors until DPDK is fixed
-CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -Wno-cast-align"
-fi
 if [ -n "$DPDK_EXPERIMENTAL" ]; then
 CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -DALLOW_EXPERIMENTAL_API"
 fi
diff --git a/utilities/ovs-dev.py b/utilities/ovs-dev.py
index c45788acd..534c5e7f1 100755
--- a/utilities/ovs-dev.py
+++ b/utilities/ovs-dev.py
@@ -90,7 +90,6 @@ def conf():
 
 if options.with_dpdk:
 configure.append("--with-dpdk=" + options.with_dpdk)
-cflags += " -Wno-cast-align -Wno-bad-function-cast"  # DPDK warnings.
 
 if options.optimize is None:
 options.optimize = 0
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev: Use PMD context to get the port for HW miss recovery.

2021-12-04 Thread Eli Britstein via dev

Acked-by: Eli Britstein 

On 12/3/2021 11:12 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


Last RX queue, from which the packet got received, is already stored
in the PMD context.  So, we can get the netdev from it without the
expensive hash map lookup.

In my V2V testing this patch improves performance in case HW offload
and experimental APIs are enabled by about 3%.  That narrows down the
performance difference with the case with experimental API disabled
to about 0.5%, which is way within a margin of error for that setup.

Signed-off-by: Ilya Maximets 
---
  lib/dpif-netdev-avx512.c  |  2 +-
  lib/dpif-netdev-private.h |  1 -
  lib/dpif-netdev.c | 20 +++-
  3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 544d36903..3980960ba 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -186,7 +186,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,

  /* Check for a partial hardware offload match. */
  if (hwol_enabled) {
-if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, in_port, packet, ))) {
+if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
  /* Packet restoration failed and it was dropped, do not
   * continue processing. */
  continue;
diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
index 4593649bd..029b23a22 100644
--- a/lib/dpif-netdev-private.h
+++ b/lib/dpif-netdev-private.h
@@ -46,7 +46,6 @@ dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,

  int
  dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no,
struct dp_packet *packet,
struct dp_netdev_flow **flow);

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 69d7ec26e..a790df5fd 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7322,28 +7322,22 @@ smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
  return NULL;
  }

-static struct tx_port * pmd_send_port_cache_lookup(
-const struct dp_netdev_pmd_thread *pmd, odp_port_t port_no);
-
  inline int
  dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no OVS_UNUSED,
struct dp_packet *packet,
struct dp_netdev_flow **flow)
  {
-struct tx_port *p OVS_UNUSED;
  uint32_t mark;

  #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
  /* Restore the packet if HW processing was terminated before completion. 
*/
-p = pmd_send_port_cache_lookup(pmd, port_no);
-if (OVS_LIKELY(p)) {
-int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);
+struct dp_netdev_rxq *rxq = pmd->ctx.last_rxq;
+int err;

-if (err && err != EOPNOTSUPP) {
-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
-}
+err = netdev_hw_miss_packet_recover(rxq->port->netdev, packet);
+if (err && err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
  }
  #endif

@@ -7420,7 +7414,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
  }

  if (netdev_flow_api && recirc_depth == 0) {
-if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port_no, packet, ))) {
+if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
  /* Packet restoration failed and it was dropped, do not
   * continue processing.
   */
--
2.31.1


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest] acinclude: Enable -Werror by default

2021-11-30 Thread Eli Britstein via dev



On 11/30/2021 2:06 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 11/30/21 10:43, Eli Britstein wrote:

On 11/30/2021 12:31 AM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 11/7/21 11:56, Eli Britstein via dev wrote:

Following dpdk commits [1]-[3], it is now possible to compile with
--enable-Werror. Change the default to on, with an option to disable
using --disable-Werror.

Notes:
1. To compile against 21.11-rc1, need to apply [4] and [5] patches.
2. There are still sparse errors, due to dpdk issue. [6] fixes it.

[1] a3f8d0587188 ("net: avoid cast-align warning in VLAN insert function")
[2] da0333c8790b ("mbuf: avoid cast-align warning in data offset macro")
[3] 6de430b7079e ("eal/x86: avoid cast-align warning in memcpy functions")
[4] https://patchwork.ozlabs.org/project/openvswitch/list/?series=268844
[5] https://patchwork.ozlabs.org/project/openvswitch/list/?series=261231
[6] 
https://patches.dpdk.org/project/dpdk/patch/20211028101428.15007-1-david.march...@redhat.com/

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
   .ci/linux-build.sh | 1 -
   .cirrus.yml| 2 +-
   acinclude.m4   | 4 ++--
   3 files changed, 3 insertions(+), 4 deletions(-)

Hi, Eli.  I'm not sure if I understand the reason behind this patch.

In linux x86 at least, we had warnings by dpdk. We discussed it in [7], and 
fixed some in OVS by [8].

The fixes in dpdk were merged (see above [1]-[3]), so I thought to have this 
option by default to prevent future warnings.

Hmm.  Maybe it's better to just remove -Wno-cast-align from CI scripts
instead?  Current version of a patch will not catch cast-align warnings
in our CI, since they are explicitly disabled.


Hi Ilya,

I cherry-picked my commit on top of the current dpdk-latest/dpdk 
branches, that already includes [4]-[6].


Also, I removed the -Wno-cast-align, 
https://github.com/elibritstein/OVS/commit/5663a73555ce3c79bf16d2519660bc4fc3e5d8f4, 
and pushed for CI.



CI for windows passed, though I do see warnings:

https://ci.appveyor.com/project/elibritstein/ovs/build/job/ejn39u0cr9f42l6c

https://ci.appveyor.com/project/elibritstein/ovs/build/job/emus2vv79ihcd4ng

GitHub actions passed:

https://github.com/elibritstein/OVS/actions/runs/1521686491


Should I squash the removal of -Wno-cast-align and post v2?

What do you suggest next?

Thanks,

Eli




We did encounter some issue on PPC, still pending to a proper resolution, [9].


[7] 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fpipermail%2Fovs-dev%2F2021-July%2F384773.htmldata=04%7C01%7Celibr%40nvidia.com%7Cfbaf5bb4739649fbbebd08d9b3f9ea57%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637738708996798389%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=FA1FbBWhbCa6xaDDIYtWBVSJtLrKXd05SR4Mn4GltSM%3Dreserved=0

[8] 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fpipermail%2Fovs-dev%2F2021-July%2F385482.htmldata=04%7C01%7Celibr%40nvidia.com%7Cfbaf5bb4739649fbbebd08d9b3f9ea57%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637738708996798389%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=YFtFa%2Fzspqp3cABpFb55Cykhg%2Fhpp9plfpSgLxE%2BZUQ%3Dreserved=0

[9] 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fpipermail%2Fovs-dev%2F2021-November%2F389381.htmldata=04%7C01%7Celibr%40nvidia.com%7Cfbaf5bb4739649fbbebd08d9b3f9ea57%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637738708996798389%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=h%2Fl2gxScO%2BYHpEHUjXZjGxQBtLpmv9P4IXjrF4aCV3k%3Dreserved=0


But, in any case, I believe that it will break the Windows build, as
it currently produces a fair amount of warnings.

I admit I haven't tested windows. I tried to look into it, but could not find a 
free CI tool for that.

I tried to follow 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdocs.openvswitch.org%2Fen%2Flatest%2Fintro%2Finstall%2Fwindows%2Fdata=04%7C01%7Celibr%40nvidia.com%7Cfbaf5bb4739649fbbebd08d9b3f9ea57%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637738708996798389%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=4bjVibsMdat27ukfyWX9zUyEPW2%2FhKFHX9EDQXveYjs%3Dreserved=0,
 but could not understand what option to choose in 
https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.mingw.org%2Fwiki%2FGetting_Starteddata=04%7C01%7Celibr%40nvidia.com%7Cfbaf5bb4739649fbbebd08d9b3f9ea57%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637738708996798389%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=HlOnsopwilA0WyL3auvhGF5DZlrCadfCyOwLBl%2BJmsY%3Dreserved=0

Could you please advis

Re: [ovs-dev] [PATCH dpdk-latest] acinclude: Enable -Werror by default

2021-11-30 Thread Eli Britstein via dev



On 11/30/2021 12:31 AM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 11/7/21 11:56, Eli Britstein via dev wrote:

Following dpdk commits [1]-[3], it is now possible to compile with
--enable-Werror. Change the default to on, with an option to disable
using --disable-Werror.

Notes:
1. To compile against 21.11-rc1, need to apply [4] and [5] patches.
2. There are still sparse errors, due to dpdk issue. [6] fixes it.

[1] a3f8d0587188 ("net: avoid cast-align warning in VLAN insert function")
[2] da0333c8790b ("mbuf: avoid cast-align warning in data offset macro")
[3] 6de430b7079e ("eal/x86: avoid cast-align warning in memcpy functions")
[4] https://patchwork.ozlabs.org/project/openvswitch/list/?series=268844
[5] https://patchwork.ozlabs.org/project/openvswitch/list/?series=261231
[6] 
https://patches.dpdk.org/project/dpdk/patch/20211028101428.15007-1-david.march...@redhat.com/

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
  .ci/linux-build.sh | 1 -
  .cirrus.yml| 2 +-
  acinclude.m4   | 4 ++--
  3 files changed, 3 insertions(+), 4 deletions(-)

Hi, Eli.  I'm not sure if I understand the reason behind this patch.


In linux x86 at least, we had warnings by dpdk. We discussed it in [7], 
and fixed some in OVS by [8].


The fixes in dpdk were merged (see above [1]-[3]), so I thought to have 
this option by default to prevent future warnings.


We did encounter some issue on PPC, still pending to a proper 
resolution, [9].



[7] https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/384773.html

[8] https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385482.html

[9] https://mail.openvswitch.org/pipermail/ovs-dev/2021-November/389381.html


But, in any case, I believe that it will break the Windows build, as
it currently produces a fair amount of warnings.


I admit I haven't tested windows. I tried to look into it, but could not 
find a free CI tool for that.


I tried to follow 
https://docs.openvswitch.org/en/latest/intro/install/windows/, but could 
not understand what option to choose in 
http://www.mingw.org/wiki/Getting_Started


Could you please advise?



Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev: avoid hw_miss_packet_recover() for devices with no support

2021-11-22 Thread Eli Britstein via dev


On 11/22/2021 3:19 PM, Sriharsha Basavapatna wrote:

Hi Eli,

On Sun, Nov 21, 2021 at 12:03 PM Eli Britstein via dev
 wrote:

Hi Harsha,

It's a clever idea, though have some problems in the implementation. PSB.

Thanks, please see my response below.


On 11/20/2021 11:20 AM, Sriharsha Basavapatna wrote:

The hw_miss_packet_recover() API results in performance degradation, for
ports that are either not offload capable or do not support this specific
offload API.

For example, in the test configuration shown below, the vhost-user port
does not support offloads and the VF port doesn't support hw_miss offload
API. But because tunnel offload needs to be configured in other bridges
(br-vxlan and br-phy), OVS has been built with -DALLOW_EXPERIMENTAL_API.

  br-vhostbr-vxlanbr-phy
vhost-user<-->VFVF-Rep<-->VxLAN   uplink-port

For every packet between the VF and the vhost-user ports, hw_miss API is
called even though it is not supported by the ports involved. This leads
to significant performance drop (~3x in some cases; both cycles and pps).

To fix this, return EOPNOTSUPP when this API fails for a device that

"To fix" -> "To improve"

doesn't support it and avoid this API on that port for subsequent packets.

Signed-off-by: Sriharsha Basavapatna 
---
   lib/dpif-netdev-private.h |  2 +-
   lib/dpif-netdev.c | 29 +
   lib/netdev-offload-dpdk.c |  9 +++--
   3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
index 4593649bd..e2a6a9d3a 100644
--- a/lib/dpif-netdev-private.h
+++ b/lib/dpif-netdev-private.h
@@ -46,7 +46,7 @@ dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,

   int
   dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no,
+  void *port,

void * -> struct tx_port *. use a forward declaration.


 struct dp_packet *packet,
 struct dp_netdev_flow **flow);

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 69d7ec26e..207b1961c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -434,6 +434,7 @@ struct tx_port {
   long long last_used;
   struct hmap_node node;
   long long flush_time;
+bool hw_miss_api_supported;
   struct dp_packet_batch output_pkts;
   struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
   };
@@ -6972,6 +6973,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
   tx->port = port;
   tx->qid = -1;
   tx->flush_time = 0LL;
+tx->hw_miss_api_supported = true;
   dp_packet_batch_init(>output_pkts);

   hmap_insert(>tx_ports, >node, hash_port_no(tx->port->port_no));
@@ -7327,22 +7329,28 @@ static struct tx_port * pmd_send_port_cache_lookup(

   inline int
   dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no OVS_UNUSED,
+  void *port,

don't omit OVS_UNUSED. it is for compiling without ALLOW_EXPERIMENTAL_API

Ok.

 struct dp_packet *packet,
 struct dp_netdev_flow **flow)
   {
-struct tx_port *p OVS_UNUSED;
+struct tx_port *p = port;

no need for this local variable, you get it from the function arguments

The declaration of dp_netdev_hw_flow() in dpif-netdev-private.h can't
see 'struct tx_port' since it is defined in dpif_netdev.c. So it needs
to be a void * argument.


In the H file, use forward declaration, like this:

struct tx_port;

void foo(struct tx_port *port);

Then, in the C file this stack variable can be removed.


   uint32_t mark;

   #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
   /* Restore the packet if HW processing was terminated before completion. 
*/
-p = pmd_send_port_cache_lookup(pmd, port_no);
-if (OVS_LIKELY(p)) {
+if (OVS_LIKELY(p) && p->hw_miss_api_supported) {
   int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);

-if (err && err != EOPNOTSUPP) {
-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
+if (err) {
+if (err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
+} else {
+/* API unsupported by the port; avoid subsequent calls. */
+VLOG_DBG("hw_miss_api unsupported: port: %d",
+ p->port->port_no);
+p->hw_miss_api_supported = false;
+}
   }
   }
   #endif
@@ -7394,6 +7402,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
   uint16_t tcp_flags;
   size_t map_cnt = 0;
   bool batch_enable = true;
+struct tx_port *port = NULL;
+
+#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required.

Re: [ovs-dev] [PATCH] dpif-netdev: avoid hw_miss_packet_recover() for devices with no support

2021-11-20 Thread Eli Britstein via dev

Hi Harsha,

It's a clever idea, though have some problems in the implementation. PSB.


On 11/20/2021 11:20 AM, Sriharsha Basavapatna wrote:

The hw_miss_packet_recover() API results in performance degradation, for
ports that are either not offload capable or do not support this specific
offload API.

For example, in the test configuration shown below, the vhost-user port
does not support offloads and the VF port doesn't support hw_miss offload
API. But because tunnel offload needs to be configured in other bridges
(br-vxlan and br-phy), OVS has been built with -DALLOW_EXPERIMENTAL_API.

 br-vhostbr-vxlanbr-phy
vhost-user<-->VFVF-Rep<-->VxLAN   uplink-port

For every packet between the VF and the vhost-user ports, hw_miss API is
called even though it is not supported by the ports involved. This leads
to significant performance drop (~3x in some cases; both cycles and pps).

To fix this, return EOPNOTSUPP when this API fails for a device that

"To fix" -> "To improve"

doesn't support it and avoid this API on that port for subsequent packets.

Signed-off-by: Sriharsha Basavapatna 
---
  lib/dpif-netdev-private.h |  2 +-
  lib/dpif-netdev.c | 29 +
  lib/netdev-offload-dpdk.c |  9 +++--
  3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
index 4593649bd..e2a6a9d3a 100644
--- a/lib/dpif-netdev-private.h
+++ b/lib/dpif-netdev-private.h
@@ -46,7 +46,7 @@ dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
  
  int

  dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no,
+  void *port,


void * -> struct tx_port *. use a forward declaration.


struct dp_packet *packet,
struct dp_netdev_flow **flow);
  
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

index 69d7ec26e..207b1961c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -434,6 +434,7 @@ struct tx_port {
  long long last_used;
  struct hmap_node node;
  long long flush_time;
+bool hw_miss_api_supported;
  struct dp_packet_batch output_pkts;
  struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
  };
@@ -6972,6 +6973,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
  tx->port = port;
  tx->qid = -1;
  tx->flush_time = 0LL;
+tx->hw_miss_api_supported = true;
  dp_packet_batch_init(>output_pkts);
  
  hmap_insert(>tx_ports, >node, hash_port_no(tx->port->port_no));

@@ -7327,22 +7329,28 @@ static struct tx_port * pmd_send_port_cache_lookup(
  
  inline int

  dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no OVS_UNUSED,
+  void *port,

don't omit OVS_UNUSED. it is for compiling without ALLOW_EXPERIMENTAL_API

struct dp_packet *packet,
struct dp_netdev_flow **flow)
  {
-struct tx_port *p OVS_UNUSED;
+struct tx_port *p = port;

no need for this local variable, you get it from the function arguments

  uint32_t mark;
  
  #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */

  /* Restore the packet if HW processing was terminated before completion. 
*/
-p = pmd_send_port_cache_lookup(pmd, port_no);
-if (OVS_LIKELY(p)) {
+if (OVS_LIKELY(p) && p->hw_miss_api_supported) {
  int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);
  
-if (err && err != EOPNOTSUPP) {

-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
+if (err) {
+if (err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
+} else {
+/* API unsupported by the port; avoid subsequent calls. */
+VLOG_DBG("hw_miss_api unsupported: port: %d",
+ p->port->port_no);
+p->hw_miss_api_supported = false;
+}
  }
  }
  #endif
@@ -7394,6 +7402,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
  uint16_t tcp_flags;
  size_t map_cnt = 0;
  bool batch_enable = true;
+struct tx_port *port = NULL;
+
+#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
+port = pmd_send_port_cache_lookup(pmd, port_no);
+#endif
  
  pmd_perf_update_counter(>perf_stats,

  md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
@@ -7420,7 +7433,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
  }
  
  if (netdev_flow_api && recirc_depth == 0) {

-if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port_no, packet, ))) {
+if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port, packet, ))) {
  /* Packet restoration failed and it was dropped, do not
   * continue processing.
   */
diff --git 

[ovs-dev] [PATCH dpdk-latest] acinclude: Enable -Werror by default

2021-11-07 Thread Eli Britstein via dev
Following dpdk commits [1]-[3], it is now possible to compile with
--enable-Werror. Change the default to on, with an option to disable
using --disable-Werror.

Notes:
1. To compile against 21.11-rc1, need to apply [4] and [5] patches.
2. There are still sparse errors, due to dpdk issue. [6] fixes it.

[1] a3f8d0587188 ("net: avoid cast-align warning in VLAN insert function")
[2] da0333c8790b ("mbuf: avoid cast-align warning in data offset macro")
[3] 6de430b7079e ("eal/x86: avoid cast-align warning in memcpy functions")
[4] https://patchwork.ozlabs.org/project/openvswitch/list/?series=268844
[5] https://patchwork.ozlabs.org/project/openvswitch/list/?series=261231
[6] 
https://patches.dpdk.org/project/dpdk/patch/20211028101428.15007-1-david.march...@redhat.com/

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 .ci/linux-build.sh | 1 -
 .cirrus.yml| 2 +-
 acinclude.m4   | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index e20cc6ad0..03178562b 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -5,7 +5,6 @@ set -x
 
 CFLAGS_FOR_OVS="-g -O2"
 SPARSE_FLAGS=""
-EXTRA_OPTS="--enable-Werror"
 
 [ -z "$DPDK_EXPERIMENTAL" ] || DPDK=1
 [ -z "$DPDK_SHARED" ] || DPDK=1
diff --git a/.cirrus.yml b/.cirrus.yml
index 358f2ba25..6d9454978 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -23,7 +23,7 @@ freebsd_build_task:
   configure_script:
 - ./boot.sh
 - ./configure CC=${COMPILER} CFLAGS="-g -O2 -Wall"
-  MAKE=gmake --enable-Werror
+  MAKE=gmake
   || { cat config.log; exit 1; }
 
   build_script:
diff --git a/acinclude.m4 b/acinclude.m4
index dba365ea1..900ce8b77 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -89,8 +89,8 @@ dnl OVS_ENABLE_WERROR
 AC_DEFUN([OVS_ENABLE_WERROR],
   [AC_ARG_ENABLE(
  [Werror],
- [AC_HELP_STRING([--enable-Werror], [Add -Werror to CFLAGS])],
- [], [enable_Werror=no])
+ [AC_HELP_STRING([--disable-Werror], [Remove -Werror from CFLAGS])],
+ [], [enable_Werror=yes])
AC_CONFIG_COMMANDS_PRE(
  [if test "X$enable_Werror" = Xyes; then
 OVS_CFLAGS="$OVS_CFLAGS -Werror"
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V2 4/4] netdev-offload-dpdk: Don't ignore frags as they are handled

2021-08-27 Thread Eli Britstein via dev



On 8/27/2021 12:30 PM, Maxime Coquelin wrote:

External email: Use caution opening links or attachments


On 8/16/21 3:53 PM, Eli Britstein via dev wrote:

Signed-off-by: Eli Britstein 
---
  NEWS  | 2 ++
  lib/netdev-offload-dpdk.c | 5 -
  2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 26920e215..80466c014 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@ Post-v2.16.0
 by default.  'other_config:dpdk-socket-limit' can be set equal to
 the 'other_config:dpdk-socket-mem' to preserve the legacy memory
 limiting behavior.
+ * Add hardware offload support for matching IPv4/IPv6 frag types
+   (experimental).


  v2.16.0 - xx xxx 
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index d79ad1bea..f6b1514b4 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1234,11 +1234,6 @@ parse_flow_match(struct netdev *netdev,
  proto = spec->hdr.next_proto_id &
  mask->hdr.next_proto_id;
  }
-/* If fragmented, then don't HW accelerate - for now. */
-if (match->wc.masks.nw_frag & match->flow.nw_frag) {
-return -1;
-}
-consumed_masks->nw_frag = 0;

As this is experimental, shouldn't it be disabled by default or at least
a possibility to disable it should be provided?


It is not experimental (no need for ALLOW_EXPERIMENTAL_API). This code 
section is between IPv4 and IPv6 handling.


After patch #2, this is a dead code for IPv4 anyway. If we keep it, 
patch #3 is a dead code.





  /* IP v6 */
  if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) {


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V3 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2021-08-24 Thread Eli Britstein via dev



On 8/24/2021 6:47 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 8/24/21 5:25 PM, Eli Britstein wrote:

On 8/24/2021 6:08 PM, Finn, Emma wrote:

External email: Use caution opening links or attachments


-Original Message-
From: Eli Britstein 
Sent: Monday 16 August 2021 14:55
To: d...@openvswitch.org; Ilya Maximets 
Cc: Finn, Emma ; Stokes, Ian ; Sriharsha Basavapatna 
; Gaetan Rivet ; Majd Dibbiny 
; Eli Britstein ; Salem Sol 
Subject: [PATCH V3 1/2] netdev-offload-dpdk: Use has_vlan match attribute

DPDK 20.11 introduced an ability to specify existance/non-existance of VLAN tag 
by [1].
Use this attribute.

[1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items")

Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 

Hi Eli,

I tested this but currently we don't have support in the i40e pmd for the 
has_vlan match attribute and with these patches it is breaking offload for VLAN 
packets on Intel devices.

Hi Emma,

Thanks for testing.

Is adding such support in your plans?

How do you suggest to proceed? It is needed in order to fix OVS bug.

Thanks,

Eli

The "Table 1.2 rte_flow items availability in networking drivers"
here: https://doc.dpdk.org/guides/nics/overview.html
says that both ixgbe and i40e has a full support for 'vlan' and
'eth' items.  Is it a bug?  Should it be 'partial' instead?

In general, this sounds like a big limitation of rte_flow API.
I mean the fact that there is no way to get what is implemented by
a particular driver and what is not implemented in runtime.
Someone should, probably, work on adding this kind of API to DPDK.
Otherwise, we will stuck with inability to use certain actions/matches
unless all the drivers supports them (which is also hard to check
taking documentation issues into account).  If I missed it and the
API actually exists, we should definitely start using it.

CC: dpdk-dev and rte_flow maintainers.

Thoughts?


There is such an API - rte_flow_validate().

However, in OVS, as each flow is independent and can have different 
matches and actions, we just call rte_flow_create(). The PMD (at least 
mlx5) first internally validates it (as if rte_flow_validate() is 
called), and bail out with a failure in case validate fails.


Can you suggest an effective way to utilize it in OVS?

In theory, if the API exists in rte_flow, OVS should not care if all 
PMDs support it or not.


In practice, the "has_vlan" field was introduced only in 20.11, and 
apparently Intel has not adapted i40e PMD, so it breaks their offloads. 
I suspected this so I've added Emma and Ian to review it.


I don't know i40e HW capabilities, but at least from PMD point of view, 
it can be silently ignored until a proper support is added.




Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V3 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2021-08-24 Thread Eli Britstein via dev



On 8/24/2021 6:08 PM, Finn, Emma wrote:

External email: Use caution opening links or attachments


-Original Message-
From: Eli Britstein 
Sent: Monday 16 August 2021 14:55
To: d...@openvswitch.org; Ilya Maximets 
Cc: Finn, Emma ; Stokes, Ian ; Sriharsha Basavapatna 
; Gaetan Rivet ; Majd Dibbiny 
; Eli Britstein ; Salem Sol 
Subject: [PATCH V3 1/2] netdev-offload-dpdk: Use has_vlan match attribute

DPDK 20.11 introduced an ability to specify existance/non-existance of VLAN tag 
by [1].
Use this attribute.

[1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items")

Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 

Hi Eli,

I tested this but currently we don't have support in the i40e pmd for the 
has_vlan match attribute and with these patches it is breaking offload for VLAN 
packets on Intel devices.


Hi Emma,

Thanks for testing.

Is adding such support in your plans?

How do you suggest to proceed? It is needed in order to fix OVS bug.

Thanks,

Eli



Thanks,
Emma
---
  lib/netdev-offload-dpdk.c | 16 
  1 file changed, 16 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 
f6706ee0c..28c4ba276 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -210,6 +210,8 @@ dump_flow_pattern(struct ds *s,

  ds_put_cstr(s, "eth ");
  if (eth_spec) {
+uint32_t has_vlan_mask;
+
  if (!eth_mask) {
  eth_mask = _flow_item_eth_mask;
  }
@@ -222,6 +224,9 @@ dump_flow_pattern(struct ds *s,
  DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
ntohs(eth_spec->type),
ntohs(eth_mask->type));
+has_vlan_mask = eth_mask->has_vlan ? UINT32_MAX : 0;
+DUMP_PATTERN_ITEM(has_vlan_mask, "has_vlan", "%d",
+  eth_spec->has_vlan, eth_mask->has_vlan);
  }
  ds_put_cstr(s, "/ ");
  } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { @@ -1047,6 +1052,7 @@ 
parse_flow_match(struct netdev *netdev,
   struct flow_patterns *patterns,
   struct match *match)
  {
+struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
  struct flow *consumed_masks;
  uint8_t proto = 0;

@@ -1092,6 +1098,11 @@ parse_flow_match(struct netdev *netdev,
  memset(_masks->dl_src, 0, sizeof consumed_masks->dl_src);
  consumed_masks->dl_type = 0;

+spec->has_vlan = 0;
+mask->has_vlan = 1;
+eth_spec = spec;
+eth_mask = mask;
+
  add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
  }

@@ -1108,6 +1119,11 @@ parse_flow_match(struct netdev *netdev,
  /* Match any protocols. */
  mask->inner_type = 0;

+if (eth_spec && eth_mask) {
+eth_spec->has_vlan = 1;
+eth_mask->has_vlan = 1;
+}
+
  add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
  }
  /* For untagged matching match->wc.masks.vlans[0].tci is 0x and
--
2.28.0.2311.g225365fb51


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] dpif-netdev: Keep orig_in_port as a field of the flow

2021-08-16 Thread Eli Britstein via dev
A flow may be modified after its initial offload failed. In this case,
according to [1], the modification is handled as a flow add.
For a vport flow "add", the orig_in_port should be provided.
Keep that field in the flow struct, so it can be provided in the flow
modification use case.

[1] 0d25621e4d9f ("dpif-netdev: Fix flow modification after failure.")

Fixes: b5e6f6f6bfbe ("dpif-netdev: Provide orig_in_port in metadata for 
tunneled packets.")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev-private-flow.h | 1 +
 lib/dpif-netdev.c  | 9 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h
index 303066067..32ad020d9 100644
--- a/lib/dpif-netdev-private-flow.h
+++ b/lib/dpif-netdev-private-flow.h
@@ -101,6 +101,7 @@ struct dp_netdev_flow {
 
 bool dead;
 uint32_t mark;   /* Unique flow mark assigned to a flow */
+odp_port_t orig_in_port;
 
 /* Statistics. */
 struct dp_netdev_flow_stats stats;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 03f460c7d..8376a16f8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2779,7 +2779,7 @@ static void
 queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
   struct dp_netdev_flow *flow, struct match *match,
   const struct nlattr *actions, size_t actions_len,
-  odp_port_t orig_in_port, int op)
+  int op)
 {
 struct dp_flow_offload_item *offload;
 
@@ -2799,7 +2799,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
 offload->actions = xmalloc(actions_len);
 memcpy(offload->actions, actions, actions_len);
 offload->actions_len = actions_len;
-offload->orig_in_port = orig_in_port;
+offload->orig_in_port = flow->orig_in_port;
 
 dp_netdev_append_flow_offload(offload);
 }
@@ -3614,6 +3614,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 flow->dead = false;
 flow->batch = NULL;
 flow->mark = INVALID_FLOW_MARK;
+flow->orig_in_port = orig_in_port;
 *CONST_CAST(unsigned *, >pmd_id) = pmd->core_id;
 *CONST_CAST(struct flow *, >flow) = match->flow;
 *CONST_CAST(ovs_u128 *, >ufid) = *ufid;
@@ -3643,7 +3644,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 dp_netdev_flow_hash(>ufid));
 
 queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
-  orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
+  DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
 log_netdev_flow_change(flow, match, NULL, actions, actions_len);
 
 return flow;
@@ -3685,7 +3686,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 ovsrcu_set(_flow->actions, new_actions);
 
 queue_netdev_flow_put(pmd, netdev_flow, match,
-  put->actions, put->actions_len, ODPP_NONE,
+  put->actions, put->actions_len,
   DP_NETDEV_FLOW_OFFLOAD_OP_MOD);
 log_netdev_flow_change(netdev_flow, match, old_actions,
put->actions, put->actions_len);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 2/2] netdev-offload-dpdk: Fix ethernet type for VLANs

2021-08-16 Thread Eli Britstein via dev
For VLANs, the match of ethernet type should be specified in inner_type
field of the vlan match, and not type field in ethernet match.
Fix it.

Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow")
Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 28c4ba276..9298032aa 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1116,12 +1116,13 @@ parse_flow_match(struct netdev *netdev,
 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
-/* Match any protocols. */
-mask->inner_type = 0;
-
 if (eth_spec && eth_mask) {
 eth_spec->has_vlan = 1;
 eth_mask->has_vlan = 1;
+spec->inner_type = eth_spec->type;
+mask->inner_type = eth_mask->type;
+eth_spec->type = match->flow.vlans[0].tpid;
+eth_mask->type = match->wc.masks.vlans[0].tpid;
 }
 
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2021-08-16 Thread Eli Britstein via dev
DPDK 20.11 introduced an ability to specify existance/non-existance of
VLAN tag by [1].
Use this attribute.

[1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items")

Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index f6706ee0c..28c4ba276 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -210,6 +210,8 @@ dump_flow_pattern(struct ds *s,
 
 ds_put_cstr(s, "eth ");
 if (eth_spec) {
+uint32_t has_vlan_mask;
+
 if (!eth_mask) {
 eth_mask = _flow_item_eth_mask;
 }
@@ -222,6 +224,9 @@ dump_flow_pattern(struct ds *s,
 DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
   ntohs(eth_spec->type),
   ntohs(eth_mask->type));
+has_vlan_mask = eth_mask->has_vlan ? UINT32_MAX : 0;
+DUMP_PATTERN_ITEM(has_vlan_mask, "has_vlan", "%d",
+  eth_spec->has_vlan, eth_mask->has_vlan);
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
@@ -1047,6 +1052,7 @@ parse_flow_match(struct netdev *netdev,
  struct flow_patterns *patterns,
  struct match *match)
 {
+struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
 struct flow *consumed_masks;
 uint8_t proto = 0;
 
@@ -1092,6 +1098,11 @@ parse_flow_match(struct netdev *netdev,
 memset(_masks->dl_src, 0, sizeof consumed_masks->dl_src);
 consumed_masks->dl_type = 0;
 
+spec->has_vlan = 0;
+mask->has_vlan = 1;
+eth_spec = spec;
+eth_mask = mask;
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
 }
 
@@ -1108,6 +1119,11 @@ parse_flow_match(struct netdev *netdev,
 /* Match any protocols. */
 mask->inner_type = 0;
 
+if (eth_spec && eth_mask) {
+eth_spec->has_vlan = 1;
+eth_mask->has_vlan = 1;
+}
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
 }
 /* For untagged matching match->wc.masks.vlans[0].tci is 0x and
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 3/4] netdev-offload-dpdk: Support IPv6 fragmentation types

2021-08-16 Thread Eli Britstein via dev
Support IPv6 fragmentation matching.

Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 82 ++-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index ee8e56fe0..d79ad1bea 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -370,6 +370,8 @@ dump_flow_pattern(struct ds *s,
 
 ds_put_cstr(s, "ipv6 ");
 if (ipv6_spec) {
+uint8_t has_frag_ext_mask;
+
 if (!ipv6_mask) {
 ipv6_mask = _flow_item_ipv6_mask;
 }
@@ -393,6 +395,37 @@ dump_flow_pattern(struct ds *s,
 DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, false, "hop", 
"%"PRIu8,
   ipv6_spec->hdr.hop_limits,
   ipv6_mask->hdr.hop_limits, 0);
+has_frag_ext_mask = ipv6_mask->has_frag_ext ? UINT8_MAX : 0;
+DUMP_PATTERN_ITEM(has_frag_ext_mask, false, "has_frag_ext",
+  "%"PRIu8, ipv6_spec->has_frag_ext,
+  ipv6_mask->has_frag_ext, 0);
+}
+ds_put_cstr(s, "/ ");
+} else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT) {
+const struct rte_flow_item_ipv6_frag_ext *ipv6_frag_spec = item->spec;
+const struct rte_flow_item_ipv6_frag_ext *ipv6_frag_mask = item->mask;
+const struct rte_flow_item_ipv6_frag_ext *ipv6_frag_last = item->last;
+const struct rte_flow_item_ipv6_frag_ext ipv6_frag_def = {
+.hdr.next_header = 0, .hdr.frag_data = 0};
+
+ds_put_cstr(s, "ipv6_frag_ext ");
+if (ipv6_frag_spec) {
+if (!ipv6_frag_mask) {
+ipv6_frag_mask = _frag_def;
+}
+if (!ipv6_frag_last) {
+ipv6_frag_last = _frag_def;
+}
+DUMP_PATTERN_ITEM(ipv6_frag_mask->hdr.next_header, item->last,
+  "next_hdr", "%"PRIu8,
+  ipv6_frag_spec->hdr.next_header,
+  ipv6_frag_mask->hdr.next_header,
+  ipv6_frag_last->hdr.next_header);
+DUMP_PATTERN_ITEM(ipv6_frag_mask->hdr.frag_data, item->last,
+  "frag_data", "0x%"PRIx16,
+  ntohs(ipv6_frag_spec->hdr.frag_data),
+  ntohs(ipv6_frag_mask->hdr.frag_data),
+  ntohs(ipv6_frag_last->hdr.frag_data));
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
@@ -1222,6 +1255,10 @@ parse_flow_match(struct netdev *netdev,
sizeof spec->hdr.src_addr);
 memcpy(spec->hdr.dst_addr, >flow.ipv6_dst,
sizeof spec->hdr.dst_addr);
+if ((match->wc.masks.nw_frag & FLOW_NW_FRAG_ANY) &&
+(match->flow.nw_frag & FLOW_NW_FRAG_ANY)) {
+spec->has_frag_ext = 1;
+}
 
 mask->hdr.proto = match->wc.masks.nw_proto;
 mask->hdr.hop_limits = match->wc.masks.nw_ttl;
@@ -1232,7 +1269,6 @@ parse_flow_match(struct netdev *netdev,
 memcpy(mask->hdr.dst_addr, >wc.masks.ipv6_dst,
sizeof mask->hdr.dst_addr);
 
-consumed_masks->nw_proto = 0;
 consumed_masks->nw_ttl = 0;
 consumed_masks->nw_tos = 0;
 memset(_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src);
@@ -1242,6 +1278,50 @@ parse_flow_match(struct netdev *netdev,
 
 /* Save proto for L4 protocol setup. */
 proto = spec->hdr.proto & mask->hdr.proto;
+
+if (spec->has_frag_ext) {
+struct rte_flow_item_ipv6_frag_ext *frag_spec, *frag_mask,
+*frag_last = NULL;
+
+frag_spec = xzalloc(sizeof *frag_spec);
+frag_mask = xzalloc(sizeof *frag_mask);
+
+if (match->wc.masks.nw_frag & FLOW_NW_FRAG_LATER) {
+if (!(match->flow.nw_frag & FLOW_NW_FRAG_LATER)) {
+/* frag=first. */
+frag_spec->hdr.frag_data = htons(RTE_IPV6_EHDR_MF_MASK);
+frag_mask->hdr.frag_data = htons(RTE_IPV6_EHDR_MF_MASK |
+ RTE_IPV6_EHDR_FO_MASK);
+/* Move the proto match to the extension item. */
+frag_spec->hdr.next_header = match->flow.nw_proto;
+frag_mask->hdr.next_header = match->wc.masks.nw_proto;
+spec->hdr.proto = 0;
+mask->hdr.proto = 0;
+} else {
+/* frag=la

[ovs-dev] [PATCH V2 4/4] netdev-offload-dpdk: Don't ignore frags as they are handled

2021-08-16 Thread Eli Britstein via dev
Signed-off-by: Eli Britstein 
---
 NEWS  | 2 ++
 lib/netdev-offload-dpdk.c | 5 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 26920e215..80466c014 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@ Post-v2.16.0
by default.  'other_config:dpdk-socket-limit' can be set equal to
the 'other_config:dpdk-socket-mem' to preserve the legacy memory
limiting behavior.
+ * Add hardware offload support for matching IPv4/IPv6 frag types
+   (experimental).
 
 
 v2.16.0 - xx xxx 
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index d79ad1bea..f6b1514b4 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1234,11 +1234,6 @@ parse_flow_match(struct netdev *netdev,
 proto = spec->hdr.next_proto_id &
 mask->hdr.next_proto_id;
 }
-/* If fragmented, then don't HW accelerate - for now. */
-if (match->wc.masks.nw_frag & match->flow.nw_frag) {
-return -1;
-}
-consumed_masks->nw_frag = 0;
 
 /* IP v6 */
 if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) {
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/4] netdev-offload-dpdk: Add last attribute to patterns

2021-08-16 Thread Eli Britstein via dev
Matching on frag types requires range. Add 'last' attribute to patterns.

Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 151 --
 1 file changed, 81 insertions(+), 70 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index f6706ee0c..769dc48d2 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -184,8 +184,12 @@ dump_flow_attr(struct ds *s, struct ds *s_extra,
 
 /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
  * 'testpmd command'-like format. */
-#define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \
-if (is_all_ones(, sizeof mask)) { \
+#define DUMP_PATTERN_ITEM(mask, has_last, field, fmt, spec_pri, mask_pri, \
+  last_pri) \
+if (has_last) { \
+ds_put_format(s, field " spec " fmt " " field " mask " fmt " " field \
+  " last " fmt " ", spec_pri, mask_pri, last_pri); \
+} else if (is_all_ones(, sizeof mask)) { \
 ds_put_format(s, field " is " fmt " ", spec_pri); \
 } else if (!is_all_zeros(, sizeof mask)) { \
 ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \
@@ -207,21 +211,24 @@ dump_flow_pattern(struct ds *s,
 } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
 const struct rte_flow_item_eth *eth_spec = item->spec;
 const struct rte_flow_item_eth *eth_mask = item->mask;
+uint8_t ea[ETH_ADDR_LEN];
 
 ds_put_cstr(s, "eth ");
 if (eth_spec) {
 if (!eth_mask) {
 eth_mask = _flow_item_eth_mask;
 }
-DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT,
+DUMP_PATTERN_ITEM(eth_mask->src, false, "src", ETH_ADDR_FMT,
   ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
-  ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes));
-DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT,
+  ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes),
+  ETH_ADDR_BYTES_ARGS(ea));
+DUMP_PATTERN_ITEM(eth_mask->dst, false, "dst", ETH_ADDR_FMT,
   ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
-  ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes));
-DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
+  ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes),
+  ETH_ADDR_BYTES_ARGS(ea));
+DUMP_PATTERN_ITEM(eth_mask->type, false, "type", "0x%04"PRIx16,
   ntohs(eth_spec->type),
-  ntohs(eth_mask->type));
+  ntohs(eth_mask->type), 0);
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
@@ -233,11 +240,11 @@ dump_flow_pattern(struct ds *s,
 if (!vlan_mask) {
 vlan_mask = _flow_item_vlan_mask;
 }
-DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16,
-  ntohs(vlan_spec->inner_type),
-  ntohs(vlan_mask->inner_type));
-DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16,
-  ntohs(vlan_spec->tci), ntohs(vlan_mask->tci));
+DUMP_PATTERN_ITEM(vlan_mask->inner_type, false, "inner_type",
+  "0x%"PRIx16, ntohs(vlan_spec->inner_type),
+  ntohs(vlan_mask->inner_type), 0);
+DUMP_PATTERN_ITEM(vlan_mask->tci, false, "tci", "0x%"PRIx16,
+  ntohs(vlan_spec->tci), ntohs(vlan_mask->tci), 0);
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
@@ -249,21 +256,21 @@ dump_flow_pattern(struct ds *s,
 if (!ipv4_mask) {
 ipv4_mask = _flow_item_ipv4_mask;
 }
-DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT,
+DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, false, "src", IP_FMT,
   IP_ARGS(ipv4_spec->hdr.src_addr),
-  IP_ARGS(ipv4_mask->hdr.src_addr));
-DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT,
+  IP_ARGS(ipv4_mask->hdr.src_addr), IP_ARGS(0));
+DUMP_PATTERN_ITEM(ipv4_mask->hdr.ds

[ovs-dev] [PATCH V2 2/4] netdev-offload-dpdk: Support IPv4 fragmentation types

2021-08-16 Thread Eli Britstein via dev
Support IPv4 fragmentation matching.

Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 47 +--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 769dc48d2..ee8e56fe0 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -250,12 +250,18 @@ dump_flow_pattern(struct ds *s,
 } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
 const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
 const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
+const struct rte_flow_item_ipv4 *ipv4_last = item->last;
 
 ds_put_cstr(s, "ipv4 ");
 if (ipv4_spec) {
+ovs_be16 fragment_offset_mask;
+
 if (!ipv4_mask) {
 ipv4_mask = _flow_item_ipv4_mask;
 }
+if (!ipv4_last) {
+ipv4_last = _flow_item_ipv4_mask;
+}
 DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, false, "src", IP_FMT,
   IP_ARGS(ipv4_spec->hdr.src_addr),
   IP_ARGS(ipv4_mask->hdr.src_addr), IP_ARGS(0));
@@ -271,6 +277,16 @@ dump_flow_pattern(struct ds *s,
 DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, false, "ttl",
   "0x%"PRIx8, ipv4_spec->hdr.time_to_live,
   ipv4_mask->hdr.time_to_live, 0);
+fragment_offset_mask = ipv4_mask->hdr.fragment_offset ==
+   htons(RTE_IPV4_HDR_OFFSET_MASK |
+ RTE_IPV4_HDR_MF_FLAG)
+   ? OVS_BE16_MAX
+   : ipv4_mask->hdr.fragment_offset;
+DUMP_PATTERN_ITEM(fragment_offset_mask, item->last,
+  "fragment_offset", "0x%"PRIx16,
+  ntohs(ipv4_spec->hdr.fragment_offset),
+  ntohs(ipv4_mask->hdr.fragment_offset),
+  ntohs(ipv4_last->hdr.fragment_offset));
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
@@ -1129,7 +1145,7 @@ parse_flow_match(struct netdev *netdev,
 
 /* IP v4 */
 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
-struct rte_flow_item_ipv4 *spec, *mask;
+struct rte_flow_item_ipv4 *spec, *mask, *last = NULL;
 
 spec = xzalloc(sizeof *spec);
 mask = xzalloc(sizeof *mask);
@@ -1152,7 +1168,34 @@ parse_flow_match(struct netdev *netdev,
 consumed_masks->nw_src = 0;
 consumed_masks->nw_dst = 0;
 
-add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask, NULL);
+if (match->wc.masks.nw_frag & FLOW_NW_FRAG_ANY) {
+if (!(match->flow.nw_frag & FLOW_NW_FRAG_ANY)) {
+/* frag=no. */
+spec->hdr.fragment_offset = 0;
+mask->hdr.fragment_offset = htons(RTE_IPV4_HDR_OFFSET_MASK |
+  RTE_IPV4_HDR_MF_FLAG);
+} else if (match->wc.masks.nw_frag & FLOW_NW_FRAG_LATER) {
+if (!(match->flow.nw_frag & FLOW_NW_FRAG_LATER)) {
+/* frag=first. */
+spec->hdr.fragment_offset = htons(RTE_IPV4_HDR_MF_FLAG);
+mask->hdr.fragment_offset = htons(RTE_IPV4_HDR_OFFSET_MASK 
|
+  RTE_IPV4_HDR_MF_FLAG);
+} else {
+/* frag=later. */
+last = xzalloc(sizeof *last);
+spec->hdr.fragment_offset = htons(1 << 
RTE_IPV4_HDR_FO_SHIFT);
+mask->hdr.fragment_offset = 
htons(RTE_IPV4_HDR_OFFSET_MASK);
+last->hdr.fragment_offset = 
htons(RTE_IPV4_HDR_OFFSET_MASK);
+}
+} else {
+VLOG_WARN_RL(, "Unknown IPv4 frag (0x%x/0x%x)",
+ match->flow.nw_frag, match->wc.masks.nw_frag);
+return -1;
+}
+consumed_masks->nw_frag = 0;
+}
+
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask, last);
 
 /* Save proto for L4 protocol setup. */
 proto = spec->hdr.next_proto_id &
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 0/4] netdev datapath offload frag matching

2021-08-16 Thread Eli Britstein via dev
Netdev datapath ignored fragmentation matching so far. [1] introduced
an API for that in dpdk. Use it to apply frag matching.

[1] http://mails.dpdk.org/archives/dev/2020-October/186177.html

Travis:
v1: https://travis-ci.org/github/elibritstein/OVS/builds/752634689

GitHub Actions:
v1: https://github.com/elibritstein/OVS/actions/runs/459082657
v2: https://github.com/elibritstein/OVS/actions/runs/1135430235

v2-v1:
- Rebase.

Eli Britstein (4):
  netdev-offload-dpdk: Add last attribute to patterns
  netdev-offload-dpdk: Support IPv4 fragmentation types
  netdev-offload-dpdk: Support IPv6 fragmentation types
  netdev-offload-dpdk: Don't ignore frags as they are handled

 NEWS  |   2 +
 lib/netdev-offload-dpdk.c | 283 +++---
 2 files changed, 208 insertions(+), 77 deletions(-)

-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V3 0/4] dpif-netdev offload transitions

2021-08-03 Thread Eli Britstein via dev



On 8/2/2021 10:58 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 7/26/21 10:14 AM, Eli Britstein wrote:

This patch-set improves/fixes offloads transitions behavior.

Patch #1 avoids flushing PMD offloads unnecessarily.
Patch #2 fixes a flow modifications bug.
Patch #3 fixes a race condition with flow modifications.
Patch #3 improves debuggability of flow modifications.

v2-v1:
- Rebase.
v3-v2:
- Added patch #2 for fixes another flow modification scenario.
- Changed log to a separated function.

GitHub Actions:
v1: https://github.com/elibritstein/OVS/actions/runs/769805954
 - This run has encountered some internal GitHub problems.
 - A previous good run, with the same code, only changed commit
   messages since:
 https://github.com/elibritstein/OVS/actions/runs/70787
v2: https://github.com/elibritstein/OVS/actions/runs/1023045302
v3: https://github.com/elibritstein/OVS/actions/runs/1066584199

Eli Britstein (4):
   dpif-netdev: Do not flush PMD offloads on reload
   dpif-netdev: Fix flow modification after failure
   dpif-netdev: Fix offloads of modified flows
   dpif-netdev: Log flow modification in debug level

  lib/dpif-netdev.c | 144 --
  1 file changed, 74 insertions(+), 70 deletions(-)


Thanks!
I renamed the function in the patch #4 from 'dump_*' to 'log_*'
to avoid confusion with flow dump functions and applied the
patch set.

I added Ack from David to patch #1 since it's the same as in v2.
Patches #2 and #3 backported down to 2.13.

Thanks. Indeed I forgot to add it.


Technically, I think, we can consider patch #1 as a bug fix too
and backport it down to 2.15.  Let me know if that's needed.


This patch depends on the flush series that was merged in 2.15, but 
didn't have Fixes tags, though it fixed a memory leak.


I think here it is not severe as a memory leak, only failure in 
offloads, but similar to the severity of patches #2,#3.


Though it's not a clear cut, if #2,#3 are backported, #1 can be too.



Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 3/4] dpif-netdev: Fix offloads of modified flows

2021-07-26 Thread Eli Britstein via dev
Association of a mark to a flow is done as part of its offload handling,
in the offloading thread. However, the PMD thread specifies whether an
offload request is an "add" or "modify" by the association of a mark to
the flow.
This is exposed to a race condition. A flow might be created with
actions that cannot be fully offloaded, for example flooding (before MAC
learning), and later modified to have actions that can be fully
offloaded. If the two requests are queued before the offload thread
handling, they are both marked as "add". When the offload thread handles
them, the first request is partially offloaded, and the second one is
ignored as the flow is already considered as offloaded.

Fix it by specifying add/modify of an offload request by the actual flow
state change, without relying on the mark.

Fixes: 3c7330ebf036 ("netdev-offload-dpdk: Support offload of output action.")
Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5fe06b996..6a7bed840 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2714,10 +2714,9 @@ static void
 queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
   struct dp_netdev_flow *flow, struct match *match,
   const struct nlattr *actions, size_t actions_len,
-  odp_port_t orig_in_port)
+  odp_port_t orig_in_port, int op)
 {
 struct dp_flow_offload_item *offload;
-int op;
 
 if (!netdev_is_flow_api_enabled()) {
 return;
@@ -2730,11 +2729,6 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
 ovsthread_once_done(_thread_once);
 }
 
-if (flow->mark != INVALID_FLOW_MARK) {
-op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
-} else {
-op = DP_NETDEV_FLOW_OFFLOAD_OP_ADD;
-}
 offload = dp_netdev_alloc_flow_offload(pmd, flow, op);
 offload->match = *match;
 offload->actions = xmalloc(actions_len);
@@ -3586,7 +3580,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 dp_netdev_flow_hash(>ufid));
 
 queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
-  orig_in_port);
+  orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
 
 if (OVS_UNLIKELY(!VLOG_DROP_DBG((_rl {
 struct ds ds = DS_EMPTY_INITIALIZER;
@@ -3673,7 +3667,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 ovsrcu_set(_flow->actions, new_actions);
 
 queue_netdev_flow_put(pmd, netdev_flow, match,
-  put->actions, put->actions_len, ODPP_NONE);
+  put->actions, put->actions_len, ODPP_NONE,
+  DP_NETDEV_FLOW_OFFLOAD_OP_MOD);
 
 if (stats) {
 get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 4/4] dpif-netdev: Log flow modification in debug level

2021-07-26 Thread Eli Britstein via dev
Log flow modifications to help debugging.

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 114 +++---
 1 file changed, 68 insertions(+), 46 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6a7bed840..4002bfb4e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2710,6 +2710,71 @@ queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
 dp_netdev_append_flow_offload(offload);
 }
 
+static void
+dump_netdev_flow_change(struct dp_netdev_flow *flow,
+struct match *match,
+const struct dp_netdev_actions *old_actions,
+const struct nlattr *actions,
+size_t actions_len)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+struct ofpbuf key_buf, mask_buf;
+struct odp_flow_key_parms odp_parms = {
+.flow = >flow,
+.mask = >wc.masks,
+.support = dp_netdev_support,
+};
+
+if (OVS_LIKELY(VLOG_DROP_DBG((_rl {
+return;
+}
+
+ofpbuf_init(_buf, 0);
+ofpbuf_init(_buf, 0);
+
+odp_flow_key_from_flow(_parms, _buf);
+odp_parms.key_buf = _buf;
+odp_flow_key_from_mask(_parms, _buf);
+
+if (old_actions) {
+ds_put_cstr(, "flow_mod: ");
+} else {
+ds_put_cstr(, "flow_add: ");
+}
+odp_format_ufid(>ufid, );
+ds_put_cstr(, " mega_");
+odp_format_ufid(>mega_ufid, );
+ds_put_cstr(, " ");
+odp_flow_format(key_buf.data, key_buf.size,
+mask_buf.data, mask_buf.size,
+NULL, , false);
+if (old_actions) {
+ds_put_cstr(, ", old_actions:");
+format_odp_actions(, old_actions->actions, old_actions->size,
+   NULL);
+}
+ds_put_cstr(, ", actions:");
+format_odp_actions(, actions, actions_len, NULL);
+
+VLOG_DBG("%s", ds_cstr());
+
+ofpbuf_uninit(_buf);
+ofpbuf_uninit(_buf);
+
+/* Add a printout of the actual match installed. */
+struct match m;
+ds_clear();
+ds_put_cstr(, "flow match: ");
+miniflow_expand(>cr.flow.mf, );
+miniflow_expand(>cr.mask->mf, );
+memset(_md, 0, sizeof m.tun_md);
+match_format(, NULL, , OFP_DEFAULT_PRIORITY);
+
+VLOG_DBG("%s", ds_cstr());
+
+ds_destroy();
+}
+
 static void
 queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
   struct dp_netdev_flow *flow, struct match *match,
@@ -3581,52 +3646,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 
 queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
   orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
-
-if (OVS_UNLIKELY(!VLOG_DROP_DBG((_rl {
-struct ds ds = DS_EMPTY_INITIALIZER;
-struct ofpbuf key_buf, mask_buf;
-struct odp_flow_key_parms odp_parms = {
-.flow = >flow,
-.mask = >wc.masks,
-.support = dp_netdev_support,
-};
-
-ofpbuf_init(_buf, 0);
-ofpbuf_init(_buf, 0);
-
-odp_flow_key_from_flow(_parms, _buf);
-odp_parms.key_buf = _buf;
-odp_flow_key_from_mask(_parms, _buf);
-
-ds_put_cstr(, "flow_add: ");
-odp_format_ufid(ufid, );
-ds_put_cstr(, " mega_");
-odp_format_ufid(>mega_ufid, );
-ds_put_cstr(, " ");
-odp_flow_format(key_buf.data, key_buf.size,
-mask_buf.data, mask_buf.size,
-NULL, , false);
-ds_put_cstr(, ", actions:");
-format_odp_actions(, actions, actions_len, NULL);
-
-VLOG_DBG("%s", ds_cstr());
-
-ofpbuf_uninit(_buf);
-ofpbuf_uninit(_buf);
-
-/* Add a printout of the actual match installed. */
-struct match m;
-ds_clear();
-ds_put_cstr(, "flow match: ");
-miniflow_expand(>cr.flow.mf, );
-miniflow_expand(>cr.mask->mf, );
-memset(_md, 0, sizeof m.tun_md);
-match_format(, NULL, , OFP_DEFAULT_PRIORITY);
-
-VLOG_DBG("%s", ds_cstr());
-
-ds_destroy();
-}
+dump_netdev_flow_change(flow, match, NULL, actions, actions_len);
 
 return flow;
 }
@@ -3669,6 +3689,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 queue_netdev_flow_put(pmd, netdev_flow, match,
   put->actions, put->actions_len, ODPP_NONE,
   DP_NETDEV_FLOW_OFFLOAD_OP_MOD);
+dump_netdev_flow_change(netdev_flow, match, old_actions,
+put->actions, put->actions_len);
 
 if (stats) {
 get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 2/4] dpif-netdev: Fix flow modification after failure

2021-07-26 Thread Eli Britstein via dev
dp_netdev_flow_offload_main thread is asynchronous, by the cited commit.
There might be a case where there are modification requests of the same
flow submitted before handled. Then, if the first handling fails, the
rule for the flow is deleted, and the mark is freed. Then, the following
one should not be handled as a modification, but rather as an "add".

Fixes: 02bb2824e51d ("dpif-netdev: do hw flow offload in a thread")
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b1fffbac7..5fe06b996 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2573,7 +2573,8 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item 
*offload)
 struct dp_netdev_flow *flow = offload->flow;
 odp_port_t in_port = flow->flow.in_port.odp_port;
 const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
-bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
+bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD
+&& flow->mark != INVALID_FLOW_MARK;
 struct offload_info info;
 struct netdev *port;
 uint32_t mark;
@@ -2585,7 +2586,6 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item 
*offload)
 
 if (modification) {
 mark = flow->mark;
-ovs_assert(mark != INVALID_FLOW_MARK);
 } else {
 /*
  * If a mega flow has already been offloaded (from other PMD
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 1/4] dpif-netdev: Do not flush PMD offloads on reload

2021-07-26 Thread Eli Britstein via dev
Before flushing offloads of a removed port was supported by [1], it was
necessary to flush the 'marks'. In doing so, all offloads of the PMD are
removed, include the ones that are not related to the removed port and
that are not modified following this removal. As a result such flows are
evicted from being offloaded, and won't resume offloading.

As PMD offload flush is not necessary, avoid it.

[1] 62d1c28e9ce0 ("dpif-netdev: Flush offload rules upon port deletion.")

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 59e326f11..b1fffbac7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2495,18 +2495,6 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread 
*pmd,
 return ret;
 }
 
-static void
-flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
-{
-struct dp_netdev_flow *flow;
-
-CMAP_FOR_EACH (flow, mark_node, _mark.mark_to_flow) {
-if (flow->pmd_id == pmd->core_id) {
-queue_netdev_flow_del(pmd, flow);
-}
-}
-}
-
 static struct dp_netdev_flow *
 mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,
   const uint32_t mark)
@@ -5524,7 +5512,6 @@ reload_affected_pmds(struct dp_netdev *dp)
 
 CMAP_FOR_EACH (pmd, node, >poll_threads) {
 if (pmd->need_reload) {
-flow_mark_flush(pmd);
 dp_netdev_reload_pmd__(pmd);
 }
 }
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 0/4] dpif-netdev offload transitions

2021-07-26 Thread Eli Britstein via dev
This patch-set improves/fixes offloads transitions behavior.

Patch #1 avoids flushing PMD offloads unnecessarily.
Patch #2 fixes a flow modifications bug.
Patch #3 fixes a race condition with flow modifications.
Patch #3 improves debuggability of flow modifications.

v2-v1:
- Rebase.
v3-v2:
- Added patch #2 for fixes another flow modification scenario.
- Changed log to a separated function.

GitHub Actions:
v1: https://github.com/elibritstein/OVS/actions/runs/769805954
- This run has encountered some internal GitHub problems.
- A previous good run, with the same code, only changed commit
  messages since:
https://github.com/elibritstein/OVS/actions/runs/70787
v2: https://github.com/elibritstein/OVS/actions/runs/1023045302
v3: https://github.com/elibritstein/OVS/actions/runs/1066584199

Eli Britstein (4):
  dpif-netdev: Do not flush PMD offloads on reload
  dpif-netdev: Fix flow modification after failure
  dpif-netdev: Fix offloads of modified flows
  dpif-netdev: Log flow modification in debug level

 lib/dpif-netdev.c | 144 --
 1 file changed, 74 insertions(+), 70 deletions(-)

-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 2/2] netdev-offload-dpdk: Fix vxlan vni cast-align warnings

2021-07-25 Thread Eli Britstein via dev
Reported-by: Harry Van Haaren 
Fixes: 4e432d6f8128 ("netdev-offload-dpdk: Support tnl/push using vxlan encap 
attribute.")
Fixes: e098c2f966cb ("netdev-dpdk-offload: Add vxlan pattern matching 
function.")
Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 4112fc3a5..f6706ee0c 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -375,15 +375,19 @@ dump_flow_pattern(struct ds *s,
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
 const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
 const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
+ovs_be32 spec_vni, mask_vni;
 
 ds_put_cstr(s, "vxlan ");
 if (vxlan_spec) {
 if (!vxlan_mask) {
 vxlan_mask = _flow_item_vxlan_mask;
 }
+spec_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+   vxlan_spec->vni));
+mask_vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+   vxlan_mask->vni));
 DUMP_PATTERN_ITEM(vxlan_mask->vni, "vni", "%"PRIu32,
-  ntohl(*(ovs_be32 *) vxlan_spec->vni) >> 8,
-  ntohl(*(ovs_be32 *) vxlan_mask->vni) >> 8);
+  ntohl(spec_vni) >> 8, ntohl(mask_vni) >> 8);
 }
 ds_put_cstr(s, "/ ");
 } else {
@@ -417,8 +421,11 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item 
*items)
 ds_put_format(s, "set vxlan ip-version %s ",
   ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
 if (vxlan) {
-ds_put_format(s, "vni %"PRIu32" ",
-  ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
+ovs_be32 vni;
+
+vni = get_unaligned_be32(ALIGNED_CAST(ovs_be32 *,
+  vxlan->vni));
+ds_put_format(s, "vni %"PRIu32" ", ntohl(vni) >> 8);
 }
 if (udp) {
 ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
@@ -1002,9 +1009,9 @@ parse_vxlan_match(struct flow_patterns *patterns,
 vx_spec = xzalloc(sizeof *vx_spec);
 vx_mask = xzalloc(sizeof *vx_mask);
 
-put_unaligned_be32((ovs_be32 *) vx_spec->vni,
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, vx_spec->vni),
htonl(ntohll(match->flow.tunnel.tun_id) << 8));
-put_unaligned_be32((ovs_be32 *) vx_mask->vni,
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, vx_mask->vni),
htonl(ntohll(match->wc.masks.tunnel.tun_id) << 8));
 
 consumed_masks->tunnel.tun_id = 0;
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V3 1/2] netdev-offload-dpdk: Fix IPv6 rewrite cast-align warning

2021-07-25 Thread Eli Britstein via dev
Fixes: b6207b1d2711 ("netdev-offload-dpdk: Support offload of set IPv6 
actions.")
Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index e7913292e..4112fc3a5 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -568,8 +568,11 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
 
 ds_put_format(s, "set_ipv6_%s ", dirstr);
 if (set_ipv6) {
+struct in6_addr addr;
+
 ds_put_cstr(s, "ipv6_addr ");
-ipv6_format_addr((struct in6_addr *) _ipv6->ipv6_addr, s);
+memcpy(, set_ipv6->ipv6_addr, sizeof addr);
+ipv6_format_addr(, s);
 ds_put_cstr(s, " ");
 }
 ds_put_cstr(s, "/ ");
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V2 1/3] dpif-netdev: Do not flush PMD offloads on reload

2021-07-25 Thread Eli Britstein via dev



On 7/23/2021 9:00 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 7/12/21 5:07 PM, Eli Britstein wrote:

Before flushing offloads of a removed port was supported by [1], it was
necessary to flush the 'marks'. In doing so, all offloads of the PMD are
removed, include the ones that are not related to the removed port and
that are not modified following this removal. As a result such flows are
evicted from being offloaded, and won't resume offloading.

As PMD offload flush is not necessary, avoid it.

[1] 62d1c28e9ce0 ("dpif-netdev: Flush offload rules upon port deletion.")

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---

Is my understanding here correct:
On a port deletion netdev_fow_flush() will remove flows from HW and
offloading layer.  Later, ofproto will request to remove flows from
the datapath.  flow marks will be freed, but actual netdev_flow_del()
will fail, because netdev-offload already removed these flows.  But
we do not really care about this failure.  Right?


That is correct.

It was also the same before above [1], but then in the race condition, 
the offload memory was leaked, and the offloads themselves were either 
not destroyed (e.g. leaked) or removed by the PMD, which was PMD dependent.


[1] resolved the leaking, but didn't change the mentioned failure behavior.

This commit doesn't prevents this failure either, but removes the code 
that destroys offloads of flows that should not be destroyed.




Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] netdev-offload-dpdk: Fix vxlan vni cast-align warnings

2021-07-22 Thread Eli Britstein via dev


On 7/22/2021 4:10 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 7/22/21 3:00 PM, Eli Britstein wrote:

On 7/22/2021 3:28 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 7/11/21 7:15 AM, Eli Britstein wrote:

Compiling with -Werror and -Wcast-align has errors like:

lib/netdev-offload-dpdk.c: In function 'dump_flow_pattern':
lib/netdev-offload-dpdk.c:385:38: error: cast increases required alignment
  of target type [-Werror=cast-align]
385 |    ntohl(*(ovs_be32 *) vxlan_spec->vni) >> 8,
|   ^

Fix them.

Reported-by: Harry Van Haaren 
Fixes: 4e432d6f8128 ("netdev-offload-dpdk: Support tnl/push using vxlan encap 
attribute.")
Fixes: e098c2f966cb ("netdev-dpdk-offload: Add vxlan pattern matching 
function.")
Signed-off-by: Eli Britstein 
---
   lib/netdev-offload-dpdk.c | 18 +-
   1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index a24f92782..e4b19ae40 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -375,6 +375,8 @@ dump_flow_pattern(struct ds *s,
   } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
   const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
   const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
+BUILD_ASSERT_DECL(offsetof(struct rte_flow_item_vxlan, vni) %
+  sizeof(ovs_be32) == 0);

   ds_put_cstr(s, "vxlan ");
   if (vxlan_spec) {
@@ -382,8 +384,10 @@ dump_flow_pattern(struct ds *s,
   vxlan_mask = _flow_item_vxlan_mask;
   }
   DUMP_PATTERN_ITEM(vxlan_mask->vni, "vni", "%"PRIu32,
-  ntohl(*(ovs_be32 *) vxlan_spec->vni) >> 8,
-  ntohl(*(ovs_be32 *) vxlan_mask->vni) >> 8);
+  ntohl(*ALIGNED_CAST(ovs_be32 *,
+  vxlan_spec->vni)) >> 8,
+  ntohl(*ALIGNED_CAST(ovs_be32 *,
+  vxlan_mask->vni)) >> 8);
   }
   ds_put_cstr(s, "/ ");
   } else {
@@ -417,8 +421,10 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item 
*items)
   ds_put_format(s, "set vxlan ip-version %s ",
 ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
   if (vxlan) {
+BUILD_ASSERT_DECL(offsetof(struct rte_flow_item_vxlan, vni) %
+  sizeof(ovs_be32) == 0);
   ds_put_format(s, "vni %"PRIu32" ",
-  ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
+  ntohl(*ALIGNED_CAST(ovs_be32 *, vxlan->vni)) >> 8);
   }
   if (udp) {
   ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
@@ -1003,9 +1009,11 @@ parse_vxlan_match(struct flow_patterns *patterns,
   vx_spec = xzalloc(sizeof *vx_spec);
   vx_mask = xzalloc(sizeof *vx_mask);

-put_unaligned_be32((ovs_be32 *) vx_spec->vni,
+BUILD_ASSERT_DECL(offsetof(struct rte_flow_item_vxlan, vni) %
+  sizeof(ovs_be32) == 0);
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, vx_spec->vni),
  htonl(ntohll(match->flow.tunnel.tun_id) << 8));
-put_unaligned_be32((ovs_be32 *) vx_mask->vni,
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, vx_mask->vni),
  htonl(ntohll(match->wc.masks.tunnel.tun_id) << 8));

   consumed_masks->tunnel.tun_id = 0;


Same concerns here about the build time assertion as in the patch #1.
It also seems redundant to use put_unaligned_* functions and have a
build assertion at the same time.

Suggesting to just use put/get_unaligned_* in all cases and remove
build time assertions.

The code before this patch just uses (for example) put_unaligned_be32, which 
its 1st argument is (ovs_be32 *).

vni in struct rte_flow_item_vxlan  in dpdk is uint8_t vni[3]; /**< VXLAN 
identifier. */

I use ALIGNED_CAST to mute the warning, and the assert to make sure the 
alignment is correct.

I don't understand your suggestion here, unless you suggest to use memcpy as 
suggested in patch#1.

put_unaligned_be32 is an alias for put_unaligned_u32 that
is implemented like this:

116 static inline void put_unaligned_u32(uint32_t *p_, uint32_t x_)
117 {
118 uint8_t *p = (uint8_t *) p_;
119 uint32_t x = ntohl(x_);
120
121 p[0] = x >> 24;
122 p[1] = x >> 16;
123 p[2] = x >> 8;
124 p[3] = x;
125 }

or by the equivalent function provided by compiler.

The memory copy performed byte-by-byte, hence independent form the
original alignment of the memory.

Re: [ovs-dev] [PATCH 2/2] netdev-offload-dpdk: Fix vxlan vni cast-align warnings

2021-07-22 Thread Eli Britstein via dev


On 7/22/2021 3:28 PM, Ilya Maximets wrote:

External email: Use caution opening links or attachments


On 7/11/21 7:15 AM, Eli Britstein wrote:

Compiling with -Werror and -Wcast-align has errors like:

lib/netdev-offload-dpdk.c: In function 'dump_flow_pattern':
lib/netdev-offload-dpdk.c:385:38: error: cast increases required alignment
 of target type [-Werror=cast-align]
   385 |ntohl(*(ovs_be32 *) vxlan_spec->vni) >> 8,
   |   ^

Fix them.

Reported-by: Harry Van Haaren 
Fixes: 4e432d6f8128 ("netdev-offload-dpdk: Support tnl/push using vxlan encap 
attribute.")
Fixes: e098c2f966cb ("netdev-dpdk-offload: Add vxlan pattern matching 
function.")
Signed-off-by: Eli Britstein 
---
  lib/netdev-offload-dpdk.c | 18 +-
  1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index a24f92782..e4b19ae40 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -375,6 +375,8 @@ dump_flow_pattern(struct ds *s,
  } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
  const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
  const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
+BUILD_ASSERT_DECL(offsetof(struct rte_flow_item_vxlan, vni) %
+  sizeof(ovs_be32) == 0);

  ds_put_cstr(s, "vxlan ");
  if (vxlan_spec) {
@@ -382,8 +384,10 @@ dump_flow_pattern(struct ds *s,
  vxlan_mask = _flow_item_vxlan_mask;
  }
  DUMP_PATTERN_ITEM(vxlan_mask->vni, "vni", "%"PRIu32,
-  ntohl(*(ovs_be32 *) vxlan_spec->vni) >> 8,
-  ntohl(*(ovs_be32 *) vxlan_mask->vni) >> 8);
+  ntohl(*ALIGNED_CAST(ovs_be32 *,
+  vxlan_spec->vni)) >> 8,
+  ntohl(*ALIGNED_CAST(ovs_be32 *,
+  vxlan_mask->vni)) >> 8);
  }
  ds_put_cstr(s, "/ ");
  } else {
@@ -417,8 +421,10 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item 
*items)
  ds_put_format(s, "set vxlan ip-version %s ",
ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
  if (vxlan) {
+BUILD_ASSERT_DECL(offsetof(struct rte_flow_item_vxlan, vni) %
+  sizeof(ovs_be32) == 0);
  ds_put_format(s, "vni %"PRIu32" ",
-  ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
+  ntohl(*ALIGNED_CAST(ovs_be32 *, vxlan->vni)) >> 8);
  }
  if (udp) {
  ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
@@ -1003,9 +1009,11 @@ parse_vxlan_match(struct flow_patterns *patterns,
  vx_spec = xzalloc(sizeof *vx_spec);
  vx_mask = xzalloc(sizeof *vx_mask);

-put_unaligned_be32((ovs_be32 *) vx_spec->vni,
+BUILD_ASSERT_DECL(offsetof(struct rte_flow_item_vxlan, vni) %
+  sizeof(ovs_be32) == 0);
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, vx_spec->vni),
 htonl(ntohll(match->flow.tunnel.tun_id) << 8));
-put_unaligned_be32((ovs_be32 *) vx_mask->vni,
+put_unaligned_be32(ALIGNED_CAST(ovs_be32 *, vx_mask->vni),
 htonl(ntohll(match->wc.masks.tunnel.tun_id) << 8));

  consumed_masks->tunnel.tun_id = 0;


Same concerns here about the build time assertion as in the patch #1.
It also seems redundant to use put_unaligned_* functions and have a
build assertion at the same time.

Suggesting to just use put/get_unaligned_* in all cases and remove
build time assertions.


The code before this patch just uses (for example) put_unaligned_be32, 
which its 1st argument is (ovs_be32 *).


vni in struct rte_flow_item_vxlan  in dpdk is uint8_t vni[3]; /**< VXLAN 
identifier. */


I use ALIGNED_CAST to mute the warning, and the assert to make sure the 
alignment is correct.


I don't understand your suggestion here, unless you suggest to use 
memcpy as suggested in patch#1.




Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache

2021-07-15 Thread Eli Britstein via dev


On 7/15/2021 4:35 PM, Ferriter, Cian wrote:

External email: Use caution opening links or attachments



-Original Message-
From: Eli Britstein 
Sent: Wednesday 14 July 2021 16:21
To: Ferriter, Cian ; Ilya Maximets 
; Gaëtan Rivet
; d...@openvswitch.org; Van Haaren, Harry 

Cc: Majd Dibbiny ; Stokes, Ian ; Flavio 
Leitner

Subject: Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache


On 7/14/2021 5:58 PM, Ferriter, Cian wrote:

External email: Use caution opening links or attachments



-Original Message-
From: Ilya Maximets 
Sent: Friday 9 July 2021 21:53
To: Ferriter, Cian ; Gaëtan Rivet ; 
Eli Britstein
; d...@openvswitch.org; Van Haaren, Harry 

Cc: Majd Dibbiny ; Ilya Maximets ; Stokes, 
Ian
; Flavio Leitner 
Subject: Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache

On 7/8/21 6:43 PM, Ferriter, Cian wrote:

Hi Gaetan, Eli and all,

Thanks for the patch and the info on how it affects performance in your case. I 
just wanted to

post

the performance we are seeing.

I've posted the numbers inline. Please note, I'll be away on leave till Tuesday.
Thanks,
Cian


-Original Message-
From: Gaëtan Rivet 
Sent: Wednesday 7 July 2021 17:36
To: Eli Britstein ;  
; Van Haaren,

Harry

; Ferriter, Cian 
Cc: Majd Dibbiny ; Ilya Maximets 
Subject: Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache

On Wed, Jul 7, 2021, at 17:05, Eli Britstein wrote:

Port numbers are usually small. Maintain an array of netdev handles indexed
by port numbers. It accelerates looking up for them for
netdev_hw_miss_packet_recover().

Reported-by: Cian Ferriter 
Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---




___
dev mailing list
d...@openvswitch.org


https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flis
tinfo%2Fovs-
devdata=04%7C01%7Celibr%40nvidia.com%7C7ca0caf9434e429e4ffd08d946d7cf2f%7C43083d15727340c1b7db39e
fd9ccc17a%7C0%7C0%7C637618715041410254%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJ
BTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=rv%2FdenANxrcTGxBBbRvhhlNioyswL7ieFr8AGcGtCs8%3Drese
rved=0

Hello,

I tested the performance impact of this patch with a partial offload setup.
As reported by pmd-stats-show, in average cycles per packet:

Before vxlan-decap: 525 c/p
After vxlan-decap: 542 c/p
After this fix: 530 c/p

Without those fixes, vxlan-decap has a 3.2% negative impact on cycles,
with the fixes, the impact is reduced to 0.95%.

As I had to force partial offloads for our hardware, it would be better
with an outside confirmation on a proper setup.

Kind regards,
--
Gaetan Rivet

I'm showing the performance relative to what we measured on OVS master directly 
before the VXLAN

HWOL changes went in. All of the below results are using the scalar DPIF and 
partial HWOL.

Link to "Fixup patches":

https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatchwork.ozlabs.org%2Fproject%2Fopen
vswitch%2Flist%2F%3Fseries%3D252356data=04%7C01%7Celibr%40nvidia.com%7C7ca0caf9434e429e4ffd08d946
d7cf2f%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637618715041410254%7CUnknown%7CTWFpbGZsb3d8eyJWIjo
iMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=Y62OrCRyS00vJHHPQvAHyhG5C
4eO%2FSfWMCSPtszn3Is%3Dreserved=0

Master before VXLAN HWOL changes (f0e4a73)
1.000x

Latest master after VXLAN HWOL changes (b780911)
0.918x (-8.2%)

After fixup patches on OVS ML are applied (with ALLOW_EXPERIMENTAL_API=off)
0.973x (-2.7%)

After fixup patches on OVS ML are applied and after ALLOW_EXPERIMENTAL_API is 
removed.
0.938x (-6.2%)

I ran the last set of results by applying the below diff. I did this because 
I'm assuming the plan

is to remove the ALLOW_EXPERIMENTAL_API '#ifdef's at some point?

Yes, that is the plan.


Thanks for confirming this.


And thanks for testing, Gaetan and Cian!

Could you also provide more details on your test environment,
so someone else can reproduce?


Good idea, I'll add the details inline below. These details apply to the 
performance measured

previously by me, and the performance in this mail.

What is important to know:
- Test configuration: P2P, V2V, PVP, etc.

P2P
1 PHY port
1 RXQ


- Test type: max. throughput, zero packet loss.

Max throughput.


- OVS config: EMC, SMC, HWOL, AVX512 - on/off/type

In all tests, all packets hit a single datapath flow with "offloaded:partial". 
So all packets are

partially offloaded, skipping miniflow_extract() and EMC/SMC/DPCLS lookups.

AVX512 is off.


- Installed OF rules.

$ $OVS_DIR/utilities/ovs-ofctl dump-flows br0
   cookie=0x0, duration=253.691s, table=0, n_packets=2993867136, 
n_bytes=179632028160, in_port=phy0

actions=IN_PORT

- Traffic pattern: Packet size, number of flows, packet type.

64B, 1 flow, ETH/IP packets.


This tests also didn't include the fix from Balazs, IIUC, because
they were performed a bit before tha

Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache

2021-07-14 Thread Eli Britstein


On 7/14/2021 5:58 PM, Ferriter, Cian wrote:

External email: Use caution opening links or attachments



-Original Message-
From: Ilya Maximets 
Sent: Friday 9 July 2021 21:53
To: Ferriter, Cian ; Gaëtan Rivet ; 
Eli Britstein
; d...@openvswitch.org; Van Haaren, Harry 

Cc: Majd Dibbiny ; Ilya Maximets ; Stokes, 
Ian
; Flavio Leitner 
Subject: Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache

On 7/8/21 6:43 PM, Ferriter, Cian wrote:

Hi Gaetan, Eli and all,

Thanks for the patch and the info on how it affects performance in your case. I 
just wanted to post

the performance we are seeing.

I've posted the numbers inline. Please note, I'll be away on leave till Tuesday.
Thanks,
Cian


-Original Message-
From: Gaëtan Rivet 
Sent: Wednesday 7 July 2021 17:36
To: Eli Britstein ;  
; Van Haaren,

Harry

; Ferriter, Cian 
Cc: Majd Dibbiny ; Ilya Maximets 
Subject: Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce netdev array cache

On Wed, Jul 7, 2021, at 17:05, Eli Britstein wrote:

Port numbers are usually small. Maintain an array of netdev handles indexed
by port numbers. It accelerates looking up for them for
netdev_hw_miss_packet_recover().

Reported-by: Cian Ferriter 
Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---




___
dev mailing list
d...@openvswitch.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=04%7C01%7Celibr%40nvidia.com%7C7ca0caf9434e429e4ffd08d946d7cf2f%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637618715041410254%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=rv%2FdenANxrcTGxBBbRvhhlNioyswL7ieFr8AGcGtCs8%3Dreserved=0


Hello,

I tested the performance impact of this patch with a partial offload setup.
As reported by pmd-stats-show, in average cycles per packet:

Before vxlan-decap: 525 c/p
After vxlan-decap: 542 c/p
After this fix: 530 c/p

Without those fixes, vxlan-decap has a 3.2% negative impact on cycles,
with the fixes, the impact is reduced to 0.95%.

As I had to force partial offloads for our hardware, it would be better
with an outside confirmation on a proper setup.

Kind regards,
--
Gaetan Rivet

I'm showing the performance relative to what we measured on OVS master directly 
before the VXLAN

HWOL changes went in. All of the below results are using the scalar DPIF and 
partial HWOL.

Link to "Fixup patches": 
https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatchwork.ozlabs.org%2Fproject%2Fopenvswitch%2Flist%2F%3Fseries%3D252356data=04%7C01%7Celibr%40nvidia.com%7C7ca0caf9434e429e4ffd08d946d7cf2f%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637618715041410254%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=Y62OrCRyS00vJHHPQvAHyhG5C4eO%2FSfWMCSPtszn3Is%3Dreserved=0

Master before VXLAN HWOL changes (f0e4a73)
1.000x

Latest master after VXLAN HWOL changes (b780911)
0.918x (-8.2%)

After fixup patches on OVS ML are applied (with ALLOW_EXPERIMENTAL_API=off)
0.973x (-2.7%)

After fixup patches on OVS ML are applied and after ALLOW_EXPERIMENTAL_API is 
removed.
0.938x (-6.2%)

I ran the last set of results by applying the below diff. I did this because 
I'm assuming the plan

is to remove the ALLOW_EXPERIMENTAL_API '#ifdef's at some point?

Yes, that is the plan.


Thanks for confirming this.


And thanks for testing, Gaetan and Cian!

Could you also provide more details on your test environment,
so someone else can reproduce?


Good idea, I'll add the details inline below. These details apply to the 
performance measured previously by me, and the performance in this mail.


What is important to know:
- Test configuration: P2P, V2V, PVP, etc.


P2P
1 PHY port
1 RXQ


- Test type: max. throughput, zero packet loss.

Max throughput.


- OVS config: EMC, SMC, HWOL, AVX512 - on/off/type

In all tests, all packets hit a single datapath flow with "offloaded:partial". 
So all packets are partially offloaded, skipping miniflow_extract() and EMC/SMC/DPCLS 
lookups.

AVX512 is off.


- Installed OF rules.

$ $OVS_DIR/utilities/ovs-ofctl dump-flows br0
  cookie=0x0, duration=253.691s, table=0, n_packets=2993867136, 
n_bytes=179632028160, in_port=phy0 actions=IN_PORT


- Traffic pattern: Packet size, number of flows, packet type.

64B, 1 flow, ETH/IP packets.


This tests also didn't include the fix from Balazs, IIUC, because
they were performed a bit before that patch got accepted.


Correct, the above tests didn't include the optimization from Balazs.


And Flavio reported what seems to be noticeable performance
drop due to just accepted AVX512 DPIF implementation for the
non-HWOL non-AVX512 setup:
   
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fpipermail%2Fovs-dev%2F2021-July%2F385448.htmldata=04%7C01%7Celibr%40nvidia.com%7C7ca0caf9434e

[ovs-dev] [PATCH V2 2/2] dpif-netdev: Introduce netdev array cache

2021-07-14 Thread Eli Britstein
Port numbers are usually small. Maintain an array of netdev handles indexed
by port numbers. It accelerates looking up for them for
netdev_hw_miss_packet_recover().

Reported-by: Cian Ferriter 
Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev-private-thread.h |  4 +++
 lib/dpif-netdev.c| 43 +---
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
index ba79c4a0a..52755fbae 100644
--- a/lib/dpif-netdev-private-thread.h
+++ b/lib/dpif-netdev-private-thread.h
@@ -50,6 +50,9 @@ struct dp_netdev_pmd_thread_ctx {
 bool smc_enable_db;
 };
 
+/* Size of netdev's cache. */
+#define DP_PMD_NETDEV_CACHE_SIZE 1024
+
 /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
  * the performance overhead of interrupt processing.  Therefore netdev can
  * not implement rx-wait for these devices.  dpif-netdev needs to poll
@@ -192,6 +195,7 @@ struct dp_netdev_pmd_thread {
  * other instance will only be accessed by its own pmd thread. */
 struct hmap tnl_port_cache;
 struct hmap send_port_cache;
+struct netdev *send_netdev_cache[DP_PMD_NETDEV_CACHE_SIZE];
 
 /* Keep track of detailed PMD performance statistics. */
 struct pmd_perf_stats perf_stats;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1823bf565..50ea85d48 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5540,6 +5540,12 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
 free(tx_port_cached);
 }
 HMAP_FOR_EACH_POP (tx_port_cached, node, >send_port_cache) {
+uint32_t port_no_ind;
+
+port_no_ind = odp_to_u32(tx_port_cached->port->port_no);
+if (port_no_ind < ARRAY_SIZE(pmd->send_netdev_cache)) {
+pmd->send_netdev_cache[port_no_ind] = NULL;
+}
 free(tx_port_cached);
 }
 }
@@ -5566,9 +5572,16 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
 }
 
 if (netdev_n_txq(tx_port->port->netdev)) {
+uint32_t port_no_ind;
+
 tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
 hmap_insert(>send_port_cache, _port_cached->node,
 hash_port_no(tx_port_cached->port->port_no));
+port_no_ind = odp_to_u32(tx_port_cached->port->port_no);
+if (port_no_ind < ARRAY_SIZE(pmd->send_netdev_cache)) {
+pmd->send_netdev_cache[port_no_ind] =
+tx_port_cached->port->netdev;
+}
 }
 }
 }
@@ -6217,6 +6230,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, 
struct dp_netdev *dp,
 hmap_init(>tx_ports);
 hmap_init(>tnl_port_cache);
 hmap_init(>send_port_cache);
+memset(pmd->send_netdev_cache, 0, sizeof pmd->send_netdev_cache);
 cmap_init(>tx_bonds);
 
 /* Initialize DPIF function pointer to the default configured version. */
@@ -6241,6 +6255,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
 struct dpcls *cls;
 
 dp_netdev_pmd_flow_flush(pmd);
+memset(pmd->send_netdev_cache, 0, sizeof pmd->send_netdev_cache);
 hmap_destroy(>send_port_cache);
 hmap_destroy(>tnl_port_cache);
 hmap_destroy(>tx_ports);
@@ -6754,20 +6769,40 @@ smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
 static struct tx_port * pmd_send_port_cache_lookup(
 const struct dp_netdev_pmd_thread *pmd, odp_port_t port_no);
 
+OVS_UNUSED
+static inline struct netdev *
+pmd_netdev_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+odp_port_t port_no)
+{
+uint32_t port_no_ind;
+struct tx_port *p;
+
+port_no_ind = odp_to_u32(port_no);
+if (port_no_ind < ARRAY_SIZE(pmd->send_netdev_cache)) {
+return pmd->send_netdev_cache[port_no_ind];
+}
+
+p = pmd_send_port_cache_lookup(pmd, port_no);
+if (p) {
+return p->port->netdev;
+}
+return NULL;
+}
+
 inline int
 dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
   odp_port_t port_no OVS_UNUSED,
   struct dp_packet *packet,
   struct dp_netdev_flow **flow)
 {
-struct tx_port *p OVS_UNUSED;
+struct netdev *netdev OVS_UNUSED;
 uint32_t mark;
 
 #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
 /* Restore the packet if HW processing was terminated before completion. */
-p = pmd_send_port_cache_lookup(pmd, port_no);
-if (OVS_LIKELY(p)) {
-int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);
+netdev = pmd_netdev_cache_lookup(pmd, port_no);
+if (OVS_LIKELY(netdev)) {
+int err = netdev_hw_miss_packet_recover(netdev, packet);
 
 if (err && err != EOPNOTSUPP) {
 COVERAGE_INC(datapath_drop_hw_miss_recover);
-

[ovs-dev] [PATCH V2 1/2] dpif-netdev: Do not execute packet recovery without experimental support

2021-07-14 Thread Eli Britstein
rte_flow_get_restore_info() API is under experimental attribute. Using it
has a performance impact that can be avoided for non-experimental compilation.

Do not call it without experimental support.

Reported-by: Cian Ferriter 
Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 610949f36..1823bf565 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -114,7 +114,9 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
 COVERAGE_DEFINE(datapath_drop_invalid_bond);
 COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
 COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
 COVERAGE_DEFINE(datapath_drop_hw_miss_recover);
+#endif
 
 /* Protects against changes to 'dp_netdevs'. */
 static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -6754,13 +6756,14 @@ static struct tx_port * pmd_send_port_cache_lookup(
 
 inline int
 dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no,
+  odp_port_t port_no OVS_UNUSED,
   struct dp_packet *packet,
   struct dp_netdev_flow **flow)
 {
-struct tx_port *p;
+struct tx_port *p OVS_UNUSED;
 uint32_t mark;
 
+#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
 /* Restore the packet if HW processing was terminated before completion. */
 p = pmd_send_port_cache_lookup(pmd, port_no);
 if (OVS_LIKELY(p)) {
@@ -6771,6 +6774,7 @@ dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
 return -1;
 }
 }
+#endif
 
 /* If no mark, no flow to find. */
 if (!dp_packet_has_flow_mark(packet, )) {
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 2/2] netdev-offload-dpdk: Fix ethernet type for VLANs

2021-07-13 Thread Eli Britstein
For VLANs, the match of ethernet type should be specified in inner_type
field of the vlan match, and not type field in ethernet match.
Fix it.

Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow")
Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 3e0d0643b..65f9b3685 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1106,12 +1106,13 @@ parse_flow_match(struct netdev *netdev,
 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
-/* Match any protocols. */
-mask->inner_type = 0;
-
 if (eth_spec && eth_mask) {
 eth_spec->has_vlan = 1;
 eth_mask->has_vlan = 1;
+spec->inner_type = eth_spec->type;
+mask->inner_type = eth_mask->type;
+eth_spec->type = match->flow.vlans[0].tpid;
+eth_mask->type = match->wc.masks.vlans[0].tpid;
 }
 
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2021-07-13 Thread Eli Britstein
DPDK 20.11 introduced an ability to specify existance/non-existance of
VLAN tag by [1].
Use this attribute.

[1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items")

Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index e7913292e..3e0d0643b 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -210,6 +210,8 @@ dump_flow_pattern(struct ds *s,
 
 ds_put_cstr(s, "eth ");
 if (eth_spec) {
+uint32_t has_vlan_mask;
+
 if (!eth_mask) {
 eth_mask = _flow_item_eth_mask;
 }
@@ -222,6 +224,9 @@ dump_flow_pattern(struct ds *s,
 DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
   ntohs(eth_spec->type),
   ntohs(eth_mask->type));
+has_vlan_mask = eth_mask->has_vlan ? UINT32_MAX : 0;
+DUMP_PATTERN_ITEM(has_vlan_mask, "has_vlan", "%d",
+  eth_spec->has_vlan, eth_mask->has_vlan);
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
@@ -1037,6 +1042,7 @@ parse_flow_match(struct netdev *netdev,
  struct flow_patterns *patterns,
  struct match *match)
 {
+struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
 struct flow *consumed_masks;
 uint8_t proto = 0;
 
@@ -1082,6 +1088,11 @@ parse_flow_match(struct netdev *netdev,
 memset(_masks->dl_src, 0, sizeof consumed_masks->dl_src);
 consumed_masks->dl_type = 0;
 
+spec->has_vlan = 0;
+mask->has_vlan = 1;
+eth_spec = spec;
+eth_mask = mask;
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
 }
 
@@ -1098,6 +1109,11 @@ parse_flow_match(struct netdev *netdev,
 /* Match any protocols. */
 mask->inner_type = 0;
 
+if (eth_spec && eth_mask) {
+eth_spec->has_vlan = 1;
+eth_mask->has_vlan = 1;
+}
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
 }
 /* For untagged matching match->wc.masks.vlans[0].tci is 0x and
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 2/2] netdev-offload-dpdk: Fix ethernet type for VLANs

2021-07-12 Thread Eli Britstein
For VLANs, the match of ethernet type should be specified in inner_type
field of the vlan match, and not type field in ethernet match.
Fix it.

Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow")
Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 3e0d0643b..65f9b3685 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1106,12 +1106,13 @@ parse_flow_match(struct netdev *netdev,
 spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
 mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
-/* Match any protocols. */
-mask->inner_type = 0;
-
 if (eth_spec && eth_mask) {
 eth_spec->has_vlan = 1;
 eth_mask->has_vlan = 1;
+spec->inner_type = eth_spec->type;
+mask->inner_type = eth_mask->type;
+eth_spec->type = match->flow.vlans[0].tpid;
+eth_mask->type = match->wc.masks.vlans[0].tpid;
 }
 
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/2] netdev-offload-dpdk: Use has_vlan match attribute

2021-07-12 Thread Eli Britstein
DPDK 20.11 introduced an ability to specify existance/non-existance of
VLAN tag by [1].
Use this attribute.

[1]: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items")

Signed-off-by: Eli Britstein 
Reviewed-by: Salem Sol 
---
 lib/netdev-offload-dpdk.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index e7913292e..3e0d0643b 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -210,6 +210,8 @@ dump_flow_pattern(struct ds *s,
 
 ds_put_cstr(s, "eth ");
 if (eth_spec) {
+uint32_t has_vlan_mask;
+
 if (!eth_mask) {
 eth_mask = _flow_item_eth_mask;
 }
@@ -222,6 +224,9 @@ dump_flow_pattern(struct ds *s,
 DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
   ntohs(eth_spec->type),
   ntohs(eth_mask->type));
+has_vlan_mask = eth_mask->has_vlan ? UINT32_MAX : 0;
+DUMP_PATTERN_ITEM(has_vlan_mask, "has_vlan", "%d",
+  eth_spec->has_vlan, eth_mask->has_vlan);
 }
 ds_put_cstr(s, "/ ");
 } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
@@ -1037,6 +1042,7 @@ parse_flow_match(struct netdev *netdev,
  struct flow_patterns *patterns,
  struct match *match)
 {
+struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
 struct flow *consumed_masks;
 uint8_t proto = 0;
 
@@ -1082,6 +1088,11 @@ parse_flow_match(struct netdev *netdev,
 memset(_masks->dl_src, 0, sizeof consumed_masks->dl_src);
 consumed_masks->dl_type = 0;
 
+spec->has_vlan = 0;
+mask->has_vlan = 1;
+eth_spec = spec;
+eth_mask = mask;
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
 }
 
@@ -1098,6 +1109,11 @@ parse_flow_match(struct netdev *netdev,
 /* Match any protocols. */
 mask->inner_type = 0;
 
+if (eth_spec && eth_mask) {
+eth_spec->has_vlan = 1;
+eth_mask->has_vlan = 1;
+}
+
 add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
 }
 /* For untagged matching match->wc.masks.vlans[0].tci is 0x and
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 2/3] dpif-netdev: Fix offloads of modified flows

2021-07-12 Thread Eli Britstein
Association of a mark to a flow is done as part of its offload handling,
in the offloading thread. However, the PMD thread specifies whether an
offload request is an "add" or "modify" by the association of a mark to
the flow.
This is exposed to a race condition. A flow might be created with
actions that cannot be fully offloaded, for example flooding (before MAC
learning), and later modified to have actions that can be fully
offloaded. If the two requests are queued before the offload thread
handling, they are both marked as "add". When the offload thread handles
them, the first request is partially offloaded, and the second one is
ignored as the flow is already considered as offloaded.

Fix it by specifying add/modify of an offload request by the actual flow
state change, without relying on the mark.

Fixes: 3c7330ebf036 ("netdev-offload-dpdk: Support offload of output action.")
Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 21b0e025d..9b2b8d6d9 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2451,7 +2451,8 @@ static void
 queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
   struct dp_netdev_flow *flow, struct match *match,
   const struct nlattr *actions, size_t actions_len,
-  odp_port_t orig_in_port)
+  odp_port_t orig_in_port,
+  const struct dp_netdev_actions *old_actions)
 {
 struct dp_flow_offload_item *offload;
 int op;
@@ -2467,11 +2468,9 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
 ovsthread_once_done(_thread_once);
 }
 
-if (flow->mark != INVALID_FLOW_MARK) {
-op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
-} else {
-op = DP_NETDEV_FLOW_OFFLOAD_OP_ADD;
-}
+op = old_actions
+? DP_NETDEV_FLOW_OFFLOAD_OP_MOD
+: DP_NETDEV_FLOW_OFFLOAD_OP_ADD;
 offload = dp_netdev_alloc_flow_offload(pmd, flow, op);
 offload->match = *match;
 offload->actions = xmalloc(actions_len);
@@ -3323,7 +3322,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 dp_netdev_flow_hash(>ufid));
 
 queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
-  orig_in_port);
+  orig_in_port, NULL);
 
 if (OVS_UNLIKELY(!VLOG_DROP_DBG((_rl {
 struct ds ds = DS_EMPTY_INITIALIZER;
@@ -3410,7 +3409,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 ovsrcu_set(_flow->actions, new_actions);
 
 queue_netdev_flow_put(pmd, netdev_flow, match,
-  put->actions, put->actions_len, ODPP_NONE);
+  put->actions, put->actions_len, ODPP_NONE,
+  old_actions);
 
 if (stats) {
 get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL);
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 0/3] dpif-netdev offload transitions

2021-07-12 Thread Eli Britstein
This patch-set improves offloads transitions behavior.

Patch #1 avoids flushing PMD offloads unnecessarily.
Patch #2 fixes a race condition with flow modifications.
Patch #3 improves debuggability of flow modifications.

v2-v1: Rebase.

Travis:
v1: https://travis-ci.org/github/elibritstein/OVS/builds/767839987

GitHub Actions:
v1: https://github.com/elibritstein/OVS/actions/runs/769805954
- This run has encountered some internal GitHub problems.
- A previous good run, with the same code, only changed commit
  messages since:
https://github.com/elibritstein/OVS/actions/runs/70787
v2: https://github.com/elibritstein/OVS/actions/runs/1023045302

Eli Britstein (3):
  dpif-netdev: Do not flush PMD offloads on reload
  dpif-netdev: Fix offloads of modified flows
  dpif-netdev: Log flow modification in debug level

 lib/dpif-netdev.c | 130 ++
 1 file changed, 63 insertions(+), 67 deletions(-)

-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 3/3] dpif-netdev: Log flow modification in debug level

2021-07-12 Thread Eli Britstein
Log flow modifications to help debugging.

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 101 +-
 1 file changed, 55 insertions(+), 46 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 9b2b8d6d9..caed3e7f2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2457,6 +2457,61 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
 struct dp_flow_offload_item *offload;
 int op;
 
+if (OVS_UNLIKELY(!VLOG_DROP_DBG((_rl {
+struct ds ds = DS_EMPTY_INITIALIZER;
+struct ofpbuf key_buf, mask_buf;
+struct odp_flow_key_parms odp_parms = {
+.flow = >flow,
+.mask = >wc.masks,
+.support = dp_netdev_support,
+};
+
+ofpbuf_init(_buf, 0);
+ofpbuf_init(_buf, 0);
+
+odp_flow_key_from_flow(_parms, _buf);
+odp_parms.key_buf = _buf;
+odp_flow_key_from_mask(_parms, _buf);
+
+if (old_actions) {
+ds_put_cstr(, "flow_mod: ");
+} else {
+ds_put_cstr(, "flow_add: ");
+}
+odp_format_ufid(>ufid, );
+ds_put_cstr(, " mega_");
+odp_format_ufid(>mega_ufid, );
+ds_put_cstr(, " ");
+odp_flow_format(key_buf.data, key_buf.size,
+mask_buf.data, mask_buf.size,
+NULL, , false);
+if (old_actions) {
+ds_put_cstr(, ", old_actions:");
+format_odp_actions(, old_actions->actions, old_actions->size,
+   NULL);
+}
+ds_put_cstr(, ", actions:");
+format_odp_actions(, actions, actions_len, NULL);
+
+VLOG_DBG("%s", ds_cstr());
+
+ofpbuf_uninit(_buf);
+ofpbuf_uninit(_buf);
+
+/* Add a printout of the actual match installed. */
+struct match m;
+ds_clear();
+ds_put_cstr(, "flow match: ");
+miniflow_expand(>cr.flow.mf, );
+miniflow_expand(>cr.mask->mf, );
+memset(_md, 0, sizeof m.tun_md);
+match_format(, NULL, , OFP_DEFAULT_PRIORITY);
+
+VLOG_DBG("%s", ds_cstr());
+
+ds_destroy();
+}
+
 if (!netdev_is_flow_api_enabled()) {
 return;
 }
@@ -3324,52 +3379,6 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
   orig_in_port, NULL);
 
-if (OVS_UNLIKELY(!VLOG_DROP_DBG((_rl {
-struct ds ds = DS_EMPTY_INITIALIZER;
-struct ofpbuf key_buf, mask_buf;
-struct odp_flow_key_parms odp_parms = {
-.flow = >flow,
-.mask = >wc.masks,
-.support = dp_netdev_support,
-};
-
-ofpbuf_init(_buf, 0);
-ofpbuf_init(_buf, 0);
-
-odp_flow_key_from_flow(_parms, _buf);
-odp_parms.key_buf = _buf;
-odp_flow_key_from_mask(_parms, _buf);
-
-ds_put_cstr(, "flow_add: ");
-odp_format_ufid(ufid, );
-ds_put_cstr(, " mega_");
-odp_format_ufid(>mega_ufid, );
-ds_put_cstr(, " ");
-odp_flow_format(key_buf.data, key_buf.size,
-mask_buf.data, mask_buf.size,
-NULL, , false);
-ds_put_cstr(, ", actions:");
-format_odp_actions(, actions, actions_len, NULL);
-
-VLOG_DBG("%s", ds_cstr());
-
-ofpbuf_uninit(_buf);
-ofpbuf_uninit(_buf);
-
-/* Add a printout of the actual match installed. */
-struct match m;
-ds_clear();
-ds_put_cstr(, "flow match: ");
-miniflow_expand(>cr.flow.mf, );
-miniflow_expand(>cr.mask->mf, );
-memset(_md, 0, sizeof m.tun_md);
-match_format(, NULL, , OFP_DEFAULT_PRIORITY);
-
-VLOG_DBG("%s", ds_cstr());
-
-ds_destroy();
-}
-
 return flow;
 }
 
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2 1/3] dpif-netdev: Do not flush PMD offloads on reload

2021-07-12 Thread Eli Britstein
Before flushing offloads of a removed port was supported by [1], it was
necessary to flush the 'marks'. In doing so, all offloads of the PMD are
removed, include the ones that are not related to the removed port and
that are not modified following this removal. As a result such flows are
evicted from being offloaded, and won't resume offloading.

As PMD offload flush is not necessary, avoid it.

[1] 62d1c28e9ce0 ("dpif-netdev: Flush offload rules upon port deletion.")

Signed-off-by: Eli Britstein 
Reviewed-by: Gaetan Rivet 
---
 lib/dpif-netdev.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 610949f36..21b0e025d 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2232,18 +2232,6 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread 
*pmd,
 return ret;
 }
 
-static void
-flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
-{
-struct dp_netdev_flow *flow;
-
-CMAP_FOR_EACH (flow, mark_node, _mark.mark_to_flow) {
-if (flow->pmd_id == pmd->core_id) {
-queue_netdev_flow_del(pmd, flow);
-}
-}
-}
-
 static struct dp_netdev_flow *
 mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,
   const uint32_t mark)
@@ -4811,7 +4799,6 @@ reload_affected_pmds(struct dp_netdev *dp)
 
 CMAP_FOR_EACH (pmd, node, >poll_threads) {
 if (pmd->need_reload) {
-flow_mark_flush(pmd);
 dp_netdev_reload_pmd__(pmd);
 }
 }
-- 
2.28.0.2311.g225365fb51

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


  1   2   3   4   5   6   7   8   >