On 6/2/23 20:59, Mike Pattrick wrote:
> The netdev receiving packets is supposed to provide the flags
> indicating if the L4 checksum was verified and it is OK or BAD,
> otherwise the stack will check when appropriate by software.
> 
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
> 
> When encapsulate a packet with that flag, set the checksum
> of the inner L4 header since that is not yet supported.
> 
> Calculate the L4 checksum when the packet is going to be sent
> over a device that doesn't support the feature.
> 
> Linux tap devices allows enabling L3 and L4 offload, so this
> patch enables the feature. However, Linux socket interface
> remains disabled because the API doesn't allow enabling
> those two features without enabling TSO too.
> 
> Signed-off-by: Flavio Leitner <[email protected]>
> Co-authored-by: Flavio Leitner <[email protected]>
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
>  Since v9:
>   - Extended miniflow_extract changes into avx512 code
>   - Formatting changes
>   - Note that we cannot currently enable checksum offloading in
>     CONFIGURE_VETH_OFFLOADS for check-system-userspace as
>     netdev-linux.c currently only parses the vnet header if TSO
>     is enabled.
>  Since v10:
>   - No change
>  Since v11:
>   - Added AVX512 IPv6 checksum offload support.
>   - Improved error messages and logging.
>  Since v12:
>   - Added missing mutex annotations
>  Since v13:
>   - Added TUNGETFEATURES check in netdev-linux
> ---
>  lib/conntrack.c                  |  15 +-
>  lib/dp-packet.c                  |  25 +++
>  lib/dp-packet.h                  |  78 +++++++++-
>  lib/dpif-netdev-extract-avx512.c |  62 +++++++-
>  lib/flow.c                       |  23 +++
>  lib/netdev-dpdk.c                | 176 ++++++++++++++-------
>  lib/netdev-linux.c               | 253 ++++++++++++++++++++++---------
>  lib/netdev-native-tnl.c          |  32 +---
>  lib/netdev.c                     |  46 ++----
>  lib/odp-execute-avx512.c         |  88 +++++++----
>  lib/packets.c                    | 175 ++++++++++++++++-----
>  lib/packets.h                    |   3 +
>  12 files changed, 707 insertions(+), 269 deletions(-)
> 

<snip>

> @@ -4268,6 +4351,14 @@ destroy_device(int vid)
>                     dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
>              netdev_dpdk_txq_map_clear(dev);
>  
> +            /* Clear offload capabilities before next new_device. */
> +            dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;

Can we just set it to zero?  To avoid potentially missing some flags.

> +            netdev_dpdk_update_netdev_flags(dev);
> +
>              netdev_change_seq_changed(&dev->up);
>              ovs_mutex_unlock(&dev->mutex);
>              exists = true;

<snip>

> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..0ad36ded9 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -530,6 +530,11 @@ static struct vlog_rate_limit rl = 
> VLOG_RATE_LIMIT_INIT(5, 20);
>   * changes in the device miimon status, so we can use atomic_count. */
>  static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>  
> +/* Very old kernels from the 2.6 era don't support vnet headers with the tun
> + * device. We can detect this while constructing a netdev, but need this for
> + * packet rx/tx. */
> +static bool enable_vnet_hdr = true;
> +
>  static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
>  static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
>  static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
> @@ -938,14 +943,6 @@ netdev_linux_common_construct(struct netdev *netdev_)
>      netnsid_unset(&netdev->netnsid);
>      ovs_mutex_init(&netdev->mutex);
>  
> -    if (userspace_tso_enabled()) {
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> -    }
> -
>      return 0;
>  }
>  
> @@ -959,6 +956,16 @@ netdev_linux_construct(struct netdev *netdev_)
>          return error;
>      }
>  
> +    /* The socket interface doesn't offer the option to enable only
> +     * csum offloading without TSO. */
> +    if (userspace_tso_enabled()) {
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> +    }
> +
>      error = get_flags(&netdev->up, &netdev->ifi_flags);
>      if (error == ENODEV) {
>          if (netdev->up.netdev_class != &netdev_internal_class) {
> @@ -987,6 +994,8 @@ netdev_linux_construct_tap(struct netdev *netdev_)
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      static const char tap_dev[] = "/dev/net/tun";
>      const char *name = netdev_->name;
> +    unsigned long oflags;
> +    unsigned int up;
>      struct ifreq ifr;
>  
>      int error = netdev_linux_common_construct(netdev_);
> @@ -1004,9 +1013,17 @@ netdev_linux_construct_tap(struct netdev *netdev_)
>  
>      /* Create tap device. */
>      get_flags(&netdev->up, &netdev->ifi_flags);
> +    if (ioctl(netdev->tap_fd, TUNGETFEATURES, &up) == -1) {
> +        VLOG_WARN("%s: querying tap features failed: %s", name,
> +                  ovs_strerror(errno));
> +        error = errno;
> +        goto error_close;
> +    }
>      ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
> -    if (userspace_tso_enabled()) {
> +    if (up & IFF_VNET_HDR) {
>          ifr.ifr_flags |= IFF_VNET_HDR;
> +    } else {
> +        enable_vnet_hdr = false;
>      }

The 'enable_vnet_hdr' should never change after the first initialization.
If that will happen for some reason, rx/tx path may be broken.  While
it is unlikely to actually happen, it's better to not assign on every call
a potentially different value.  This should be either a per-port value
stored in the netdev_linux structure, or it should be protected with
the ovsthread_once like, for example, getqdisc_is_safe().

Also the variable name is too broad.  It should reflect that it's applicable
to tap interfaces only, e.g. 'tap_supports_vnet_hdr'.

Suggesting a change at the end of this email.

<snip>

> @@ -262,9 +240,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev 
> OVS_UNUSED,
>      udp->udp_src = netdev_tnl_get_src_port(packet);
>      udp->udp_len = htons(ip_tot_size);
>  
> -    if (udp->udp_csum) {
> -        netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
> -    }
> +    /* Postpone checksum to the egress netdev. */
> +    dp_packet_hwol_set_csum_udp(packet);
> +    dp_packet_ol_reset_l4_csum_good(packet);

With this change OVS will now always calculate UDP checksum regardless of the
'csum' tunnel configuration.  This doesn't seem right.  We need to keep checking
the udp->udp_csum here and set appropriate flags if it is zero and should remain
zero.  dp_packet_ol_set_l4_csum_good() ?

Best regards, Ilya Maximets.


Suggested change for the enable_vnet_hdr part (not tested):

---
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 3ad1fe544..3dba2ef1f 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -533,7 +533,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
 /* Very old kernels from the 2.6 era don't support vnet headers with the tun
  * device. We can detect this while constructing a netdev, but need this for
  * packet rx/tx. */
-static bool enable_vnet_hdr = true;
+static bool tap_supports_vnet_hdr = true;
 
 static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
 static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
@@ -991,6 +991,7 @@ netdev_linux_construct(struct netdev *netdev_)
 static int
 netdev_linux_construct_tap(struct netdev *netdev_)
 {
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     static const char tap_dev[] = "/dev/net/tun";
     const char *name = netdev_->name;
@@ -1013,17 +1014,22 @@ netdev_linux_construct_tap(struct netdev *netdev_)
 
     /* Create tap device. */
     get_flags(&netdev->up, &netdev->ifi_flags);
-    if (ioctl(netdev->tap_fd, TUNGETFEATURES, &up) == -1) {
-        VLOG_WARN("%s: querying tap features failed: %s", name,
-                  ovs_strerror(errno));
-        error = errno;
-        goto error_close;
+
+    if (ovsthread_once_start(&once)) {
+        if (ioctl(netdev->tap_fd, TUNGETFEATURES, &up) == -1) {
+            VLOG_WARN("%s: querying tap features failed: %s", name,
+                      ovs_strerror(errno));
+            tap_supports_vnet_hdr = false;
+        } else if (!(up & IFF_VNET_HDR)) {
+            VLOG_WARN("TAP interfaces do not support virtio-net headers");
+            tap_supports_vnet_hdr = false;
+        }
+        ovsthread_once_done(&once);
     }
+
     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-    if (up & IFF_VNET_HDR) {
+    if (tap_supports_vnet_hdr) {
         ifr.ifr_flags |= IFF_VNET_HDR;
-    } else {
-        enable_vnet_hdr = false;
     }
 
     ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
@@ -1052,19 +1058,18 @@ netdev_linux_construct_tap(struct netdev *netdev_)
         oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
     }
 
-    if (enable_vnet_hdr) {
-        if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
-            netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
-                                  | NETDEV_TX_OFFLOAD_TCP_CKSUM
-                                  | NETDEV_TX_OFFLOAD_UDP_CKSUM);
+    if (tap_supports_vnet_hdr
+        && ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+        netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
+                              | NETDEV_TX_OFFLOAD_TCP_CKSUM
+                              | NETDEV_TX_OFFLOAD_UDP_CKSUM);
 
-            if (userspace_tso_enabled()) {
-                netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
-             }
-        } else {
-           VLOG_WARN("%s: Disabling checksum and segment offloading due to "
-                     "missing kernel support", name);
+        if (userspace_tso_enabled()) {
+            netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
         }
+    } else {
+       VLOG_INFO("%s: Disabling checksum and segment offloading due to "
+                 "missing kernel support", name);
     }
 
     netdev->present = true;
@@ -1442,7 +1447,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux 
*rx, int mtu,
         /* Use only the buffer from the allocated packet. */
         iovlen = IOV_STD_SIZE;
     }
-    if (OVS_LIKELY(enable_vnet_hdr)) {
+    if (OVS_LIKELY(tap_supports_vnet_hdr)) {
         virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
     } else {
         virtio_net_hdr_size = 0;
@@ -1661,7 +1666,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, int 
mtu,
         ssize_t retval;
         int error;
 
-        if (OVS_LIKELY(enable_vnet_hdr)) {
+        if (OVS_LIKELY(tap_supports_vnet_hdr)) {
             netdev_linux_prepend_vnet_hdr(packet, mtu);
         }
 
---
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to