On 6/2/23 20:59, Mike Pattrick wrote:
> The netdev receiving packets is supposed to provide the flags
> indicating if the L4 checksum was verified and it is OK or BAD,
> otherwise the stack will check when appropriate by software.
>
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
>
> When encapsulate a packet with that flag, set the checksum
> of the inner L4 header since that is not yet supported.
>
> Calculate the L4 checksum when the packet is going to be sent
> over a device that doesn't support the feature.
>
> Linux tap devices allows enabling L3 and L4 offload, so this
> patch enables the feature. However, Linux socket interface
> remains disabled because the API doesn't allow enabling
> those two features without enabling TSO too.
>
> Signed-off-by: Flavio Leitner <[email protected]>
> Co-authored-by: Flavio Leitner <[email protected]>
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
> Since v9:
> - Extended miniflow_extract changes into avx512 code
> - Formatting changes
> - Note that we cannot currently enable checksum offloading in
> CONFIGURE_VETH_OFFLOADS for check-system-userspace as
> netdev-linux.c currently only parses the vnet header if TSO
> is enabled.
> Since v10:
> - No change
> Since v11:
> - Added AVX512 IPv6 checksum offload support.
> - Improved error messages and logging.
> Since v12:
> - Added missing mutex annotations
> Since v13:
> - Added TUNGETFEATURES check in netdev-linux
> ---
> lib/conntrack.c | 15 +-
> lib/dp-packet.c | 25 +++
> lib/dp-packet.h | 78 +++++++++-
> lib/dpif-netdev-extract-avx512.c | 62 +++++++-
> lib/flow.c | 23 +++
> lib/netdev-dpdk.c | 176 ++++++++++++++-------
> lib/netdev-linux.c | 253 ++++++++++++++++++++++---------
> lib/netdev-native-tnl.c | 32 +---
> lib/netdev.c | 46 ++----
> lib/odp-execute-avx512.c | 88 +++++++----
> lib/packets.c | 175 ++++++++++++++++-----
> lib/packets.h | 3 +
> 12 files changed, 707 insertions(+), 269 deletions(-)
>
<snip>
> @@ -4268,6 +4351,14 @@ destroy_device(int vid)
> dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
> netdev_dpdk_txq_map_clear(dev);
>
> + /* Clear offload capabilities before next new_device. */
> + dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
> + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
> + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
Can we just set it to zero? To avoid potentially missing some flags.
> + netdev_dpdk_update_netdev_flags(dev);
> +
> netdev_change_seq_changed(&dev->up);
> ovs_mutex_unlock(&dev->mutex);
> exists = true;
<snip>
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..0ad36ded9 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -530,6 +530,11 @@ static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(5, 20);
> * changes in the device miimon status, so we can use atomic_count. */
> static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>
> +/* Very old kernels from the 2.6 era don't support vnet headers with the tun
> + * device. We can detect this while constructing a netdev, but need this for
> + * packet rx/tx. */
> +static bool enable_vnet_hdr = true;
> +
> static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
> static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
> @@ -938,14 +943,6 @@ netdev_linux_common_construct(struct netdev *netdev_)
> netnsid_unset(&netdev->netnsid);
> ovs_mutex_init(&netdev->mutex);
>
> - if (userspace_tso_enabled()) {
> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> - }
> -
> return 0;
> }
>
> @@ -959,6 +956,16 @@ netdev_linux_construct(struct netdev *netdev_)
> return error;
> }
>
> + /* The socket interface doesn't offer the option to enable only
> + * csum offloading without TSO. */
> + if (userspace_tso_enabled()) {
> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> + }
> +
> error = get_flags(&netdev->up, &netdev->ifi_flags);
> if (error == ENODEV) {
> if (netdev->up.netdev_class != &netdev_internal_class) {
> @@ -987,6 +994,8 @@ netdev_linux_construct_tap(struct netdev *netdev_)
> struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> static const char tap_dev[] = "/dev/net/tun";
> const char *name = netdev_->name;
> + unsigned long oflags;
> + unsigned int up;
> struct ifreq ifr;
>
> int error = netdev_linux_common_construct(netdev_);
> @@ -1004,9 +1013,17 @@ netdev_linux_construct_tap(struct netdev *netdev_)
>
> /* Create tap device. */
> get_flags(&netdev->up, &netdev->ifi_flags);
> + if (ioctl(netdev->tap_fd, TUNGETFEATURES, &up) == -1) {
> + VLOG_WARN("%s: querying tap features failed: %s", name,
> + ovs_strerror(errno));
> + error = errno;
> + goto error_close;
> + }
> ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
> - if (userspace_tso_enabled()) {
> + if (up & IFF_VNET_HDR) {
> ifr.ifr_flags |= IFF_VNET_HDR;
> + } else {
> + enable_vnet_hdr = false;
> }
The 'enable_vnet_hdr' should never change after the first initialization.
If that will happen for some reason, rx/tx path may be broken. While
it is unlikely to actually happen, it's better to not assign on every call
a potentially different value. This should be either a per-port value
stored in the netdev_linux structure, or it should be protected with
the ovsthread_once like, for example, getqdisc_is_safe().
Also the variable name is too broad. It should reflect that it's applicable
to tap interfaces only, e.g. 'tap_supports_vnet_hdr'.
Suggesting a change at the end of this email.
<snip>
> @@ -262,9 +240,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev
> OVS_UNUSED,
> udp->udp_src = netdev_tnl_get_src_port(packet);
> udp->udp_len = htons(ip_tot_size);
>
> - if (udp->udp_csum) {
> - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
> - }
> + /* Postpone checksum to the egress netdev. */
> + dp_packet_hwol_set_csum_udp(packet);
> + dp_packet_ol_reset_l4_csum_good(packet);
With this change OVS will now always calculate UDP checksum regardless of the
'csum' tunnel configuration. This doesn't seem right. We need to keep checking
the udp->udp_csum here and set appropriate flags if it is zero and should remain
zero. dp_packet_ol_set_l4_csum_good() ?
Best regards, Ilya Maximets.
Suggested change for the enable_vnet_hdr part (not tested):
---
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 3ad1fe544..3dba2ef1f 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -533,7 +533,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
/* Very old kernels from the 2.6 era don't support vnet headers with the tun
* device. We can detect this while constructing a netdev, but need this for
* packet rx/tx. */
-static bool enable_vnet_hdr = true;
+static bool tap_supports_vnet_hdr = true;
static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
@@ -991,6 +991,7 @@ netdev_linux_construct(struct netdev *netdev_)
static int
netdev_linux_construct_tap(struct netdev *netdev_)
{
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
static const char tap_dev[] = "/dev/net/tun";
const char *name = netdev_->name;
@@ -1013,17 +1014,22 @@ netdev_linux_construct_tap(struct netdev *netdev_)
/* Create tap device. */
get_flags(&netdev->up, &netdev->ifi_flags);
- if (ioctl(netdev->tap_fd, TUNGETFEATURES, &up) == -1) {
- VLOG_WARN("%s: querying tap features failed: %s", name,
- ovs_strerror(errno));
- error = errno;
- goto error_close;
+
+ if (ovsthread_once_start(&once)) {
+ if (ioctl(netdev->tap_fd, TUNGETFEATURES, &up) == -1) {
+ VLOG_WARN("%s: querying tap features failed: %s", name,
+ ovs_strerror(errno));
+ tap_supports_vnet_hdr = false;
+ } else if (!(up & IFF_VNET_HDR)) {
+ VLOG_WARN("TAP interfaces do not support virtio-net headers");
+ tap_supports_vnet_hdr = false;
+ }
+ ovsthread_once_done(&once);
}
+
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- if (up & IFF_VNET_HDR) {
+ if (tap_supports_vnet_hdr) {
ifr.ifr_flags |= IFF_VNET_HDR;
- } else {
- enable_vnet_hdr = false;
}
ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
@@ -1052,19 +1058,18 @@ netdev_linux_construct_tap(struct netdev *netdev_)
oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
}
- if (enable_vnet_hdr) {
- if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
- netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
- | NETDEV_TX_OFFLOAD_TCP_CKSUM
- | NETDEV_TX_OFFLOAD_UDP_CKSUM);
+ if (tap_supports_vnet_hdr
+ && ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+ netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
+ | NETDEV_TX_OFFLOAD_TCP_CKSUM
+ | NETDEV_TX_OFFLOAD_UDP_CKSUM);
- if (userspace_tso_enabled()) {
- netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
- }
- } else {
- VLOG_WARN("%s: Disabling checksum and segment offloading due to "
- "missing kernel support", name);
+ if (userspace_tso_enabled()) {
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
}
+ } else {
+ VLOG_INFO("%s: Disabling checksum and segment offloading due to "
+ "missing kernel support", name);
}
netdev->present = true;
@@ -1442,7 +1447,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux
*rx, int mtu,
/* Use only the buffer from the allocated packet. */
iovlen = IOV_STD_SIZE;
}
- if (OVS_LIKELY(enable_vnet_hdr)) {
+ if (OVS_LIKELY(tap_supports_vnet_hdr)) {
virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
} else {
virtio_net_hdr_size = 0;
@@ -1661,7 +1666,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, int
mtu,
ssize_t retval;
int error;
- if (OVS_LIKELY(enable_vnet_hdr)) {
+ if (OVS_LIKELY(tap_supports_vnet_hdr)) {
netdev_linux_prepend_vnet_hdr(packet, mtu);
}
---
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev