I did not finish reviewing. On Fri, Jul 1, 2022 at 5:58 AM Mike Pattrick <[email protected]> wrote: > > From: Flavio Leitner <[email protected]> > > The netdev receiving packets is supposed to provide the flags > indicating if the L4 csum was verified and it is OK or BAD, > otherwise the stack will check when appropriate by software. > > If the packet comes with good checksum, then postpone the > checksum calculation to the egress device if needed. > > When encapsulate a packet with that flag, set the checksum > of the inner L4 header since that is not yet supported. > > Calculate the L4 csum when the packet is going to be sent over > a device that doesn't support the feature. > > Linux tap devices allows enabling L3 and L4 offload, so this > patch enables the feature. However, Linux socket interface > remains disabled because the API doesn't allow enabling > those those features without enabling TSO too.
those two* > > Signed-off-by: Flavio Leitner <[email protected]> > Co-authored-by: Mike Pattrick <[email protected]> > Signed-off-by: Mike Pattrick <[email protected]> > --- > lib/conntrack.c | 16 +-- > lib/dp-packet.c | 23 +++- > lib/dp-packet.h | 97 ++++++++++++--- > lib/flow.c | 23 ++++ > lib/netdev-dpdk.c | 136 ++++++++++++--------- > lib/netdev-linux.c | 253 ++++++++++++++++++++++++++-------------- > lib/netdev-native-tnl.c | 32 +---- > lib/netdev.c | 39 ++----- > lib/packets.c | 175 +++++++++++++++++++++------ > lib/packets.h | 3 + > 10 files changed, 542 insertions(+), 255 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 11768da00..d7072e1e9 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -2105,13 +2105,13 @@ conn_key_extract(struct conntrack *ct, struct > dp_packet *pkt, ovs_be16 dl_type, > } > > if (ok) { > - bool hwol_bad_l4_csum = dp_packet_ol_l4_checksum_bad(pkt); > - if (!hwol_bad_l4_csum) { > - bool hwol_good_l4_csum = dp_packet_ol_l4_checksum_good(pkt) > - || dp_packet_ol_tx_l4_checksum(pkt); > + if (!dp_packet_ol_l4_checksum_bad(pkt)) { > + > /* Validate the checksum only when hwol is not supported. */ > if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt), > - &ctx->icmp_related, l3, !hwol_good_l4_csum, > + &ctx->icmp_related, l3, > + !dp_packet_ol_l4_checksum_good(pkt) && > + !dp_packet_ol_tx_l4_checksum(pkt), Do we need to check for dp_packet_ol_tx_l4_checksum? > NULL)) { > ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); > return true; > @@ -3423,8 +3423,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct > conn_lookup_ctx *ctx, > adj_seqnum(&th->tcp_seq, ec->seq_skew); > } > > - th->tcp_csum = 0; > - if (!dp_packet_ol_tx_l4_checksum(pkt)) { > + if (dp_packet_ol_tx_tcp_csum(pkt)) { > + dp_packet_ol_reset_l4_csum_good(pkt); > + } else { > + th->tcp_csum = 0; > if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) { > th->tcp_csum = packet_csum_upperlayer6(nh6, th, > ctx->key.nw_proto, > dp_packet_l4_size(pkt)); [snip] > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 1eb2954ab..bfeb75add 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -145,17 +145,6 @@ typedef uint16_t dpdk_port_t; > > #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > > -/* List of required flags advertised by the hardware that will be used > - * if TSO is enabled. Ideally this should include > - * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that > - * at the moment and SCTP is not a widely used protocol like TCP and UDP, > - * so it's optional. */ > -#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO \ > - | RTE_ETH_TX_OFFLOAD_TCP_CKSUM \ > - | RTE_ETH_TX_OFFLOAD_UDP_CKSUM \ > - | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) > - > - > static const struct rte_eth_conf port_conf = { > .rxmode = { > .split_hdr_size = 0, > @@ -398,8 +387,10 @@ enum dpdk_hw_ol_features { > NETDEV_RX_HW_CRC_STRIP = 1 << 1, > NETDEV_RX_HW_SCATTER = 1 << 2, > NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3, > - NETDEV_TX_TSO_OFFLOAD = 1 << 4, > - NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5, > + NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4, > + NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, > + NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, > + NETDEV_TX_TSO_OFFLOAD = 1 << 7, > }; > > /* > @@ -953,6 +944,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED) > return NULL; > } > > +static void > +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev, > + enum dpdk_hw_ol_features hw_ol_features, > + enum netdev_ol_flags flag) > +{ > + struct netdev *netdev = &dev->up; > + > + if (dev->hw_ol_features & hw_ol_features) { > + netdev->ol_flags |= flag; > + } else { > + netdev->ol_flags &= ~flag; > + } > +} > + > +static void > +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) > +{ > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD, > + NETDEV_OFFLOAD_TX_IPV4_CSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD, > + NETDEV_OFFLOAD_TX_TCP_CSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD, > + NETDEV_OFFLOAD_TX_UDP_CSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD, > + NETDEV_OFFLOAD_TX_SCTP_CSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, > + NETDEV_OFFLOAD_TX_TCP_TSO); > +} > + > static int > dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > { > @@ -989,11 +1009,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int > n_rxq, int n_txq) > conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; > } > > + if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; > + } > + > if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > - conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; > - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > - conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; > - } > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; > } > > /* Limit configured rss hash functions to only those supported > @@ -1099,7 +1128,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > struct rte_ether_addr eth_addr; > int diag; > int n_rxq, n_txq; > - uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; > uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM | > RTE_ETH_RX_OFFLOAD_TCP_CKSUM | > RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; > @@ -1135,18 +1163,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; > } > > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } > + > dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > if (userspace_tso_enabled()) { > - if ((info.tx_offload_capa & tx_tso_offload_capa) > - == tx_tso_offload_capa) { > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { > dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { > - dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; > - } else { > - VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " > - "SCTP packets sent to this device will be dropped", > - netdev_get_name(&dev->up)); > - } > } else { > VLOG_WARN("%s: Tx TSO offload is not supported.", > netdev_get_name(&dev->up)); > @@ -1708,6 +1746,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, > struct smap *args) > smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false"); > HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD); > HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD); > HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD); > #undef HWOL_SMAP_ADD > smap_add(args, "lsc_interrupt_mode", > @@ -2154,6 +2195,7 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > > mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); > mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); > + mbuf->l4_len = 0; > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > > @@ -4935,21 +4977,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > } > > err = dpdk_eth_dev_init(dev); > - if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) { > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM; > - } else { > - netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM; > - } > - > - if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM; > - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM; > - } > - } > + netdev_dpdk_update_netdev_flags(dev); > > /* If both requested and actual hwaddr were previously > * unset (initialized to 0), then first device init above > @@ -5024,6 +5052,7 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev) > int err; > > ovs_mutex_lock(&dev->mutex); > + netdev_dpdk_update_netdev_flags(dev); > err = dpdk_vhost_reconfigure_helper(dev); > ovs_mutex_unlock(&dev->mutex); > > @@ -5088,19 +5117,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev > *netdev) > goto unlock; > } > > + vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN > + | 1ULL << VIRTIO_NET_F_HOST_UFO; > + > + dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; It is a bit late here, so my brain might not be working well. I think we have problems with Tx vs Rx features. TX checksum offloads, from a vhost port point of view, should be conditionned to what the guest announces it supports. Afaiu, the guest accepting to receive partial tcp cheksums and other is mapped to the VIRTIO_NET_F_GUEST_CSUM (1) feature. > + > if (userspace_tso_enabled()) { > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM; > - netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM; > - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN > - | 1ULL << VIRTIO_NET_F_HOST_UFO; > + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > + VLOG_DBG("%s: TSO enabled on vhost port", > + netdev_get_name(&dev->up)); > } else { > - /* This disables checksum offloading and all the features > - * that depends on it (TSO, UFO, ECN) according to virtio > - * specification. */ > vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; On the contrary, with this patch, OVS should always advertise VIRTIO_NET_F_CSUM, since it can handle partial checksums and may offload it to some hw nic on tx. This line above should be removed. But it is dead code, as vhost_unsup_flags content is reset on the next line, below. (intention might have been to vhost_unsup_flags |= ?). > + vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_TSO4 > + | 1ULL << VIRTIO_NET_F_HOST_TSO6; This seems problematic too. HOST_TSO4 indicates to the guest that the host side may receive TSO frames. >From a vhost port point of view, that it can receive TSO frames, i.e. that OVS supports LRO, but we don't express such feature in the netdev layer. > > err = rte_vhost_driver_disable_features(dev->vhost_id, I also see a runtime issue when upgrading with the last patch, I'll send a comment on it. -- David Marchand _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
