TCP Segmentation Offload (TSO) is a feature which enables the TCP/IP network stack to delegate segmentation of a TCP segment to the hardware NIC, thus saving compute resources. This may improve performance significantly for TCP workload in virtualized environments.
While a previous commit already added the necesary logic to netdev-dpdk to deal with packets marked for TSO, this set of changes enables TSO by default when using multi-segment mbufs. Thus, to enable TSO on the physical DPDK interfaces, only the following command needs to be issued before starting OvS: ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true Co-authored-by: Mark Kavanagh <mark.b.kavan...@intel.com> Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com> Signed-off-by: Tiago Lam <tiago....@intel.com> --- Documentation/topics/dpdk/phy.rst | 64 +++++++++++++++++++++++++++++++++++++++ lib/netdev-dpdk.c | 52 ++++++++++++++++++++++++++----- 2 files changed, 108 insertions(+), 8 deletions(-) diff --git a/Documentation/topics/dpdk/phy.rst b/Documentation/topics/dpdk/phy.rst index 1470623..980a629 100644 --- a/Documentation/topics/dpdk/phy.rst +++ b/Documentation/topics/dpdk/phy.rst @@ -248,3 +248,67 @@ Command to set interrupt mode for a specific interface:: Command to set polling mode for a specific interface:: $ ovs-vsctl set interface <iface_name> options:dpdk-lsc-interrupt=false + +TCP Segmentation Offload (TSO) +------------------------------ + +Overview +~~~~~~~~ + +TCP Segmentation Offload (TSO) enables a network stack to delegate +segmentation of an oversized TCP segment to the underlying physical NIC. +Offload of frame segmentation achieves computational savings in the core, +freeing up CPU cycles for more useful work. + +DPDK v16.07 added support for `TSO` in the vHost user backend; as such, a +guest's virtual network interfaces may avail of `TSO`. In such a setup, the +aforementioned computational savings are made in the core acting as the VM's +virtual CPU, typically resulting in improved TCP throughput. + +To enable TSO in a guest, the underlying NIC must first support `TSO` - +consult your controller's datasheet for compatibility. Secondly, the NIC +must have an associated DPDK Poll Mode Driver (PMD) which supports `TSO`. + +Enabling TSO +~~~~~~~~~~~~ + +TSO may be enabled in one of two ways, as follows: + + 1. QEMU Command Line Parameter: + + ``` + sudo $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 \ + ... + -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1,\ + mrg_rxbuf=on,csum=on,gso=on,guest_csum=on,guest_tso4=on,\ + guest_tso6=on,guest_ecn=on \ + ... + ``` + + 2. ethtool + +`TSO` is enabled in OvS by the DPDK vHost User backend; when a new guest +connection is established, `TSO` is advertised to the guest as an available +feature. Assuming that the guest's OS also supports `TSO`, ethtool can be used +to enable same: + + ``` + ethtool -K eth0 sg on # scatter-gather is a prerequisite for TSO + ethtool -K eth0 tso on + ethtool -k eth0 # verify that TSO is reported as 'on' + ``` + + <b>Note:</b> In both methods, `mergeable buffers` are required: + ``` + sudo $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 \ + ... + mrg_rxbuf=on,\ + ... + ``` + +Limitations +~~~~~~~~~~~ + +The current OvS `TSO` implementation supports flat and VLAN networks only +(i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP, +etc.]). diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 5da5996..20d4fd5 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -379,6 +379,7 @@ struct ingress_policer { enum dpdk_hw_ol_features { NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0, NETDEV_RX_HW_CRC_STRIP = 1 << 1, + NETDEV_TX_TSO_OFFLOAD = 1 << 2, }; /* @@ -1003,6 +1004,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) struct rte_eth_dev_info info; uint16_t conf_mtu; + rte_eth_dev_info_get(dev->port_id, &info); + /* As of DPDK 17.11.1 a few PMDs require to explicitly enable * scatter to support jumbo RX. Checking the offload capabilities * is not an option as PMDs are not required yet to report @@ -1010,7 +1013,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) * (testing or code review). Listing all such PMDs feels harder * than highlighting the one known not to need scatter */ if (dev->mtu > ETHER_MTU) { - rte_eth_dev_info_get(dev->port_id, &info); if (strncmp(info.driver_name, "net_nfp", 7)) { conf.rxmode.enable_scatter = 1; } @@ -1018,14 +1020,28 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) /* Multi-segment-mbuf-specific setup. */ if (dpdk_multi_segment_mbufs) { - /* DPDK PMDs typically attempt to use simple or vectorized - * transmit functions, neither of which are compatible with - * multi-segment mbufs. Ensure that these are disabled when - * multi-segment mbufs are enabled. - */ - rte_eth_dev_info_get(dev->port_id, &info); + if (info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS) { + /* DPDK PMDs typically attempt to use simple or vectorized + * transmit functions, neither of which are compatible with + * multi-segment mbufs. Ensure that these are disabled when + * multi-segment mbufs are enabled. + */ + conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; + } + + if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { + conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO; + conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; + conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; + } + txconf = info.default_txconf; - txconf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + txconf.txq_flags = ETH_TXQ_FLAGS_IGNORE; + txconf.offloads = conf.txmode.offloads; + } else if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; + VLOG_WARN("Failed to set Tx TSO offload in %s. Requires option " + "`dpdk-multi-seg-mbufs` to be enabled.", dev->up.name); } conf.intr_conf.lsc = dev->lsc_interrupt_mode; @@ -1135,6 +1151,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_IPV4_CKSUM; + uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_IPV4_CKSUM; rte_eth_dev_info_get(dev->port_id, &info); @@ -1154,6 +1173,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD; } + if (dpdk_multi_segment_mbufs) { + if (info.tx_offload_capa & tx_tso_offload_capa) { + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; + } else { + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; + VLOG_WARN("Tx TSO offload is not supported on port " + DPDK_PORT_ID_FMT, dev->port_id); + } + } else { + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; + } + n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); n_txq = MIN(info.max_tx_queues, dev->up.n_txq); @@ -1673,6 +1704,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) } else { smap_add(args, "rx_csum_offload", "false"); } + if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { + smap_add(args, "tx_tso_offload", "true"); + } else { + smap_add(args, "tx_tso_offload", "false"); + } smap_add(args, "lsc_interrupt_mode", dev->lsc_interrupt_mode ? "true" : "false"); } -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev