>On 4/12/24 08:29, Jun Wang wrote:
>> If it's a DPDK net_bonding, it may cause
>> offload-related configurations to take effect,
>> leading to offload failure.
>>
>> /usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd \
>> --system-id=test
>> ovs-vsctl --no-wait set open . external-ids:ovn-bridge-datapath-type\
>> =netdev
>>ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem=\
>> "4096,4096"
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=\
>> 0xff0000
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra=\
>> "-a 0000:ca:00.0 -a 0000:ca:00.1 --vdev net_bonding2494589023,\
>> mode=4,member=0000:ca:00.0,member=0000:ca:00.1,xmit_policy=l34,\
>> lacp_rate=fast"
>> ovs-vswitchd unix:/var/run/openvswitch/db.sock -vconsole:emer \
>> -vsyslog:err -vfile:info --mlockall --no-chdir \
>> --log-file=/var/log/openvswitch/ovs-vswitchd.log \
>> --pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor
>> ovs-vsctl add-br br-tun -- set bridge br-tun datapath_type=netdev
>> ovs-vsctl --may-exist add-port br-tun dpdk_tun_port -- set Interface \
>> dpdk_tun_port type=dpdk options:dpdk-devargs="net_bonding2494589023"
>>
>> {bus_info="bus_name=vdev", driver_name=net_bonding,
>> if_descr="DPDK 23.11.0 net_bonding", if_type="6",link_speed="20Gbps",
>> max_hash_mac_addrs="0", max_mac_addrs="16", max_rx_pktlen="1618",
>> max_rx_queues="1023", max_tx_queues="1023", max_vfs="0",
>> max_vmdq_pools="0", min_rx_bufsize="0", n_rxq="4", n_txq="9",
>> numa_id="0", port_no="2", rx-steering=rss, rx_csum_offload="false",
>> tx_geneve_tso_offload="false", tx_ip_csum_offload="false",
>> tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false",
>> tx_sctp_csum_offload="false", tx_tcp_csum_offload="false",
>> tx_tcp_seg_offload="false", tx_udp_csum_offload="false",
>> tx_vxlan_tso_offload="false"}
>>
>> Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
>>
>> Signed-off-by: Jun Wang <[email protected]>
>> ---
>> lib/netdev-dpdk.c | 75
>> +++++++++++++++++++++++++++++++++----------------------
>> 1 file changed, 45 insertions(+), 30 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 2111f77..191c83d 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1294,15 +1294,10 @@ dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk
>> *dev)
>> dev->rx_metadata_delivery_configured = true;
>> }
>>
>> -static int
>> -dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> - OVS_REQUIRES(dev->mutex)
>> +static void
>> +dpdk_eth_offload_config(struct netdev_dpdk *dev,
>> + struct rte_eth_dev_info *info)
>> {
>> - struct rte_pktmbuf_pool_private *mbp_priv;
>> - struct rte_eth_dev_info info;
>> - struct rte_ether_addr eth_addr;
>> - int diag;
>> - int n_rxq, n_txq;
>> uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>> RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>> RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>> @@ -1319,16 +1314,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> dpdk_eth_dev_init_rx_metadata(dev);
>> }
>>
>> - rte_eth_dev_info_get(dev->port_id, &info);
>> -
>> - if (strstr(info.driver_name, "vf") != NULL) {
>> + if (strstr(info->driver_name, "vf") != NULL) {
>> VLOG_INFO("Virtual function detected, HW_CRC_STRIP will be
>> enabled");
>> dev->hw_ol_features |= NETDEV_RX_HW_CRC_STRIP;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_RX_HW_CRC_STRIP;
>> }
>>
>> - if ((info.rx_offload_capa & rx_chksm_offload_capa) !=
>> + if ((info->rx_offload_capa & rx_chksm_offload_capa) !=
>> rx_chksm_offload_capa) {
>> VLOG_WARN("Rx checksum offload is not supported on port "
>> DPDK_PORT_ID_FMT, dev->port_id);
>> @@ -1337,66 +1330,66 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD;
>> }
>>
>> - if (info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>> + if (info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>> dev->hw_ol_features |= NETDEV_RX_HW_SCATTER;
>> } else {
>> /* Do not warn on lack of scatter support */
>> dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>> }
>>
>> - if (!strcmp(info.driver_name, "net_tap")) {
>> + if (!strcmp(info->driver_name, "net_tap")) {
>> /* FIXME: L4 checksum offloading is broken in DPDK net/tap driver.
>> * This workaround can be removed once the fix makes it to a DPDK
>> * LTS release used by OVS. */
>> VLOG_INFO("%s: disabled Tx L4 checksum offloads for a net/tap
>> port.",
>> netdev_get_name(&dev->up));
>> - info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
>> - info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>> + info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
>> + info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>> }
>>
>> - if (!strcmp(info.driver_name, "net_ice")
>> - || !strcmp(info.driver_name, "net_i40e")) {
>> + if (!strcmp(info->driver_name, "net_ice")
>> + || !strcmp(info->driver_name, "net_i40e")) {
>> /* FIXME: Driver advertises the capability but doesn't seem
>> * to actually support it correctly. Can remove this once
>> * the driver is fixed on DPDK side. */
>> VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
>> "net/ice or net/i40e port.", netdev_get_name(&dev->up));
>> - info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
>> - info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
>> - info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>> + info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
>> + info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
>> + info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>> dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>> dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>> dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>> dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>> dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>> dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>> } else {
>> dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>> @@ -1404,21 +1397,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>
>> dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>> if (userspace_tso_enabled()) {
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>> dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
>> } else {
>> VLOG_WARN("%s: Tx TSO offload is not supported.",
>> netdev_get_name(&dev->up));
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>> dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD;
>> } else {
>> VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.",
>> netdev_get_name(&dev->up));
>> }
>>
>> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>> + if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>> dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD;
>> } else {
>> VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
>> @@ -1426,6 +1419,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> }
>> }
>>
>> +}
>> +
>> +static int
>> +dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> + OVS_REQUIRES(dev->mutex)
>> +{
>> + struct rte_pktmbuf_pool_private *mbp_priv;
>> + struct rte_eth_dev_info info;
>> + struct rte_ether_addr eth_addr;
>> + int diag;
>> + int n_rxq, n_txq;
>> +
>> + rte_eth_dev_info_get(dev->port_id, &info);
>> + dpdk_eth_offload_config(dev, &info);
>> +
>> n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
>> n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
>>
>> @@ -1439,6 +1447,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> return -diag;
>> }
>>
>> + rte_eth_dev_info_get(dev->port_id, &info);
>> + if (!strcmp(info.driver_name, "net_bonding")) {
>> + dpdk_eth_offload_config(dev, &info);
>> + VLOG_INFO("%s: configure offloads for a dpdk net_bonding port.",
>> + netdev_get_name(&dev->up));
>> + }
>> +
>
>I'm a bit confused by this. Why do we need to check offloads again
>after the initialization? Is there a chance that bonding driver
>will enable features we didn't ask for?
>
>In general, since we don't know what kind of device is in the bonding,
>we should just disable all offloads for it as long as there is a chance
>to have a broken driver downstream of a bond.
>
>Best regards, Ilya Maximets.
I believe the DPDK net/bonding port has not been configured here yet, so all
the obtained offloads are false. That's why I reset them again after
dpdk_eth_dev_port_config. It seems like the DPDK bond has completed
initialization at this point, allowing us to properly obtain the offload
flags.I think this should be where the DPDK net/bonding port sets the offload
flags, which may be the logic of DPDK net/bonding. Additionally, regarding your
second point, shouldn't we consider DPDK's net/bonding? I have verified with
CX5 network cards, and after reacquiring the offloads, the functionality works
properly.
Jun Wang
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev