>On 4/12/24 08:29, Jun Wang wrote:
>> If it's a DPDK net_bonding, it may cause
>> offload-related configurations to take effect,
>> leading to offload failure.
>>
>> /usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd \
>> --system-id=test
>> ovs-vsctl --no-wait set open . external-ids:ovn-bridge-datapath-type\
>> =netdev
>>ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem=\
>> "4096,4096"
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=\
>> 0xff0000
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra=\
>> "-a 0000:ca:00.0  -a 0000:ca:00.1 --vdev net_bonding2494589023,\
>> mode=4,member=0000:ca:00.0,member=0000:ca:00.1,xmit_policy=l34,\
>> lacp_rate=fast"
>> ovs-vswitchd unix:/var/run/openvswitch/db.sock -vconsole:emer \
>> -vsyslog:err -vfile:info --mlockall --no-chdir \
>> --log-file=/var/log/openvswitch/ovs-vswitchd.log \
>> --pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor
>> ovs-vsctl  add-br br-tun -- set bridge br-tun datapath_type=netdev
>> ovs-vsctl --may-exist add-port br-tun dpdk_tun_port -- set Interface \
>> dpdk_tun_port type=dpdk options:dpdk-devargs="net_bonding2494589023"
>>
>> {bus_info="bus_name=vdev", driver_name=net_bonding,
>>  if_descr="DPDK 23.11.0 net_bonding", if_type="6",link_speed="20Gbps",
>>  max_hash_mac_addrs="0", max_mac_addrs="16", max_rx_pktlen="1618",
>>  max_rx_queues="1023", max_tx_queues="1023", max_vfs="0",
>>  max_vmdq_pools="0", min_rx_bufsize="0", n_rxq="4", n_txq="9",
>>  numa_id="0", port_no="2", rx-steering=rss, rx_csum_offload="false",
>>  tx_geneve_tso_offload="false", tx_ip_csum_offload="false",
>>  tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false",
>>  tx_sctp_csum_offload="false", tx_tcp_csum_offload="false",
>>  tx_tcp_seg_offload="false", tx_udp_csum_offload="false",
>>  tx_vxlan_tso_offload="false"}
>>
>> Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
>>
>> Signed-off-by: Jun Wang <[email protected]>
>> ---
>>  lib/netdev-dpdk.c | 75 
>> +++++++++++++++++++++++++++++++++----------------------
>>  1 file changed, 45 insertions(+), 30 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 2111f77..191c83d 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1294,15 +1294,10 @@ dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk 
>> *dev)
>>      dev->rx_metadata_delivery_configured = true;
>>  }
>> 
>> -static int
>> -dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> -    OVS_REQUIRES(dev->mutex)
>> +static void
>> +dpdk_eth_offload_config(struct netdev_dpdk *dev,
>> +                               struct rte_eth_dev_info *info)
>>  {
>> -    struct rte_pktmbuf_pool_private *mbp_priv;
>> -    struct rte_eth_dev_info info;
>> -    struct rte_ether_addr eth_addr;
>> -    int diag;
>> -    int n_rxq, n_txq;
>>      uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>>                                       RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>>                                       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>> @@ -1319,16 +1314,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          dpdk_eth_dev_init_rx_metadata(dev);
>>      }
>> 
>> -    rte_eth_dev_info_get(dev->port_id, &info);
>> -
>> -    if (strstr(info.driver_name, "vf") != NULL) {
>> +    if (strstr(info->driver_name, "vf") != NULL) {
>>          VLOG_INFO("Virtual function detected, HW_CRC_STRIP will be 
>> enabled");
>>          dev->hw_ol_features |= NETDEV_RX_HW_CRC_STRIP;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_RX_HW_CRC_STRIP;
>>      }
>> 
>> -    if ((info.rx_offload_capa & rx_chksm_offload_capa) !=
>> +    if ((info->rx_offload_capa & rx_chksm_offload_capa) !=
>>              rx_chksm_offload_capa) {
>>          VLOG_WARN("Rx checksum offload is not supported on port "
>>                    DPDK_PORT_ID_FMT, dev->port_id);
>> @@ -1337,66 +1330,66 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>> +    if (info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>>          dev->hw_ol_features |= NETDEV_RX_HW_SCATTER;
>>      } else {
>>          /* Do not warn on lack of scatter support */
>>          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>>      }
>> 
>> -    if (!strcmp(info.driver_name, "net_tap")) {
>> +    if (!strcmp(info->driver_name, "net_tap")) {
>>          /* FIXME: L4 checksum offloading is broken in DPDK net/tap driver.
>>           * This workaround can be removed once the fix makes it to a DPDK
>>           * LTS release used by OVS. */
>>          VLOG_INFO("%s: disabled Tx L4 checksum offloads for a net/tap 
>> port.",
>>                    netdev_get_name(&dev->up));
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>>      }
>> 
>> -    if (!strcmp(info.driver_name, "net_ice")
>> -        || !strcmp(info.driver_name, "net_i40e")) {
>> +    if (!strcmp(info->driver_name, "net_ice")
>> +        || !strcmp(info->driver_name, "net_i40e")) {
>>          /* FIXME: Driver advertises the capability but doesn't seem
>>           * to actually support it correctly.  Can remove this once
>>           * the driver is fixed on DPDK side. */
>>          VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
>>                    "net/ice or net/i40e port.", netdev_get_name(&dev->up));
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>> @@ -1404,21 +1397,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> 
>>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>>      if (userspace_tso_enabled()) {
>> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>>              dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
>>          } else {
>>              VLOG_WARN("%s: Tx TSO offload is not supported.",
>>                        netdev_get_name(&dev->up));
>>          }
>> 
>> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>>              dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD;
>>          } else {
>>              VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.",
>>                        netdev_get_name(&dev->up));
>>          }
>> 
>> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>>              dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD;
>>          } else {
>>              VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
>> @@ -1426,6 +1419,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          }
>>      }
>> 
>> +}
>> +
>> +static int
>> +dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> +    OVS_REQUIRES(dev->mutex)
>> +{
>> +    struct rte_pktmbuf_pool_private *mbp_priv;
>> +    struct rte_eth_dev_info info;
>> +    struct rte_ether_addr eth_addr;
>> +    int diag;
>> +    int n_rxq, n_txq;
>> +
>> +    rte_eth_dev_info_get(dev->port_id, &info);
>> +    dpdk_eth_offload_config(dev, &info);
>> +
>>      n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
>>      n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
>> 
>> @@ -1439,6 +1447,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          return -diag;
>>      }
>> 
>> +    rte_eth_dev_info_get(dev->port_id, &info);
>> +    if (!strcmp(info.driver_name, "net_bonding")) {
>> +        dpdk_eth_offload_config(dev, &info);
>> +        VLOG_INFO("%s: configure offloads for a dpdk net_bonding port.",
>> +                   netdev_get_name(&dev->up));
>> +    }
>> +
> 
>I'm a bit confused  by this.  Why do we need to check offloads again
>after the initialization?  Is there a chance that bonding driver
>will enable features we didn't ask for?
> 
>In general, since we don't know what kind of device is in the bonding,
>we should just disable all offloads for it as long as there is a chance
>to have a broken driver downstream of a bond.
> 
>Best regards, Ilya Maximets.

I believe the DPDK net/bonding port has not been configured here yet, so all 
the obtained offloads are false. That's why I reset them again after 
dpdk_eth_dev_port_config. It seems like the DPDK bond has completed 
initialization at this point, allowing us to properly obtain the offload 
flags.I think this should be where the DPDK net/bonding port sets the offload 
flags, which may be the logic of DPDK net/bonding. Additionally, regarding your 
second point, shouldn't we consider DPDK's net/bonding? I have verified with 
CX5 network cards, and after reacquiring the offloads, the functionality works 
properly.



Jun Wang
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to