On 3/27/23 12:50, Mike Pattrick wrote:
> From: Flavio Leitner <[email protected]>
> 
> The netdev receiving packets is supposed to provide the flags
> indicating if the L4 checksum was verified and it is OK or BAD,
> otherwise the stack will check when appropriate by software.
> 
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
> 
> When encapsulate a packet with that flag, set the checksum
> of the inner L4 header since that is not yet supported.
> 
> Calculate the L4 checksum when the packet is going to be sent
> over a device that doesn't support the feature.
> 
> Linux tap devices allows enabling L3 and L4 offload, so this
> patch enables the feature. However, Linux socket interface
> remains disabled because the API doesn't allow enabling
> those two features without enabling TSO too.
> 
> Signed-off-by: Flavio Leitner <[email protected]>
> Co-authored-by: Mike Pattrick <[email protected]>
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
>  Since v9:
>   - Extended miniflow_extract changes into avx512 code
>   - Formatting changes
>   - Note that we cannot currently enable checksum offloading in
>     CONFIGURE_VETH_OFFLOADS for check-system-userspace as
>     netdev-linux.c currently only parses the vnet header if TSO
>     is enabled.
>  Since v10:
>   - No change
> 
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
>  lib/conntrack.c                  |  15 +-
>  lib/dp-packet.c                  |  25 ++++
>  lib/dp-packet.h                  |  78 +++++++++-
>  lib/dpif-netdev-extract-avx512.c |  62 +++++++-
>  lib/flow.c                       |  23 +++
>  lib/netdev-dpdk.c                | 174 +++++++++++++++-------
>  lib/netdev-linux.c               | 242 +++++++++++++++++++++----------
>  lib/netdev-native-tnl.c          |  32 +---
>  lib/netdev.c                     |  46 ++----
>  lib/odp-execute-avx512.c         |  24 +--
>  lib/packets.c                    | 175 +++++++++++++++++-----
>  lib/packets.h                    |   3 +
>  12 files changed, 645 insertions(+), 254 deletions(-)
> 
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 54166c320..75f3d7fee 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2044,13 +2044,12 @@ conn_key_extract(struct conntrack *ct, struct 
> dp_packet *pkt, ovs_be16 dl_type,
>      }
>  
>      if (ok) {
> -        bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
> -        if (!hwol_bad_l4_csum) {
> -            bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
> -                                     || dp_packet_hwol_tx_l4_checksum(pkt);
> +        if (!dp_packet_l4_checksum_bad(pkt)) {
>              /* Validate the checksum only when hwol is not supported. */
>              if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
> -                           &ctx->icmp_related, l3, !hwol_good_l4_csum,
> +                           &ctx->icmp_related, l3,
> +                           !dp_packet_l4_checksum_good(pkt) &&
> +                           !dp_packet_hwol_tx_l4_checksum(pkt),
>                             NULL)) {
>                  ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
>                  return true;
> @@ -3379,8 +3378,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct 
> conn_lookup_ctx *ctx,
>              adj_seqnum(&th->tcp_seq, ec->seq_skew);
>      }
>  
> -    th->tcp_csum = 0;
> -    if (!dp_packet_hwol_tx_l4_checksum(pkt)) {
> +    if (dp_packet_hwol_tx_l4_checksum(pkt)) {
> +        dp_packet_ol_reset_l4_csum_good(pkt);
> +    } else {
> +        th->tcp_csum = 0;
>          if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
>              th->tcp_csum = packet_csum_upperlayer6(nh6, th, 
> ctx->key.nw_proto,
>                                 dp_packet_l4_size(pkt));
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index 61c36de44..dfedd0e9b 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, 
> enum dp_packet_source so
>      dp_packet_init_specific(b);
>      /* By default assume the packet type to be Ethernet. */
>      b->packet_type = htonl(PT_ETH);
> +    /* Reset csum start and offset. */
> +    b->csum_start = 0;
> +    b->csum_offset = 0;
>  }
>  
>  static void
> @@ -544,4 +547,26 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t 
> flags)
>          dp_packet_ol_set_ip_csum_good(p);
>          dp_packet_hwol_reset_tx_ip_csum(p);
>      }
> +
> +    if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) {
> +        dp_packet_hwol_reset_tx_l4_csum(p);
> +        return;
> +    }
> +
> +    if (dp_packet_hwol_l4_is_tcp(p)
> +        && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
> +        packet_tcp_complete_csum(p);
> +        dp_packet_ol_set_l4_csum_good(p);
> +        dp_packet_hwol_reset_tx_l4_csum(p);
> +    } else if (dp_packet_hwol_l4_is_udp(p)
> +               && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
> +        packet_udp_complete_csum(p);
> +        dp_packet_ol_set_l4_csum_good(p);
> +        dp_packet_hwol_reset_tx_l4_csum(p);
> +    } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)
> +               && dp_packet_hwol_l4_is_sctp(p)) {
> +        packet_sctp_complete_csum(p);
> +        dp_packet_ol_set_l4_csum_good(p);
> +        dp_packet_hwol_reset_tx_l4_csum(p);
> +    }
>  }
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index af0a2b7f0..c37c85857 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -140,6 +140,8 @@ struct dp_packet {
>                                        or UINT16_MAX. */
>      uint32_t cutlen;               /* length in bytes to cut from the end. */
>      ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
> +    uint16_t csum_start;           /* Position to start checksumming from. */
> +    uint16_t csum_offset;          /* Offset to place checksum. */
>      union {
>          struct pkt_metadata md;
>          uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
> @@ -997,6 +999,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b)
>      return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4);
>  }
>  
> +/* Returns 'true' if packet 'p' is marked as IPv6. */
> +static inline bool
> +dp_packet_hwol_tx_ipv6(const struct dp_packet *p)
> +{
> +    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
> +}
> +
>  /* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */
>  static inline bool
>  dp_packet_hwol_l4_is_tcp(const struct dp_packet *b)
> @@ -1021,18 +1030,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
>              DP_PACKET_OL_TX_SCTP_CKSUM;
>  }
>  
> -/* Mark packet 'b' for IPv4 checksum offloading. */
>  static inline void
> -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
> +dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK;
> +}
> +
> +/* Mark packet 'p' as IPv4. */
> +static inline void
> +dp_packet_hwol_set_tx_ipv4(struct dp_packet *p)
>  {
> -    *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4;
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
>  }
>  
> -/* Mark packet 'b' for IPv6 checksum offloading. */
> +/* Mark packet 'a' as IPv6. */
>  static inline void
> -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
> +dp_packet_hwol_set_tx_ipv6(struct dp_packet *a)
>  {
> -    *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
> +    *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4;
> +    *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
>  }
>  
>  /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
> @@ -1147,6 +1164,55 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p)
>              DP_PACKET_OL_RX_L4_CKSUM_BAD;
>  }
>  
> +/* Returns 'true' if the packet has good integrity though the
> + * checksum in the packet 'p' is not complete. */
> +static inline bool
> +dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
> +{
> +    return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
> +            DP_PACKET_OL_RX_L4_CKSUM_MASK;
> +}
> +
> +/* Marks packet 'p' with good integrity though the checksum in the
> + * packet is not complete. */
> +static inline void
> +dp_packet_ol_set_l4_csum_partial(struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK;
> +}
> +
> +/* Marks packet 'p' with good L4 checksum. */
> +static inline void
> +dp_packet_ol_set_l4_csum_good(struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD;
> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD;
> +}
> +
> +/* Marks packet 'p' with good L4 checksum as modified. */
> +static inline void
> +dp_packet_ol_reset_l4_csum_good(struct dp_packet *p)
> +{
> +    if (!dp_packet_ol_l4_csum_partial(p)) {
> +        *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD;
> +    }
> +}
> +
> +/* Marks packet 'p' with good integrity if the 'start' and 'offset'
> + * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
> + * The 'start' is the offset from the begin of the packet headers.
> + * The 'offset' is the offset from start to place the checksum.
> + * The csum_start and csum_offset fields are set from the virtio_net_hdr
> + * struct that may be provided by a netdev on packet ingress. */
> +static inline void
> +dp_packet_ol_vnet_csum_check(struct dp_packet *p, uint16_t start,
> +                             uint16_t offset)

Can we call this function dp_packet_ol_l4_csum_check_partial or
something like that?

> +{
> +    if (p->csum_start == start && p->csum_offset == offset) {
> +        dp_packet_ol_set_l4_csum_partial(p);
> +    }
> +}
> +
>  static inline uint32_t ALWAYS_INLINE
>  dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
>                           uint32_t hash)
> diff --git a/lib/dpif-netdev-extract-avx512.c 
> b/lib/dpif-netdev-extract-avx512.c
> index 66884eaf0..a9299d229 100644
> --- a/lib/dpif-netdev-extract-avx512.c
> +++ b/lib/dpif-netdev-extract-avx512.c
> @@ -698,7 +698,6 @@ mfex_ipv6_set_l2_pad_size(struct dp_packet *pkt,
>          return -1;
>      }
>      dp_packet_set_l2_pad_size(pkt, len_from_ipv6 - (p_len + 
> IPV6_HEADER_LEN));
> -    dp_packet_hwol_set_tx_ipv6(pkt);
>      return 0;
>  }
>  
> @@ -729,10 +728,6 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct 
> ip_header *nh,
>          return -1;
>      }
>      dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
> -    dp_packet_hwol_set_tx_ipv4(pkt);
> -    if (dp_packet_ip_checksum_good(pkt)) {
> -        dp_packet_hwol_set_tx_ip_csum(pkt);
> -    }
>      return 0;
>  }
>  
> @@ -763,6 +758,45 @@ mfex_check_tcp_data_offset(const struct tcp_header *tcp)
>      return ret;
>  }
>  
> +static void
> +mfex_ipv4_set_hwol(struct dp_packet *pkt)
> +{
> +    dp_packet_hwol_set_tx_ipv4(pkt);
> +    if (dp_packet_ip_checksum_good(pkt)) {
> +        dp_packet_hwol_set_tx_ip_csum(pkt);
> +    }
> +}
> +
> +static void
> +mfex_ipv6_set_hwol(struct dp_packet *pkt)
> +{
> +    dp_packet_hwol_set_tx_ipv6(pkt);
> +}
> +
> +static void
> +mfex_tcp_set_hwol(struct dp_packet *pkt)
> +{
> +    dp_packet_ol_vnet_csum_check(pkt, pkt->l4_ofs,
> +                                 offsetof(struct tcp_header,
> +                                          tcp_csum));
> +    if (dp_packet_l4_checksum_good(pkt)
> +        || dp_packet_ol_l4_csum_partial(pkt)) {
> +        dp_packet_hwol_set_csum_tcp(pkt);
> +    }
> +}
> +
> +static void
> +mfex_udp_set_hwol(struct dp_packet *pkt)
> +{
> +    dp_packet_ol_vnet_csum_check(pkt, pkt->l4_ofs,
> +                                 offsetof(struct udp_header,
> +                                          udp_csum));
> +    if (dp_packet_l4_checksum_good(pkt)
> +        || dp_packet_ol_l4_csum_partial(pkt)) {
> +        dp_packet_hwol_set_csum_udp(pkt);
> +    }
> +}
> +
>  /* Generic loop to process any mfex profile. This code is specialized into
>   * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
>   * to ensure the compiler specializes each instance. The code is marked "hot"
> @@ -864,6 +898,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                  const struct tcp_header *tcp = (void *)&pkt[38];
>                  mfex_handle_tcp_flags(tcp, &blocks[7]);
>                  dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                mfex_ipv4_set_hwol(packet);
> +                mfex_tcp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_VLAN_IPV4_UDP: {
> @@ -876,6 +912,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                      continue;
>                  }
>                  dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                mfex_ipv4_set_hwol(packet);
> +                mfex_udp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_IPV4_TCP: {
> @@ -891,6 +929,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                      continue;
>                  }
>                  dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                mfex_ipv4_set_hwol(packet);
> +                mfex_tcp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_IPV4_UDP: {
> @@ -902,6 +942,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                      continue;
>                  }
>                  dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                mfex_ipv4_set_hwol(packet);
> +                mfex_udp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_IPV6_UDP: {
> @@ -920,6 +962,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                  /* Process UDP header. */
>                  mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[9]);
>                  dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
> +                mfex_ipv6_set_hwol(packet);
> +                mfex_udp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_IPV6_TCP: {
> @@ -943,6 +987,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                  }
>                  mfex_handle_tcp_flags(tcp, &blocks[9]);
>                  dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
> +                mfex_ipv6_set_hwol(packet);
> +                mfex_tcp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_VLAN_IPV6_TCP: {
> @@ -969,6 +1015,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                  }
>                  mfex_handle_tcp_flags(tcp, &blocks[10]);
>                  dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
> +                mfex_ipv6_set_hwol(packet);
> +                mfex_tcp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_VLAN_IPV6_UDP: {
> @@ -990,6 +1038,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                  /* Process UDP header. */
>                  mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[10]);
>                  dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
> +                mfex_ipv6_set_hwol(packet);
> +                mfex_udp_set_hwol(packet);
>              } break;
>  
>          case PROFILE_ETH_IPV4_NVGRE: {
> @@ -1000,6 +1050,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                      continue;
>                  }
>                  dp_packet_update_rss_hash_ipv4(packet);
> +                mfex_ipv4_set_hwol(packet);
> +                mfex_udp_set_hwol(packet);
>              } break;
>  
>          default:
> diff --git a/lib/flow.c b/lib/flow.c
> index 6c8bf7fc0..5aaf3b420 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -1027,6 +1027,13 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>                      } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>                          dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
>                      }
> +                    dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
> +                                                 offsetof(struct tcp_header,
> +                                                          tcp_csum));
> +                    if (dp_packet_l4_checksum_good(packet)
> +                        || dp_packet_ol_l4_csum_partial(packet)) {
> +                        dp_packet_hwol_set_csum_tcp(packet);
> +                    }
>                  }
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
> @@ -1042,6 +1049,13 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>                  } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>                      dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
>                  }
> +                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
> +                                             offsetof(struct udp_header,
> +                                                      udp_csum));
> +                if (dp_packet_l4_checksum_good(packet)
> +                    || dp_packet_ol_l4_csum_partial(packet)) {
> +                    dp_packet_hwol_set_csum_udp(packet);
> +                }
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
>              if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
> @@ -1051,6 +1065,13 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>                  miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
>                  miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>                  miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
> +                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
> +                                             offsetof(struct sctp_header,
> +                                                      sctp_csum));
> +                if (dp_packet_l4_checksum_good(packet)
> +                    || dp_packet_ol_l4_csum_partial(packet)) {
> +                    dp_packet_hwol_set_csum_sctp(packet);
> +                }
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
>              if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
> @@ -3170,6 +3191,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct 
> flow *flow,
>              tcp->tcp_csum = 0;
>              tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
>                                                        tcp, l4_len));
> +            dp_packet_ol_set_l4_csum_good(p);
>          } else if (flow->nw_proto == IPPROTO_UDP) {
>              struct udp_header *udp = dp_packet_l4(p);
>  
> @@ -3179,6 +3201,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct 
> flow *flow,
>              if (!udp->udp_csum) {
>                  udp->udp_csum = htons(0xffff);
>              }
> +            dp_packet_ol_set_l4_csum_good(p);
>          } else if (flow->nw_proto == IPPROTO_ICMP) {
>              struct icmp_header *icmp = dp_packet_l4(p);
>  
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 8c2c07898..31ac534f6 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -412,8 +412,10 @@ enum dpdk_hw_ol_features {
>      NETDEV_RX_HW_CRC_STRIP = 1 << 1,
>      NETDEV_RX_HW_SCATTER = 1 << 2,
>      NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
> -    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
> -    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
> +    NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
> +    NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
> +    NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
> +    NETDEV_TX_TSO_OFFLOAD = 1 << 7,
>  };
>  
>  /*
> @@ -1008,6 +1010,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
>      return NULL;
>  }
>  
> +static void
> +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
> +                               enum dpdk_hw_ol_features hw_ol_features,
> +                               enum netdev_ol_flags flag)

Annotation that it requires dev->mutex would be nice.

> +{
> +    struct netdev *netdev = &dev->up;
> +
> +    if (dev->hw_ol_features & hw_ol_features) {
> +        netdev->ol_flags |= flag;
> +    } else {
> +        netdev->ol_flags &= ~flag;
> +    }
> +}
> +
> +static void
> +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
> +{
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
> +                                   NETDEV_TX_OFFLOAD_IPV4_CKSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
> +                                   NETDEV_TX_OFFLOAD_TCP_CKSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
> +                                   NETDEV_TX_OFFLOAD_UDP_CKSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
> +                                   NETDEV_TX_OFFLOAD_SCTP_CKSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
> +                                   NETDEV_TX_OFFLOAD_TCP_TSO);
> +}
> +
>  static int
>  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>  {
> @@ -1044,11 +1075,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int 
> n_rxq, int n_txq)
>          conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
>      }
>  
> +    if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
> +    }
> +
> +    if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
> +    }
> +
> +    if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -        conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
> -        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> -            conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
> -        }
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
>      }
>  
>      /* Limit configured rss hash functions to only those supported
> @@ -1154,7 +1194,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>      struct rte_ether_addr eth_addr;
>      int diag;
>      int n_rxq, n_txq;
> -    uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
>      uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>                                       RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>                                       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
> @@ -1190,18 +1229,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>      }
>  
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +    }
> +
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +    }
> +
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +    }
> +
>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>      if (userspace_tso_enabled()) {
> -        if ((info.tx_offload_capa & tx_tso_offload_capa)
> -            == tx_tso_offload_capa) {
> +        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>              dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> -            if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
> -                dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
> -            } else {
> -                VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
> -                          "SCTP packets sent to this device will be dropped",
> -                          netdev_get_name(&dev->up));
> -            }
>          } else {
>              VLOG_WARN("%s: Tx TSO offload is not supported.",
>                        netdev_get_name(&dev->up));
> @@ -2213,6 +2262,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, 
> struct rte_mbuf *mbuf)
>  
>      mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
>      mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
> +    mbuf->l4_len = 0;
>      mbuf->outer_l2_len = 0;
>      mbuf->outer_l3_len = 0;
>  
> @@ -4149,6 +4199,7 @@ new_device(int vid)
>          ovs_mutex_lock(&dev->mutex);
>          if (nullable_string_is_equal(ifname, dev->vhost_id)) {
>              uint32_t qp_num = rte_vhost_get_vring_num(vid) / VIRTIO_QNUM;
> +            uint64_t features;
>  
>              /* Get NUMA information */
>              newnode = rte_vhost_get_numa_node(vid);
> @@ -4173,6 +4224,36 @@ new_device(int vid)
>                  dev->vhost_reconfigured = true;
>              }
>  
> +            if (rte_vhost_get_negotiated_features(vid, &features)) {
> +                VLOG_INFO("Error checking guest features for "
> +                          "vHost Device '%s'", dev->vhost_id);
> +            } else {
> +                if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
> +                    dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +                    dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +                    dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +                }
> +
> +                if (userspace_tso_enabled()) {
> +                    if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4)
> +                        && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) {
> +
> +                        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> +                        VLOG_DBG("%s: TSO enabled on vhost port",
> +                                 netdev_get_name(&dev->up));
> +                    } else {
> +                        VLOG_WARN("%s: Tx TSO offload is not supported.",
> +                                  netdev_get_name(&dev->up));
> +                    }
> +                }
> +            }
> +
> +            /* There is no support in virtio net to offload IPv4 csum,
> +             * but the vhost library handles IPv4 csum offloading fine. */
> +            dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +
> +            netdev_dpdk_update_netdev_flags(dev);
> +
>              ovsrcu_index_set(&dev->vid, vid);
>              exists = true;
>  
> @@ -4236,6 +4317,14 @@ destroy_device(int vid)
>                     dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
>              netdev_dpdk_txq_map_clear(dev);
>  
> +            /* Clear offload capabilities before next new_device. */
> +            dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
> +            netdev_dpdk_update_netdev_flags(dev);
> +
>              netdev_change_seq_changed(&dev->up);
>              ovs_mutex_unlock(&dev->mutex);
>              exists = true;
> @@ -5246,22 +5335,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>      }
>  
>      err = dpdk_eth_dev_init(dev);
> -
> -    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> -        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> -    } else {
> -        netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> -    }
> -
> -    if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -        netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> -        netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> -        netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> -        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> -        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> -            netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> -        }
> -    }
> +    netdev_dpdk_update_netdev_flags(dev);
>  
>      /* If both requested and actual hwaddr were previously
>       * unset (initialized to 0), then first device init above
> @@ -5308,11 +5382,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
>      memset(dev->sw_stats, 0, sizeof *dev->sw_stats);
>      rte_spinlock_unlock(&dev->stats_lock);
>  
> -    if (userspace_tso_enabled()) {
> -        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> -        VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up));
> -    }
> -
>      netdev_dpdk_remap_txqs(dev);
>  
>      if (netdev_dpdk_get_vid(dev) >= 0) {
> @@ -5333,6 +5402,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
>          }
>      }
>  
> +    netdev_dpdk_update_netdev_flags(dev);
> +
>      return 0;
>  }
>  
> @@ -5354,8 +5425,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev 
> *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      int err;
> -    uint64_t vhost_flags = 0;
> -    uint64_t vhost_unsup_flags;
>  
>      ovs_mutex_lock(&dev->mutex);
>  
> @@ -5365,6 +5434,9 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev 
> *netdev)
>       *  2. A path has been specified.
>       */
>      if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) 
> {
> +        uint64_t virtio_unsup_features = 0;
> +        uint64_t vhost_flags = 0;
> +
>          /* Register client-mode device. */
>          vhost_flags |= RTE_VHOST_USER_CLIENT;
>  
> @@ -5411,22 +5483,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev 
> *netdev)
>          }
>  
>          if (userspace_tso_enabled()) {
> -            netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> -            netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> -            netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> -            netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> -            netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> -            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
> -                                | 1ULL << VIRTIO_NET_F_HOST_UFO;
> +            virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN
> +                                    | 1ULL << VIRTIO_NET_F_HOST_UFO;
> +            VLOG_DBG("%s: TSO enabled on vhost port",
> +                     netdev_get_name(&dev->up));
>          } else {
> -            /* This disables checksum offloading and all the features
> -             * that depends on it (TSO, UFO, ECN) according to virtio
> -             * specification. */
> -            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
> +            /* Advertise checksum offloading to the guest, but explicitly
> +             * disable TSO and friends.
> +             * NOTE: we can't disable HOST_ECN which may have been wrongly
> +             * negotiated by a running guest. */
> +            virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_TSO4
> +                                    | 1ULL << VIRTIO_NET_F_HOST_TSO6
> +                                    | 1ULL << VIRTIO_NET_F_HOST_UFO;
>          }
>  
>          err = rte_vhost_driver_disable_features(dev->vhost_id,
> -                                                vhost_unsup_flags);
> +                                                virtio_unsup_features);
>          if (err) {
>              VLOG_ERR("rte_vhost_driver_disable_features failed for "
>                       "vhost user client port: %s\n", dev->up.name);
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..8b4a327ae 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -938,14 +938,6 @@ netdev_linux_common_construct(struct netdev *netdev_)
>      netnsid_unset(&netdev->netnsid);
>      ovs_mutex_init(&netdev->mutex);
>  
> -    if (userspace_tso_enabled()) {
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> -        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> -    }
> -
>      return 0;
>  }
>  
> @@ -959,6 +951,16 @@ netdev_linux_construct(struct netdev *netdev_)
>          return error;
>      }
>  
> +    /* The socket interface doesn't offer the option to enable only
> +     * csum offloading without TSO. */
> +    if (userspace_tso_enabled()) {
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
> +        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
> +    }
> +
>      error = get_flags(&netdev->up, &netdev->ifi_flags);
>      if (error == ENODEV) {
>          if (netdev->up.netdev_class != &netdev_internal_class) {
> @@ -987,6 +989,7 @@ netdev_linux_construct_tap(struct netdev *netdev_)
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      static const char tap_dev[] = "/dev/net/tun";
>      const char *name = netdev_->name;
> +    unsigned long oflags;
>      struct ifreq ifr;
>  
>      int error = netdev_linux_common_construct(netdev_);
> @@ -1004,10 +1007,7 @@ netdev_linux_construct_tap(struct netdev *netdev_)
>  
>      /* Create tap device. */
>      get_flags(&netdev->up, &netdev->ifi_flags);
> -    ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
> -    if (userspace_tso_enabled()) {
> -        ifr.ifr_flags |= IFF_VNET_HDR;
> -    }
> +    ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
>  
>      ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
>      if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
> @@ -1030,21 +1030,22 @@ netdev_linux_construct_tap(struct netdev *netdev_)
>          goto error_close;
>      }
>  
> +    oflags = TUN_F_CSUM;
>      if (userspace_tso_enabled()) {
> -        /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
> -         * available, it will return EINVAL when a flag is unknown.
> -         * Therefore, try enabling offload with no flags to check
> -         * if TUNSETOFFLOAD support is available or not. */
> -        if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) 
> {
> -            unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
> -
> -            if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
> -                VLOG_WARN("%s: enabling tap offloading failed: %s", name,
> -                          ovs_strerror(errno));
> -                error = errno;
> -                goto error_close;
> -            }
> -        }
> +        oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
> +    }
> +
> +    if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
> +        netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
> +                              | NETDEV_TX_OFFLOAD_TCP_CKSUM
> +                              | NETDEV_TX_OFFLOAD_UDP_CKSUM);
> +
> +        if (userspace_tso_enabled()) {
> +            netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
> +         }
> +    } else {
> +       VLOG_WARN("%s: Disabling hardware offloading: %s", name,
> +                 ovs_strerror(errno));

This is not a great wording.  'Hardware offloading' is associated
with flow offloading and might be misinterpreted.

>      }
>  
>      netdev->present = true;
> @@ -1344,18 +1345,22 @@ netdev_linux_batch_rxq_recv_sock(struct 
> netdev_rxq_linux *rx, int mtu,
>              pkt = buffers[i];
>           }
>  
> -        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
> -            struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
> -            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> +        if (virtio_net_hdr_size) {
> +            int ret = netdev_linux_parse_vnet_hdr(pkt);
> +            if (OVS_UNLIKELY(ret)) {
> +                struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
> +                struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>  
> -            /* Unexpected error situation: the virtio header is not present
> -             * or corrupted. Drop the packet but continue in case next ones
> -             * are correct. */
> -            dp_packet_delete(pkt);
> -            netdev->rx_dropped += 1;
> -            VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net 
> header",
> -                         netdev_get_name(netdev_));
> -            continue;
> +                /* Unexpected error situation: the virtio header is not
> +                 * present or corrupted or contains unsupported features.
> +                 * Drop the packet but continue in case next ones are
> +                 * correct. */
> +                dp_packet_delete(pkt);
> +                netdev->rx_dropped += 1;
> +                VLOG_WARN_RL(&rl, "%s: Dropped packet: %s",
> +                             netdev_get_name(netdev_), ovs_strerror(ret));

We should probbaly still mention that it was an error in the virtio net
header.  I doubt the error code provides sufficient information.

<snip>

> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index 93b6b6ccc..ebb13d2d1 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -485,9 +485,11 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch 
> *batch,
>              size_t l4_size = dp_packet_l4_size(packet);
>  
>              if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
> -                /* New UDP checksum. */
>                  struct udp_header *uh = dp_packet_l4(packet);
> -                if (uh->udp_csum) {
> +                if (dp_packet_hwol_l4_is_udp(packet)) {
> +                    dp_packet_ol_reset_l4_csum_good(packet);
> +                } else if (uh->udp_csum) {
> +                    /* New UDP checksum. */
>                      uint16_t old_udp_checksum = ~uh->udp_csum;
>                      uint32_t udp_checksum = old_udp_checksum + 
> delta_checksum;
>                      udp_checksum = csum_finish(udp_checksum);
> @@ -500,13 +502,17 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch 
> *batch,
>                  }
>              } else if (nh->ip_proto == IPPROTO_TCP &&
>                         l4_size >= TCP_HEADER_LEN) {
> -                /* New TCP checksum. */
> -                struct tcp_header *th = dp_packet_l4(packet);
> -                uint16_t old_tcp_checksum = ~th->tcp_csum;
> -                uint32_t tcp_checksum = old_tcp_checksum + delta_checksum;
> -                tcp_checksum = csum_finish(tcp_checksum);
> -
> -                th->tcp_csum = tcp_checksum;
> +                if (dp_packet_hwol_l4_is_tcp(packet)) {
> +                    dp_packet_ol_reset_l4_csum_good(packet);
> +                } else {
> +                    /* New TCP checksum. */
> +                    struct tcp_header *th = dp_packet_l4(packet);
> +                    uint16_t old_tcp_checksum = ~th->tcp_csum;
> +                    uint32_t tcp_checksum = old_tcp_checksum + 
> delta_checksum;
> +                    tcp_checksum = csum_finish(tcp_checksum);
> +
> +                    th->tcp_csum = tcp_checksum;
> +                }

This covers IPv4, but we also have IPv6.  See action_avx512_set_ipv6().

Best regrds, Ilya Maximets.

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Reply via email to