On Thu, Mar 13, 2025 at 8:44 AM David Marchand <david.march...@redhat.com> wrote: > > As the virtio-net offload API is used for netdev-linux ports, but > provides no information about the potentially encapsulated protocol > concerned by a checksum request, specific informations from this netdev- > specific implementation is propagated into OVS code, and must be > carefully evaluated when some tunnel gets decapsulated. > > This induces a cost in "normal" processing path, while the netdev-linux > path is not performance critical. > > This patch removes such specific information, yet try harder to parse > the packet on the Rx side and set offload flags accordingly for non > encapsulated traffic and for encapsulated traffic, fixes the inner > checksum. > > Signed-off-by: David Marchand <david.march...@redhat.com> > --- > Changes since v3: > - dropped setting Tx flags, > > Changes since v1: > - fixed patch reordering issue: offloads fields was being added instead > of later in this series, > > --- > lib/dp-packet.c | 3 - > lib/dp-packet.h | 24 ------- > lib/dpif-netdev-extract-avx512.c | 2 - > lib/flow.c | 6 -- > lib/netdev-linux.c | 113 ++++++++++++++++++++++++++++--- > lib/netdev-native-tnl.c | 6 +- > 6 files changed, 104 insertions(+), 50 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index dad0d7be3a..6a9bfd63ba 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -39,9 +39,6 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, > enum dp_packet_source so > dp_packet_init_specific(b); > /* By default assume the packet type to be Ethernet. */ > b->packet_type = htonl(PT_ETH); > - /* Reset csum start and offset. */ > - b->csum_start = 0; > - b->csum_offset = 0; > } > > static void > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 549802167d..084e89bd99 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -178,8 +178,6 @@ struct dp_packet { > or UINT16_MAX. */ > uint32_t cutlen; /* length in bytes to cut from the end. */ > ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ > - uint16_t csum_start; /* Position to start checksumming from. */ > - uint16_t csum_offset; /* Offset to place checksum. */ > union { > struct pkt_metadata md; > uint64_t data[DP_PACKET_CONTEXT_SIZE / 8]; > @@ -1539,34 +1537,12 @@ dp_packet_ol_set_l4_csum_bad(struct dp_packet *p) > *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_BAD; > } > > -/* Marks packet 'p' with good integrity if checksum offload locations > - * were provided. In the case of encapsulated packets, these values may > - * be deeper into the packet than OVS might expect. But the packet > - * should still be considered to have good integrity. > - * The 'csum_start' is the offset from the begin of the packet headers. > - * The 'csum_offset' is the offset from start to place the checksum. > - * The csum_start and csum_offset fields are set from the virtio_net_hdr > - * struct that may be provided by a netdev on packet ingress. */ > -static inline void > -dp_packet_ol_l4_csum_check_partial(struct dp_packet *p) > -{ > - if (p->csum_start && p->csum_offset) { > - dp_packet_ol_set_l4_csum_partial(p); > - } > -} > - > static inline void > dp_packet_reset_packet(struct dp_packet *b, int off) > { > dp_packet_set_size(b, dp_packet_size(b) - off); > dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off)); > dp_packet_reset_offsets(b); > - > - if (b->csum_start >= off && b->csum_offset) { > - /* Adjust values for decapsulation. */ > - b->csum_start -= off; > - dp_packet_ol_set_l4_csum_partial(b); > - } > } > > static inline uint32_t ALWAYS_INLINE > diff --git a/lib/dpif-netdev-extract-avx512.c > b/lib/dpif-netdev-extract-avx512.c > index 57ca4c71b7..3ae850c2d5 100644 > --- a/lib/dpif-netdev-extract-avx512.c > +++ b/lib/dpif-netdev-extract-avx512.c > @@ -776,7 +776,6 @@ mfex_ipv6_set_hwol(struct dp_packet *pkt) > static void > mfex_tcp_set_hwol(struct dp_packet *pkt) > { > - dp_packet_ol_l4_csum_check_partial(pkt); > if (dp_packet_l4_checksum_good(pkt) > || dp_packet_ol_l4_csum_partial(pkt)) { > dp_packet_hwol_set_csum_tcp(pkt); > @@ -786,7 +785,6 @@ mfex_tcp_set_hwol(struct dp_packet *pkt) > static void > mfex_udp_set_hwol(struct dp_packet *pkt) > { > - dp_packet_ol_l4_csum_check_partial(pkt); > if (dp_packet_l4_checksum_good(pkt) > || dp_packet_ol_l4_csum_partial(pkt)) { > dp_packet_hwol_set_csum_udp(pkt); > diff --git a/lib/flow.c b/lib/flow.c > index c2e1e4ad6f..988ae6bfdc 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -1080,7 +1080,6 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > dp_packet_update_rss_hash_ipv6_tcp_udp(packet); > } > - dp_packet_ol_l4_csum_check_partial(packet); > if (dp_packet_l4_checksum_good(packet) > || dp_packet_ol_l4_csum_partial(packet)) { > dp_packet_hwol_set_csum_tcp(packet); > @@ -1100,7 +1099,6 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > dp_packet_update_rss_hash_ipv6_tcp_udp(packet); > } > - dp_packet_ol_l4_csum_check_partial(packet); > if (dp_packet_l4_checksum_good(packet) > || dp_packet_ol_l4_csum_partial(packet)) { > if (tunneling) { > @@ -1118,7 +1116,6 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > miniflow_push_be16(mf, tp_dst, sctp->sctp_dst); > miniflow_push_be16(mf, ct_tp_src, ct_tp_src); > miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); > - dp_packet_ol_l4_csum_check_partial(packet); > if (dp_packet_l4_checksum_good(packet) > || dp_packet_ol_l4_csum_partial(packet)) { > dp_packet_hwol_set_csum_sctp(packet); > @@ -1309,20 +1306,17 @@ parse_tcp_flags(struct dp_packet *packet, > if (nw_proto == IPPROTO_TCP && size >= TCP_HEADER_LEN) { > const struct tcp_header *tcp = data; > > - dp_packet_ol_l4_csum_check_partial(packet); > if (dp_packet_l4_checksum_good(packet) > || dp_packet_ol_l4_csum_partial(packet)) { > dp_packet_hwol_set_csum_tcp(packet); > } > return TCP_FLAGS(tcp->tcp_ctl); > } else if (nw_proto == IPPROTO_UDP && size >= UDP_HEADER_LEN) { > - dp_packet_ol_l4_csum_check_partial(packet); > if (dp_packet_l4_checksum_good(packet) > || dp_packet_ol_l4_csum_partial(packet)) { > dp_packet_hwol_set_csum_udp(packet); > } > } else if (nw_proto == IPPROTO_SCTP && size >= SCTP_HEADER_LEN) { > - dp_packet_ol_l4_csum_check_partial(packet); > if (dp_packet_l4_checksum_good(packet) > || dp_packet_ol_l4_csum_partial(packet)) { > dp_packet_hwol_set_csum_sctp(packet); > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index af7438f1ee..c1c0addab7 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -89,6 +89,7 @@ COVERAGE_DEFINE(netdev_get_hwaddr); > COVERAGE_DEFINE(netdev_set_hwaddr); > COVERAGE_DEFINE(netdev_get_ethtool); > COVERAGE_DEFINE(netdev_set_ethtool); > +COVERAGE_DEFINE(netdev_linux_unknown_l4_csum); > > > #ifndef IFLA_IF_NETNSID > @@ -7038,6 +7039,70 @@ af_packet_sock(void) > return sock; > } > > +static int > +netdev_linux_parse_packet(struct dp_packet *b, uint16_t *l2_len, > + uint16_t *l3_len, uint16_t *l4proto) > +{ > + struct eth_header *eth_hdr; > + ovs_be16 eth_type; > + > + eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN); > + if (!eth_hdr) { > + return -EINVAL; > + } > + > + *l2_len = ETH_HEADER_LEN; > + eth_type = eth_hdr->eth_type; > + if (eth_type_vlan(eth_type)) { > + struct vlan_header *vlan = dp_packet_at(b, *l2_len, VLAN_HEADER_LEN); > + > + if (!vlan) { > + return -EINVAL; > + } > + > + eth_type = vlan->vlan_next_type; > + *l2_len += VLAN_HEADER_LEN; > + } > + > + if (eth_type == htons(ETH_TYPE_IP)) { > + struct ip_header *ip_hdr = dp_packet_at(b, *l2_len, IP_HEADER_LEN); > + > + if (!ip_hdr) { > + return -EINVAL; > + } > + > + *l3_len = IP_IHL(ip_hdr->ip_ihl_ver) * 4; > + *l4proto = ip_hdr->ip_proto; > + dp_packet_hwol_set_tx_ipv4(b); > + } else if (eth_type == htons(ETH_TYPE_IPV6)) { > + struct ovs_16aligned_ip6_hdr *nh6; > + const void *data; > + uint8_t nw_proto; > + uint8_t nw_frag; > + size_t size; > + > + nh6 = dp_packet_at(b, *l2_len, IPV6_HEADER_LEN); > + if (!nh6) { > + return -EINVAL; > + } > + > + nw_proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; > + data = (const char *) nh6 + sizeof *nh6; > + size = (const char *) dp_packet_tail(b) - (const char *) data; > + if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag, > + NULL, NULL)) { > + return -EINVAL; > + } > + *l3_len = (const char *) data - (const char *) nh6; > + *l4proto = nw_proto; > + dp_packet_hwol_set_tx_ipv6(b); > + } else { > + *l3_len = *l4proto = 0; > + } > + > + return 0; > +} > + > /* Initializes packet 'b' with features enabled in the prepended > * struct virtio_net_hdr. Returns 0 if successful, otherwise a > * positive errno value. */ > @@ -7055,16 +7120,44 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) > } > > if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
Instead of implementing our own checksumming here, why not just use ethtool to disable all checksum offloading? > - /* The packet has offloaded checksum. However, there is no > - * additional information like the protocol used, so it would > - * require to parse the packet here. The checksum starting point > - * and offset are going to be verified when the packet headers > - * are parsed during miniflow extraction. */ > - b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start; > - b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset; > - } else { > - b->csum_start = 0; > - b->csum_offset = 0; > + uint16_t csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset; > + uint16_t csum_start = (OVS_FORCE uint16_t) vnet->csum_start; > + uint16_t l4proto; > + uint16_t l2_len; > + uint16_t l3_len; > + > + if (netdev_linux_parse_packet(b, &l2_len, &l3_len, &l4proto)) { > + return EINVAL; > + } > + > + if (csum_start && csum_offset && csum_start == l2_len + l3_len > + && ((csum_offset == offsetof(struct tcp_header, tcp_csum) > + && l4proto == IPPROTO_TCP) > + || (csum_offset == offsetof(struct udp_header, udp_csum) > + && l4proto == IPPROTO_UDP) > + || (csum_offset == offsetof(struct sctp_header, sctp_csum) > + && l4proto == IPPROTO_SCTP))) { > + dp_packet_ol_set_l4_csum_partial(b); > + } else { > + ovs_be16 *csum_l4; > + void *l4; > + > + COVERAGE_INC(netdev_linux_unknown_l4_csum); > + > + csum_l4 = dp_packet_at(b, csum_start + csum_offset, > + sizeof *csum_l4); > + if (!csum_l4) { > + return EINVAL; > + } > + > + l4 = dp_packet_at(b, csum_start, dp_packet_size(b) - csum_start); > + *csum_l4 = csum(l4, dp_packet_size(b) - csum_start); > + > + if (l4proto == IPPROTO_TCP || l4proto == IPPROTO_UDP > + || l4proto == IPPROTO_SCTP) { > + dp_packet_ol_set_l4_csum_good(b); > + } > + } > } > > int ret = 0; > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index cbb875bd26..d49597f380 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -338,11 +338,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev > OVS_UNUSED, > } else { > dp_packet_hwol_set_csum_udp(packet); > } > - } > - > - if (packet->csum_start && packet->csum_offset) { > - dp_packet_ol_set_l4_csum_partial(packet); > - } else if (!udp->udp_csum) { > + } else { > dp_packet_ol_set_l4_csum_good(packet); > } > > -- > 2.48.1 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev