As the virtio-net offload API is used for netdev-linux ports, but
provides no information about the potentially encapsulated protocol
concerned by a checksum request, specific informations from this netdev-
specific implementation is propagated into OVS code, and must be
carefully evaluated when some tunnel gets decapsulated.

This induces a cost in "normal" processing path, while the netdev-linux
path is not performance critical.

This patch removes such specific information, yet try harder to parse
the packet on the Rx side and set offload flags accordingly for non
encapsulated traffic.
If the parsing fails, resolve the checksum right away and
mark the packet as good.

Signed-off-by: David Marchand <[email protected]>
---
Changes since v1:
- fixed patch reordering issue: offloads fields was being added instead
  of later in this series,

---
 lib/dp-packet.c                  |  3 -
 lib/dp-packet.h                  | 24 --------
 lib/dpif-netdev-extract-avx512.c |  2 -
 lib/flow.c                       |  3 -
 lib/netdev-linux.c               | 97 ++++++++++++++++++++++++--------
 lib/netdev-native-tnl.c          |  6 +-
 6 files changed, 75 insertions(+), 60 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index dad0d7be3a..6a9bfd63ba 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -39,9 +39,6 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum 
dp_packet_source so
     dp_packet_init_specific(b);
     /* By default assume the packet type to be Ethernet. */
     b->packet_type = htonl(PT_ETH);
-    /* Reset csum start and offset. */
-    b->csum_start = 0;
-    b->csum_offset = 0;
 }
 
 static void
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 549802167d..084e89bd99 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -178,8 +178,6 @@ struct dp_packet {
                                       or UINT16_MAX. */
     uint32_t cutlen;               /* length in bytes to cut from the end. */
     ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
-    uint16_t csum_start;           /* Position to start checksumming from. */
-    uint16_t csum_offset;          /* Offset to place checksum. */
     union {
         struct pkt_metadata md;
         uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
@@ -1539,34 +1537,12 @@ dp_packet_ol_set_l4_csum_bad(struct dp_packet *p)
     *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_BAD;
 }
 
-/* Marks packet 'p' with good integrity if checksum offload locations
- * were provided. In the case of encapsulated packets, these values may
- * be deeper into the packet than OVS might expect. But the packet
- * should still be considered to have good integrity.
- * The 'csum_start' is the offset from the begin of the packet headers.
- * The 'csum_offset' is the offset from start to place the checksum.
- * The csum_start and csum_offset fields are set from the virtio_net_hdr
- * struct that may be provided by a netdev on packet ingress. */
-static inline void
-dp_packet_ol_l4_csum_check_partial(struct dp_packet *p)
-{
-    if (p->csum_start && p->csum_offset) {
-        dp_packet_ol_set_l4_csum_partial(p);
-    }
-}
-
 static inline void
 dp_packet_reset_packet(struct dp_packet *b, int off)
 {
     dp_packet_set_size(b, dp_packet_size(b) - off);
     dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
     dp_packet_reset_offsets(b);
-
-    if (b->csum_start >= off && b->csum_offset) {
-        /* Adjust values for decapsulation. */
-        b->csum_start -= off;
-        dp_packet_ol_set_l4_csum_partial(b);
-    }
 }
 
 static inline uint32_t ALWAYS_INLINE
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 57ca4c71b7..3ae850c2d5 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -776,7 +776,6 @@ mfex_ipv6_set_hwol(struct dp_packet *pkt)
 static void
 mfex_tcp_set_hwol(struct dp_packet *pkt)
 {
-    dp_packet_ol_l4_csum_check_partial(pkt);
     if (dp_packet_l4_checksum_good(pkt)
         || dp_packet_ol_l4_csum_partial(pkt)) {
         dp_packet_hwol_set_csum_tcp(pkt);
@@ -786,7 +785,6 @@ mfex_tcp_set_hwol(struct dp_packet *pkt)
 static void
 mfex_udp_set_hwol(struct dp_packet *pkt)
 {
-    dp_packet_ol_l4_csum_check_partial(pkt);
     if (dp_packet_l4_checksum_good(pkt)
         || dp_packet_ol_l4_csum_partial(pkt)) {
         dp_packet_hwol_set_csum_udp(pkt);
diff --git a/lib/flow.c b/lib/flow.c
index 61e732aff8..80b76d8f87 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1080,7 +1080,6 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
                     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                         dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                     }
-                    dp_packet_ol_l4_csum_check_partial(packet);
                     if (dp_packet_l4_checksum_good(packet)
                         || dp_packet_ol_l4_csum_partial(packet)) {
                         dp_packet_hwol_set_csum_tcp(packet);
@@ -1100,7 +1099,6 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
                 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                     dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                 }
-                dp_packet_ol_l4_csum_check_partial(packet);
                 if (dp_packet_l4_checksum_good(packet)
                     || dp_packet_ol_l4_csum_partial(packet)) {
                     if (tunneling) {
@@ -1118,7 +1116,6 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
                 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
-                dp_packet_ol_l4_csum_check_partial(packet);
                 if (dp_packet_l4_checksum_good(packet)
                     || dp_packet_ol_l4_csum_partial(packet)) {
                     dp_packet_hwol_set_csum_sctp(packet);
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index a89f7b8a21..49ed25cbee 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -89,6 +89,7 @@ COVERAGE_DEFINE(netdev_get_hwaddr);
 COVERAGE_DEFINE(netdev_set_hwaddr);
 COVERAGE_DEFINE(netdev_get_ethtool);
 COVERAGE_DEFINE(netdev_set_ethtool);
+COVERAGE_DEFINE(netdev_linux_unknown_l4_csum);
 
 
 #ifndef IFLA_IF_NETNSID
@@ -7039,49 +7040,64 @@ af_packet_sock(void)
 }
 
 static int
-netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
+netdev_linux_parse_packet(struct dp_packet *b, uint16_t *l2_len,
+                          uint16_t *l3_len, uint16_t *l4proto)
 {
     struct eth_header *eth_hdr;
     ovs_be16 eth_type;
-    int l2_len;
 
     eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN);
     if (!eth_hdr) {
         return -EINVAL;
     }
 
-    l2_len = ETH_HEADER_LEN;
+    *l2_len = ETH_HEADER_LEN;
     eth_type = eth_hdr->eth_type;
     if (eth_type_vlan(eth_type)) {
-        struct vlan_header *vlan = dp_packet_at(b, l2_len, VLAN_HEADER_LEN);
+        struct vlan_header *vlan = dp_packet_at(b, *l2_len, VLAN_HEADER_LEN);
 
         if (!vlan) {
             return -EINVAL;
         }
 
         eth_type = vlan->vlan_next_type;
-        l2_len += VLAN_HEADER_LEN;
+        *l2_len += VLAN_HEADER_LEN;
     }
 
     if (eth_type == htons(ETH_TYPE_IP)) {
-        struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN);
+        struct ip_header *ip_hdr = dp_packet_at(b, *l2_len, IP_HEADER_LEN);
 
         if (!ip_hdr) {
             return -EINVAL;
         }
 
+        *l3_len = IP_IHL(ip_hdr->ip_ihl_ver) * 4;
         *l4proto = ip_hdr->ip_proto;
         dp_packet_hwol_set_tx_ipv4(b);
     } else if (eth_type == htons(ETH_TYPE_IPV6)) {
         struct ovs_16aligned_ip6_hdr *nh6;
+        const void *data;
+        uint8_t nw_proto;
+        uint8_t nw_frag;
+        size_t size;
 
-        nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN);
+        nh6 = dp_packet_at(b, *l2_len, IPV6_HEADER_LEN);
         if (!nh6) {
             return -EINVAL;
         }
 
-        *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
+        nw_proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
+        data = (const char *) nh6 + sizeof *nh6;
+        size = (const char *) dp_packet_tail(b) - (const char *) data;
+        if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
+                                 NULL, NULL)) {
+            return -EINVAL;
+        }
+        *l3_len = (const char *) data - (const char *) nh6;
+        *l4proto = nw_proto;
         dp_packet_hwol_set_tx_ipv6(b);
+    } else {
+        *l3_len = *l4proto = 0;
     }
 
     return 0;
@@ -7104,25 +7120,60 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
     }
 
     if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-        uint16_t l4proto = 0;
-
-        if (netdev_linux_parse_l2(b, &l4proto)) {
+        uint16_t csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset;
+        uint16_t csum_start = (OVS_FORCE uint16_t) vnet->csum_start;
+        bool l4_csum_partial = false;
+        uint16_t l4proto;
+        uint16_t l2_len;
+        uint16_t l3_len;
+
+        if (netdev_linux_parse_packet(b, &l2_len, &l3_len, &l4proto)) {
             return EINVAL;
         }
 
-        if (l4proto == IPPROTO_UDP) {
-            dp_packet_hwol_set_csum_udp(b);
+        if (csum_start && csum_offset && csum_start == l2_len + l3_len) {
+            if (csum_offset == offsetof(struct tcp_header, tcp_csum)
+                && l4proto == IPPROTO_TCP) {
+                dp_packet_hwol_set_csum_tcp(b);
+                l4_csum_partial = true;
+            } else if (csum_offset == offsetof(struct udp_header, udp_csum)
+                       && l4proto == IPPROTO_UDP) {
+                dp_packet_hwol_set_csum_udp(b);
+                l4_csum_partial = true;
+            } else if (csum_offset == offsetof(struct sctp_header, sctp_csum)
+                       && l4proto == IPPROTO_SCTP) {
+                dp_packet_hwol_set_csum_sctp(b);
+                l4_csum_partial = true;
+            }
+        }
+        if (l4_csum_partial) {
+            dp_packet_ol_set_l4_csum_partial(b);
+        } else {
+            ovs_be16 *csum_l4;
+            void *l4;
+
+            COVERAGE_INC(netdev_linux_unknown_l4_csum);
+
+            csum_l4 = dp_packet_at(b, csum_start + csum_offset,
+                                   sizeof *csum_l4);
+            if (!csum_l4) {
+                return EINVAL;
+            }
+
+            l4 = dp_packet_at(b, csum_start, dp_packet_size(b) - csum_start);
+            *csum_l4 = csum(l4, dp_packet_size(b) - csum_start);
+
+            if (l4proto == IPPROTO_TCP) {
+                dp_packet_hwol_set_csum_tcp(b);
+                dp_packet_ol_set_l4_csum_good(b);
+            } else if (l4proto == IPPROTO_UDP) {
+                dp_packet_hwol_set_csum_udp(b);
+                dp_packet_ol_set_l4_csum_good(b);
+            } else if (l4proto == IPPROTO_SCTP) {
+                dp_packet_hwol_set_csum_sctp(b);
+                dp_packet_ol_set_l4_csum_good(b);
+            }
         }
-        /* The packet has offloaded checksum. However, there is no
-         * additional information like the protocol used, so it would
-         * require to parse the packet here. The checksum starting point
-         * and offset are going to be verified when the packet headers
-         * are parsed during miniflow extraction. */
-        b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start;
-        b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset;
-    } else {
-        b->csum_start = 0;
-        b->csum_offset = 0;
     }
 
     int ret = 0;
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index d496813bee..e7a2037722 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -338,11 +338,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev 
OVS_UNUSED,
         } else {
             dp_packet_hwol_set_csum_udp(packet);
         }
-    }
-
-    if (packet->csum_start && packet->csum_offset) {
-        dp_packet_ol_set_l4_csum_partial(packet);
-    } else if (!udp->udp_csum) {
+    } else {
         dp_packet_ol_set_l4_csum_good(packet);
     }
 
-- 
2.48.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to