With virtio, doing tso requires to modify the network
packet data:
- the dpdk API requires to set the l4 checksum to an
  Intel-Nic-like pseudo header checksum that does
  not include the ip length
- the virtio peer expects that the l4 checksum is
  a standard pseudo header checksum.

This is a problem with shared packets, because they
should not be modified.

This patch fixes this issue by copying the headers into
a linear buffer in that case. This buffer is located in
the virtio_tx_region, at the same place where the
virtio header is stored.

The size of this buffer is set to 256, which should
be enough in all cases:
  sizeof(ethernet) + sizeof(vlan) * 2 + sizeof(ip6)
    sizeof(ip6-ext) + sizeof(tcp) + sizeof(tcp-opts)
  = 14 + 8 + 40 + sizeof(ip6-ext) + 40 + sizeof(tcp-opts)
  = 102 + sizeof(ip6-ext) + sizeof(tcp-opts)

Fixes: 696573046e9e ("net/virtio: support TSO")

Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 drivers/net/virtio/virtio_rxtx.c | 119 +++++++++++++++++++++++++++------------
 drivers/net/virtio/virtqueue.h   |   2 +
 2 files changed, 85 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 22d97a4..577c775 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -211,43 +211,73 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, 
struct rte_mbuf *cookie)

 /* When doing TSO, the IP length is not included in the pseudo header
  * checksum of the packet given to the PMD, but for virtio it is
- * expected.
+ * expected. Fix the mbuf or a copy if the mbuf is shared.
  */
-static void
-virtio_tso_fix_cksum(struct rte_mbuf *m)
+static unsigned int
+virtio_tso_fix_cksum(struct rte_mbuf *m, char *hdr, size_t hdr_sz)
 {
-       /* common case: header is not fragmented */
-       if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
-                       m->l4_len)) {
-               struct ipv4_hdr *iph;
-               struct ipv6_hdr *ip6h;
-               struct tcp_hdr *th;
-               uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
-               uint32_t tmp;
-
-               iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
-               th = RTE_PTR_ADD(iph, m->l3_len);
-               if ((iph->version_ihl >> 4) == 4) {
-                       iph->hdr_checksum = 0;
-                       iph->hdr_checksum = rte_ipv4_cksum(iph);
-                       ip_len = iph->total_length;
-                       ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
-                               m->l3_len);
-               } else {
-                       ip6h = (struct ipv6_hdr *)iph;
-                       ip_paylen = ip6h->payload_len;
+       struct ipv4_hdr *iph, iph_copy;
+       struct ipv6_hdr *ip6h = NULL, ip6h_copy;
+       struct tcp_hdr *th, th_copy;
+       size_t hdrlen = m->l2_len + m->l3_len + m->l4_len;
+       uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
+       uint32_t tmp;
+       int shared = 0;
+
+       /* mbuf is write-only, we need to copy the headers in a linear buffer */
+       if (unlikely(rte_pktmbuf_data_is_shared(m, 0, hdrlen))) {
+               shared = 1;
+
+               /* network headers are too big, there's nothing we can do */
+               if (hdrlen > hdr_sz)
+                       return 0;
+
+               rte_pktmbuf_read_copy(m, 0, hdrlen, hdr);
+               iph = (struct ipv4_hdr *)(hdr + m->l2_len);
+               ip6h = (struct ipv6_hdr *)(hdr + m->l2_len);
+               th = (struct tcp_hdr *)(hdr + m->l2_len + m->l3_len);
+       } else {
+               iph = rte_pktmbuf_read(m, m->l2_len, sizeof(*iph), &iph_copy);
+               th = rte_pktmbuf_read(m, m->l2_len + m->l3_len, sizeof(*th),
+                       &th_copy);
+       }
+
+       if ((iph->version_ihl >> 4) == 4) {
+               iph->hdr_checksum = 0;
+               iph->hdr_checksum = rte_ipv4_cksum(iph);
+               ip_len = iph->total_length;
+               ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+                       m->l3_len);
+       } else {
+               if (!shared) {
+                       ip6h = rte_pktmbuf_read(m, m->l2_len, sizeof(*ip6h),
+                               &ip6h_copy);
                }
+               ip_paylen = ip6h->payload_len;
+       }

-               /* calculate the new phdr checksum not including ip_paylen */
-               prev_cksum = th->cksum;
-               tmp = prev_cksum;
-               tmp += ip_paylen;
-               tmp = (tmp & 0xffff) + (tmp >> 16);
-               new_cksum = tmp;
+       /* calculate the new phdr checksum not including ip_paylen */
+       prev_cksum = th->cksum;
+       tmp = prev_cksum;
+       tmp += ip_paylen;
+       tmp = (tmp & 0xffff) + (tmp >> 16);
+       new_cksum = tmp;

-               /* replace it in the packet */
-               th->cksum = new_cksum;
-       }
+       /* replace it in the header */
+       th->cksum = new_cksum;
+
+       /* the update was done in the linear buffer, return */
+       if (shared)
+               return hdrlen;
+
+       /* copy from local buffer into mbuf if required */
+       if ((iph->version_ihl >> 4) == 4)
+               rte_pktmbuf_write(m, m->l2_len, sizeof(*iph), iph);
+       else
+               rte_pktmbuf_write(m, m->l2_len, sizeof(*ip6h), ip6h);
+       rte_pktmbuf_write(m, m->l2_len + m->l3_len, sizeof(*th), th);
+
+       return 0;
 }

 static inline int
@@ -268,7 +298,9 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct 
rte_mbuf *cookie,
        struct vring_desc *start_dp;
        uint16_t seg_num = cookie->nb_segs;
        uint16_t head_idx, idx;
+       uint16_t hdr_idx = 0;
        uint16_t head_size = vq->hw->vtnet_hdr_size;
+       unsigned int offset = 0;
        struct virtio_net_hdr *hdr;
        int offload;

@@ -303,6 +335,8 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct 
rte_mbuf *cookie,

                /* loop below will fill in rest of the indirect elements */
                start_dp = txr[idx].tx_indir;
+               hdr_idx = 0;
+               start_dp[hdr_idx].len = vq->hw->vtnet_hdr_size;
                idx = 1;
        } else {
                /* setup first tx ring slot to point to header
@@ -313,7 +347,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct 
rte_mbuf *cookie,
                start_dp[idx].len   = vq->hw->vtnet_hdr_size;
                start_dp[idx].flags = VRING_DESC_F_NEXT;
                hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
-
+               hdr_idx = idx;
                idx = start_dp[idx].next;
        }

@@ -345,7 +379,14 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct 
rte_mbuf *cookie,

                /* TCP Segmentation Offload */
                if (cookie->ol_flags & PKT_TX_TCP_SEG) {
-                       virtio_tso_fix_cksum(cookie);
+                       offset = virtio_tso_fix_cksum(cookie,
+                               RTE_PTR_ADD(hdr, start_dp[hdr_idx].len),
+                               VIRTIO_MAX_HDR_SZ);
+                       if (offset > 0) {
+                               RTE_ASSERT(can_push != 0);
+                               start_dp[hdr_idx].len += offset;
+                       }
+
                        hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
                                VIRTIO_NET_HDR_GSO_TCPV6 :
                                VIRTIO_NET_HDR_GSO_TCPV4;
@@ -362,10 +403,16 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct 
rte_mbuf *cookie,
        }

        do {
-               start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
-               start_dp[idx].len   = cookie->data_len;
+               if (offset > cookie->data_len) {
+                       offset -= cookie->data_len;
+                       continue;
+               }
+               start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) +
+                       offset;
+               start_dp[idx].len   = cookie->data_len - offset;
                start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
                idx = start_dp[idx].next;
+               offset = 0;
        } while ((cookie = cookie->next) != NULL);

        if (use_indirect)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..edfe0dd 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -254,8 +254,10 @@ struct virtio_net_hdr_mrg_rxbuf {

 /* Region reserved to allow for transmit header and indirect ring */
 #define VIRTIO_MAX_TX_INDIRECT 8
+#define VIRTIO_MAX_HDR_SZ 256
 struct virtio_tx_region {
        struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+       char net_headers[VIRTIO_MAX_HDR_SZ]; /* for offload if mbuf is RO */
        struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
                           __attribute__((__aligned__(16)));
 };
-- 
2.8.1

Reply via email to