[dpdk-dev] [PATCH v2] vhost: Only access header if offloading is supported in dequeue path

2016-10-14 Thread Maxime Coquelin


On 10/11/2016 11:01 AM, Yuanhan Liu wrote:
> On Tue, Oct 11, 2016 at 09:45:27AM +0200, Maxime Coquelin wrote:
>> @@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
>> vring_desc *descs,
>>struct rte_mempool *mbuf_pool)
>>  {
>>  struct vring_desc *desc;
>> -uint64_t desc_addr;
>> +uint64_t desc_addr = 0;
>>  uint32_t desc_avail, desc_offset;
>>  uint32_t mbuf_avail, mbuf_offset;
>>  uint32_t cpy_len;
>>  struct rte_mbuf *cur = m, *prev = m;
>> -struct virtio_net_hdr *hdr;
>> +struct virtio_net_hdr *hdr = NULL;
>>  /* A counter to avoid desc dead loop chain */
>>  uint32_t nr_desc = 1;
>>
>> @@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
>> vring_desc *descs,
>>  (desc->flags & VRING_DESC_F_INDIRECT))
>>  return -1;
>>
>> -desc_addr = gpa_to_vva(dev, desc->addr);
>> -if (unlikely(!desc_addr))
>> -return -1;
>> +if (virtio_net_with_host_offload(dev)) {
>> +desc_addr = gpa_to_vva(dev, desc->addr);
>> +if (unlikely(!desc_addr))
>> +return -1;
>>
>> -hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>> -rte_prefetch0(hdr);
>> +hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>> +rte_prefetch0(hdr);
>> +}
>>
>>  /*
>>   * A virtio driver normally uses at least 2 desc buffers
>> @@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
>> vring_desc *descs,
>>  if (unlikely(!desc_addr))
>>  return -1;
>>
>> -rte_prefetch0((void *)(uintptr_t)desc_addr);
>> -
>>  desc_offset = 0;
>>  desc_avail  = desc->len;
>>  nr_desc+= 1;
>> -
>> -PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>>  } else {
>> +if (!desc_addr) {
>> +desc_addr = gpa_to_vva(dev, desc->addr);
>> +if (unlikely(!desc_addr))
>> +return -1;
>> +}
>> +
>
> I think this piece of code make things a bit complex. I think what you
> want to achieve is, besides saving hdr prefetch, to save one call to
> gpa_to_vva() for the non-ANY_LAYOUT case. Does that matter too much?
>
> How about just saving the hdr prefetch?
>
>   if (virtio_net_with_host_offload(dev)) {
>   hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>   rte_prefetch0(hdr);
>   }
Oops, you reply slipped through the cracks...

You're right, it doesn't matter too much, the thing to avoid id
definitely the hdr prefetch and access.

I'm sending a v3 now.

Thanks,
Maxime


[dpdk-dev] [PATCH v2] vhost: Only access header if offloading is supported in dequeue path

2016-10-11 Thread Yuanhan Liu
On Tue, Oct 11, 2016 at 09:45:27AM +0200, Maxime Coquelin wrote:
> @@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
> vring_desc *descs,
> struct rte_mempool *mbuf_pool)
>  {
>   struct vring_desc *desc;
> - uint64_t desc_addr;
> + uint64_t desc_addr = 0;
>   uint32_t desc_avail, desc_offset;
>   uint32_t mbuf_avail, mbuf_offset;
>   uint32_t cpy_len;
>   struct rte_mbuf *cur = m, *prev = m;
> - struct virtio_net_hdr *hdr;
> + struct virtio_net_hdr *hdr = NULL;
>   /* A counter to avoid desc dead loop chain */
>   uint32_t nr_desc = 1;
>  
> @@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
> vring_desc *descs,
>   (desc->flags & VRING_DESC_F_INDIRECT))
>   return -1;
>  
> - desc_addr = gpa_to_vva(dev, desc->addr);
> - if (unlikely(!desc_addr))
> - return -1;
> + if (virtio_net_with_host_offload(dev)) {
> + desc_addr = gpa_to_vva(dev, desc->addr);
> + if (unlikely(!desc_addr))
> + return -1;
>  
> - hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> - rte_prefetch0(hdr);
> + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> + rte_prefetch0(hdr);
> + }
>  
>   /*
>* A virtio driver normally uses at least 2 desc buffers
> @@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
> vring_desc *descs,
>   if (unlikely(!desc_addr))
>   return -1;
>  
> - rte_prefetch0((void *)(uintptr_t)desc_addr);
> -
>   desc_offset = 0;
>   desc_avail  = desc->len;
>   nr_desc+= 1;
> -
> - PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>   } else {
> + if (!desc_addr) {
> + desc_addr = gpa_to_vva(dev, desc->addr);
> + if (unlikely(!desc_addr))
> + return -1;
> + }
> +

I think this piece of code make things a bit complex. I think what you
want to achieve is, besides saving hdr prefetch, to save one call to
gpa_to_vva() for the non-ANY_LAYOUT case. Does that matter too much?

How about just saving the hdr prefetch?

if (virtio_net_with_host_offload(dev)) {
hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
rte_prefetch0(hdr);
}

--yliu

>   desc_avail  = desc->len - dev->vhost_hlen;
>   desc_offset = dev->vhost_hlen;
>   }
>  
> + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
> +
> + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
> +
>   mbuf_offset = 0;
>   mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
>   while (1) {
> @@ -795,7 +818,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
> vring_desc *descs,
>   prev->data_len = mbuf_offset;
>   m->pkt_len+= mbuf_offset;
>  
> - if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
> + if (virtio_net_with_host_offload(dev))
>   vhost_dequeue_offload(hdr, m);
>  
>   return 0;
> -- 
> 2.7.4


[dpdk-dev] [PATCH v2] vhost: Only access header if offloading is supported in dequeue path

2016-10-11 Thread Maxime Coquelin
If offloading features are not negotiated, parsing the virtio header
is not needed.

Micro-benchmark with testpmd shows that the gain is +4% with indirect
descriptors, +1% when using direct descriptors.

Signed-off-by: Maxime Coquelin 
---
Changes since v1:
=
 - Rebased
 - Fix early out check in vhost_dequeue_offload

 lib/librte_vhost/virtio_net.c | 47 ---
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index a59c39b..5e2fd75 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -548,6 +548,18 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
return virtio_dev_rx(dev, queue_id, pkts, count);
 }

+static inline bool
+virtio_net_with_host_offload(struct virtio_net *dev)
+{
+   if (dev->features &
+   (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
+VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
+VIRTIO_NET_F_HOST_UFO))
+   return true;
+
+   return false;
+}
+
 static void
 parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
 {
@@ -600,6 +612,9 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct 
rte_mbuf *m)
void *l4_hdr = NULL;
struct tcp_hdr *tcp_hdr = NULL;

+   if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+   return;
+
parse_ethernet(m, _proto, _hdr);
if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
if (hdr->csum_start == (m->l2_len + m->l3_len)) {
@@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vring_desc *descs,
  struct rte_mempool *mbuf_pool)
 {
struct vring_desc *desc;
-   uint64_t desc_addr;
+   uint64_t desc_addr = 0;
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
struct rte_mbuf *cur = m, *prev = m;
-   struct virtio_net_hdr *hdr;
+   struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;

@@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vring_desc *descs,
(desc->flags & VRING_DESC_F_INDIRECT))
return -1;

-   desc_addr = gpa_to_vva(dev, desc->addr);
-   if (unlikely(!desc_addr))
-   return -1;
+   if (virtio_net_with_host_offload(dev)) {
+   desc_addr = gpa_to_vva(dev, desc->addr);
+   if (unlikely(!desc_addr))
+   return -1;

-   hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-   rte_prefetch0(hdr);
+   hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+   rte_prefetch0(hdr);
+   }

/*
 * A virtio driver normally uses at least 2 desc buffers
@@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vring_desc *descs,
if (unlikely(!desc_addr))
return -1;

-   rte_prefetch0((void *)(uintptr_t)desc_addr);
-
desc_offset = 0;
desc_avail  = desc->len;
nr_desc+= 1;
-
-   PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
} else {
+   if (!desc_addr) {
+   desc_addr = gpa_to_vva(dev, desc->addr);
+   if (unlikely(!desc_addr))
+   return -1;
+   }
+
desc_avail  = desc->len - dev->vhost_hlen;
desc_offset = dev->vhost_hlen;
}

+   rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+   PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
mbuf_offset = 0;
mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
while (1) {
@@ -795,7 +818,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc 
*descs,
prev->data_len = mbuf_offset;
m->pkt_len+= mbuf_offset;

-   if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
+   if (virtio_net_with_host_offload(dev))
vhost_dequeue_offload(hdr, m);

return 0;
-- 
2.7.4