On Mon, Jul 6, 2020 at 9:50 AM Joyce Kong <joyce.k...@arm.com> wrote: > > Restrict pointer aliasing to allow the compiler to vectorize loop > more aggressively. > > With this patch, a 9.6% improvement is observed in throughput for > the packed virtio-net PVP case, and a 2.8% improvement in throughput > for the packed virtio-user PVP case. All performance data are measured > under 0.001% acceptable packet loss with 1 core on both vhost and > virtio side. > > Signed-off-by: Joyce Kong <joyce.k...@arm.com> > Reviewed-by: Phil Yang <phil.y...@arm.com> > --- > drivers/net/virtio/virtio_rxtx_simple_neon.c | 5 +++-- > lib/librte_vhost/virtio_net.c | 14 +++++++------- > 2 files changed, 10 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c > b/drivers/net/virtio/virtio_rxtx_simple_neon.c > index 5febfb0f5..31824a931 100644 > --- a/drivers/net/virtio/virtio_rxtx_simple_neon.c > +++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c > @@ -36,8 +36,9 @@ > * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet > */ > uint16_t > -virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf > - **__rte_restrict rx_pkts, uint16_t nb_pkts) > +virtio_recv_pkts_vec(void *rx_queue, > + struct rte_mbuf **__rte_restrict rx_pkts, > + uint16_t nb_pkts) > { > struct virtnet_rx *rxvq = rx_queue; > struct virtqueue *vq = rxvq->vq;
For the neon bits, I trust you. > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index 751c1f373..e60358251 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -1133,8 +1133,8 @@ virtio_dev_rx_single_packed(struct virtio_net *dev, > > static __rte_noinline uint32_t > virtio_dev_rx_packed(struct virtio_net *dev, > - struct vhost_virtqueue *vq, > - struct rte_mbuf **pkts, > + struct vhost_virtqueue *__rte_restrict vq, > + struct rte_mbuf **__rte_restrict pkts, > uint32_t count) > { > uint32_t pkt_idx = 0; But for the generic part, I'd like to get others opinion. Added Zhihong and Adrian. > @@ -1219,7 +1219,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, > > uint16_t > rte_vhost_enqueue_burst(int vid, uint16_t queue_id, > - struct rte_mbuf **pkts, uint16_t count) > + struct rte_mbuf **__rte_restrict pkts, uint16_t count) > { > struct virtio_net *dev = get_device(vid); > > @@ -2124,9 +2124,9 @@ free_zmbuf(struct vhost_virtqueue *vq) > > static __rte_noinline uint16_t > virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, > - struct vhost_virtqueue *vq, > + struct vhost_virtqueue *__rte_restrict vq, > struct rte_mempool *mbuf_pool, > - struct rte_mbuf **pkts, > + struct rte_mbuf **__rte_restrict pkts, > uint32_t count) > { > uint32_t pkt_idx = 0; > @@ -2160,9 +2160,9 @@ virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, > > static __rte_noinline uint16_t > virtio_dev_tx_packed(struct virtio_net *dev, > - struct vhost_virtqueue *vq, > + struct vhost_virtqueue *__rte_restrict vq, > struct rte_mempool *mbuf_pool, > - struct rte_mbuf **pkts, > + struct rte_mbuf **__rte_restrict pkts, > uint32_t count) > { > uint32_t pkt_idx = 0; > -- > 2.27.0 >