[dpdk-dev] [PATCH v3 2/4] ixgbe: implement vector PMD for arm architecture

2016-05-26 Thread Jianbo Liu
On 25 May 2016 at 20:29, Jerin Jacob  wrote:
> On Fri, May 06, 2016 at 11:55:46AM +0530, Jianbo Liu wrote:
>> use ARM NEON intrinsic to implement ixgbe vPMD
>>
>> Signed-off-by: Jianbo Liu 
>> ---
>>  drivers/net/ixgbe/Makefile  |   4 +
>>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 561 
>> 
>>  2 files changed, 565 insertions(+)
>>  create mode 100644 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c

>> + /* Read desc statuses backwards to avoid race condition */
>> + /* A.1 load 4 pkts desc */
>> + descs[3] =  vld1q_u64((uint64_t *)(rxdp + 3));
>> + rte_rmb();
>
> Any specific reason to add rte_rmb() here, If there is no performance
> drop then it makes sense to add before descs[3] uses it.i.e
> at rte_compiler_barrier() place in x86 code.
>
To avoid desc statuses inconsistent since they are read backwards.

>> +
>> + /* B.2 copy 2 mbuf point into rx_pkts  */
>> + vst1q_u64((uint64_t *)_pkts[pos], mbp1);
>> +
>> + /* B.1 load 1 mbuf point */
>> + mbp2 = vld1q_u64((uint64_t *)_ring[pos + 2]);
>> +
>> + descs[2] =  vld1q_u64((uint64_t *)(rxdp + 2));
>> + /* B.1 load 2 mbuf point */
>> + descs[1] =  vld1q_u64((uint64_t *)(rxdp + 1));
>> + descs[0] =  vld1q_u64((uint64_t *)(rxdp));
>> +
>> + /* B.2 copy 2 mbuf point into rx_pkts  */
>> + vst1q_u64((uint64_t *)_pkts[pos + 2], mbp2);
>> +
>> + if (split_packet) {
>> + rte_prefetch_non_temporal(_pkts[pos]->cacheline1);
>> + rte_prefetch_non_temporal(_pkts[pos+1]->cacheline1);
>> + rte_prefetch_non_temporal(_pkts[pos+2]->cacheline1);
>> + rte_prefetch_non_temporal(_pkts[pos+3]->cacheline1);
>
> replace with rte_mbuf_prefetch_part2 or equivalent
>
rte_mbuf_prefetch_part2 is new functions after this patchset, so it's
better to submit a new patch as Bruce said.


[dpdk-dev] [PATCH v3 2/4] ixgbe: implement vector PMD for arm architecture

2016-05-25 Thread Jerin Jacob
On Fri, May 06, 2016 at 11:55:46AM +0530, Jianbo Liu wrote:
> use ARM NEON intrinsic to implement ixgbe vPMD
> 
> Signed-off-by: Jianbo Liu 
> ---
>  drivers/net/ixgbe/Makefile  |   4 +
>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 561 
> 
>  2 files changed, 565 insertions(+)
>  create mode 100644 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> 
> diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
> index 50bf51c..b1c7a60 100644
> --- a/drivers/net/ixgbe/Makefile
> +++ b/drivers/net/ixgbe/Makefile
> @@ -108,7 +108,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_rxtx.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_ethdev.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_fdir.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_pf.c
> +ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> +SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec_neon.c
> +else
>  SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec.c
> +endif
>  
>  ifeq ($(CONFIG_RTE_NIC_BYPASS),y)
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bypass.c
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c 
> b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> new file mode 100644
> index 000..11a6115
> --- /dev/null
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> @@ -0,0 +1,561 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + *   notice, this list of conditions and the following disclaimer in
> + *   the documentation and/or other materials provided with the
> + *   distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + *   contributors may be used to endorse or promote products derived
> + *   from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +#include "ixgbe_ethdev.h"
> +#include "ixgbe_rxtx.h"
> +#include "ixgbe_rxtx_vec_common.h"
> +
> +#include 
> +
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +
> +static inline void
> +ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
> +{
> + int i;
> + uint16_t rx_id;
> + volatile union ixgbe_adv_rx_desc *rxdp;
> + struct ixgbe_rx_entry *rxep = >sw_ring[rxq->rxrearm_start];
> + struct rte_mbuf *mb0, *mb1;
> + uint64x2_t dma_addr0, dma_addr1;
> + uint64x2_t zero = vdupq_n_u64(0);
> + uint64_t paddr;
> + uint8x8_t p;
> +
> + rxdp = rxq->rx_ring + rxq->rxrearm_start;
> +
> + /* Pull 'n' more MBUFs into the software ring */
> + if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
> +   (void *)rxep,
> +   RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
> + if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
> + rxq->nb_rx_desc) {
> + for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
> + rxep[i].mbuf = >fake_mbuf;
> + vst1q_u64((uint64_t *)[i].read,
> +   zero);
> + }
> + }
> + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
> + RTE_IXGBE_RXQ_REARM_THRESH;
> + return;
> + }
> +
> + p = vld1_u8((uint8_t *)>mbuf_initializer);
> +
> + /* Initialize the mbufs in vector, process 2 mbufs in one loop */
> + for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
> + mb0 = rxep[0].mbuf;
> + mb1 = rxep[1].mbuf;
> +
> + /*
> +  * Flush mbuf with pkt template.
> +  * Data to be rearmed is 6 bytes long.
> +  * Though, 

[dpdk-dev] [PATCH v3 2/4] ixgbe: implement vector PMD for arm architecture

2016-05-25 Thread Bruce Richardson
On Wed, May 25, 2016 at 05:59:38PM +0530, Jerin Jacob wrote:
> On Fri, May 06, 2016 at 11:55:46AM +0530, Jianbo Liu wrote:
> > use ARM NEON intrinsic to implement ixgbe vPMD
> > 
> > Signed-off-by: Jianbo Liu 
> > ---
> >  drivers/net/ixgbe/Makefile  |   4 +
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 561 
> > 
> >  2 files changed, 565 insertions(+)
> >  create mode 100644 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> > 

> > +   for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
> > +   pos += RTE_IXGBE_DESCS_PER_LOOP,
> > +   rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
> > +   uint64x2_t descs[RTE_IXGBE_DESCS_PER_LOOP];
> > +   uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
> > +   uint8x16x2_t sterr_tmp1, sterr_tmp2;
> > +   uint64x2_t mbp1, mbp2;
> > +   uint8x16_t staterr;
> > +   uint16x8_t tmp;
> > +   uint32_t stat;
> > +
> > +   /* B.1 load 1 mbuf point */
> > +   mbp1 = vld1q_u64((uint64_t *)_ring[pos]);
> > +
> > +   /* Read desc statuses backwards to avoid race condition */
> > +   /* A.1 load 4 pkts desc */
> > +   descs[3] =  vld1q_u64((uint64_t *)(rxdp + 3));
> > +   rte_rmb();
> 
> Any specific reason to add rte_rmb() here, If there is no performance
> drop then it makes sense to add before descs[3] uses it.i.e
> at rte_compiler_barrier() place in x86 code.
> 
> > +
> > +   /* B.2 copy 2 mbuf point into rx_pkts  */
> > +   vst1q_u64((uint64_t *)_pkts[pos], mbp1);
> > +
> > +   /* B.1 load 1 mbuf point */
> > +   mbp2 = vld1q_u64((uint64_t *)_ring[pos + 2]);
> > +
> > +   descs[2] =  vld1q_u64((uint64_t *)(rxdp + 2));
> > +   /* B.1 load 2 mbuf point */
> > +   descs[1] =  vld1q_u64((uint64_t *)(rxdp + 1));
> > +   descs[0] =  vld1q_u64((uint64_t *)(rxdp));
> > +
> > +   /* B.2 copy 2 mbuf point into rx_pkts  */
> > +   vst1q_u64((uint64_t *)_pkts[pos + 2], mbp2);
> > +
> > +   if (split_packet) {
> > +   rte_prefetch_non_temporal(_pkts[pos]->cacheline1);
> > +   rte_prefetch_non_temporal(_pkts[pos+1]->cacheline1);
> > +   rte_prefetch_non_temporal(_pkts[pos+2]->cacheline1);
> > +   rte_prefetch_non_temporal(_pkts[pos+3]->cacheline1);
> 
> replace with rte_mbuf_prefetch_part2 or equivalent
> 
Hi Jerin, Jianbo,

since this patch has already been applied and these are not critical issues with
it, can a new patch please be submitted to propose these additional changes on
top of what's on next-net now.

Thanks,
/Bruce


[dpdk-dev] [PATCH v3 2/4] ixgbe: implement vector PMD for arm architecture

2016-05-11 Thread Jianbo Liu
On 10 May 2016 at 22:49, Bruce Richardson  wrote:
> On Fri, May 06, 2016 at 11:55:46AM +0530, Jianbo Liu wrote:
>> use ARM NEON intrinsic to implement ixgbe vPMD
>>
>> Signed-off-by: Jianbo Liu 
>> ---
>>  drivers/net/ixgbe/Makefile  |   4 +
>>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 561 
>> 
>>  2 files changed, 565 insertions(+)
>>  create mode 100644 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
>>
>> diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
>> index 50bf51c..b1c7a60 100644
>> --- a/drivers/net/ixgbe/Makefile
>> +++ b/drivers/net/ixgbe/Makefile
>> @@ -108,7 +108,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_rxtx.c
>>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_ethdev.c
>>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_fdir.c
>>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_pf.c
>> +ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
>> +SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec_neon.c
>> +else
>>  SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec.c
>> +endif
>>
> Since you are adding ixgbe_rxtx_vec_neon.c here, it might be worthwhile adding
> in an extra patch to rename ixgbe_rxtx_vec.c to ixgbe_rxtx_vec_sse.c for
> consistency.
>
OK, I'll do that.


[dpdk-dev] [PATCH v3 2/4] ixgbe: implement vector PMD for arm architecture

2016-05-10 Thread Bruce Richardson
On Fri, May 06, 2016 at 11:55:46AM +0530, Jianbo Liu wrote:
> use ARM NEON intrinsic to implement ixgbe vPMD
> 
> Signed-off-by: Jianbo Liu 
> ---
>  drivers/net/ixgbe/Makefile  |   4 +
>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 561 
> 
>  2 files changed, 565 insertions(+)
>  create mode 100644 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> 
> diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
> index 50bf51c..b1c7a60 100644
> --- a/drivers/net/ixgbe/Makefile
> +++ b/drivers/net/ixgbe/Makefile
> @@ -108,7 +108,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_rxtx.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_ethdev.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_fdir.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_pf.c
> +ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> +SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec_neon.c
> +else
>  SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec.c
> +endif
>  
Since you are adding ixgbe_rxtx_vec_neon.c here, it might be worthwhile adding
in an extra patch to rename ixgbe_rxtx_vec.c to ixgbe_rxtx_vec_sse.c for 
consistency.

Regards,
/Bruce


[dpdk-dev] [PATCH v3 2/4] ixgbe: implement vector PMD for arm architecture

2016-05-06 Thread Jianbo Liu
use ARM NEON intrinsic to implement ixgbe vPMD

Signed-off-by: Jianbo Liu 
---
 drivers/net/ixgbe/Makefile  |   4 +
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 561 
 2 files changed, 565 insertions(+)
 create mode 100644 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c

diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
index 50bf51c..b1c7a60 100644
--- a/drivers/net/ixgbe/Makefile
+++ b/drivers/net/ixgbe/Makefile
@@ -108,7 +108,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_pf.c
+ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec_neon.c
+else
 SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec.c
+endif

 ifeq ($(CONFIG_RTE_NIC_BYPASS),y)
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bypass.c
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
new file mode 100644
index 000..11a6115
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -0,0 +1,561 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+
+#include "ixgbe_ethdev.h"
+#include "ixgbe_rxtx.h"
+#include "ixgbe_rxtx_vec_common.h"
+
+#include 
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+static inline void
+ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+{
+   int i;
+   uint16_t rx_id;
+   volatile union ixgbe_adv_rx_desc *rxdp;
+   struct ixgbe_rx_entry *rxep = >sw_ring[rxq->rxrearm_start];
+   struct rte_mbuf *mb0, *mb1;
+   uint64x2_t dma_addr0, dma_addr1;
+   uint64x2_t zero = vdupq_n_u64(0);
+   uint64_t paddr;
+   uint8x8_t p;
+
+   rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+   /* Pull 'n' more MBUFs into the software ring */
+   if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+ (void *)rxep,
+ RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
+   if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
+   rxq->nb_rx_desc) {
+   for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
+   rxep[i].mbuf = >fake_mbuf;
+   vst1q_u64((uint64_t *)[i].read,
+ zero);
+   }
+   }
+   rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+   RTE_IXGBE_RXQ_REARM_THRESH;
+   return;
+   }
+
+   p = vld1_u8((uint8_t *)>mbuf_initializer);
+
+   /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+   for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+   mb0 = rxep[0].mbuf;
+   mb1 = rxep[1].mbuf;
+
+   /*
+* Flush mbuf with pkt template.
+* Data to be rearmed is 6 bytes long.
+* Though, RX will overwrite ol_flags that are coming next
+* anyway. So overwrite whole 8 bytes with one load:
+* 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+*/
+