[dpdk-dev] [PATCH] eal: define generic vector types

2016-11-29 Thread Chao Zhu
-Original Message-
From: Nelio Laranjeiro [mailto:nelio.laranje...@6wind.com] 
Sent: 2016?11?16? 23:21
To: dev at dpdk.org
Cc: Thomas Monjalon ; Jianbo Liu
; Jerin Jacob ;
Zhigang Lu ; Liming Sun ; Chao Zhu
; Bruce Richardson ;
Konstantin Ananyev ; Adrien Mazarguil 
Subject: [PATCH] eal: define generic vector types

Add common vector type definitions to all CPU architectures.

Signed-off-by: Nelio Laranjeiro 
---
 lib/librte_eal/common/Makefile |   1 +
 lib/librte_eal/common/include/arch/arm/rte_vect.h  |   1 +
 .../common/include/arch/ppc_64/rte_vect.h  |   1 +
 lib/librte_eal/common/include/arch/tile/rte_vect.h |  38 +
 lib/librte_eal/common/include/arch/x86/rte_vect.h  |   7 +-
 lib/librte_eal/common/include/generic/rte_vect.h   | 185
+
 6 files changed, 230 insertions(+), 3 deletions(-)  create mode 100644
lib/librte_eal/common/include/arch/tile/rte_vect.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_vect.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index dfd64aa..8af06b1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -48,6 +48,7 @@ endif

 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
+GENERIC_INC += rte_vect.h
 # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk  ARCH_DIR ?= $(RTE_ARCH)
ARCH_INC := $(notdir $(wildcard
$(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h
b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index b86c2cf..4107c99 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -34,6 +34,7 @@
 #define _RTE_VECT_ARM_H_

 #include 
+#include "generic/rte_vect.h"
 #include "arm_neon.h"

 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
index 05209e5..99586e5 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
@@ -34,6 +34,7 @@
 #define _RTE_VECT_PPC_64_H_

 #include 
+#include "generic/rte_vect.h"

 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_eal/common/include/arch/tile/rte_vect.h
b/lib/librte_eal/common/include/arch/tile/rte_vect.h
new file mode 100644
index 000..f1e1709
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_vect.h
@@ -0,0 +1,38 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_TILE_H_
+#define _RTE_VECT_TILE_H_
+
+#include "generic/rte_vect.h"
+
+#endif /* _RTE_VECT_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h
b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index 77f2e25..1b4b85d 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -31,8 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

-#ifndef _RTE_VECT_H_
-#define _RTE_VECT_H_
+#ifndef _RTE_VECT_X86_H_
+#define _RTE_VECT_X86_H_

 /**
  * @file
@@ -41,6 +41,7 @@
  */

 #include 
+#include "generic/rte_vect.h"

 #if (defined(__ICC) || (__G

[dpdk-dev] FW: [PATCH v6 0/9] enable lpm, acl and other missing libraries in ppc64le

2016-08-26 Thread Chao Zhu
Thomas,

Any comments of this patch set? Are we waiting for more acks?

Thank you!

-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Chao Zhu
Sent: 2016?8?17? 16:49
To: 'Gowrishankar Muthukrishnan' ;
dev at dpdk.org
Cc: 'Bruce Richardson' ; 'Konstantin Ananyev'
; 'Thomas Monjalon'
; 'Cristian Dumitrescu'
; 'Pradeep' 
Subject: Re: [dpdk-dev] [PATCH v6 0/9] enable lpm, acl and other missing
libraries in ppc64le

-Original Message-
From: Gowrishankar Muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com]
Sent: 2016?8?16? 18:28
To: dev at dpdk.org
Cc: Chao Zhu ; Bruce Richardson
; Konstantin Ananyev
; Thomas Monjalon ;
Cristian Dumitrescu ; Pradeep

Subject: [PATCH v6 0/9] enable lpm, acl and other missing libraries in
ppc64le

This patchset enables LPM, ACL and other few missing libs in ppc64le and
also address few patches in related examples (ip_pipeline and l3fwd).

Test report:
1. LPM and ACL unit tests passed.
   Steps:
   compile test app and run (with any needed params)
   lpm_autotest
   acl_autotest
   table_autotest
 test_table_lpm_combined fails same as in intel in current master.

2. Example ip_pipeline application verified for port forwarding.
   compile examples/ip_pipeline (adjust app_init_core_map
 param for ht to 1 in case of ppc64le in SMT=off mode).
   modify config/l3fwd.cfg as per enabled PMD ports.
   run ip_pipeline with config file option and check packets fwd.

v6 changes:
- added cache alignment fix for rte hash table structs.

v5 changes:
- no change in lpm lib enablement
- no change in acl lib enablement
- config file changes individually for sched,table,port,pipeline
  lib enablement
- ip_pipeline patch description and changes flagged only for ppc64le.
   app_init_core_map changes removed (due to bug found and under 
   investigation only on ppc64le/smt=off case).

v4 changes:
- fix transition4 in acl_run_altivec.h for gcc strict-aliasing error.
  Thanks to Chao Zhu for bringing up.

v3 changes:
- rebase over master to fix conflict in examples/l3fwd/l3fwd_em.c

v2 changes:
- enabling libs in config included as part of lib changes itself.

Gowrishankar Muthukrishnan (9):
  lpm: add altivec intrinsics for dpdk lpm on ppc_64
  acl: add altivec intrinsics for dpdk acl on ppc_64
  l3fwd: add altivec support for em_hash_key
  table: enable table library for ppc64le
  sched: enable sched library for ppc64le
  port: enable port library for ppc64le
  pipeline: enable pipeline library for ppc64le
  ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64
  table: align rte table hash structs for cache line size

 app/test-acl/main.c|   4 +
 app/test/test_xmmt_ops.h   |  16 +
 config/defconfig_ppc_64-power8-linuxapp-gcc|   7 -
 examples/ip_pipeline/cpu_core_map.c|   8 +
 examples/l3fwd/l3fwd_em.c  |  10 +-
 lib/librte_acl/Makefile|   2 +
 lib/librte_acl/acl.h   |   4 +
 lib/librte_acl/acl_run.h   |   2 +
 lib/librte_acl/acl_run_altivec.c   |  47 +++
 lib/librte_acl/acl_run_altivec.h   | 329
+
 lib/librte_acl/rte_acl.c   |  13 +
 lib/librte_acl/rte_acl.h   |   1 +
 .../common/include/arch/ppc_64/rte_vect.h  |  60 
 lib/librte_lpm/Makefile|   2 +
 lib/librte_lpm/rte_lpm.h   |   2 +
 lib/librte_lpm/rte_lpm_altivec.h   | 154 ++
 lib/librte_table/rte_table_hash_key16.c|   4 +-
 lib/librte_table/rte_table_hash_key32.c|   4 +-
 lib/librte_table/rte_table_hash_key8.c |   2 +-
 19 files changed, 658 insertions(+), 13 deletions(-)  create mode 100644
lib/librte_acl/acl_run_altivec.c  create mode 100644
lib/librte_acl/acl_run_altivec.h  create mode 100644
lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
 create mode 100644 lib/librte_lpm/rte_lpm_altivec.h

--
1.9.1

I think this patch set is good enough.

Acked-by: Chao Zhu 




[dpdk-dev] [PATCH v6 0/9] enable lpm, acl and other missing libraries in ppc64le

2016-08-17 Thread Chao Zhu
-Original Message-
From: Gowrishankar Muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?8?16? 18:28
To: dev at dpdk.org
Cc: Chao Zhu ; Bruce Richardson
; Konstantin Ananyev
; Thomas Monjalon ;
Cristian Dumitrescu ; Pradeep

Subject: [PATCH v6 0/9] enable lpm, acl and other missing libraries in
ppc64le

This patchset enables LPM, ACL and other few missing libs in ppc64le and
also address few patches in related examples (ip_pipeline and l3fwd).

Test report:
1. LPM and ACL unit tests passed.
   Steps:
   compile test app and run (with any needed params)
   lpm_autotest
   acl_autotest
   table_autotest
 test_table_lpm_combined fails same as in intel in current master.

2. Example ip_pipeline application verified for port forwarding.
   compile examples/ip_pipeline (adjust app_init_core_map
 param for ht to 1 in case of ppc64le in SMT=off mode).
   modify config/l3fwd.cfg as per enabled PMD ports.
   run ip_pipeline with config file option and check packets fwd.

v6 changes:
- added cache alignment fix for rte hash table structs.

v5 changes:
- no change in lpm lib enablement
- no change in acl lib enablement
- config file changes individually for sched,table,port,pipeline
  lib enablement
- ip_pipeline patch description and changes flagged only for ppc64le.
   app_init_core_map changes removed (due to bug found and under 
   investigation only on ppc64le/smt=off case).

v4 changes:
- fix transition4 in acl_run_altivec.h for gcc strict-aliasing error.
  Thanks to Chao Zhu for bringing up.

v3 changes:
- rebase over master to fix conflict in examples/l3fwd/l3fwd_em.c

v2 changes:
- enabling libs in config included as part of lib changes itself.

Gowrishankar Muthukrishnan (9):
  lpm: add altivec intrinsics for dpdk lpm on ppc_64
  acl: add altivec intrinsics for dpdk acl on ppc_64
  l3fwd: add altivec support for em_hash_key
  table: enable table library for ppc64le
  sched: enable sched library for ppc64le
  port: enable port library for ppc64le
  pipeline: enable pipeline library for ppc64le
  ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64
  table: align rte table hash structs for cache line size

 app/test-acl/main.c|   4 +
 app/test/test_xmmt_ops.h   |  16 +
 config/defconfig_ppc_64-power8-linuxapp-gcc|   7 -
 examples/ip_pipeline/cpu_core_map.c|   8 +
 examples/l3fwd/l3fwd_em.c  |  10 +-
 lib/librte_acl/Makefile|   2 +
 lib/librte_acl/acl.h   |   4 +
 lib/librte_acl/acl_run.h   |   2 +
 lib/librte_acl/acl_run_altivec.c   |  47 +++
 lib/librte_acl/acl_run_altivec.h   | 329
+
 lib/librte_acl/rte_acl.c   |  13 +
 lib/librte_acl/rte_acl.h   |   1 +
 .../common/include/arch/ppc_64/rte_vect.h  |  60 
 lib/librte_lpm/Makefile|   2 +
 lib/librte_lpm/rte_lpm.h   |   2 +
 lib/librte_lpm/rte_lpm_altivec.h   | 154 ++
 lib/librte_table/rte_table_hash_key16.c|   4 +-
 lib/librte_table/rte_table_hash_key32.c|   4 +-
 lib/librte_table/rte_table_hash_key8.c |   2 +-
 19 files changed, 658 insertions(+), 13 deletions(-)  create mode 100644
lib/librte_acl/acl_run_altivec.c  create mode 100644
lib/librte_acl/acl_run_altivec.h  create mode 100644
lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
 create mode 100644 lib/librte_lpm/rte_lpm_altivec.h

--
1.9.1

I think this patch set is good enough.

Acked-by: Chao Zhu 



[dpdk-dev] [PATCH v5 4/8] table: enable table library for ppc64le

2016-08-16 Thread Chao Zhu
Gowrishankar,

The unit test of table got failed. 
When I took a look at the code, it failed on this line in file
lib/librte_table/rte_table_hash_key8.c ,
lib/librte_table/rte_table_hash_key16.c,
lib/librte_table/rte_table_hash_key32.c:

if ((check_params_create_lru(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0)
||
((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) !=
0)) {
return NULL;
}

if ((check_params_create_lru(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0)
||
((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) !=
0)) {
return NULL;
}

if ((check_params_create_lru(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0)
||
((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) !=
0)) {
return NULL;
}

The size of rte_bucket_4_8/16/32 is not aligned to cache line size. This is
normal, because POWER has different cache line size. The change of struct
rte_bucket_4_8/16/32 may solve this problem. I didn't do further
investigation. Can you help to double check this patch?


-Original Message-
From: Gowrishankar Muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?8?12? 20:03
To: dev at dpdk.org
Cc: Chao Zhu ; Bruce Richardson
; Konstantin Ananyev
; Thomas Monjalon ;
Cristian Dumitrescu ; Pradeep
; gowrishankar 
Subject: [PATCH v5 4/8] table: enable table library for ppc64le

From: gowrishankar <gowrishanka...@linux.vnet.ibm.com>

This patch enables librte_table in ppc64le.

Signed-off-by: Gowrishankar Muthukrishnan

---
 config/defconfig_ppc_64-power8-linuxapp-gcc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index dede34f..41f67d5 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -59,5 +59,4 @@ CONFIG_RTE_LIBRTE_FM10K_PMD=n  # This following libraries
are not available on Power. So they're turned off.
 CONFIG_RTE_LIBRTE_SCHED=n
 CONFIG_RTE_LIBRTE_PORT=n
-CONFIG_RTE_LIBRTE_TABLE=n
 CONFIG_RTE_LIBRTE_PIPELINE=n
--
1.9.1




[dpdk-dev] [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64

2016-08-12 Thread Chao Zhu
Another comment is, comment out lcore_socket_id check will influence other 
architectures. If possible, I would like to make this change to Power specific. 

-Original Message-
From: gowrishankar muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?8?12? 17:00
To: Chao Zhu 
Cc: dev at dpdk.org; 'Bruce Richardson' ; 
'Konstantin Ananyev' ; 'Thomas Monjalon' 
; 'Cristian Dumitrescu' ; 'Pradeep' 
Subject: Re: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT 
threads as in ppc64

On Friday 12 August 2016 02:14 PM, Chao Zhu wrote:
> Gowrishankar,
>
> I suggest to set the following value:
>
> n_max_cores_per_socket = 8
> n_max_ht_per_core = 8
>
> This will cover most of the Power8 servers.
> Any comments?
Sure Chao. I will include this change in v5. If there are no other comments, I 
can spin out v5, with changes in this patch.

Regards,
Gowrishankar
>
> -Original Message-
> From: gowrishankar muthukrishnan 
> [mailto:gowrishankar.m at linux.vnet.ibm.com]
> Sent: 2016?8?11? 20:02
> To: Chao Zhu 
> Cc: dev at dpdk.org; 'Bruce Richardson' ; 
> 'Konstantin Ananyev' ; 'Thomas Monjalon' 
> ; 'Cristian Dumitrescu' 
> ; 'Pradeep' 
> Subject: Re: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying 
> SMT threads as in ppc64
>
> On Thursday 11 August 2016 03:59 PM, Chao Zhu wrote:
>> Gowrishankar,
>>
>> Thanks for the detail.
>> If my understanding is correct, Power8 has different chips. Some of the 
>> OpenPOWER chips have 8 cores per socket. And the max threads per core is 8. 
>> Should we support this in cpu_core_map_init()?
>>
>> Here's a dump from the OpenPOWER system.
>> ==
>> # lscpu
>> Architecture:  ppc64le
>> Byte Order:Little Endian
>> CPU(s):64
>> On-line CPU(s) list:   0,8,16,24,32,40,48,56
>> Off-line CPU(s) list:  1-7,9-15,17-23,25-31,33-39,41-47,49-55,57-63
>> Thread(s) per core:1
>> Core(s) per socket:8
>> Socket(s): 1
>> NUMA node(s):  1
>> Model: unknown
>> L1d cache: 64K
>> L1i cache: 32K
>> L2 cache:  512K
>> L3 cache:  8192K
>> NUMA node0 CPU(s): 0,8,16,24,32,40,48,56
>> =
>>
>>
>>> +#if defined(RTE_ARCH_PPC_64)
>>> +   app->core_map = cpu_core_map_init(2, 5, 1, 0); #else
>>>
>>> This value seems quite strange. Can you give more detail?
> Based on config of tested server (as below output),
>
> CPU(s):80
> On-line CPU(s) list:   0,8,16,24,32,40,48,56,64,72
> Off-line CPU(s) list:
> 1-7,9-15,17-23,25-31,33-39,41-47,49-55,57-63,65-71,73-79
> Thread(s) per core:1  <<<
> Core(s) per socket:5   <<<
> Socket(s): 2   <<<
> NUMA node(s):  2
>
> cpu_core_map_init parameters (2,5,1,0) were prepared. Instead, we can cap max 
> sockets/core/ht counts to possible maximum supported today.
>
> Regards,
> Gowrishankar
>>> app->core_map = cpu_core_map_init(4, 32, 4, 0);
>>> +#endif
>> -Original Message-
>> From: gowrishankar muthukrishnan
>> [mailto:gowrishankar.m at linux.vnet.ibm.com]
>> Sent: 2016?8?9? 19:14
>> To: Chao Zhu ; dev at dpdk.org
>> Cc: 'Bruce Richardson' ; 'Konstantin 
>> Ananyev' ; 'Thomas Monjalon'
>> ; 'Cristian Dumitrescu'
>> ; 'Pradeep' 
>> Subject: Re: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for 
>> varying SMT threads as in ppc64
>>
>> Hi Chao,
>> Sure. Please find below one.
>>
>> This patch fixes ip_pipeline panic in app_init_core_map while preparing cpu 
>> core map in powerpc with SMT off. cpu_core_map_compute_linux currently 
>> prepares core mapping based on file existence in sysfs ie.
>>
>> /sys/devices/system/cpu/cpu/topology/physical_package_id
>>  /sys/devices/system/cpu/cpu/topology/core_id
>>
>> These files do not exist for lcores which are offline for any reason (as in 
>> powerpc, while SMT is off). In this situation, this function should further 
>> continue preparing map for other online lcores instead of returning with -1 
>> for a first unavailable lcore.
>>
>> Also, in SMT=off scenario for powerpc, lcore ids can not be always 
>> indexed from
>> 0 upto 'number of cores present' (/sys/devices/system/cpu/present).
>> For eg, for an online lcore 32, core_id returned in sysfs is 112 
>> where online lcores are
>> 10 (as in one configuration), hence sysfs lcore id can not be checked with 

[dpdk-dev] [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64

2016-08-12 Thread Chao Zhu
Gowrishankar,

I suggest to set the following value:

n_max_cores_per_socket = 8
n_max_ht_per_core = 8

This will cover most of the Power8 servers. 
Any comments?

-Original Message-
From: gowrishankar muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?8?11? 20:02
To: Chao Zhu 
Cc: dev at dpdk.org; 'Bruce Richardson' ; 
'Konstantin Ananyev' ; 'Thomas Monjalon' 
; 'Cristian Dumitrescu' ; 'Pradeep' 
Subject: Re: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT 
threads as in ppc64

On Thursday 11 August 2016 03:59 PM, Chao Zhu wrote:
> Gowrishankar,
>
> Thanks for the detail.
> If my understanding is correct, Power8 has different chips. Some of the 
> OpenPOWER chips have 8 cores per socket. And the max threads per core is 8. 
> Should we support this in cpu_core_map_init()?
>
> Here's a dump from the OpenPOWER system.
> ==
> # lscpu
> Architecture:  ppc64le
> Byte Order:Little Endian
> CPU(s):64
> On-line CPU(s) list:   0,8,16,24,32,40,48,56
> Off-line CPU(s) list:  1-7,9-15,17-23,25-31,33-39,41-47,49-55,57-63
> Thread(s) per core:1
> Core(s) per socket:8
> Socket(s): 1
> NUMA node(s):  1
> Model: unknown
> L1d cache: 64K
> L1i cache: 32K
> L2 cache:  512K
> L3 cache:  8192K
> NUMA node0 CPU(s): 0,8,16,24,32,40,48,56
> =
>
>
>> +#if defined(RTE_ARCH_PPC_64)
>> +app->core_map = cpu_core_map_init(2, 5, 1, 0); #else
>>
>> This value seems quite strange. Can you give more detail?

Based on config of tested server (as below output),

CPU(s):80
On-line CPU(s) list:   0,8,16,24,32,40,48,56,64,72
Off-line CPU(s) list: 
1-7,9-15,17-23,25-31,33-39,41-47,49-55,57-63,65-71,73-79
Thread(s) per core:1  <<<
Core(s) per socket:5   <<<
Socket(s): 2   <<<
NUMA node(s):  2

cpu_core_map_init parameters (2,5,1,0) were prepared. Instead, we can cap max 
sockets/core/ht counts to possible maximum supported today.

Regards,
Gowrishankar
>>
>>  app->core_map = cpu_core_map_init(4, 32, 4, 0);
>> +#endif
>
> -Original Message-
> From: gowrishankar muthukrishnan 
> [mailto:gowrishankar.m at linux.vnet.ibm.com]
> Sent: 2016?8?9? 19:14
> To: Chao Zhu ; dev at dpdk.org
> Cc: 'Bruce Richardson' ; 'Konstantin 
> Ananyev' ; 'Thomas Monjalon' 
> ; 'Cristian Dumitrescu' 
> ; 'Pradeep' 
> Subject: Re: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying 
> SMT threads as in ppc64
>
> Hi Chao,
> Sure. Please find below one.
>
> This patch fixes ip_pipeline panic in app_init_core_map while preparing cpu 
> core map in powerpc with SMT off. cpu_core_map_compute_linux currently 
> prepares core mapping based on file existence in sysfs ie.
>
> /sys/devices/system/cpu/cpu/topology/physical_package_id
> /sys/devices/system/cpu/cpu/topology/core_id
>
> These files do not exist for lcores which are offline for any reason (as in 
> powerpc, while SMT is off). In this situation, this function should further 
> continue preparing map for other online lcores instead of returning with -1 
> for a first unavailable lcore.
>
> Also, in SMT=off scenario for powerpc, lcore ids can not be always 
> indexed from
> 0 upto 'number of cores present' (/sys/devices/system/cpu/present). 
> For eg, for an online lcore 32, core_id returned in sysfs is 112 where 
> online lcores are
> 10 (as in one configuration), hence sysfs lcore id can not be checked with 
> indexing lcore number before positioning lcore map array.
>
> Thanks,
> Gowrishankar
>
> On Tuesday 09 August 2016 02:37 PM, Chao Zhu wrote:
>> Gowrishankar,
>>
>> Can you give more description about this patch?
>> Thank you!
>>
>> -Original Message-
>> From: Gowrishankar Muthukrishnan
>> [mailto:gowrishankar.m at linux.vnet.ibm.com]
>> Sent: 2016?8?6? 20:33
>> To: dev at dpdk.org
>> Cc: Chao Zhu ; Bruce Richardson 
>> ; Konstantin Ananyev 
>> ; Thomas Monjalon 
>> ; Cristian Dumitrescu 
>> ; Pradeep ; 
>> gowrishankar 
>> Subject: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying 
>> SMT threads as in ppc64
>>
>> From: gowrishankar 
>>
>> offline lcore would still refer to original core id and this has to 
>> be considered while creating cpu core mask.
>>
>> Signed-off-by: Gowrishankar 
>> ---
>>config/defconfig_ppc_64-power8-linuxapp-gcc |  3 ---
>>examples/ip_pipeline/cpu_core_map.c | 12 +---
>>examples/ip_

[dpdk-dev] [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64

2016-08-11 Thread Chao Zhu
Gowrishankar,

Thanks for the detail.
If my understanding is correct, Power8 has different chips. Some of the 
OpenPOWER chips have 8 cores per socket. And the max threads per core is 8. 
Should we support this in cpu_core_map_init()?

Here's a dump from the OpenPOWER system.
==
# lscpu
Architecture:  ppc64le
Byte Order:Little Endian
CPU(s):64
On-line CPU(s) list:   0,8,16,24,32,40,48,56
Off-line CPU(s) list:  1-7,9-15,17-23,25-31,33-39,41-47,49-55,57-63
Thread(s) per core:1
Core(s) per socket:8
Socket(s): 1
NUMA node(s):  1
Model: unknown
L1d cache: 64K
L1i cache: 32K
L2 cache:  512K
L3 cache:  8192K
NUMA node0 CPU(s): 0,8,16,24,32,40,48,56
=


> +#if defined(RTE_ARCH_PPC_64)
> + app->core_map = cpu_core_map_init(2, 5, 1, 0); #else
>
> This value seems quite strange. Can you give more detail?
>
>   app->core_map = cpu_core_map_init(4, 32, 4, 0);
> +#endif


-Original Message-
From: gowrishankar muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?8?9? 19:14
To: Chao Zhu ; dev at dpdk.org
Cc: 'Bruce Richardson' ; 'Konstantin Ananyev' 
; 'Thomas Monjalon' ; 'Cristian Dumitrescu' ; 'Pradeep' 

Subject: Re: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT 
threads as in ppc64

Hi Chao,
Sure. Please find below one.

This patch fixes ip_pipeline panic in app_init_core_map while preparing cpu 
core map in powerpc with SMT off. cpu_core_map_compute_linux currently prepares 
core mapping based on file existence in sysfs ie.

/sys/devices/system/cpu/cpu/topology/physical_package_id
   /sys/devices/system/cpu/cpu/topology/core_id

These files do not exist for lcores which are offline for any reason (as in 
powerpc, while SMT is off). In this situation, this function should further 
continue preparing map for other online lcores instead of returning with -1 for 
a first unavailable lcore.

Also, in SMT=off scenario for powerpc, lcore ids can not be always indexed from
0 upto 'number of cores present' (/sys/devices/system/cpu/present). For eg, for 
an online lcore 32, core_id returned in sysfs is 112 where online lcores are
10 (as in one configuration), hence sysfs lcore id can not be checked with 
indexing lcore number before positioning lcore map array.

Thanks,
Gowrishankar

On Tuesday 09 August 2016 02:37 PM, Chao Zhu wrote:
> Gowrishankar,
>
> Can you give more description about this patch?
> Thank you!
>
> -Original Message-
> From: Gowrishankar Muthukrishnan 
> [mailto:gowrishankar.m at linux.vnet.ibm.com]
> Sent: 2016?8?6? 20:33
> To: dev at dpdk.org
> Cc: Chao Zhu ; Bruce Richardson 
> ; Konstantin Ananyev 
> ; Thomas Monjalon 
> ; Cristian Dumitrescu 
> ; Pradeep ; 
> gowrishankar 
> Subject: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT 
> threads as in ppc64
>
> From: gowrishankar 
>
> offline lcore would still refer to original core id and this has to be 
> considered while creating cpu core mask.
>
> Signed-off-by: Gowrishankar 
> ---
>   config/defconfig_ppc_64-power8-linuxapp-gcc |  3 ---
>   examples/ip_pipeline/cpu_core_map.c | 12 +---
>   examples/ip_pipeline/init.c |  4 
>   3 files changed, 5 insertions(+), 14 deletions(-)
>
> diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc
> b/config/defconfig_ppc_64-power8-linuxapp-gcc
> index dede34f..a084672 100644
> --- a/config/defconfig_ppc_64-power8-linuxapp-gcc
> +++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
> @@ -58,6 +58,3 @@ CONFIG_RTE_LIBRTE_FM10K_PMD=n
>
>   # This following libraries are not available on Power. So they're 
> turned off.
>   CONFIG_RTE_LIBRTE_SCHED=n
> -CONFIG_RTE_LIBRTE_PORT=n
> -CONFIG_RTE_LIBRTE_TABLE=n
> -CONFIG_RTE_LIBRTE_PIPELINE=n
> diff --git a/examples/ip_pipeline/cpu_core_map.c
> b/examples/ip_pipeline/cpu_core_map.c
> index cb088b1..482e68e 100644
> --- a/examples/ip_pipeline/cpu_core_map.c
> +++ b/examples/ip_pipeline/cpu_core_map.c
> @@ -351,9 +351,6 @@ cpu_core_map_compute_linux(struct cpu_core_map *map)
>   int lcore_socket_id =
>   cpu_core_map_get_socket_id_linux(lcore_id);
>
> - if (lcore_socket_id < 0)
> - return -1;
> -
>   if (((uint32_t) lcore_socket_id) == socket_id)
>   n_detected++;
>   }
> @@ -368,18 +365,11 @@ cpu_core_map_compute_linux(struct cpu_core_map *map)
>   cpu_core_map_get_socket_id_linux(
>   lco

[dpdk-dev] [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64

2016-08-09 Thread Chao Zhu
Gowrishankar,

Can you give more description about this patch? 
Thank you!

-Original Message-
From: Gowrishankar Muthukrishnan [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?8?6? 20:33
To: dev at dpdk.org
Cc: Chao Zhu ; Bruce Richardson
; Konstantin Ananyev
; Thomas Monjalon ;
Cristian Dumitrescu ; Pradeep
; gowrishankar 
Subject: [PATCH v4 3/6] ip_pipeline: fix lcore mapping for varying SMT
threads as in ppc64

From: gowrishankar <gowrishanka...@linux.vnet.ibm.com>

offline lcore would still refer to original core id and this has to be
considered while creating cpu core mask.

Signed-off-by: Gowrishankar 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc |  3 ---
 examples/ip_pipeline/cpu_core_map.c | 12 +---
 examples/ip_pipeline/init.c |  4 
 3 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index dede34f..a084672 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -58,6 +58,3 @@ CONFIG_RTE_LIBRTE_FM10K_PMD=n

 # This following libraries are not available on Power. So they're turned
off.
 CONFIG_RTE_LIBRTE_SCHED=n
-CONFIG_RTE_LIBRTE_PORT=n
-CONFIG_RTE_LIBRTE_TABLE=n
-CONFIG_RTE_LIBRTE_PIPELINE=n
diff --git a/examples/ip_pipeline/cpu_core_map.c
b/examples/ip_pipeline/cpu_core_map.c
index cb088b1..482e68e 100644
--- a/examples/ip_pipeline/cpu_core_map.c
+++ b/examples/ip_pipeline/cpu_core_map.c
@@ -351,9 +351,6 @@ cpu_core_map_compute_linux(struct cpu_core_map *map)
int lcore_socket_id =
cpu_core_map_get_socket_id_linux(lcore_id);

-   if (lcore_socket_id < 0)
-   return -1;
-
if (((uint32_t) lcore_socket_id) == socket_id)
n_detected++;
}
@@ -368,18 +365,11 @@ cpu_core_map_compute_linux(struct cpu_core_map *map)
cpu_core_map_get_socket_id_linux(
lcore_id);

-   if (lcore_socket_id < 0)
-   return -1;
-
Why to remove the lcore_socket_id check?

int lcore_core_id =
cpu_core_map_get_core_id_linux(
lcore_id);

-   if (lcore_core_id < 0)
-   return -1;
-
-   if (((uint32_t) lcore_socket_id ==
socket_id) &&
-   ((uint32_t) lcore_core_id ==
core_id)) {
+   if ((uint32_t) lcore_socket_id == socket_id)
{
uint32_t pos = cpu_core_map_pos(map,
socket_id,
core_id_contig,
diff --git a/examples/ip_pipeline/init.c b/examples/ip_pipeline/init.c index
cd167f6..60c931f 100644
--- a/examples/ip_pipeline/init.c
+++ b/examples/ip_pipeline/init.c
@@ -61,7 +61,11 @@ static void
 app_init_core_map(struct app_params *app)  {
APP_LOG(app, HIGH, "Initializing CPU core map ...");
+#if defined(RTE_ARCH_PPC_64)
+   app->core_map = cpu_core_map_init(2, 5, 1, 0); #else

This value seems quite strange. Can you give more detail?

app->core_map = cpu_core_map_init(4, 32, 4, 0);
+#endif

if (app->core_map == NULL)
rte_panic("Cannot create CPU core map\n");
--
1.9.1




[dpdk-dev] [PATCH v2 0/6] enable lpm, acl and other missing libraries in ppc64le

2016-07-18 Thread Chao Zhu
Gowrishankar,

I still get this error with the v3 patches. My OS is Ubuntu 16.04 LTS
ppc64le. GCC version: Ubuntu/IBM 5.3.1-14ubuntu2
Can you help to check again?

In file included from
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.c:34:0:
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h: In function
'transition4':
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h:198:2: error:
dereferencing type-punned pointer will break strict-aliasing rules
[-Werror=strict-aliasing]
  *indices1 = (xmm_t){((uint32_t *))[0], ((uint32_t *))[1],
  ^
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h:202:2: error:
dereferencing type-punned pointer will break strict-aliasing rules
[-Werror=strict-aliasing]
  *indices2 = (xmm_t){((uint32_t *))[0], ((uint32_t *))[1],
  ^



-Original Message-
From: gowrishankar [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?7?16? 15:59
To: Chao Zhu ; dev at dpdk.org
Cc: 'Bruce Richardson' ; 'Konstantin Ananyev'
; 'Thomas Monjalon'
; 'Cristian Dumitrescu'
; 'Pradeep' 
Subject: Re: [PATCH v2 0/6] enable lpm, acl and other missing libraries in
ppc64le

Hi Chao,
I did not face this error. Even I verified today with tip of master
6596554... .
However I had patch conflict for examples/l3fwd/l3fwd_em.c which is fixed
now and v3 patch set sent recently. Could you please check.

Thanks,
Gowrishankar
On Friday 15 July 2016 08:45 AM, Chao Zhu wrote:
> Gowrishankar,
>
> When I tried the patches, I got some compilation error:
>
> In file included from
> /root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.c:34:0:
> /root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h: In function
> 'transition4':
> /root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h:198:2: error:
> dereferencing type-punned pointer will break strict-aliasing rules 
> [-Werror=strict-aliasing]
>*indices1 = (xmm_t){((uint32_t *))[0], ((uint32_t *))[1],
>^
> /root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h:202:2: error:
> dereferencing type-punned pointer will break strict-aliasing rules 
> [-Werror=strict-aliasing]
>*indices2 = (xmm_t){((uint32_t *))[0], ((uint32_t *))[1],
>
> Can you help to take a look?
>
>
> -Original Message-
> From: Gowrishankar [mailto:gowrishankar.m at linux.vnet.ibm.com]
> Sent: 2016?7?10? 15:51
> To: dev at dpdk.org
> Cc: Chao Zhu ; Bruce Richardson 
> ; Konstantin Ananyev 
> ; Thomas Monjalon 
> ; Cristian Dumitrescu 
> ; Pradeep ; 
> gowrishankar 
> Subject: [PATCH v2 0/6] enable lpm, acl and other missing libraries in 
> ppc64le
>
> From: gowrishankar 
>
> This patchset enables LPM, ACL and other few missing libs in ppc64le 
> and also address few patches in related examples (ip_pipeline and l3fwd).
>
> Test report:
> LPM and ACL unit tests verified as in patch set v1.
> Same results as before observed.
>
> v2 changes:
> - enabling libs in config included as part of lib changes itself.
>
> gowrishankar (6):
>lpm: add altivec intrinsics for dpdk lpm on ppc_64
>acl: add altivec intrinsics for dpdk acl on ppc_64
>ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64
>table: cache align rte_bucket_4_8
>sched: enable sched library for ppc64le
>l3fwd: add altivec support for em_hash_key
>
>   app/test-acl/main.c|   4 +
>   app/test/test_xmmt_ops.h   |  16 +
>   config/defconfig_ppc_64-power8-linuxapp-gcc|   7 -
>   examples/ip_pipeline/cpu_core_map.c|  12 +-
>   examples/ip_pipeline/init.c|   4 +
>   examples/l3fwd/l3fwd_em.c  |   8 +
>   lib/librte_acl/Makefile|   2 +
>   lib/librte_acl/acl.h   |   4 +
>   lib/librte_acl/acl_run.h   |   2 +
>   lib/librte_acl/acl_run_altivec.c   |  47 +++
>   lib/librte_acl/acl_run_altivec.h   | 328
> +
>   lib/librte_acl/rte_acl.c   |  13 +
>   lib/librte_acl/rte_acl.h   |   1 +
>   .../common/include/arch/ppc_64/rte_vect.h  |  60 
>   lib/librte_lpm/Makefile|   2 +
>   lib/librte_lpm/rte_lpm.h   |   2 +
>   lib/librte_lpm/rte_lpm_altivec.h   | 154 ++
>   lib/librte_table/rte_table_hash_key8.c |   2 +-
>   18 files changed, 649 insertions(+), 19 deletions(-)  create mode 
> 100644 lib/librte_acl/acl_run_altivec.c  create mode 100644 
> lib/librte_acl/acl_run_altivec.h  create mode 100644 
> lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
>   create mode 100644 lib/librte_lpm/rte_lpm_altivec.h
>
> --
> 1.9.1
>
>
>





[dpdk-dev] [PATCH v2 0/6] enable lpm, acl and other missing libraries in ppc64le

2016-07-15 Thread Chao Zhu
Gowrishankar,

When I tried the patches, I got some compilation error:

In file included from
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.c:34:0:
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h: In function
'transition4':
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h:198:2: error:
dereferencing type-punned pointer will break strict-aliasing rules
[-Werror=strict-aliasing]
  *indices1 = (xmm_t){((uint32_t *))[0], ((uint32_t *))[1],
  ^
/root/test/sub/dpdk/lib/librte_acl/acl_run_altivec.h:202:2: error:
dereferencing type-punned pointer will break strict-aliasing rules
[-Werror=strict-aliasing]
  *indices2 = (xmm_t){((uint32_t *))[0], ((uint32_t *))[1],

Can you help to take a look?


-Original Message-
From: Gowrishankar [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?7?10? 15:51
To: dev at dpdk.org
Cc: Chao Zhu ; Bruce Richardson
; Konstantin Ananyev
; Thomas Monjalon ;
Cristian Dumitrescu ; Pradeep
; gowrishankar 
Subject: [PATCH v2 0/6] enable lpm, acl and other missing libraries in
ppc64le

From: gowrishankar <gowrishanka...@linux.vnet.ibm.com>

This patchset enables LPM, ACL and other few missing libs in ppc64le and
also address few patches in related examples (ip_pipeline and l3fwd).

Test report:
LPM and ACL unit tests verified as in patch set v1.
Same results as before observed.

v2 changes:
- enabling libs in config included as part of lib changes itself.

gowrishankar (6):
  lpm: add altivec intrinsics for dpdk lpm on ppc_64
  acl: add altivec intrinsics for dpdk acl on ppc_64
  ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64
  table: cache align rte_bucket_4_8
  sched: enable sched library for ppc64le
  l3fwd: add altivec support for em_hash_key

 app/test-acl/main.c|   4 +
 app/test/test_xmmt_ops.h   |  16 +
 config/defconfig_ppc_64-power8-linuxapp-gcc|   7 -
 examples/ip_pipeline/cpu_core_map.c|  12 +-
 examples/ip_pipeline/init.c|   4 +
 examples/l3fwd/l3fwd_em.c  |   8 +
 lib/librte_acl/Makefile|   2 +
 lib/librte_acl/acl.h   |   4 +
 lib/librte_acl/acl_run.h   |   2 +
 lib/librte_acl/acl_run_altivec.c   |  47 +++
 lib/librte_acl/acl_run_altivec.h   | 328
+
 lib/librte_acl/rte_acl.c   |  13 +
 lib/librte_acl/rte_acl.h   |   1 +
 .../common/include/arch/ppc_64/rte_vect.h  |  60 
 lib/librte_lpm/Makefile|   2 +
 lib/librte_lpm/rte_lpm.h   |   2 +
 lib/librte_lpm/rte_lpm_altivec.h   | 154 ++
 lib/librte_table/rte_table_hash_key8.c |   2 +-
 18 files changed, 649 insertions(+), 19 deletions(-)  create mode 100644
lib/librte_acl/acl_run_altivec.c  create mode 100644
lib/librte_acl/acl_run_altivec.h  create mode 100644
lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
 create mode 100644 lib/librte_lpm/rte_lpm_altivec.h

--
1.9.1




[dpdk-dev] [PATCH] EAL:fix memory barrier implementation on IBM POWER

2016-07-15 Thread Chao Zhu
On weak memory order architecture like POWER, rte_smp_wmb/rte_smp_rmb
need to use CPU instructions, not compiler barrier. This patch fixes
this. Also, to improve performance on PPC64, use light weight sync
instruction instead of sync instruction.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_atomic.h|   12 ++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index feae486..924e894 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -62,7 +62,11 @@ extern "C" {
  * Guarantees that the STORE operations generated before the barrier
  * occur before the STORE operations generated after.
  */
+#ifdef RTE_ARCH_64
+#definerte_wmb() {asm volatile("lwsync" : : : "memory"); }
+#else
 #definerte_wmb() {asm volatile("sync" : : : "memory"); }
+#endif

 /**
  * Read memory barrier.
@@ -70,13 +74,17 @@ extern "C" {
  * Guarantees that the LOAD operations generated before the barrier
  * occur before the LOAD operations generated after.
  */
+#ifdef RTE_ARCH_64
+#definerte_rmb() {asm volatile("lwsync" : : : "memory"); }
+#else
 #definerte_rmb() {asm volatile("sync" : : : "memory"); }
+#endif

 #define rte_smp_mb() rte_mb()

-#define rte_smp_wmb() rte_compiler_barrier()
+#define rte_smp_wmb() rte_wmb()

-#define rte_smp_rmb() rte_compiler_barrier()
+#define rte_smp_rmb() rte_rmb()

 /*- 16 bit atomic operations 
-*/
 /* To be compatible with Power7, use GCC built-in functions for 16 bit
-- 
1.7.1



[dpdk-dev] [PATCH v2 0/6] enable lpm, acl and other missing libraries in ppc64le

2016-07-11 Thread Chao Zhu
Gowrishankar,

Nice patches! Do you have some function test result? I need some time to
verify the patches.

-Original Message-
From: Gowrishankar [mailto:gowrishanka...@linux.vnet.ibm.com] 
Sent: 2016?7?10? 15:51
To: dev at dpdk.org
Cc: Chao Zhu ; Bruce Richardson
; Konstantin Ananyev
; Thomas Monjalon ;
Cristian Dumitrescu ; Pradeep
; gowrishankar 
Subject: [PATCH v2 0/6] enable lpm, acl and other missing libraries in
ppc64le

From: gowrishankar <gowrishanka...@linux.vnet.ibm.com>

This patchset enables LPM, ACL and other few missing libs in ppc64le and
also address few patches in related examples (ip_pipeline and l3fwd).

Test report:
LPM and ACL unit tests verified as in patch set v1.
Same results as before observed.

v2 changes:
- enabling libs in config included as part of lib changes itself.

gowrishankar (6):
  lpm: add altivec intrinsics for dpdk lpm on ppc_64
  acl: add altivec intrinsics for dpdk acl on ppc_64
  ip_pipeline: fix lcore mapping for varying SMT threads as in ppc64
  table: cache align rte_bucket_4_8
  sched: enable sched library for ppc64le
  l3fwd: add altivec support for em_hash_key

 app/test-acl/main.c|   4 +
 app/test/test_xmmt_ops.h   |  16 +
 config/defconfig_ppc_64-power8-linuxapp-gcc|   7 -
 examples/ip_pipeline/cpu_core_map.c|  12 +-
 examples/ip_pipeline/init.c|   4 +
 examples/l3fwd/l3fwd_em.c  |   8 +
 lib/librte_acl/Makefile|   2 +
 lib/librte_acl/acl.h   |   4 +
 lib/librte_acl/acl_run.h   |   2 +
 lib/librte_acl/acl_run_altivec.c   |  47 +++
 lib/librte_acl/acl_run_altivec.h   | 328
+
 lib/librte_acl/rte_acl.c   |  13 +
 lib/librte_acl/rte_acl.h   |   1 +
 .../common/include/arch/ppc_64/rte_vect.h  |  60 
 lib/librte_lpm/Makefile|   2 +
 lib/librte_lpm/rte_lpm.h   |   2 +
 lib/librte_lpm/rte_lpm_altivec.h   | 154 ++
 lib/librte_table/rte_table_hash_key8.c |   2 +-
 18 files changed, 649 insertions(+), 19 deletions(-)  create mode 100644
lib/librte_acl/acl_run_altivec.c  create mode 100644
lib/librte_acl/acl_run_altivec.h  create mode 100644
lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
 create mode 100644 lib/librte_lpm/rte_lpm_altivec.h

--
1.9.1




[dpdk-dev] librte_meter compilation fails on IBM Power8

2016-06-24 Thread Chao Zhu
I can repeat this problem by "export EXTRA_CFLAGS="-O0 -g"" on Power8. But
I'm not sure why this happens. The "-O3 -g" option works properly. I'll
investigate more.

-Original Message-
From: Dumitrescu, Cristian [mailto:cristian.dumitre...@intel.com] 
Sent: 2016?6?24? 1:26
To: N?lio Laranjeiro ; Chao Zhu 
Cc: dev at dpdk.org
Subject: RE: librte_meter compilation fails on IBM Power8



> -Original Message-
> From: N?lio Laranjeiro [mailto:nelio.laranjeiro at 6wind.com]
> Sent: Wednesday, June 22, 2016 1:31 PM
> To: Dumitrescu, Cristian ; Chao Zhu 
> 
> Cc: dev at dpdk.org
> Subject: librte_meter compilation fails on IBM Power8
> 
> Hi Cristian, Chao,
> 
> I have encountered a compilation failure on IBM Power8 when compiling 
> master branch with EXTRA_CFLAGS='-O0 -g':
> 
>   /root/nl/dpdk.org/build/lib/librte_meter.a(rte_meter.o): In function
> `rte_meter_get_tb_params':
>   /root/nl/dpdk.org/lib/librte_meter/rte_meter.c:57: undefined 
> reference to `ceil'
> 
> Seems related to commit 43f4364d.
> 
> I don't have the time to search more deeply, I hope it can help.
> 
> Regards,
> 
> --
> N?lio Laranjeiro
> 6WIND

I am not sure what the problem might be for IBM Power8.

ceil() is a function defined in math library, we include math.h header file
in rte_meter.c and we also link the library properly in the Makefile by
using LDLIBS += -lm, therefore I do not see any issue in the library code.

Thanks,
Cristian




[dpdk-dev] librte_meter compilation fails on IBM Power8

2016-06-23 Thread Chao Zhu
Nelio,

I'll check. Thanks!

-Original Message-
From: N?lio Laranjeiro [mailto:nelio.laranje...@6wind.com] 
Sent: 2016?6?22? 20:31
To: Cristian Dumitrescu ; Chao Zhu

Cc: dev at dpdk.org
Subject: librte_meter compilation fails on IBM Power8

Hi Cristian, Chao,

I have encountered a compilation failure on IBM Power8 when compiling master
branch with EXTRA_CFLAGS='-O0 -g':

  /root/nl/dpdk.org/build/lib/librte_meter.a(rte_meter.o): In function
`rte_meter_get_tb_params':
  /root/nl/dpdk.org/lib/librte_meter/rte_meter.c:57: undefined reference to
`ceil'

Seems related to commit 43f4364d.

I don't have the time to search more deeply, I hope it can help.

Regards,

--
N?lio Laranjeiro
6WIND




[dpdk-dev] [PATCH] mk: define objcopy target and arch on IBM POWER

2016-06-15 Thread Chao Zhu
This patch defines the target and arch value of objcopy program for
IBM POWER PPC64 little endian architecture. This is a fix of the
original patch proposed by Jan Viktorin .

Signed-off-by: Chao Zhu 
---
 mk/arch/ppc_64/rte.vars.mk |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/mk/arch/ppc_64/rte.vars.mk b/mk/arch/ppc_64/rte.vars.mk
index 363fcd1..ef3ba1d 100644
--- a/mk/arch/ppc_64/rte.vars.mk
+++ b/mk/arch/ppc_64/rte.vars.mk
@@ -37,3 +37,8 @@ CPU_LDFLAGS ?=
 CPU_ASFLAGS ?= -felf64

 export ARCH CROSS CPU_CFLAGS CPU_LDFLAGS CPU_ASFLAGS
+
+RTE_OBJCOPY_TARGET = elf64-powerpcle
+RTE_OBJCOPY_ARCH = powerpc:common64
+
+export RTE_OBJCOPY_TARGET RTE_OBJCOPY_ARCH
-- 
1.7.1



[dpdk-dev] [PATCH v2 6/7] virtio: fix pci accesses for ppc64 in legacy mode

2016-05-24 Thread Chao Zhu
Olivier,

The patch set looks good. I think little endian is good enough. We
internally decide to only support little endian previously. You can keep the
endian conversion such kind of things in the code.

-Original Message-
From: Olivier Matz [mailto:olivier.m...@6wind.com] 
Sent: 2016?5?20? 20:11
To: Chao Zhu ; dev at dpdk.org
Cc: david.marchand at 6wind.com; yuanhan.liu at linux.intel.com; huawei.xie at 
intel.
com
Subject: Re: [PATCH v2 6/7] virtio: fix pci accesses for ppc64 in legacy
mode

Hi Chao,

On 05/19/2016 11:13 AM, Chao Zhu wrote:
> Olivier,
> 
> Thanks for the patches! 
> Just one comment:
> POWER8 machine only supports little endian OS on bare metal. In VM 
> guest, it can support both little endian and big endian OS. Did you 
> try to run it on both host (little endian) and guest (big endian and
little endian)?

No I didn't test big endian in the guest.

I don't have any big endian VM image. Is it required for the test? I mean,
is it possible to run a big endian DPDK userland application on a little
endian kernel? (and if yes, is it known to work?)

Maybe I could replace in the patch:
  #ifdef RTE_ARCH_PPC_64
By something like :
  #if defined(RTE_ARCH_PPC_64) && (RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN)

If you have the ability to test it easily, it would be appreciated :)

Thanks for the review!
Olivier




[dpdk-dev] [PATCH] eal/ppc: fix secondary process to map hugepages in correct order

2016-05-20 Thread Chao Zhu
Sergio,

The step 4 will not fail because each huge page will get an virtual address 
finally, though it's a different address. If you take a look at the function 
rte_eal_hugepage_init(), in the last loop, it uses both physical address and 
virtual address to determine a new memory segment. This step can make sure that 
the initialization is correct. What I want to say is, this bug also influence 
the secondary process in function rte_eal_hugepage_attach(). It can make the 
secondary process fail to init. I'm trying to figure out how to make it work.

-Original Message-
From: Sergio Gonzalez Monroy [mailto:sergio.gonzalez.mon...@intel.com] 
Sent: 2016?5?20? 16:01
To: Chao Zhu ; 'Bruce Richardson' 

Cc: 'Gowrishankar' ; dev at dpdk.org; 
'David Marchand' 
Subject: Re: [dpdk-dev] [PATCH] eal/ppc: fix secondary process to map hugepages 
in correct order

On 20/05/2016 04:03, Chao Zhu wrote:
> Bruce,
>
> Recently, we find some bugs with mmap in PowerLinux. The mmap doesn't 
> respect the address hints. In function get_virtual_area() in 
> eal_memory.c, mmap get the free virtual address range as the address 
> hint. However, when mapping the real memory in 
> rte_eal_hugepage_init(), mmap doesn't return the same address as the 
> requested address. When taking a look at the /proc//maps, the 
> requested address range is free for use. With this bug, pre-allocate some 
> free space doesn't work.

Hi Chao,

If I understand you correctly, the issue you are describing would cause DPDK to 
fail initialization even with the reverse reordering that you are doing for PPC.

Basically (just showing relevant initialization steps):
1. map_all_hugepages(..., orig=1)
 - map all hugepages
2. find physical address for each hugepage 3. sort by physical address 4. 
map_all_hugepages(..., orig=0)
 - Now we try to get big chunk of virtual address for a block of contig 
hugepages
so we know we have that virtual address chunk available.
 - Then we try to remap each page of that block of contig pages into that
virtual address chunk.

So the issue you are describing would make step 4 fail regardless of the 
different ordering that PPC does.
I'm probably missing something, would you care to elaborate?

Sergio


> We're trying to create some test case and report it as a bug to kernel 
> community.
>
> Here's some logs:
> ===
> EAL: Ask a virtual area of 0x1000 bytes
> EAL: Virtual area found at 0x3fffa700 (size = 0x1000)
> EAL: map_all_hugepages, /mnt/huge/rtemap_52,paddr 0x3ca600  
> requested
> addr: 0x3fffa700  mmaped addr: 0x3efff000
> EAL: map_all_hugepages, /mnt/huge/rtemap_53,paddr 0x3ca500  
> requested
> addr: 0x3fffa800  mmaped addr: 0x3effef00
> EAL: map_all_hugepages, /mnt/huge/rtemap_54,paddr 0x3ca400  
> requested
> addr: 0x3fffa900  mmaped addr: 0x3effee00
> EAL: map_all_hugepages, /mnt/huge/rtemap_55,paddr 0x3ca300  
> requested
> addr: 0x3fffaa00  mmaped addr: 0x3effed00
> EAL: map_all_hugepages, /mnt/huge/rtemap_56,paddr 0x3ca200  
> requested
> addr: 0x3fffab00  mmaped addr: 0x3effec00
> EAL: map_all_hugepages, /mnt/huge/rtemap_57,paddr 0x3ca100  
> requested
> addr: 0x3fffac00  mmaped addr: 0x3effeb00
> EAL: map_all_hugepages, /mnt/huge/rtemap_58,paddr 0x3ca000  
> requested
> addr: 0x3fffad00  mmaped addr: 0x3effea00
> EAL: map_all_hugepages, /mnt/huge/rtemap_59,paddr 0x3c9f00  
> requested
> addr: 0x3fffae00  mmaped addr: 0x3effe900
> EAL: map_all_hugepages, /mnt/huge/rtemap_60,paddr 0x3c9e00  
> requested
> addr: 0x3fffaf00  mmaped addr: 0x3effe800
> EAL: map_all_hugepages, /mnt/huge/rtemap_61,paddr 0x3c9d00  
> requested
> addr: 0x3fffb000  mmaped addr: 0x3effe700
> EAL: map_all_hugepages, /mnt/huge/rtemap_62, paddr 0x3c9c00 
> requested
> addr:  0x3fffb100 mmaped addr:  0x3effe600
> EAL: map_all_hugepages, /mnt/huge/rtemap_63, paddr 0x3c9b00 
> requested
> addr:  0x3fffb200 mmaped addr:  0x3effe500
> EAL: map_all_hugepages, /mnt/huge/rtemap_51, paddr 0x3c9a00 
> requested
> addr:  0x3fffb300 mmaped addr:  0x3effe400
> EAL: map_all_hugepages, /mnt/huge/rtemap_50, paddr 0x3c9900 
> requested
> addr:  0x3fffb400 mmaped addr:  0x3effe300
> EAL: map_all_hugepages, /mnt/huge/rtemap_49, paddr 0x3c9800 
> requested
> addr:  0x3fffb500 mmaped addr:  0x3effe200
> EAL: map_all_hugepages, /mnt/huge/rtemap_48, paddr 0x3c9700 
> requested
> addr:  0x3fffb600 mmaped addr:  0x3effe100
>
> # cat /proc/143765/maps
> 0100-0200 rw-s  00:27 61162550
> /mnt/huge/rtemap_14
> 0200-0300 rw-s  00:27 61162536
> /mnt/huge/rtema

[dpdk-dev] [PATCH] eal/ppc: fix secondary process to map hugepages in correct order

2016-05-20 Thread Chao Zhu
/mnt/huge/rtemap_50
3effe400-3effe500 rw-s  00:27 61162587
/mnt/huge/rtemap_51
3effe500-3effe600 rw-s  00:27 61162599
/mnt/huge/rtemap_63
3effe600-3effe700 rw-s  00:27 61162598
/mnt/huge/rtemap_62
3effe700-3effe800 rw-s  00:27 61162597
/mnt/huge/rtemap_61
3effe800-3effe900 rw-s  00:27 61162596
/mnt/huge/rtemap_60
3effe900-3effea00 rw-s  00:27 61162595
/mnt/huge/rtemap_59
3effea00-3effeb00 rw-s  00:27 61162594
/mnt/huge/rtemap_58
3effeb00-3effec00 rw-s  00:27 61162593
/mnt/huge/rtemap_57
3effec00-3effed00 rw-s  00:27 61162592
/mnt/huge/rtemap_56
3effed00-3effee00 rw-s  00:27 61162591
/mnt/huge/rtemap_55
3effee00-3effef00 rw-s  00:27 61162590
/mnt/huge/rtemap_54
3effef00-3efff000 rw-s  00:27 61162589
/mnt/huge/rtemap_53
3efff000-3efff100 rw-s  00:27 61162588
/mnt/huge/rtemap_52
3efff100-3efff200 rw-s  00:27 61162565
/mnt/huge/rtemap_29
3efff200-3efff300 rw-s  00:27 61162564
/mnt/huge/rtemap_28
3efff300-3efff400 rw-s  00:27 61162563
/mnt/huge/rtemap_27
3efff400-3efff500 rw-s  00:27 61162562
/mnt/huge/rtemap_26
3efff500-3efff600 rw-s  00:27 61162561
/mnt/huge/rtemap_25
3efff600-3efff700 rw-s  00:27 61162560
/mnt/huge/rtemap_24
3efff700-3efff800 rw-s  00:27 61162559
/mnt/huge/rtemap_23
3efff800-3efff900 rw-s  00:27 61162558
/mnt/huge/rtemap_22
3efff900-3efffa00 rw-s  00:27 61162557
/mnt/huge/rtemap_21
3efffa00-3efffb00 rw-s  00:27 61162556
/mnt/huge/rtemap_20
3efffb00-3efffc00 rw-s  00:27 61162555
/mnt/huge/rtemap_19
3efffc00-3efffd00 rw-s  00:27 61162554
/mnt/huge/rtemap_18
3efffd00-3efffe00 rw-s  00:27 61162553
/mnt/huge/rtemap_17
3efffe00-3e00 rw-s  00:27 61162552
/mnt/huge/rtemap_16
3e00-3f00 rw-s  00:27 61162551
/mnt/huge/rtemap_15
3fffb7bc-3fffb7c1 rw-p  00:00 0 
3fffb7c1-3fffb7c5 rw-s  00:12 3926240
/run/.rte_config
3fffb7c5-3fffb7c7 rw-p  00:00 0 
3fffb7c7-3fffb7e2 r-xp  08:32 7090531
/opt/at7.1/lib64/power8/libc-2.19.so
3fffb7e2-3fffb7e3 rw-p 001a 08:32 7090531
/opt/at7.1/lib64/power8/libc-2.19.so
3fffb7e3-3fffb7e5 rw-p  00:00 0 
3fffb7e5-3fffb7e7 r-xp  08:32 7090563
/opt/at7.1/lib64/power8/libpthread-2.19.so
3fffb7e7-3fffb7e8 rw-p 0001 08:32 7090563
/opt/at7.1/lib64/power8/libpthread-2.19.so
3fffb7e8-3fffb7e9 r-xp  08:32 7090210
/opt/at7.1/lib64/libdl-2.19.so
3fffb7e9-3fffb7ea rw-p  08:32 7090210
/opt/at7.1/lib64/libdl-2.19.so
3fffb7ea-3fffb7ec r-xp  08:32 7090533
/opt/at7.1/lib64/power8/libz.so.1.2.6
3fffb7ec-3fffb7ed rw-p 0001 08:32 7090533
/opt/at7.1/lib64/power8/libz.so.1.2.6
3fffb7ed-3fffb7f9 r-xp  08:32 7090568
/opt/at7.1/lib64/power8/libm-2.19.so
3fffb7f9-3fffb7fa rw-p 000b 08:32 7090568
/opt/at7.1/lib64/power8/libm-2.19.so
3fffb7fa-3fffb7fc r-xp  00:00 0
[vdso]
3fffb7fc-3fffb7ff r-xp  08:32 7090048
/opt/at7.1/lib64/ld-2.19.so
3fffb7ff-3fffb800 rw-p 0002 08:32 7090048
/opt/at7.1/lib64/ld-2.19.so
3ffd-4000 rw-p  00:00 0
[stack]


-Original Message-
From: Bruce Richardson [mailto:bruce.richard...@intel.com] 
Sent: 2016?3?23? 1:11
To: Sergio Gonzalez Monroy 
Cc: Gowrishankar ; dev at dpdk.org;
chaozhu at linux.vnet.ibm.com; David Marchand 
Subject: Re: [dpdk-dev] [PATCH] eal/ppc: fix secondary process to map
hugepages in correct order

On Tue, Mar 22, 2016 at 04:35:32PM +, Sergio Gonzalez Monroy wrote:
> First of all, forgive my ignorance regarding ppc64 and if the 
> questions are naive but after having a look to the already existing 
> code for ppc64 and this patch now, why are we doing this reverse 
> mapping at all?
> 
> I guess the question revolves around the comment in eal_memory.c:
> 1316 /* On PPC64 architecture, the mmap always start from
> higher
> 1317  * virtual address to lower address. Here, both the
> physical
> 1318  * address and virtual address are in descending
order
> */
> 
> From looking at the code, for ppc64 we do qsort in reverse order and 
> thereafter everything looks to be is done to account for that reverse 
> sorting.
> 
> CC: Chao Zhu and David Marchand as original author and reviewer of the
code.
> 
> Sergio
>

Just to add my 2c here. At one point, with I believe some i686 installs -
don't remember the specific OS/kernel, we found that the mmap calls were
returning the highest free address first and then working downwards

[dpdk-dev] [PATCH v2 6/7] virtio: fix pci accesses for ppc64 in legacy mode

2016-05-19 Thread Chao Zhu
Olivier,

Thanks for the patches! 
Just one comment:
POWER8 machine only supports little endian OS on bare metal. In VM guest, it
can support both little endian and big endian OS. Did you try to run it on
both host (little endian) and guest (big endian and little endian)?

-Original Message-
From: Olivier Matz [mailto:olivier.m...@6wind.com] 
Sent: 2016?5?17? 18:00
To: dev at dpdk.org
Cc: david.marchand at 6wind.com; chaozhu at linux.vnet.ibm.com; yuanhan.liu at 
linux.
intel.com; huawei.xie at intel.com
Subject: [PATCH v2 6/7] virtio: fix pci accesses for ppc64 in legacy mode

From: David Marchand 

Although ppc supports both endianesses, qemu supposes that the cpu is big
endian and enforces this for the virtio-net stuff.

Fix PCI accesses in legacy mode. Only ppc64le is supported at the moment.

Signed-off-by: David Marchand 
Signed-off-by: Olivier Matz 
---
 drivers/net/virtio/virtio_pci.c | 68
+
 1 file changed, 68 insertions(+)

diff --git a/drivers/net/virtio/virtio_pci.c
b/drivers/net/virtio/virtio_pci.c index 9cdca06..ebf4cf7 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -55,20 +55,88 @@
  */
 #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)

+/*
+ * Since we are in legacy mode:
+ * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
+ *
+ * "Note that this is possible because while the virtio header is PCI (i.e.
+ * little) endian, the device-specific region is encoded in the native 
+endian of
+ * the guest (where such distinction is applicable)."
+ *
+ * For powerpc which supports both, qemu supposes that cpu is big 
+endian and
+ * enforces this for the virtio-net stuff.
+ */
+
 static void
 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
   void *dst, int length)
 {
+#ifdef RTE_ARCH_PPC_64
+   int size;
+
+   while (length > 0) {
+   if (length >= 4) {
+   size = 4;
+   rte_eal_pci_ioport_read(>io, dst, size,
+   VIRTIO_PCI_CONFIG(hw) + offset);
+   *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t
*)dst);
+   } else if (length >= 2) {
+   size = 2;
+   rte_eal_pci_ioport_read(>io, dst, size,
+   VIRTIO_PCI_CONFIG(hw) + offset);
+   *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t
*)dst);
+   } else {
+   size = 1;
+   rte_eal_pci_ioport_read(>io, dst, size,
+   VIRTIO_PCI_CONFIG(hw) + offset);
+   }
+
+   dst = (char *)dst + size;
+   offset += size;
+   length -= size;
+   }
+#else
rte_eal_pci_ioport_read(>io, dst, length,
VIRTIO_PCI_CONFIG(hw) + offset);
+#endif
 }

 static void
 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
const void *src, int length)
 {
+#ifdef RTE_ARCH_PPC_64
+   union {
+   uint32_t u32;
+   uint16_t u16;
+   } tmp;
+   int size;
+
+   while (length > 0) {
+   if (length >= 4) {
+   size = 4;
+   tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
+   rte_eal_pci_ioport_write(>io, , size,
+   VIRTIO_PCI_CONFIG(hw) + offset);
+   } else if (length >= 2) {
+   size = 2;
+   tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
+   rte_eal_pci_ioport_write(>io, , size,
+   VIRTIO_PCI_CONFIG(hw) + offset);
+   } else {
+   size = 1;
+   rte_eal_pci_ioport_write(>io, src, size,
+   VIRTIO_PCI_CONFIG(hw) + offset);
+   }
+
+   src = (const char *)src + size;
+   offset += size;
+   length -= size;
+   }
+#else
rte_eal_pci_ioport_write(>io, src, length,
 VIRTIO_PCI_CONFIG(hw) + offset);
+#endif
 }

 static uint64_t
--
2.8.0.rc3




[dpdk-dev] [PATCH 1/2] Fix CPU and memory parameters on IBM POWER8

2016-03-31 Thread Chao Zhu
Thomas,

Seems I didn't get the messages from David. Anyway, I sent out an updated
patch.
Thanks for reminder!

-Original Message-
From: Thomas Monjalon [mailto:thomas.monja...@6wind.com] 
Sent: 2016?3?30? 18:53
To: Chao Zhu 
Cc: dev at dpdk.org; David Marchand ; Richardson,
Bruce ; Panu Matilainen 
Subject: Re: [dpdk-dev] [PATCH 1/2] Fix CPU and memory parameters on IBM
POWER8

2016-03-25 09:48, David Marchand:
> On Fri, Mar 25, 2016 at 9:11 AM, Chao Zhu 
wrote:
> > This patch fixes the max logic number and memory channel number 
> > settings on IBM POWER8 platform.
> > 1. The max number of logic cores of a POWER8 processor is 96. Normally,
> >there are two sockets on a server. So the max number of logic cores
> >are 192. So this parch set CONFIG_RTE_MAX_LCORE to 256.
> 
> This is a power8 configuration item, this should go to power8 config 
> file, not common_base.
> 
> > 2. Currently, the max number of memory channels are hardcoded to 4.
However,
> >on a POWER8 machine, the max number of memory channels are 8. To fix
this,
> >CONFIG_RTE_MAX_NCHANNELS is added to do the configuration.
> 
> I don't see any reason why we would need a max value for force_nchannel.
> We should just get rid of this check, this is an obscure parameter for 
> most people, so people playing with it know what they are doing 
> (hopefully ?).
> 
> On the other hand, if power8 has some specifics about it, maybe we 
> should introduce some default value in a arch eal header for other 
> dpdk components to use (like in mempool).
> Thoughts ?

Chao? We are running out of time for 16.04.




[dpdk-dev] [PATCH v2 2/2] Fix prefetch instruction on IBM POWER8

2016-03-31 Thread Chao Zhu
Current prefetch instruction (dcbt) implementation for IBM POWER8 has wrong
Touch Hint(TH) parameter. The current setting of TH=1 indicates to load data 
from
current cache line and an unlimited number of sequentially following cache 
lines.
TTH=0 means to load data from current cache line. rte_prefetch0 function is 
defined
to load one cache line, which means TH=0 is suited here.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_prefetch.h  |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index bcc7185..9a1995e 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -41,17 +41,17 @@ extern "C" {

 static inline void rte_prefetch0(const volatile void *p)
 {
-   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+   asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
 }

 static inline void rte_prefetch1(const volatile void *p)
 {
-   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+   asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
 }

 static inline void rte_prefetch2(const volatile void *p)
 {
-   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+   asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
 }

 static inline void rte_prefetch_non_temporal(const volatile void *p)
-- 
1.7.1



[dpdk-dev] [PATCH v2 1/2] Fix CPU and memory parameters on IBM POWER8

2016-03-31 Thread Chao Zhu
This patch fixes the max logic number and memory channel number settings
on IBM POWER8 platform.
1. The max number of logic cores of a POWER8 processor is 96. Normally,
   there are two sockets on a server. So the max number of logic cores
   are 192. So this parch set CONFIG_RTE_MAX_LCORE to 256.
2. The socket number on POWER8 little endian platform can be larger than 16.
   This patch set CONFIG_RTE_MAX_NUMA_NODES to 32 for POWER8.
3. Currently, the max number of memory channels are hardcoded to 4. However,
   on a POWER8 machine, the max number of memory channels are 8. This patch
   removes the constraint.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc |2 ++
 lib/librte_eal/common/eal_common_options.c  |3 +--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index a80a19e..9eb0cc4 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -36,6 +36,8 @@ CONFIG_RTE_ARCH="ppc_64"
 CONFIG_RTE_ARCH_PPC_64=y
 CONFIG_RTE_ARCH_64=y

+CONFIG_RTE_MAX_LCORE=256
+CONFIG_RTE_MAX_NUMA_NODES=32
 CONFIG_RTE_CACHE_LINE_SIZE=128

 CONFIG_RTE_TOOLCHAIN="gcc"
diff --git a/lib/librte_eal/common/eal_common_options.c 
b/lib/librte_eal/common/eal_common_options.c
index 29942ea..2b418d5 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -797,8 +797,7 @@ eal_parse_common_option(int opt, const char *optarg,
/* force number of channels */
case 'n':
conf->force_nchannel = atoi(optarg);
-   if (conf->force_nchannel == 0 ||
-   conf->force_nchannel > 4) {
+   if (conf->force_nchannel == 0) {
RTE_LOG(ERR, EAL, "invalid channel number\n");
return -1;
}
-- 
1.7.1



[dpdk-dev] [PATCH v2 0/2] Fix parameters and prefetch function on IBM POWER8

2016-03-31 Thread Chao Zhu
This patch set fixes CPU/memory parameters and correct wrong prefetch settings 
for IBM POWER8.
Changes in v2:
1. Move the parameter configuration to POWER specific configuration file
2. Remove the memeory channel number constraint instead of adding additional 
configuration flag.

Chao Zhu (2):
  Fix CPU and memory parameters on IBM POWER8
  Fix prefetch instruction on IBM POWER8

 config/defconfig_ppc_64-power8-linuxapp-gcc|2 ++
 lib/librte_eal/common/eal_common_options.c |3 +--
 .../common/include/arch/ppc_64/rte_prefetch.h  |6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)



[dpdk-dev] [PATCH 2/2] Fix prefetch instruction on IBM POWER8

2016-03-25 Thread Chao Zhu
Current prefetch instruction (dcbt) implementation for IBM POWER8 has wrong
Touch Hint(TH) parameter. The current setting of TH=1 indicates to load data 
from
current cache line and an unlimited number of sequentially following cache 
lines.
TTH=0 means to load data from current cache line. rte_prefetch0 function is 
defined
to load one cache line, which means TH=0 is suited here.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_prefetch.h  |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index bcc7185..9a1995e 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -41,17 +41,17 @@ extern "C" {

 static inline void rte_prefetch0(const volatile void *p)
 {
-   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+   asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
 }

 static inline void rte_prefetch1(const volatile void *p)
 {
-   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+   asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
 }

 static inline void rte_prefetch2(const volatile void *p)
 {
-   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+   asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
 }

 static inline void rte_prefetch_non_temporal(const volatile void *p)
-- 
1.7.1



[dpdk-dev] [PATCH 1/2] Fix CPU and memory parameters on IBM POWER8

2016-03-25 Thread Chao Zhu
This patch fixes the max logic number and memory channel number settings
on IBM POWER8 platform.
1. The max number of logic cores of a POWER8 processor is 96. Normally,
   there are two sockets on a server. So the max number of logic cores
   are 192. So this parch set CONFIG_RTE_MAX_LCORE to 256.
2. Currently, the max number of memory channels are hardcoded to 4. However,
   on a POWER8 machine, the max number of memory channels are 8. To fix this,
   CONFIG_RTE_MAX_NCHANNELS is added to do the configuration.

Signed-off-by: Chao Zhu 
---
 config/common_base |3 ++-
 lib/librte_eal/common/eal_common_options.c |2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/config/common_base b/config/common_base
index dbd405b..1beea32 100644
--- a/config/common_base
+++ b/config/common_base
@@ -83,10 +83,11 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
 # Compile Environment Abstraction Layer
 #
 CONFIG_RTE_LIBRTE_EAL=y
-CONFIG_RTE_MAX_LCORE=128
+CONFIG_RTE_MAX_LCORE=256
 CONFIG_RTE_MAX_NUMA_NODES=8
 CONFIG_RTE_MAX_MEMSEG=256
 CONFIG_RTE_MAX_MEMZONE=2560
+CONFIG_RTE_MAX_NCHANNELS=8
 CONFIG_RTE_MAX_TAILQ=32
 CONFIG_RTE_LOG_LEVEL=8
 CONFIG_RTE_LOG_HISTORY=256
diff --git a/lib/librte_eal/common/eal_common_options.c 
b/lib/librte_eal/common/eal_common_options.c
index 29942ea..6c268c1 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -798,7 +798,7 @@ eal_parse_common_option(int opt, const char *optarg,
case 'n':
conf->force_nchannel = atoi(optarg);
if (conf->force_nchannel == 0 ||
-   conf->force_nchannel > 4) {
+   conf->force_nchannel > RTE_MAX_NCHANNELS) {
RTE_LOG(ERR, EAL, "invalid channel number\n");
return -1;
}
-- 
1.7.1



[dpdk-dev] [PATCH 0/2] Fix parameters and prefetch function on IBM POWER8

2016-03-25 Thread Chao Zhu
This patch set fixes CPU/memory parameters and correct wrong prefetch settings 
for IBM POWER8.

Chao Zhu (2):
  Fix CPU and memory parameters on IBM POWER8
  Fix prefetch instruction on IBM POWER8

 config/common_base |3 ++-
 lib/librte_eal/common/eal_common_options.c |2 +-
 .../common/include/arch/ppc_64/rte_prefetch.h  |6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)



[dpdk-dev] [PATCH] librte_hash: Fix compile errors on IBM POWER

2015-12-09 Thread Chao Zhu
This patch fixes the compile errors caused by lacking of "size_t" definition in 
rte_hash.h. The
compile error exists on IBM POWER and ARM (see jerin.jacob at 
caviumnetworks.com's message).

The errors are like:
In file included from /tmp/dpdk/app/test/test_hash_scaling.c:35:0:
/tmp/dpdk/build/include/rte_hash.h:70:70: error: unknown type name ?size_t?
 typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t 
key_len);
  ^
/tmp/dpdk/build/include/rte_hash.h:120:48: error: unknown type name 
?rte_hash_cmp_eq_t?
 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func);

Signed-off-by: Chao Zhu 
---
 lib/librte_hash/rte_hash.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 6494ade..85fc416 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -41,6 +41,7 @@
  */

 #include 
+#include 

 #ifdef __cplusplus
 extern "C" {
-- 
1.7.1



[dpdk-dev] [PATCH] librte_hash: Fix compile errors on IBM POWER

2015-12-09 Thread Chao Zhu
Jerin,

Both stdio.h and stddef.h works on POWER.  To make it work on ARM, I'll 
use stddef.h and submit another patch.
Thanks!

On 2015/12/8 17:10, Jerin Jacob wrote:
> On Tue, Dec 08, 2015 at 04:28:52PM +0800, Chao Zhu wrote:
>> This patch fixes the compile errors caused by lacking of "size_t" definition 
>> in rte_hash.h.
>>
>> Signed-off-by: Chao Zhu 
>> ---
>>   lib/librte_hash/rte_hash.h |1 +
>>   1 files changed, 1 insertions(+), 0 deletions(-)
>>
>> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
>> index 6494ade..5046e9b 100644
>> --- a/lib/librte_hash/rte_hash.h
>> +++ b/lib/librte_hash/rte_hash.h
>> @@ -41,6 +41,7 @@
>>*/
>>   
>>   #include 
>> +#include 
> Thanks for the patch.
> The Same issue comes with arm64 GCC 5.2 compiler too.
> Shouldn't be stddef.h instead of stdio.h?
>
>>   
>>   #ifdef __cplusplus
>>   extern "C" {
>> -- 
>> 1.7.1
>>



[dpdk-dev] [PATCH] librte_hash: Fix compile errors on IBM POWER

2015-12-08 Thread Chao Zhu
This patch fixes the compile errors caused by lacking of "size_t" definition in 
rte_hash.h.

Signed-off-by: Chao Zhu 
---
 lib/librte_hash/rte_hash.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 6494ade..5046e9b 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -41,6 +41,7 @@
  */

 #include 
+#include 

 #ifdef __cplusplus
 extern "C" {
-- 
1.7.1



[dpdk-dev] [PATCH] PPC64: turn off fm10k driver compilation on IBM POWER

2015-11-04 Thread Chao Zhu
The fm10k vector driver is specific for x86 platform which can't compile
on IBM POWER for lacking of tmmintrin.h header file. This patch turns
off fm10k driver compilation on IBM POWER to prevent compile issue.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index f1af518..03760c4 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -50,6 +50,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_PMD=n
 CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
 CONFIG_RTE_LIBRTE_PMD_BOND=n
 CONFIG_RTE_LIBRTE_ENIC_PMD=n
+CONFIG_RTE_LIBRTE_FM10K_PMD=n

 # This following libraries are not available on Power. So they're turned off.
 CONFIG_RTE_LIBRTE_LPM=n
-- 
1.7.1



[dpdk-dev] Fwd: [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE machine

2015-09-17 Thread Chao Zhu

David,

Let me take a look.

On 2015/9/16 16:09, David Marchand wrote:
> Hello Chao,
>
> On Wed, Sep 16, 2015 at 4:02 AM, Chao Zhu  <mailto:chaozhu at linux.vnet.ibm.com>> wrote:
>
> Actually, without this change, DPDK can't work properly on PPC64
> little endian platform. It'll report "EAL: Not enough memory
> available! Requested: xxxMB, available: xxxMB" such kind of error.
> But for users, they don't know that changing the value of
> CONFIG_RTE_MAX_NUMA_NODES can fix this.  That why I invoke this patch.
>
>
> Sorry, I forgot to reply in this thread.
>
> Well, to me, this is a workaround.
> Yes it will work, but what happens if tomorrow we have some hardware 
> that tells us that it has some numa node which 6 index ?
>
> I think we need a rework in eal to proerly handle this, like I said in 
> this mail :
> http://dpdk.org/ml/archives/dev/2015-September/023630.html
>
> Do you think you can look into this ?
>
>
> Thanks.
>
> -- 
> David Marchand



[dpdk-dev] Fwd: [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE machine

2015-09-16 Thread Chao Zhu
Actually, without this change, DPDK can't work properly on PPC64 little 
endian platform. It'll report "EAL: Not enough memory available! 
Requested: xxxMB, available: xxxMB" such kind of error. But for users, 
they don't know that changing the value of CONFIG_RTE_MAX_NUMA_NODES can 
fix this.  That why I invoke this patch.

On 2015/9/15 17:01, Bruce Richardson wrote:
> On Tue, Sep 15, 2015 at 03:46:49PM +0800, Chao Zhu wrote:
>> Any response of this patch?
> Looks ok to me - pretty trivial change.
>
> /Bruce
>>  Forwarded Message 
>> Subject: [dpdk-dev] [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE
>> machine
>> Date:Fri, 14 Aug 2015 20:19:48 +0800
>> From:Chao Zhu 
>> To:  dev at dpdk.org
>>
>>
>>
>> When Linux is running on bare metal, it gets the raw hardware
>> information. On POWER8 little endian bare metal machine, the node number
>> is not continuous. It will jump from 0 to other values, for example, it
>> can be 0, 1, 16, 17. This patch modified the CONFIG_RTE_MAX_NUMA_NODES
>> value to make dpdk work on POWER8 bare metal little endian machine.
>>
>> Signed-off-by: Chao Zhu 
>> ---
>>   config/common_linuxapp |2 +-
>>   1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 0de43d5..82a027e 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -98,7 +98,7 @@ CONFIG_RTE_NEXT_ABI=y
>>   #
>>   CONFIG_RTE_LIBRTE_EAL=y
>>   CONFIG_RTE_MAX_LCORE=128
>> -CONFIG_RTE_MAX_NUMA_NODES=8
>> +CONFIG_RTE_MAX_NUMA_NODES=32
>>   CONFIG_RTE_MAX_MEMSEG=256
>>   CONFIG_RTE_MAX_MEMZONE=2560
>>   CONFIG_RTE_MAX_TAILQ=32
>> -- 
>> 1.7.1
>>
>>
>>



[dpdk-dev] Fwd: [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE machine

2015-09-15 Thread Chao Zhu

Any response of this patch?

 Forwarded Message 
Subject:[dpdk-dev] [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 
LE machine
Date:   Fri, 14 Aug 2015 20:19:48 +0800
From:   Chao Zhu <chao...@linux.vnet.ibm.com>
To: dev at dpdk.org



When Linux is running on bare metal, it gets the raw hardware
information. On POWER8 little endian bare metal machine, the node number
is not continuous. It will jump from 0 to other values, for example, it
can be 0, 1, 16, 17. This patch modified the CONFIG_RTE_MAX_NUMA_NODES
value to make dpdk work on POWER8 bare metal little endian machine.

Signed-off-by: Chao Zhu 
---
  config/common_linuxapp |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..82a027e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -98,7 +98,7 @@ CONFIG_RTE_NEXT_ABI=y
  #
  CONFIG_RTE_LIBRTE_EAL=y
  CONFIG_RTE_MAX_LCORE=128
-CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_NUMA_NODES=32
  CONFIG_RTE_MAX_MEMSEG=256
  CONFIG_RTE_MAX_MEMZONE=2560
  CONFIG_RTE_MAX_TAILQ=32
-- 
1.7.1





[dpdk-dev] [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE machine

2015-08-14 Thread Chao Zhu
When Linux is running on bare metal, it gets the raw hardware
information. On POWER8 little endian bare metal machine, the node number
is not continuous. It will jump from 0 to other values, for example, it
can be 0, 1, 16, 17. This patch modified the CONFIG_RTE_MAX_NUMA_NODES
value to make dpdk work on POWER8 bare metal little endian machine.

Signed-off-by: Chao Zhu 
---
 config/common_linuxapp |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..82a027e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -98,7 +98,7 @@ CONFIG_RTE_NEXT_ABI=y
 #
 CONFIG_RTE_LIBRTE_EAL=y
 CONFIG_RTE_MAX_LCORE=128
-CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_NUMA_NODES=32
 CONFIG_RTE_MAX_MEMSEG=256
 CONFIG_RTE_MAX_MEMZONE=2560
 CONFIG_RTE_MAX_TAILQ=32
-- 
1.7.1



[dpdk-dev] [PATCH] eal/ppc: fix build

2015-08-05 Thread Chao Zhu
Acked-by: Chao Zhu 

On 2015/8/5 17:13, Thomas Monjalon wrote:
> Byte ordering macros were used without including the needed header.
>
> Fixes: ce10b21bf624 ("eal/ppc: fix cpu cycle count for little endian")
>
> Signed-off-by: Thomas Monjalon 
> ---
>   lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h | 2 ++
>   1 file changed, 2 insertions(+)
>
> diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h 
> b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> index e663c48..64beddf 100644
> --- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> @@ -39,6 +39,8 @@ extern "C" {
>
>   #include "generic/rte_cycles.h"
>
> +#include 
> +
>   /**
>* Read the time base register.
>*



[dpdk-dev] [PATCH] fm10k: fix the compilation on big endian platforms

2015-08-03 Thread Chao Zhu


On 2015/8/3 17:06, Thomas Monjalon wrote:
> 2015-08-03 14:31, Chao Zhu:
>> The rte_cpu_to_le_32 function can't be used to define const variables
>> because it has different implementation on big endian platforms. If
>> doing so, it will cause 'initializer element is not constant' compiling
>> error. This patch fixes this problem.
>> --- a/drivers/net/fm10k/base/fm10k_tlv.c
>> +++ b/drivers/net/fm10k/base/fm10k_tlv.c
> You cannot change a base driver file except the osdep header where
> FM10K_CPU_TO_LE32 is defined.
>
> I don't understand why it doesn't give you a constant, given this definition:
>
> #define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ?\
> rte_constant_bswap32(x) :\
> rte_arch_bswap32(x)))
>
> Have you tried CONFIG_RTE_FORCE_INTRINSICS=y ?
> It should trigger this definition:
>
> #define rte_bswap32(x) __builtin_bswap32(x)
Yes.  CONFIG_RTE_FORCE_INTRINSICS=y works on Power Big endian.  But if I 
turn off this, the error happens.  Seems rte_constant_bswap32 doesn't 
work on Power8? I'll try to check it.
>
>> -STATIC const __le32 test_le[2] = { FM10K_CPU_TO_LE32(0x12345678),
>> -   FM10K_CPU_TO_LE32(0x9abcdef0)};
>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>> +STATIC const __le32 test_le[2] = {0x78563412,0xf0debc9a};
>> +#else
>> +STATIC const __le32 test_le[2] = {0x12345678,0x9abcdef0};
>> +#endif



[dpdk-dev] [PATCH] fm10k: fix the compilation on big endian platforms

2015-08-03 Thread Chao Zhu
Got it. Thanks!

On 2015/8/3 16:53, Thomas Monjalon wrote:
> 2015-08-03 14:31, Chao Zhu:
>> The using of rte_cpu_to_le_32() in pre-compile macros will cause error
>> 'initializer element is not constant' on big endian platforms. This patch
>> fixes the compilation error of fm10k driver.
>>
>>
>> Chao Zhu (1):
>>fm10k: fix the compilation on big endian platforms
> Chao, no need of cover-letter for only 1 patch.
>



[dpdk-dev] [PATCH] PPC64: add cpu cycle support to IBM POWER8 PPC64LE

2015-08-03 Thread Chao Zhu
On IBM POWER8 PPC64 little endian architecture, the definition of tsc
union will be different. This patch fix this to enable the right output
from rte_rdtsc().

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_cycles.h|5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
index fd26e8e..e663c48 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -51,8 +51,13 @@ rte_rdtsc(void)
union {
uint64_t tsc_64;
struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
uint32_t hi_32;
uint32_t lo_32;
+#else
+   uint32_t lo_32;
+   uint32_t hi_32;
+#endif
};
} tsc;
uint32_t tmp;
-- 
1.7.1



[dpdk-dev] [PATCH] PPC64: add cpu cycle support to IBM POWER8 PPC64LE

2015-08-03 Thread Chao Zhu
This patch add the definiton of tsc union for POWER8 PPC64 little endian 
architecture.

Chao Zhu (1):
  PPC64: add cpu cycle support to IBM POWER8 PPC64LE

 .../common/include/arch/ppc_64/rte_cycles.h|5 +
 1 files changed, 5 insertions(+), 0 deletions(-)



[dpdk-dev] [PATCH] fm10k: fix the compilation on big endian platforms

2015-08-03 Thread Chao Zhu
The rte_cpu_to_le_32 function can't be used to define const variables
because it has different implementation on big endian platforms. If
doing so, it will cause 'initializer element is not constant' compiling
error. This patch fixes this problem.

Signed-off-by: Chao Zhu 
---
 drivers/net/fm10k/base/fm10k_tlv.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_tlv.c 
b/drivers/net/fm10k/base/fm10k_tlv.c
index 1d9d7d8..5b2937d 100644
--- a/drivers/net/fm10k/base/fm10k_tlv.c
+++ b/drivers/net/fm10k/base/fm10k_tlv.c
@@ -664,8 +664,11 @@ STATIC const s64 test_s64 = -0x123456789abcdef0ll;
 STATIC const s32 test_s32 = -0x1235678;
 STATIC const s16 test_s16 = -0x1234;
 STATIC const s8  test_s8  = -0x12;
-STATIC const __le32 test_le[2] = { FM10K_CPU_TO_LE32(0x12345678),
-  FM10K_CPU_TO_LE32(0x9abcdef0)};
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+STATIC const __le32 test_le[2] = {0x78563412,0xf0debc9a};
+#else
+STATIC const __le32 test_le[2] = {0x12345678,0x9abcdef0};
+#endif

 /* The message below is meant to be used as a test message to demonstrate
  * how to use the TLV interface and to test the types.  Normally this code
-- 
1.7.1



[dpdk-dev] [PATCH] fm10k: fix the compilation on big endian platforms

2015-08-03 Thread Chao Zhu
The using of rte_cpu_to_le_32() in pre-compile macros will cause error
'initializer element is not constant' on big endian platforms. This patch
fixes the compilation error of fm10k driver.


Chao Zhu (1):
  fm10k: fix the compilation on big endian platforms

 drivers/net/fm10k/base/fm10k_tlv.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)



[dpdk-dev] [PATCH] ethdev: fix ABI breakage in lro code

2015-08-03 Thread Chao Zhu
Confirmed. It can compile on Power8 Big Endian.
Thank you!

On 2015/8/3 10:39, Chao Zhu wrote:
>
> Really sorry for the delay.
> Originally, I thought the email was to asking the ABI checking tools 
> on Power which I'm not so familiar with.  So this took me some time to 
> find solution. For Power little endian, the build is OK. I'll give 
> feedback when I tried Big  endian compilation.
>
> On 2015/7/31 18:34, Neil Horman wrote:
>> On Fri, Jul 31, 2015 at 09:03:45AM +, Mcnamara, John wrote:
>>>> -Original Message-
>>>> From: Neil Horman [mailto:nhorman at tuxdriver.com]
>>>> Sent: Monday, July 13, 2015 3:00 PM
>>>> To: Mcnamara, John
>>>> Cc: dev at dpdk.org; vladz at cloudius-systems.com
>>>> Subject: Re: [dpdk-dev] [PATCH] ethdev: fix ABI breakage in lro code
>>>>
>>>> On Mon, Jul 13, 2015 at 10:47:03AM +, Mcnamara, John wrote:
>>>>>> -Original Message-
>>>>>> From: Neil Horman [mailto:nhorman at tuxdriver.com]
>>>>>> Sent: Monday, July 13, 2015 11:42 AM
>>>>>> To: Mcnamara, John
>>>>>> Cc: dev at dpdk.org; vladz at cloudius-systems.com
>>>>>> Subject: Re: [dpdk-dev] [PATCH] ethdev: fix ABI breakage in lro code
>>>>>>
>>>>>> On Mon, Jul 13, 2015 at 11:26:25AM +0100, John McNamara wrote:
>>>>>>> Fix for ABI breakage introduced in LRO addition. Moves lro
>>>>>>> bitfield to the end of the struct/member.
>>>>>>>
>>>>>>> Fixes: 8eecb3295aed (ixgbe: add LRO support)
>>>>>>>
>>>>>>> Signed-off-by: John McNamara 
>>>>>>> ---
>>>>>>>   lib/librte_ether/rte_ethdev.h | 4 ++--
>>>>>>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/librte_ether/rte_ethdev.h
>>>>>>> b/lib/librte_ether/rte_ethdev.h index 79bde89..1c3ace1 100644
>>>>>>> --- a/lib/librte_ether/rte_ethdev.h
>>>>>>> +++ b/lib/librte_ether/rte_ethdev.h
>>>>>>> @@ -1578,9 +1578,9 @@ struct rte_eth_dev_data {
>>>>>>>   uint8_t port_id;   /**< Device [external] port
>>>> identifier.
>>>>>> */
>>>>>>>   uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) /
>>>> OFF(0).
>>>>>> */
>>>>>>>   scattered_rx : 1, /**< RX of scattered packets is ON(1)
>>>> /
>>>>>> OFF(0) */
>>>>>>> -lro  : 1, /**< RX LRO is ON(1) / OFF(0) */
>>>>>>>   all_multicast : 1, /**< RX all multicast mode ON(1) /
>>>> OFF(0).
>>>>>> */
>>>>>>> -dev_started : 1; /**< Device state: STARTED(1) /
>>>> STOPPED(0).
>>>>>> */
>>>>>>> +dev_started : 1, /**< Device state: STARTED(1) /
>>>> STOPPED(0).
>>>>>> */
>>>>>>> +lro : 1; /**< RX LRO is ON(1) / OFF(0) */
>>>>>>>   };
>>>>>>>
>>>>>>>   /**
>>>>>>> -- 
>>>>>>> 1.8.1.4
>>>>>>>
>>>>>>>
>>>>>> I presume the ABI checker stopped complaining about this with the
>>>>>> patch, yes?
>>>>> Hi Neil,
>>>>>
>>>>> Yes, I replied about that in the previous thread.
>>>>>
>>>> Thank you, I'll ack as soon as Chao confirms its not a problem on 
>>>> ppc Neil
>>> Hi Chao,
>>>
>>> Any reply on this.
>>>
>>> Neil, if there is no reply to this from the PPC maintainer do you 
>>> have any objection to this going in as is.
>>>
>>> It at least fixes the LRO ABI breakage on the platforms we can test on.
>>>
>>> John
>>>
>> Well, I suppose at this point the only thing its hurting is ppc, so 
>> no, no
>> objections.  But its pretty disheartening for an arch maintainer to 
>> dissappear
>> so soon after adding arch support.
>>
>> Neil
>>
>



[dpdk-dev] [PATCH] ethdev: fix ABI breakage in lro code

2015-08-03 Thread Chao Zhu

Really sorry for the delay.
Originally, I thought the email was to asking the ABI checking tools on 
Power which I'm not so familiar with.  So this took me some time to find 
solution. For Power little endian, the build is OK. I'll give feedback 
when I tried Big  endian compilation.

On 2015/7/31 18:34, Neil Horman wrote:
> On Fri, Jul 31, 2015 at 09:03:45AM +, Mcnamara, John wrote:
>>> -Original Message-
>>> From: Neil Horman [mailto:nhorman at tuxdriver.com]
>>> Sent: Monday, July 13, 2015 3:00 PM
>>> To: Mcnamara, John
>>> Cc: dev at dpdk.org; vladz at cloudius-systems.com
>>> Subject: Re: [dpdk-dev] [PATCH] ethdev: fix ABI breakage in lro code
>>>
>>> On Mon, Jul 13, 2015 at 10:47:03AM +, Mcnamara, John wrote:
> -Original Message-
> From: Neil Horman [mailto:nhorman at tuxdriver.com]
> Sent: Monday, July 13, 2015 11:42 AM
> To: Mcnamara, John
> Cc: dev at dpdk.org; vladz at cloudius-systems.com
> Subject: Re: [dpdk-dev] [PATCH] ethdev: fix ABI breakage in lro code
>
> On Mon, Jul 13, 2015 at 11:26:25AM +0100, John McNamara wrote:
>> Fix for ABI breakage introduced in LRO addition. Moves lro
>> bitfield to the end of the struct/member.
>>
>> Fixes: 8eecb3295aed (ixgbe: add LRO support)
>>
>> Signed-off-by: John McNamara 
>> ---
>>   lib/librte_ether/rte_ethdev.h | 4 ++--
>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/librte_ether/rte_ethdev.h
>> b/lib/librte_ether/rte_ethdev.h index 79bde89..1c3ace1 100644
>> --- a/lib/librte_ether/rte_ethdev.h
>> +++ b/lib/librte_ether/rte_ethdev.h
>> @@ -1578,9 +1578,9 @@ struct rte_eth_dev_data {
>>  uint8_t port_id;   /**< Device [external] port
>>> identifier.
> */
>>  uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) /
>>> OFF(0).
> */
>>  scattered_rx : 1,  /**< RX of scattered packets is ON(1)
>>> /
> OFF(0) */
>> -lro  : 1,  /**< RX LRO is ON(1) / OFF(0) */
>>  all_multicast : 1, /**< RX all multicast mode ON(1) /
>>> OFF(0).
> */
>> -dev_started : 1;   /**< Device state: STARTED(1) /
>>> STOPPED(0).
> */
>> +dev_started : 1,   /**< Device state: STARTED(1) /
>>> STOPPED(0).
> */
>> +lro : 1;   /**< RX LRO is ON(1) / OFF(0) */
>>   };
>>
>>   /**
>> --
>> 1.8.1.4
>>
>>
> I presume the ABI checker stopped complaining about this with the
> patch, yes?
 Hi Neil,

 Yes, I replied about that in the previous thread.

>>> Thank you, I'll ack as soon as Chao confirms its not a problem on ppc Neil
>> Hi Chao,
>>
>> Any reply on this.
>>
>> Neil, if there is no reply to this from the PPC maintainer do you have any 
>> objection to this going in as is.
>>
>> It at least fixes the LRO ABI breakage on the platforms we can test on.
>>
>> John
>>
> Well, I suppose at this point the only thing its hurting is ppc, so no, no
> objections.  But its pretty disheartening for an arch maintainer to dissappear
> so soon after adding arch support.
>
> Neil
>



[dpdk-dev] [PATCH v3] i40e: Fix the endian issue for the i40e read registers functions

2015-07-17 Thread Chao Zhu
Acked-by: Chao Zhu 

On 2015/7/17 15:25, Zhe Tao wrote:
> Signed-off-by: Zhe Tao 
> ---
> PATCH v3: Edit the subject make it more clear
>
> PATCH v2: Edit the comments make it more clear
>
> PATCH v1: Add the endian conversion for registers operations.
>
>   drivers/net/i40e/base/i40e_osdep.h | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/i40e/base/i40e_osdep.h 
> b/drivers/net/i40e/base/i40e_osdep.h
> index 3ce8057..70d2721 100644
> --- a/drivers/net/i40e/base/i40e_osdep.h
> +++ b/drivers/net/i40e/base/i40e_osdep.h
> @@ -122,10 +122,10 @@ do {
> \
>   ((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
>   static inline uint32_t i40e_read_addr(volatile void *addr)
>   {
> - return I40E_PCI_REG(addr);
> + return rte_le_to_cpu_32(I40E_PCI_REG(addr));
>   }
>   #define I40E_PCI_REG_WRITE(reg, value) \
> - do {I40E_PCI_REG((reg)) = (value);} while(0)
> + do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
>
>   #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
>   #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)



[dpdk-dev] [PATCH] MAINTAINERS: claim EAL of IBM POWER

2015-06-29 Thread Chao Zhu
Signed-off-by: Chao Zhu 
---
 MAINTAINERS |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 54f0973..8fe52c5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -105,7 +105,8 @@ F: app/test/test_mp_secondary.c
 F: examples/multi_process/
 F: doc/guides/sample_app_ug/multi_process.rst

-IBM Power
+IBM POWER
+M: Chao Zhu 
 F: lib/librte_eal/common/include/arch/ppc_64/

 Intel x86
-- 
1.7.1



[dpdk-dev] [PATCH] doc: Add IBM Power description to linux guides

2014-12-13 Thread Chao Zhu
This patch added IBM ppc_64 descriptions, including architecture
support, compiling requirements on Linux.

Signed-off-by: Chao Zhu 
---
 doc/guides/linux_gsg/build_dpdk.rst  |4 +-
 doc/guides/linux_gsg/quick_start.rst |   40 +
 doc/guides/linux_gsg/sys_reqs.rst|   29 
 3 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/doc/guides/linux_gsg/build_dpdk.rst 
b/doc/guides/linux_gsg/build_dpdk.rst
index ee6cb69..5bdd6d8 100644
--- a/doc/guides/linux_gsg/build_dpdk.rst
+++ b/doc/guides/linux_gsg/build_dpdk.rst
@@ -66,9 +66,9 @@ The format of a Intel?? DPDK target is:

 where:

-*   ARCH can be:  i686, x86_64
+*   ARCH can be:  i686, x86_64, ppc_64

-*   MACHINE can be:  native, ivshmem
+*   MACHINE can be:  native, ivshmem, power8

 *   EXECENV can be:  linuxapp,  bsdapp

diff --git a/doc/guides/linux_gsg/quick_start.rst 
b/doc/guides/linux_gsg/quick_start.rst
index 089dddb..2956c6c 100644
--- a/doc/guides/linux_gsg/quick_start.rst
+++ b/doc/guides/linux_gsg/quick_start.rst
@@ -132,29 +132,31 @@ Some options in the script prompt the user for further 
data before proceeding.

 [7] x86_64-native-linuxapp-icc

+[8] ppc_64-power8-linuxapp-gcc
+
 

 Step 2: Setup linuxapp environment

 

-[8] Insert IGB UIO module
+[9] Insert IGB UIO module

-[9] Insert VFIO module
+[10] Insert VFIO module

-[10] Insert KNI module
+[11] Insert KNI module

-[11] Setup hugepage mappings for non-NUMA systems
+[12] Setup hugepage mappings for non-NUMA systems

-[12] Setup hugepage mappings for NUMA systems
+[13] Setup hugepage mappings for NUMA systems

-[13] Display current Ethernet device settings
+[14] Display current Ethernet device settings

-[14] Bind Ethernet device to IGB UIO module
+[15] Bind Ethernet device to IGB UIO module

-[15] Bind Ethernet device to VFIO module
+[16] Bind Ethernet device to VFIO module

-[16] Setup VFIO permissions
+[17] Setup VFIO permissions

 

@@ -162,9 +164,9 @@ Some options in the script prompt the user for further data 
before proceeding.

 

-[17] Run test application ($RTE_TARGET/app/test)
+[18] Run test application ($RTE_TARGET/app/test)

-[18] Run testpmd application in interactive mode ($RTE_TARGET/app/testpmd)
+[19] Run testpmd application in interactive mode ($RTE_TARGET/app/testpmd)

 

@@ -172,7 +174,7 @@ Some options in the script prompt the user for further data 
before proceeding.

 

-[19] List hugepage info from /proc/meminfo
+[20] List hugepage info from /proc/meminfo

 

@@ -180,19 +182,19 @@ Some options in the script prompt the user for further 
data before proceeding.

 

-[20] Uninstall all targets
+[21] Uninstall all targets

-[21] Unbind NICs from IGB UIO driver
+[22] Unbind NICs from IGB UIO driver

-[22] Remove IGB UIO module
+[23] Remove IGB UIO module

-[23] Remove VFIO module
+[24] Remove VFIO module

-[24] Remove KNI module
+[25] Remove KNI module

-[25] Remove hugepage mappings
+[26] Remove hugepage mappings

-[26] Exit Script
+[27] Exit Script

 Option:

diff --git a/doc/guides/linux_gsg/sys_reqs.rst 
b/doc/guides/linux_gsg/sys_reqs.rst
index c14411e..f712bec 100644
--- a/doc/guides/linux_gsg/sys_reqs.rst
+++ b/doc/guides/linux_gsg/sys_reqs.rst
@@ -38,8 +38,8 @@ This chapter describes the packages required to compile the 
Intel?? DPDK.
 If the Intel?? DPDK is being used on an Intel?? Communications Chipset 
89xx Series platform,
 please consult the *Intel?? Communications Chipset 89xx Series Software 
for Linux* Getting Started Guide*.

-BIOS Setting Prerequisite
--
+BIOS Setting Prerequisite on x86
+

 For the majority of platforms, no special BIOS settings are needed to use 
basic Intel?? DPDK functionality.
 However, for additional HPET timer and power management functionality,
@@ -61,18 +61,22 @@ Compilation of the Intel?? DPDK

 *   coreutils:  cmp, sed, grep, arch

-*   gcc: versions 4.5.x or later is recommended.
-On some distributions, some specific compiler flags and linker flags are 
enabled by default and
-affect performance (- fstack-protector, for example).
-Please refer to the documentation of your distribution and to gcc 
-dumpspecs.
+*   gcc

[dpdk-dev] [PATCH] doc: Add IBM Power architecture descriptions to Linux guides

2014-12-13 Thread Chao Zhu
This patch added prequirements, compiling options and some IBM Power related
descriptions to Linux guides.

Chao Zhu (1):
  doc: Add IBM Power description to linux guides

 doc/guides/linux_gsg/build_dpdk.rst  |4 +-
 doc/guides/linux_gsg/quick_start.rst |   40 +
 doc/guides/linux_gsg/sys_reqs.rst|   29 
 3 files changed, 42 insertions(+), 31 deletions(-)



[dpdk-dev] [PATCH] Fix KNI compiling issue on IBM Power

2014-12-05 Thread Chao Zhu

On 2014/12/5 4:05, Neil Horman wrote:
> On Thu, Dec 04, 2014 at 04:59:59PM +0100, Thomas Monjalon wrote:
>> 2014-12-04 10:32, Neil Horman:
>>> On Thu, Dec 04, 2014 at 02:47:03PM +0100, Thomas Monjalon wrote:
>>>> 2014-12-04 08:29, Neil Horman:
>>>>> On Thu, Dec 04, 2014 at 12:59:31PM +0100, Thomas Monjalon wrote:
>>>>>>> Because of different cache line size, the alignment of struct
>>>>>>> rte_kni_mbuf in rte_kni_common.h doesn't work on IBM Power. This patch
>>>>>>> changed from 64 to RTE_CACHE_LINE_SIZE micro to do the alignment.
>>>>>>>
>>>>>>> Signed-off-by: Chao Zhu 
>>>>>> Acked-by: Thomas Monjalon 
>>>>>>
>>>>>> Applied
>>>>>>
>>>>> Woah!  Slow down here, I'm not sure if this makes sense to fix his way.  
>>>>> The
>>>>> exact same ifndef/define/endif construct is used for this macro in 
>>>>> rte_memory.h.
>>>>> Currently their defined to the same vaule, but if that ever changes, this 
>>>>> macro
>>>>> will return different values based on the order in which header files are
>>>>> included.  That doesn't seem appropriate at all.
>>>> I agree (was my comment) but the patch was applied as a hot fix.
>>>> A better fix has to be found for DPDK 2.0.
>>>> Do you agree this fix is enough for DPDK 1.8 release?
>>>>
>>> I really don't like the idea of hacks like this being used.
>> It's not really a hack to replace a hardcoded value by a constant.
>> I think you should agree it's better (but not perfect).
>>
> I'm not referring to replacing a hardcoded value with a constant macro.  The
> hack I'm referring to is that of defining that macro in multiple places using
> the ifndef/define/endif construct.  Generally its fine to use that mechanism 
> to
> define a macro if you want to allow for builds to override it on the command
> line or some such, but you've got the same construct in multiple header files
> with this patch, which in turn leads to the possibility of the definition
> location changing dependent on which header file is included first in a
> compilation unit.  Thats the hack.
I agree.  It's better to have one definition for all the use. Actually, 
the RTE_CACHE_LINE_SIZE macro
was defined in many places, such as rte_acl_osdep_alone.h and 
rte_memory.h. Of cause,  we can have
it defined in some common place. If needed, I can do it. However,  I do 
prefer we can have a build system
do detect and make a global configuration header file.
>>> Truthfully, I would rather the KNI just not be built on power for now,
>>> it is after all a new feature for which not everything works yet (e.g. the
>>> acl library and the ixgbe rxtx vec code).
>>> With this in place, KNI will build now, but it means that anything
>>> changes cache line sizes until it gets fixed properly runs the risk of
>>> introducing wierd behavioral issues at compile time.
>> It was also the case before: 64 was hardcoded for KNI.
>>
> See above, not concerned with the hardcoded vs macro idea, just how the macro 
> is
> implemented.
>
>>> I'm also concerned about the fact that, since we have no bug tracker for 
>>> DPDK,
>>> indicating that there will be an improved fix in 2.0 isn't really a 
>>> guarantee,
>>> in that it requires that someone remember to do it.
>> Please be confident that I keep it noted and I'll do what I can to have it
>> properly fixed.
>> By the way, submitting a fix now would store the need in patchwork.
>>
> Yes, of course it would fix the problem, all problems could be fixed now if we
> could just have the time to do everything immediately, but alas that is not 
> the
> case, and its also the reason why I don't really trust your memory (or mine, 
> or
> any of our collective memories), as the master todo list for things like this.
> I'm too busy to do a proper fix now, I'm assuming you are as well, but Chao
> apparently feels this is important enough to address (based on the fact that
> he's proposed a fix for the problem).  As such, Chao is the one who should be
> addressing this issue.  Until then, KNI can just not build on powerpc.
>
>>>>>> I wonder if we could try to guess the cache line size instead of
>>>>>> configuring it in many places.
>>>>>> Maybe we could use something like sysconf(_SC_LEVEL1_DCACHE_LINESIZE)?
>>>>>>
>>>>> This is a good idea, but I think its a bit broken for a few reasons:
>>>

[dpdk-dev] [PATCH v3] Fix two compile issues with i686 platform

2014-12-05 Thread Chao Zhu
Michael,

I'm looking at it. I'll give you feedback soon.

On 2014/12/5 14:56, Qiu, Michael wrote:
> Hi Chao
>
> Would you please take a look at this patch?
>
> It's solved issue introduce by Power Arch support patch.
>
> Your comments are very precious :)
>
> Thanks,
> Michael
> On 12/5/2014 2:03 PM, Michael Qiu wrote:
>> lib/librte_eal/linuxapp/eal/eal_memory.c:324:4: error: comparison
>> is always false due to limited range of data type [-Werror=type-limits]
>>  || (hugepage_sz == RTE_PGSIZE_16G)) {
>>  ^
>> cc1: all warnings being treated as errors
>>
>> lib/librte_eal/linuxapp/eal/eal.c(461): error #2259: non-pointer
>> conversion from "long long" to "void *" may lose significant bits
>> RTE_PTR_ALIGN_CEIL((uintptr_t)addr, RTE_PGSIZE_16M);
>>
>> This was introuduced by commit b77b5639:
>>  mem: add huge page sizes for IBM Power
>>
>> The root cause is that size_t and uintptr_t are 32-bit in i686
>> platform, but RTE_PGSIZE_16M and RTE_PGSIZE_16G are always 64-bit.
>>
>> Define RTE_PGSIZE_16G only in 64 bit platform to avoid
>> this issue.
>>
>> Signed-off-by: Michael Qiu 
>> ---
>>   v3 ---> v2
>>  Change RTE_PGSIZE_16G from ULL to UL
>>  to keep all entries consistent
>>
>>   V2 ---> v1
>>  Change two type entries to one, and
>>  leave RTE_PGSIZE_16G only valid for
>>  64-bit platform
>>
>>   app/test/test_memzone.c| 18 --
>>   lib/librte_eal/common/eal_common_memzone.c |  2 ++
>>   lib/librte_eal/common/include/rte_memory.h | 14 --
>>   lib/librte_eal/linuxapp/eal/eal_memory.c   | 12 +---
>>   4 files changed, 27 insertions(+), 19 deletions(-)
>>
>> diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
>> index 5da6903..7bab8b5 100644
>> --- a/app/test/test_memzone.c
>> +++ b/app/test/test_memzone.c
>> @@ -145,8 +145,10 @@ test_memzone_reserve_flags(void)
>>  hugepage_1GB_avail = 1;
>>  if (ms[i].hugepage_sz == RTE_PGSIZE_16M)
>>  hugepage_16MB_avail = 1;
>> +#ifdef RTE_ARCH_64
>>  if (ms[i].hugepage_sz == RTE_PGSIZE_16G)
>>  hugepage_16GB_avail = 1;
>> +#endif
>>  }
>>  /* Display the availability of 2MB ,1GB, 16MB, 16GB pages */
>>  if (hugepage_2MB_avail)
>> @@ -234,8 +236,8 @@ test_memzone_reserve_flags(void)
>>  return -1;
>>  }
>>   
>> -/* Check if 1GB huge pages are unavailable, that function fails 
>> unless
>> - * HINT flag is indicated
>> +/* Check if 2MB huge pages are unavailable, that function
>> + * fails unless HINT flag is indicated
>>   */
>>  if (!hugepage_2MB_avail) {
>>  mz = rte_memzone_reserve("flag_zone_2M_HINT", size, 
>> SOCKET_ID_ANY,
>> @@ -295,8 +297,9 @@ test_memzone_reserve_flags(void)
>>  return -1;
>>  }
>>   
>> -/* Check if 1GB huge pages are unavailable, that function fails
>> - * unless HINT flag is indicated
>> +#ifdef RTE_ARCH_64
>> +/* Check if 16GB huge pages are unavailable, that function
>> + * fails unless HINT flag is indicated
>>   */
>>  if (!hugepage_16GB_avail) {
>>  mz = rte_memzone_reserve("flag_zone_16G_HINT", size,
>> @@ -318,7 +321,9 @@ test_memzone_reserve_flags(void)
>>  return -1;
>>  }
>>  }
>> +#endif
>>  }
>> +#ifdef RTE_ARCH_64
>>  /*As with 16MB tests above for 16GB huge page requests*/
>>  if (hugepage_16GB_avail) {
>>  mz = rte_memzone_reserve("flag_zone_16G", size, SOCKET_ID_ANY,
>> @@ -343,8 +348,8 @@ test_memzone_reserve_flags(void)
>>  return -1;
>>  }
>>   
>> -/* Check if 1GB huge pages are unavailable, that function fails
>> - * unless HINT flag is indicated
>> +/* Check if 16MB huge pages are unavailable, that function
>> + * fails unless HINT flag is indicated
>>   */
>>  if (!hugepage_16MB_avail) {
>>  mz = rte_memzone_reserve("flag_zone_16M_HINT", size,
>> @@ -376,6 +381,7 @@ test_memzone_reserve_flags(void)
>>  }
>>  }
>>  }
>> +#endif
>>  return 0;
>>   }
>>   
>> diff --git a/lib/librte_eal/common/eal_common_memzone.c 
>> b/lib/librte_eal/common/eal_common_memzone.c
>> index b5a5d72..ee233ad 100644
>> --- a/lib/librte_eal/common/eal_common_memzone.c
>> +++ b/lib/librte_eal/common/eal_common_memzone.c
>> @@ -221,12 +221,14 @@ memzone_reserve_aligned_thread_unsafe(const char 
>> *name, size_t len,
>>  if ((flags & RTE_MEMZONE_1GB) &&
>>  free_memseg[i].hugepage_sz == RTE_PGSIZE_2M)
>>  continue;
>> +#ifdef RTE_ARCH_64
>>  if ((flags & RTE_MEMZONE_16MB) &&
>> 

[dpdk-dev] [PATCH] Fix KNI compiling issue on IBM Power

2014-12-04 Thread Chao Zhu
Because of different cache line size, the alignment of struct
rte_kni_mbuf in rte_kni_common.h doesn't work on IBM Power. This patch
changed from 64 to RTE_CACHE_LINE_SIZE micro to do the alignment.

Signed-off-by: Chao Zhu 
---
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index e548161..6fc6442 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -67,6 +67,9 @@
  * KNI name is part of memzone name.
  */
 #define RTE_KNI_NAMESIZE 32
+#ifndef RTE_CACHE_LINE_SIZE
+#define RTE_CACHE_LINE_SIZE 64  /**< Cache line size. */
+#endif

 /*
  * Request id.
@@ -108,7 +111,7 @@ struct rte_kni_fifo {
  * Padding is necessary to assure the offsets of these fields
  */
 struct rte_kni_mbuf {
-   void *buf_addr __attribute__((__aligned__(64)));
+   void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
char pad0[10];
uint16_t data_off;  /**< Start address of data in segment buffer. */
char pad1[4];
@@ -118,7 +121,7 @@ struct rte_kni_mbuf {
uint32_t pkt_len;   /**< Total pkt len: sum of all segment 
data_len. */

/* fields on second cache line */
-   char pad3[8] __attribute__((__aligned__(64)));
+   char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
void *pool;
void *next;
 };
-- 
1.7.1



[dpdk-dev] [PATCH] Fix KNI compiling on IBM Power

2014-12-04 Thread Chao Zhu
This patch solves the KNI compiling problem on IBM Power by using 
RTE_CACHE_LINE_SIZE
micro.

Chao Zhu (1):
  Fix KNI compiling issue on IBM Power

 .../linuxapp/eal/include/exec-env/rte_kni_common.h |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)



[dpdk-dev] [PATCH 0/2] fix endianness in EAL

2014-12-04 Thread Chao Zhu

On 2014/12/4 4:47, Thomas Monjalon wrote:
> It can be hard to implement a reliable detection of endianness.
> Previous trials in testpmd failed.
> The IBM Power patchset introduced a config option.
>
> This patchset try to improve the situation by having a detection
> in EAL headers.
>
> Please test it (especially on IBM Power) with different toolchains
> or distributions.
>
> Thomas Monjalon (2):
>eal: detect endianness
>app/testpmd: fix endianness detection
>
I tested it on IBM Power with GCC 4.8. It works fine.
If there is no other better way to do the detection, I think this patch 
is good enough.

Acked-by: Chao Zhu




[dpdk-dev] [PATCH v5 14/14] Fix the compiling of test-pmd on IBM Power Architecture

2014-11-25 Thread Chao Zhu
This patch fixes compiling problems on IBM Power architecture and turn
on the test-pmd compiling option in configuration file. Actually, this
is an big endian compiling fix.

Signed-off-by: Chao Zhu 
---
 app/test-pmd/config.c |   39 +--
 1 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 9bc08f4..4fee6c0 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -612,8 +612,13 @@ ring_dma_zone_lookup(const char *ring_name, uint8_t 
port_id, uint16_t q_id)
 union igb_ring_dword {
uint64_t dword;
struct {
+#ifdef RTE_ARCH_BIG_ENDIAN
+   uint32_t lo;
+   uint32_t hi;
+#else
uint32_t hi;
uint32_t lo;
+#endif
} words;
 };

@@ -656,23 +661,29 @@ ring_rx_descriptor_display(const struct rte_memzone 
*ring_mz,
/* 32 bytes RX descriptor, i40e only */
struct igb_ring_desc_32_bytes *ring =
(struct igb_ring_desc_32_bytes *)ring_mz->addr;
+   ring[desc_id].lo_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].lo_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].lo_dword);
+   ring[desc_id].hi_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].hi_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].hi_dword);
+   ring[desc_id].resv1.dword =
+   rte_le_to_cpu_64(ring[desc_id].resv1.dword);
+   ring_rxd_display_dword(ring[desc_id].resv1);
+   ring[desc_id].resv2.dword =
+   rte_le_to_cpu_64(ring[desc_id].resv2.dword);
+   ring_rxd_display_dword(ring[desc_id].resv2);

-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].lo_dword));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].hi_dword));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].resv1));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].resv2));
return;
}
 #endif
/* 16 bytes RX descriptor */
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].lo_dword));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].hi_dword));
+   ring[desc_id].lo_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].lo_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].lo_dword);
+   ring[desc_id].hi_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].hi_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].hi_dword);
 }

 static void
@@ -682,8 +693,8 @@ ring_tx_descriptor_display(const struct rte_memzone 
*ring_mz, uint16_t desc_id)
struct igb_ring_desc_16_bytes txd;

ring = (struct igb_ring_desc_16_bytes *)ring_mz->addr;
-   txd.lo_dword = rte_le_to_cpu_64(ring[desc_id].lo_dword);
-   txd.hi_dword = rte_le_to_cpu_64(ring[desc_id].hi_dword);
+   txd.lo_dword.dword = rte_le_to_cpu_64(ring[desc_id].lo_dword.dword);
+   txd.hi_dword.dword = rte_le_to_cpu_64(ring[desc_id].hi_dword.dword);
printf("0x%08X - 0x%08X / 0x%08X - 0x%08X\n",
(unsigned)txd.lo_dword.words.lo,
(unsigned)txd.lo_dword.words.hi,
-- 
1.7.1



[dpdk-dev] [PATCH v5 13/14] test_memzone:fix finding the second smallest segment

2014-11-25 Thread Chao Zhu
Curent implementation in test_memzone.c has bugs in finding the
second smallest memory segment. It's the last smallest memory segment,
but it's not the second smallest memory segment. This bug may cause test
failure in some cases. This patch fixes this bug.

Signed-off-by: Chao Zhu 
---
 app/test/test_memzone.c |9 -
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index 387dbbc..1658006 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -797,10 +797,9 @@ test_memzone_reserve_memory_in_smallest_segment(void)

/* set new smallest */
min_ms = ms;
-   }
-   else if (prev_min_ms == NULL) {
+   } else if ((prev_min_ms == NULL)
+   || (prev_min_ms->len > ms->len))
prev_min_ms = ms;
-   }
}

if (min_ms == NULL || prev_min_ms == NULL) {
@@ -877,8 +876,8 @@ test_memzone_reserve_memory_with_smallest_offset(void)

/* set new smallest */
min_ms = ms;
-   }
-   else if (prev_min_ms == NULL) {
+   } else if ((prev_min_ms == NULL)
+   || (prev_min_ms->len > ms->len)) {
prev_min_ms = ms;
}
}
-- 
1.7.1



[dpdk-dev] [PATCH v5 12/14] Add eal memory support for IBM Power Architecture

2014-11-25 Thread Chao Zhu
The mmap of hugepage files on IBM Power starts from high address to low
address. This is different from x86. This patch modified the memory
segment detection code to get the correct memory segment layout on Power
architecture. This patch also added a commond ARCH_PPC_64 defination for
64 bit systems.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc   |1 +
 config/defconfig_x86_64-native-linuxapp-clang |1 +
 config/defconfig_x86_64-native-linuxapp-gcc   |1 +
 config/defconfig_x86_64-native-linuxapp-icc   |1 +
 lib/librte_eal/linuxapp/eal/eal_memory.c  |   91 ++---
 5 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index 4060eca..89d623e 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -35,6 +35,7 @@ CONFIG_RTE_MACHINE="power8"
 CONFIG_RTE_ARCH="ppc_64"
 CONFIG_RTE_ARCH_PPC_64=y
 CONFIG_RTE_ARCH_BIG_ENDIAN=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/config/defconfig_x86_64-native-linuxapp-clang 
b/config/defconfig_x86_64-native-linuxapp-clang
index bbda080..5f3074e 100644
--- a/config/defconfig_x86_64-native-linuxapp-clang
+++ b/config/defconfig_x86_64-native-linuxapp-clang
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="clang"
 CONFIG_RTE_TOOLCHAIN_CLANG=y
diff --git a/config/defconfig_x86_64-native-linuxapp-gcc 
b/config/defconfig_x86_64-native-linuxapp-gcc
index 3de818a..60baf5b 100644
--- a/config/defconfig_x86_64-native-linuxapp-gcc
+++ b/config/defconfig_x86_64-native-linuxapp-gcc
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/config/defconfig_x86_64-native-linuxapp-icc 
b/config/defconfig_x86_64-native-linuxapp-icc
index 795333b..71d1e28 100644
--- a/config/defconfig_x86_64-native-linuxapp-icc
+++ b/config/defconfig_x86_64-native-linuxapp-icc
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="icc"
 CONFIG_RTE_TOOLCHAIN_ICC=y
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index f2454f4..e6cb919 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -316,11 +316,12 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 #endif
hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 
1] = '\0';
}
-#ifndef RTE_ARCH_X86_64
-   /* for 32-bit systems, don't remap 1G pages, just reuse original
-* map address as final map address.
+#ifndef RTE_ARCH_64
+   /* for 32-bit systems, don't remap 1G and 16G pages, just reuse
+* original map address as final map address.
 */
-   else if (hugepage_sz == RTE_PGSIZE_1G){
+   else if ((hugepage_sz == RTE_PGSIZE_1G)
+   || (hugepage_sz == RTE_PGSIZE_16G)) {
hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
hugepg_tbl[i].orig_va = NULL;
continue;
@@ -335,9 +336,17 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 * physical block: count the number of
 * contiguous physical pages. */
for (j = i+1; j < hpi->num_pages[0] ; j++) {
+#ifdef RTE_ARCH_PPC_64
+   /* The physical addresses are sorted in
+* descending order on PPC64 */
+   if (hugepg_tbl[j].physaddr !=
+   hugepg_tbl[j-1].physaddr - hugepage_sz)
+   break;
+#else
if (hugepg_tbl[j].physaddr !=
hugepg_tbl[j-1].physaddr + hugepage_sz)
break;
+#endif
}
num_pages = j - i;
vma_len = num_pages * hugepage_sz;
@@ -412,11 +421,12 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, 
struct hugepage_info *hpi)

while (i < hpi->num_pages[0]) {

-#ifndef RTE_ARCH_X86_64
-   /* for 32-bit systems, don't remap 1G pages, just reuse original
-* map address as final map address.
+#ifndef RTE_ARCH_64
+   /* for 32-bit systems, don't remap 1G pages and 16G pages,
+* just reuse original map address as final m

[dpdk-dev] [PATCH v5 11/14] Add huge page size define for IBM Power architecture

2014-11-25 Thread Chao Zhu
IBM Power architecture has different huge page sizes (16MB, 16GB) than
x86.This patch defines RTE_PGSIZE_16M and RTE_PGSIZE_16G in the
rte_page_sizes enum variable and adds huge page size support of DPDK
for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_memzone.c |  123 ++-
 lib/librte_eal/common/eal_common_memzone.c  |   19 +++-
 lib/librte_eal/common/include/rte_memory.h  |9 ++-
 lib/librte_eal/common/include/rte_memzone.h |8 ++
 lib/librte_eal/linuxapp/eal/eal.c   |9 ++-
 5 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index 381f643..387dbbc 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -133,6 +133,8 @@ test_memzone_reserve_flags(void)
const struct rte_memseg *ms;
int hugepage_2MB_avail = 0;
int hugepage_1GB_avail = 0;
+   int hugepage_16MB_avail = 0;
+   int hugepage_16GB_avail = 0;
const size_t size = 100;
int i = 0;
ms = rte_eal_get_physmem_layout();
@@ -141,12 +143,20 @@ test_memzone_reserve_flags(void)
hugepage_2MB_avail = 1;
if (ms[i].hugepage_sz == RTE_PGSIZE_1G)
hugepage_1GB_avail = 1;
+   if (ms[i].hugepage_sz == RTE_PGSIZE_16M)
+   hugepage_16MB_avail = 1;
+   if (ms[i].hugepage_sz == RTE_PGSIZE_16G)
+   hugepage_16GB_avail = 1;
}
-   /* Display the availability of 2MB and 1GB pages */
+   /* Display the availability of 2MB ,1GB, 16MB, 16GB pages */
if (hugepage_2MB_avail)
printf("2MB Huge pages available\n");
if (hugepage_1GB_avail)
printf("1GB Huge pages available\n");
+   if (hugepage_16MB_avail)
+   printf("16MB Huge pages available\n");
+   if (hugepage_16GB_avail)
+   printf("16GB Huge pages available\n");
/*
 * If 2MB pages available, check that a small memzone is correctly
 * reserved from 2MB huge pages when requested by the RTE_MEMZONE_2MB 
flag.
@@ -255,6 +265,117 @@ test_memzone_reserve_flags(void)
}
}
}
+   /*
+* This option is for IBM Power. If 16MB pages available, check
+* that a small memzone is correctly reserved from 16MB huge pages
+* when requested by the RTE_MEMZONE_16MB flag. Also check that
+* RTE_MEMZONE_SIZE_HINT_ONLY flag only defaults to an available
+* page size (i.e 16GB ) when 16MB pages are unavailable.
+*/
+   if (hugepage_16MB_avail) {
+   mz = rte_memzone_reserve("flag_zone_16M", size, SOCKET_ID_ANY,
+   RTE_MEMZONE_16MB);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 16MB\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   mz = rte_memzone_reserve("flag_zone_16M_HINT", size,
+   SOCKET_ID_ANY, RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 2MB\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   /* Check if 1GB huge pages are unavailable, that function fails
+* unless HINT flag is indicated
+*/
+   if (!hugepage_16GB_avail) {
+   mz = rte_memzone_reserve("flag_zone_16G_HINT", size,
+   SOCKET_ID_ANY,
+   RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 16GB & HINT\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   mz = rte_memzone_reserve("flag_zone_16G", size,
+   SOCKET_ID_ANY, RTE_MEMZONE_16GB);
+   if (mz != NULL) {
+   printf("MEMZONE FLAG 16GB\n");
+   return -1;
+   }
+   }
+   }
+   /*As with 16MB tests above for 16GB huge page requests*/
+   if (hugepage_16GB_avail)

[dpdk-dev] [PATCH v5 10/14] Add cache size define for IBM Power Architecture

2014-11-25 Thread Chao Zhu
IBM Power architecture has different cache line size (128 bytes) than
x86 (64 bytes). This patch defines CACHE_LINE_SIZE to 128 bytes to
override the default value 64 bytes to support IBM Power Architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_malloc.c |8 
 mk/arch/ppc_64/rte.vars.mk |2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
index ee34ca3..63e6b32 100644
--- a/app/test/test_malloc.c
+++ b/app/test/test_malloc.c
@@ -300,9 +300,9 @@ test_big_alloc(void)
size_t size =rte_str_to_size(MALLOC_MEMZONE_SIZE)*2;
int align = 0;
 #ifndef RTE_LIBRTE_MALLOC_DEBUG
-   int overhead = 64 + 64;
+   int overhead = CACHE_LINE_SIZE + CACHE_LINE_SIZE;
 #else
-   int overhead = 64 + 64 + 64;
+   int overhead = CACHE_LINE_SIZE + CACHE_LINE_SIZE + CACHE_LINE_SIZE;
 #endif

rte_malloc_get_socket_stats(socket, _stats);
@@ -356,9 +356,9 @@ test_multi_alloc_statistics(void)
 #ifndef RTE_LIBRTE_MALLOC_DEBUG
int trailer_size = 0;
 #else
-   int trailer_size = 64;
+   int trailer_size = CACHE_LINE_SIZE;
 #endif
-   int overhead = 64 + trailer_size;
+   int overhead = CACHE_LINE_SIZE + trailer_size;

rte_malloc_get_socket_stats(socket, _stats);

diff --git a/mk/arch/ppc_64/rte.vars.mk b/mk/arch/ppc_64/rte.vars.mk
index 363fcd1..dfdeaea 100644
--- a/mk/arch/ppc_64/rte.vars.mk
+++ b/mk/arch/ppc_64/rte.vars.mk
@@ -32,7 +32,7 @@
 ARCH  ?= powerpc
 CROSS ?=

-CPU_CFLAGS  ?= -m64
+CPU_CFLAGS  ?= -m64 -DCACHE_LINE_SIZE=128
 CPU_LDFLAGS ?=
 CPU_ASFLAGS ?= -felf64

-- 
1.7.1



[dpdk-dev] [PATCH v5 08/14] Add CPU flag checking for IBM Power architecture

2014-11-25 Thread Chao Zhu
IBM Power processor doesn't have CPU flag hardware registers. This patch
uses aux vector software register to get CPU flags and add CPU flag
checking support for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_cpuflags.c   |   35 
 .../common/include/arch/ppc_64/rte_cpuflags.h  |  187 
 mk/rte.cpuflags.mk |   17 ++
 3 files changed, 239 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h

diff --git a/app/test/test_cpuflags.c b/app/test/test_cpuflags.c
index 82c0197..5aeba5d 100644
--- a/app/test/test_cpuflags.c
+++ b/app/test/test_cpuflags.c
@@ -80,6 +80,40 @@ test_cpuflags(void)
int result;
printf("\nChecking for flags from different registers...\n");

+#ifdef RTE_ARCH_PPC_64
+   printf("Check for PPC64:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_PPC64);
+
+   printf("Check for PPC32:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_PPC32);
+
+   printf("Check for VSX:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_VSX);
+
+   printf("Check for DFP:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_DFP);
+
+   printf("Check for FPU:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_FPU);
+
+   printf("Check for SMT:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_SMT);
+
+   printf("Check for MMU:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_MMU);
+
+   printf("Check for ALTIVEC:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ALTIVEC);
+
+   printf("Check for ARCH_2_06:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_06);
+
+   printf("Check for ARCH_2_07:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_07);
+
+   printf("Check for ICACHE_SNOOP:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ICACHE_SNOOP);
+#else
printf("Check for SSE:\t\t");
CHECK_FOR_FLAG(RTE_CPUFLAG_SSE);

@@ -117,6 +151,7 @@ test_cpuflags(void)
CHECK_FOR_FLAG(RTE_CPUFLAG_INVTSC);


+#endif

/*
 * Check if invalid data is handled properly
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
new file mode 100644
index 000..df45047
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
@@ -0,0 +1,187 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_PPC_64_H_
+#define _RTE_CPUFLAGS_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include 
+#include 
+#include 
+
+#include "generic/rte_cpuflags.h"
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+   REG_HWCAP = 0,
+   REG_HWCAP2,
+};
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+   RTE_CPUFLAG_PPC_LE = 0,
+   RTE_CPUFLAG_TRUE_LE,
+   RTE_CPUFLAG_PSERIES_PERFMON_COMPAT,
+   RTE_CPUFLAG_VSX,
+   RTE_CPUFLAG_ARCH_2_06,
+   RTE_CPUFLAG_POWER6_EXT,
+   RTE_CPUFLAG_DFP,
+   RTE_CPUFLAG_PA6T,
+   RTE_CPUFLAG_ARCH_2_05,
+   RTE_CPUFLAG_ICACHE_SNOOP,
+   RTE_CPUFLAG_SMT,
+   

[dpdk-dev] [PATCH v5 04/14] Add CPU cycle operations for IBM Power architecture

2014-11-25 Thread Chao Zhu
IBM Power architecture doesn't have TSC register to get CPU cycles. This
patch implements the time base register read instead of TSC register of
x86 on IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_cycles.h|   88 
 1 files changed, 88 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
new file mode 100644
index 000..1eb5065
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -0,0 +1,88 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_PPC_64_H_
+#define _RTE_CYCLES_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+   union {
+   uint64_t tsc_64;
+   struct {
+   uint32_t hi_32;
+   uint32_t lo_32;
+   };
+   } tsc;
+   uint32_t tmp;
+
+   asm volatile(
+   "0:\n"
+   "mftbu   %[hi32]\n"
+   "mftb%[lo32]\n"
+   "mftbu   %[tmp]\n"
+   "cmpw%[tmp],%[hi32]\n"
+   "bne 0b\n"
+   : [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32),
+   [tmp] "=r"(tmp)
+   );
+   return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+   rte_mb();
+   return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_PPC_64_H_ */
+
-- 
1.7.1



[dpdk-dev] [PATCH v5 01/14] Add compiling definations for IBM Power architecture

2014-11-25 Thread Chao Zhu
To make DPDK run on IBM Power architecture, configuration files for
Power architecuture are added. Also, the compiling related .mk files are
added.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc |   70 +++
 mk/arch/ppc_64/rte.vars.mk  |   39 +++
 mk/machine/power8/rte.vars.mk   |   57 ++
 3 files changed, 166 insertions(+), 0 deletions(-)
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
new file mode 100644
index 000..b023336
--- /dev/null
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -0,0 +1,70 @@
+#   BSD LICENSE
+#
+#   Copyright (C) IBM Corporation 2014.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of IBM Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common_linuxapp"
+
+CONFIG_RTE_MACHINE="power8"
+
+CONFIG_RTE_ARCH="ppc_64"
+CONFIG_RTE_ARCH_PPC_64=y
+
+CONFIG_RTE_TOOLCHAIN="gcc"
+CONFIG_RTE_TOOLCHAIN_GCC=y
+
+# The following overrides the default common value.
+# Undefine the default value, then define the new one to avoid compiling error.
+
+CONFIG_RTE_LIBNAME=n
+CONFIG_RTE_LIBNAME="powerpc_dpdk"
+
+# Power8 has 96 cores, so increase CONFIG_RTE_MAX_LCORE from 64 to 128
+# Undefine the default value, then define the new one to avoid compiling error.
+CONFIG_RTE_MAX_LCORE=n
+CONFIG_RTE_MAX_LCORE=128
+
+# Note: Power doesn't have this support
+CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n
+
+# Note: Initially, all of the PMD drivers compilation are turned off on Power
+# Will turn on them only after the successful testing on Power
+CONFIG_RTE_LIBRTE_IXGBE_PMD=n
+CONFIG_RTE_LIBRTE_I40E_PMD=n
+CONFIG_RTE_LIBRTE_VIRTIO_PMD=n
+CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
+CONFIG_RTE_LIBRTE_PMD_BOND=n
+
+# This following libraries are not available on Power. So they're turned off.
+CONFIG_RTE_LIBRTE_LPM=n
+CONFIG_RTE_LIBRTE_ACL=n
+CONFIG_RTE_LIBRTE_SCHED=n
+CONFIG_RTE_LIBRTE_PORT=n
+CONFIG_RTE_LIBRTE_TABLE=n
+
+CONFIG_RTE_LIBRTE_PIPELINE=n
diff --git a/mk/arch/ppc_64/rte.vars.mk b/mk/arch/ppc_64/rte.vars.mk
new file mode 100644
index 000..363fcd1
--- /dev/null
+++ b/mk/arch/ppc_64/rte.vars.mk
@@ -0,0 +1,39 @@
+#   BSD LICENSE
+#
+#   Copyright (C) IBM Corporation 2014.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of IBM Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE 

[dpdk-dev] [PATCH v3 01/14] Add compiling definations for IBM Power architecture

2014-11-25 Thread Chao Zhu
Neil,
Current Power related patches are not a full functional one. Some of the 
libraries are not migrated. So
common_linuxapp_powerpc is used to turn off the uncompiled part. This 
file is a copy of the common_linuxapp. And this file is intended to be 
removed when all of the libraries are migrated to Power. Actually, it's 
the current common file for linux and other OS, such as BSD.  However, I 
didn't try the compilation on BSD.  But this probably needs to be done.

On 2014/11/24 6:02, Neil Horman wrote:
> On Sun, Nov 23, 2014 at 08:22:09PM -0500, Chao Zhu wrote:
>> To make DPDK run on IBM Power architecture, configuration files for
>> Power architecuture are added. Also, the compiling related .mk files are
>> added.
>>
>> Signed-off-by: Chao Zhu 
>> ---
>>   config/common_linuxapp_powerpc  |  394 
>> +++
>>   config/defconfig_ppc_64-power8-linuxapp-gcc |   40 +++
>>   mk/arch/ppc_64/rte.vars.mk  |   39 +++
>>   mk/machine/power8/rte.vars.mk   |   57 
>>   4 files changed, 530 insertions(+), 0 deletions(-)
>>   create mode 100644 config/common_linuxapp_powerpc
>>   create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
>>   create mode 100644 mk/arch/ppc_64/rte.vars.mk
>>   create mode 100644 mk/machine/power8/rte.vars.mk
>>
>> diff --git a/config/common_linuxapp_powerpc b/config/common_linuxapp_powerpc
>> new file mode 100644
>> index 000..d230a0b
>> --- /dev/null
>> +++ b/config/common_linuxapp_powerpc
> This filename is common_linuxapp_powerpc, but given that it explicitly 
> specifies
> all the build options, there isn't really anything common about it.  I think
> what you want to do is rename this defconfig_powerpc-native-linuxapp-gcc, and
> have it include common_linuxapp, then change any power-specific option you see
> fit.
>
> Also, does BSD build on power?  I presume so. You likely want to create a
> corresponding bsd power config
>




[dpdk-dev] [PATCH v3 08/14] Add CPU flag checking for IBM Power architecture

2014-11-25 Thread Chao Zhu
Neil,

I didn't compiled ACL library on Power because SSE is not supported by 
Power. This is why ACL compiling was
turned off on Power. rte_cpu_flag_t is an architecture specific value, 
each CPU has its own rte_cpu_flag_t . The Power one has no influence on 
x86, so I think there should be no building problem on x86. However, you 
suggestion is very good. It can ease the migration effort from x86 to 
other architectures. Probably we need to do it later.

On 2014/11/24 22:14, Neil Horman wrote:
> On Sun, Nov 23, 2014 at 08:22:16PM -0500, Chao Zhu wrote:
>> IBM Power processor doesn't have CPU flag hardware registers. This patch
>> uses aux vector software register to get CPU flags and add CPU flag
>> checking support for IBM Power architecture.
>>
>> Signed-off-by: Chao Zhu 
>> ---
>>   app/test/test_cpuflags.c   |   35 
>>   .../common/include/arch/ppc_64/rte_cpuflags.h  |  184 
>> 
>>   mk/rte.cpuflags.mk |   17 ++
>>   3 files changed, 236 insertions(+), 0 deletions(-)
>>   create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
>>
>> diff --git a/app/test/test_cpuflags.c b/app/test/test_cpuflags.c
>> index 82c0197..5aeba5d 100644
>> --- a/app/test/test_cpuflags.c
>> +++ b/app/test/test_cpuflags.c
>> @@ -80,6 +80,40 @@ test_cpuflags(void)
>>  int result;
>>  printf("\nChecking for flags from different registers...\n");
>>   
>> +#ifdef RTE_ARCH_PPC_64
>> +printf("Check for PPC64:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_PPC64);
>> +
>> +printf("Check for PPC32:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_PPC32);
>> +
>> +printf("Check for VSX:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_VSX);
>> +
>> +printf("Check for DFP:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_DFP);
>> +
>> +printf("Check for FPU:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_FPU);
>> +
>> +printf("Check for SMT:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_SMT);
>> +
>> +printf("Check for MMU:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_MMU);
>> +
>> +printf("Check for ALTIVEC:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_ALTIVEC);
>> +
>> +printf("Check for ARCH_2_06:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_06);
>> +
>> +printf("Check for ARCH_2_07:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_07);
>> +
>> +printf("Check for ICACHE_SNOOP:\t\t");
>> +CHECK_FOR_FLAG(RTE_CPUFLAG_ICACHE_SNOOP);
>> +#else
>>  printf("Check for SSE:\t\t");
>>  CHECK_FOR_FLAG(RTE_CPUFLAG_SSE);
>>   
>> @@ -117,6 +151,7 @@ test_cpuflags(void)
>>  CHECK_FOR_FLAG(RTE_CPUFLAG_INVTSC);
>>   
>>   
>> +#endif
>>   
>>  /*
>>   * Check if invalid data is handled properly
>> diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h 
>> b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
>> new file mode 100644
>> index 000..6b38f1c
>> --- /dev/null
>> +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
>> @@ -0,0 +1,184 @@
>> +/*
>> + *   BSD LICENSE
>> + *
>> + *   Copyright (C) IBM Corporation 2014.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + *   notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + *   notice, this list of conditions and the following disclaimer in
>> + *   the documentation and/or other materials provided with the
>> + *   distribution.
>> + * * Neither the name of IBM Corporation nor the names of its
>> + *   contributors may be used to endorse or promote products derived
>> + *   from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR C

[dpdk-dev] [PATCH v3 00/14] Patches for DPDK to support Power architecture

2014-11-25 Thread Chao Zhu
David,

I submitted a updated patchset.
I fixed all of the checkpatch errors, except one error(this error I 
think it is invalid).
Thanks a lot!

On 2014/11/24 23:05, David Marchand wrote:
> Hello Chao,
>
> On Mon, Nov 24, 2014 at 2:22 AM, Chao Zhu  <mailto:chaozhu at linux.vnet.ibm.com>> wrote:
>
> The set of patches add IBM Power architecture to the DPDK. It adds
> the required support to the
> EAL library. This set of patches doesn't support full DPDK
> function on Power processors. So a
> separate common configuration file is used for Power to turn off
> some un-migrated functions. To
> compile on PPC64 architecture, GCC version >= 4.8 must be used.
> This v3 patch updates eal_memory.c
> to fix the memory zone allocation and also solves the compiling
> problems of test-pmd.
>
>
> Please run a little checkpath on this patchset.
> There are some issues.
>
> Thanks.
>
> -- 
> David Marchand



[dpdk-dev] [PATCH v4 14/14] Fix the compiling of test-pmd on IBM Power Architecture

2014-11-25 Thread Chao Zhu
This patch fixes compiling problems on IBM Power architecture and turn
on the test-pmd compiling option in configuration file. Actually, this
is an big endian compiling fix.

Signed-off-by: Chao Zhu 
---
 app/test-pmd/config.c  |   39 +--
 config/common_linuxapp_powerpc |6 +++---
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 9bc08f4..4fee6c0 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -612,8 +612,13 @@ ring_dma_zone_lookup(const char *ring_name, uint8_t 
port_id, uint16_t q_id)
 union igb_ring_dword {
uint64_t dword;
struct {
+#ifdef RTE_ARCH_BIG_ENDIAN
+   uint32_t lo;
+   uint32_t hi;
+#else
uint32_t hi;
uint32_t lo;
+#endif
} words;
 };

@@ -656,23 +661,29 @@ ring_rx_descriptor_display(const struct rte_memzone 
*ring_mz,
/* 32 bytes RX descriptor, i40e only */
struct igb_ring_desc_32_bytes *ring =
(struct igb_ring_desc_32_bytes *)ring_mz->addr;
+   ring[desc_id].lo_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].lo_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].lo_dword);
+   ring[desc_id].hi_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].hi_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].hi_dword);
+   ring[desc_id].resv1.dword =
+   rte_le_to_cpu_64(ring[desc_id].resv1.dword);
+   ring_rxd_display_dword(ring[desc_id].resv1);
+   ring[desc_id].resv2.dword =
+   rte_le_to_cpu_64(ring[desc_id].resv2.dword);
+   ring_rxd_display_dword(ring[desc_id].resv2);

-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].lo_dword));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].hi_dword));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].resv1));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].resv2));
return;
}
 #endif
/* 16 bytes RX descriptor */
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].lo_dword));
-   ring_rxd_display_dword(rte_le_to_cpu_64(
-   ring[desc_id].hi_dword));
+   ring[desc_id].lo_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].lo_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].lo_dword);
+   ring[desc_id].hi_dword.dword =
+   rte_le_to_cpu_64(ring[desc_id].hi_dword.dword);
+   ring_rxd_display_dword(ring[desc_id].hi_dword);
 }

 static void
@@ -682,8 +693,8 @@ ring_tx_descriptor_display(const struct rte_memzone 
*ring_mz, uint16_t desc_id)
struct igb_ring_desc_16_bytes txd;

ring = (struct igb_ring_desc_16_bytes *)ring_mz->addr;
-   txd.lo_dword = rte_le_to_cpu_64(ring[desc_id].lo_dword);
-   txd.hi_dword = rte_le_to_cpu_64(ring[desc_id].hi_dword);
+   txd.lo_dword.dword = rte_le_to_cpu_64(ring[desc_id].lo_dword.dword);
+   txd.hi_dword.dword = rte_le_to_cpu_64(ring[desc_id].hi_dword.dword);
printf("0x%08X - 0x%08X / 0x%08X - 0x%08X\n",
(unsigned)txd.lo_dword.words.lo,
(unsigned)txd.lo_dword.words.hi,
diff --git a/config/common_linuxapp_powerpc b/config/common_linuxapp_powerpc
index d230a0b..68f1b6b 100644
--- a/config/common_linuxapp_powerpc
+++ b/config/common_linuxapp_powerpc
@@ -146,8 +146,8 @@ CONFIG_RTE_NIC_BYPASS=n
 #
 # Compile burst-oriented IGB & EM PMD drivers
 #
-CONFIG_RTE_LIBRTE_EM_PMD=n
-CONFIG_RTE_LIBRTE_IGB_PMD=n
+CONFIG_RTE_LIBRTE_EM_PMD=y
+CONFIG_RTE_LIBRTE_IGB_PMD=y
 CONFIG_RTE_LIBRTE_E1000_DEBUG_INIT=n
 CONFIG_RTE_LIBRTE_E1000_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_E1000_DEBUG_TX=n
@@ -389,6 +389,6 @@ CONFIG_RTE_APP_TEST=y
 #
 # Compile the PMD test application
 #
-CONFIG_RTE_TEST_PMD=n
+CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
-- 
1.7.1



[dpdk-dev] [PATCH v4 13/14] test_memzone:fix finding the second smallest segment

2014-11-25 Thread Chao Zhu
Curent implementation in test_memzone.c has bugs in finding the
second smallest memory segment. It's the last smallest memory segment,
but it's not the second smallest memory segment. This bug may cause test
failure in some cases. This patch fixes this bug.

Signed-off-by: Chao Zhu 
---
 app/test/test_memzone.c |9 -
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index 387dbbc..1658006 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -797,10 +797,9 @@ test_memzone_reserve_memory_in_smallest_segment(void)

/* set new smallest */
min_ms = ms;
-   }
-   else if (prev_min_ms == NULL) {
+   } else if ((prev_min_ms == NULL)
+   || (prev_min_ms->len > ms->len))
prev_min_ms = ms;
-   }
}

if (min_ms == NULL || prev_min_ms == NULL) {
@@ -877,8 +876,8 @@ test_memzone_reserve_memory_with_smallest_offset(void)

/* set new smallest */
min_ms = ms;
-   }
-   else if (prev_min_ms == NULL) {
+   } else if ((prev_min_ms == NULL)
+   || (prev_min_ms->len > ms->len)) {
prev_min_ms = ms;
}
}
-- 
1.7.1



[dpdk-dev] [PATCH v4 12/14] Add eal memory support for IBM Power Architecture

2014-11-25 Thread Chao Zhu
The mmap of hugepage files on IBM Power starts from high address to low
address. This is different from x86. This patch modified the memory
segment detection code to get the correct memory segment layout on Power
architecture. This patch also added a commond ARCH_PPC_64 defination for
64 bit systems.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc   |1 +
 config/defconfig_x86_64-native-linuxapp-clang |1 +
 config/defconfig_x86_64-native-linuxapp-gcc   |1 +
 config/defconfig_x86_64-native-linuxapp-icc   |1 +
 lib/librte_eal/linuxapp/eal/eal_memory.c  |   91 ++---
 5 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index b10f60c..23a5591 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -35,6 +35,7 @@ CONFIG_RTE_MACHINE="power8"
 CONFIG_RTE_ARCH="ppc_64"
 CONFIG_RTE_ARCH_PPC_64=y
 CONFIG_RTE_ARCH_BIG_ENDIAN=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/config/defconfig_x86_64-native-linuxapp-clang 
b/config/defconfig_x86_64-native-linuxapp-clang
index bbda080..5f3074e 100644
--- a/config/defconfig_x86_64-native-linuxapp-clang
+++ b/config/defconfig_x86_64-native-linuxapp-clang
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="clang"
 CONFIG_RTE_TOOLCHAIN_CLANG=y
diff --git a/config/defconfig_x86_64-native-linuxapp-gcc 
b/config/defconfig_x86_64-native-linuxapp-gcc
index 3de818a..60baf5b 100644
--- a/config/defconfig_x86_64-native-linuxapp-gcc
+++ b/config/defconfig_x86_64-native-linuxapp-gcc
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/config/defconfig_x86_64-native-linuxapp-icc 
b/config/defconfig_x86_64-native-linuxapp-icc
index 795333b..71d1e28 100644
--- a/config/defconfig_x86_64-native-linuxapp-icc
+++ b/config/defconfig_x86_64-native-linuxapp-icc
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="icc"
 CONFIG_RTE_TOOLCHAIN_ICC=y
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index f2454f4..e6cb919 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -316,11 +316,12 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 #endif
hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 
1] = '\0';
}
-#ifndef RTE_ARCH_X86_64
-   /* for 32-bit systems, don't remap 1G pages, just reuse original
-* map address as final map address.
+#ifndef RTE_ARCH_64
+   /* for 32-bit systems, don't remap 1G and 16G pages, just reuse
+* original map address as final map address.
 */
-   else if (hugepage_sz == RTE_PGSIZE_1G){
+   else if ((hugepage_sz == RTE_PGSIZE_1G)
+   || (hugepage_sz == RTE_PGSIZE_16G)) {
hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
hugepg_tbl[i].orig_va = NULL;
continue;
@@ -335,9 +336,17 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 * physical block: count the number of
 * contiguous physical pages. */
for (j = i+1; j < hpi->num_pages[0] ; j++) {
+#ifdef RTE_ARCH_PPC_64
+   /* The physical addresses are sorted in
+* descending order on PPC64 */
+   if (hugepg_tbl[j].physaddr !=
+   hugepg_tbl[j-1].physaddr - hugepage_sz)
+   break;
+#else
if (hugepg_tbl[j].physaddr !=
hugepg_tbl[j-1].physaddr + hugepage_sz)
break;
+#endif
}
num_pages = j - i;
vma_len = num_pages * hugepage_sz;
@@ -412,11 +421,12 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, 
struct hugepage_info *hpi)

while (i < hpi->num_pages[0]) {

-#ifndef RTE_ARCH_X86_64
-   /* for 32-bit systems, don't remap 1G pages, just reuse original
-* map address as final map address.
+#ifndef RTE_ARCH_64
+   /* for 32-bit systems, don't remap 1G pages and 16G pages,
+* just reuse original map address as final m

[dpdk-dev] [PATCH v4 11/14] Add huge page size define for IBM Power architecture

2014-11-25 Thread Chao Zhu
IBM Power architecture has different huge page sizes (16MB, 16GB) than
x86.This patch defines RTE_PGSIZE_16M and RTE_PGSIZE_16G in the
rte_page_sizes enum variable and adds huge page size support of DPDK
for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_memzone.c |  123 ++-
 lib/librte_eal/common/eal_common_memzone.c  |   19 +++-
 lib/librte_eal/common/include/rte_memory.h  |9 ++-
 lib/librte_eal/common/include/rte_memzone.h |8 ++
 lib/librte_eal/linuxapp/eal/eal.c   |9 ++-
 5 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index 381f643..387dbbc 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -133,6 +133,8 @@ test_memzone_reserve_flags(void)
const struct rte_memseg *ms;
int hugepage_2MB_avail = 0;
int hugepage_1GB_avail = 0;
+   int hugepage_16MB_avail = 0;
+   int hugepage_16GB_avail = 0;
const size_t size = 100;
int i = 0;
ms = rte_eal_get_physmem_layout();
@@ -141,12 +143,20 @@ test_memzone_reserve_flags(void)
hugepage_2MB_avail = 1;
if (ms[i].hugepage_sz == RTE_PGSIZE_1G)
hugepage_1GB_avail = 1;
+   if (ms[i].hugepage_sz == RTE_PGSIZE_16M)
+   hugepage_16MB_avail = 1;
+   if (ms[i].hugepage_sz == RTE_PGSIZE_16G)
+   hugepage_16GB_avail = 1;
}
-   /* Display the availability of 2MB and 1GB pages */
+   /* Display the availability of 2MB ,1GB, 16MB, 16GB pages */
if (hugepage_2MB_avail)
printf("2MB Huge pages available\n");
if (hugepage_1GB_avail)
printf("1GB Huge pages available\n");
+   if (hugepage_16MB_avail)
+   printf("16MB Huge pages available\n");
+   if (hugepage_16GB_avail)
+   printf("16GB Huge pages available\n");
/*
 * If 2MB pages available, check that a small memzone is correctly
 * reserved from 2MB huge pages when requested by the RTE_MEMZONE_2MB 
flag.
@@ -255,6 +265,117 @@ test_memzone_reserve_flags(void)
}
}
}
+   /*
+* This option is for IBM Power. If 16MB pages available, check
+* that a small memzone is correctly reserved from 16MB huge pages
+* when requested by the RTE_MEMZONE_16MB flag. Also check that
+* RTE_MEMZONE_SIZE_HINT_ONLY flag only defaults to an available
+* page size (i.e 16GB ) when 16MB pages are unavailable.
+*/
+   if (hugepage_16MB_avail) {
+   mz = rte_memzone_reserve("flag_zone_16M", size, SOCKET_ID_ANY,
+   RTE_MEMZONE_16MB);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 16MB\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   mz = rte_memzone_reserve("flag_zone_16M_HINT", size,
+   SOCKET_ID_ANY, RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 2MB\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   /* Check if 1GB huge pages are unavailable, that function fails
+* unless HINT flag is indicated
+*/
+   if (!hugepage_16GB_avail) {
+   mz = rte_memzone_reserve("flag_zone_16G_HINT", size,
+   SOCKET_ID_ANY,
+   RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 16GB & HINT\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   mz = rte_memzone_reserve("flag_zone_16G", size,
+   SOCKET_ID_ANY, RTE_MEMZONE_16GB);
+   if (mz != NULL) {
+   printf("MEMZONE FLAG 16GB\n");
+   return -1;
+   }
+   }
+   }
+   /*As with 16MB tests above for 16GB huge page requests*/
+   if (hugepage_16GB_avail)

[dpdk-dev] [PATCH v4 10/14] Add cache size define for IBM Power Architecture

2014-11-25 Thread Chao Zhu
IBM Power architecture has different cache line size (128 bytes) than
x86 (64 bytes). This patch defines CACHE_LINE_SIZE to 128 bytes to
override the default value 64 bytes to support IBM Power Architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_malloc.c |8 
 mk/arch/ppc_64/rte.vars.mk |2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
index ee34ca3..63e6b32 100644
--- a/app/test/test_malloc.c
+++ b/app/test/test_malloc.c
@@ -300,9 +300,9 @@ test_big_alloc(void)
size_t size =rte_str_to_size(MALLOC_MEMZONE_SIZE)*2;
int align = 0;
 #ifndef RTE_LIBRTE_MALLOC_DEBUG
-   int overhead = 64 + 64;
+   int overhead = CACHE_LINE_SIZE + CACHE_LINE_SIZE;
 #else
-   int overhead = 64 + 64 + 64;
+   int overhead = CACHE_LINE_SIZE + CACHE_LINE_SIZE + CACHE_LINE_SIZE;
 #endif

rte_malloc_get_socket_stats(socket, _stats);
@@ -356,9 +356,9 @@ test_multi_alloc_statistics(void)
 #ifndef RTE_LIBRTE_MALLOC_DEBUG
int trailer_size = 0;
 #else
-   int trailer_size = 64;
+   int trailer_size = CACHE_LINE_SIZE;
 #endif
-   int overhead = 64 + trailer_size;
+   int overhead = CACHE_LINE_SIZE + trailer_size;

rte_malloc_get_socket_stats(socket, _stats);

diff --git a/mk/arch/ppc_64/rte.vars.mk b/mk/arch/ppc_64/rte.vars.mk
index 363fcd1..dfdeaea 100644
--- a/mk/arch/ppc_64/rte.vars.mk
+++ b/mk/arch/ppc_64/rte.vars.mk
@@ -32,7 +32,7 @@
 ARCH  ?= powerpc
 CROSS ?=

-CPU_CFLAGS  ?= -m64
+CPU_CFLAGS  ?= -m64 -DCACHE_LINE_SIZE=128
 CPU_LDFLAGS ?=
 CPU_ASFLAGS ?= -felf64

-- 
1.7.1



[dpdk-dev] [PATCH v4 09/14] Remove iopl operation for IBM Power architecture

2014-11-25 Thread Chao Zhu
iopl() call is mostly for the i386 architecture. In Power and other
architecture, it doesn't exist. This patch modified rte_eal_iopl_init()
and make it return -1 for Power and other architecture. Thus
rte_config.flags will not contain EAL_FLG_HIGH_IOPL flag for other
architecture.

Signed-off-by: Chao Zhu 
---
 lib/librte_eal/linuxapp/eal/eal.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 7a1d087..8c0223f 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -50,7 +50,9 @@
 #include 
 #include 
 #include 
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
 #include 
+#endif

 #include 
 #include 
@@ -752,13 +754,19 @@ rte_eal_mcfg_complete(void)

 /*
  * Request iopl privilege for all RPL, returns 0 on success
+ * iopl() call is mostly for the i386 architecture. For other architectures,
+ * return -1 to indicate IO priviledge can't be changed in this way.
  */
 int
 rte_eal_iopl_init(void)
 {
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
if (iopl(3) != 0)
return -1;
return 0;
+#else
+   return -1;
+#endif
 }

 /* Launch threads, called at application init(). */
-- 
1.7.1



[dpdk-dev] [PATCH v4 06/14] Add spinlock operation for IBM Power architecture

2014-11-25 Thread Chao Zhu
This patch adds spinlock operations for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_spinlock.h  |   73 
 1 files changed, 73 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
new file mode 100644
index 000..cf8b81a
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -0,0 +1,73 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_PPC_64_H_
+#define _RTE_SPINLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include "generic/rte_spinlock.h"
+
+/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+   while (__sync_lock_test_and_set(>locked, 1))
+   while (sl->locked)
+   rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+   __sync_lock_release(>locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+   return (__sync_lock_test_and_set(>locked, 1) == 0);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_PPC_64_H_ */
-- 
1.7.1



[dpdk-dev] [PATCH v4 01/14] Add compiling definations for IBM Power architecture

2014-11-25 Thread Chao Zhu
To make DPDK run on IBM Power architecture, configuration files for
Power architecuture are added. Also, the compiling related .mk files are
added.

Signed-off-by: Chao Zhu 
---
 config/common_linuxapp_powerpc  |  394 +++
 config/defconfig_ppc_64-power8-linuxapp-gcc |   40 +++
 mk/arch/ppc_64/rte.vars.mk  |   39 +++
 mk/machine/power8/rte.vars.mk   |   57 
 4 files changed, 530 insertions(+), 0 deletions(-)
 create mode 100644 config/common_linuxapp_powerpc
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk

diff --git a/config/common_linuxapp_powerpc b/config/common_linuxapp_powerpc
new file mode 100644
index 000..d230a0b
--- /dev/null
+++ b/config/common_linuxapp_powerpc
@@ -0,0 +1,394 @@
+#   BSD LICENSE
+#
+#   Copyright (C) IBM Corporation 2014.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of IBM Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#
+# define executive environment
+#
+# CONFIG_RTE_EXEC_ENV can be linuxapp, baremetal, bsdapp
+#
+CONFIG_RTE_EXEC_ENV="linuxapp"
+CONFIG_RTE_EXEC_ENV_LINUXAPP=y
+
+#
+# Use intrinsics or assembly code for key routines
+#
+CONFIG_RTE_FORCE_INTRINSICS=n
+
+#
+# Compile to share library
+#
+CONFIG_RTE_BUILD_SHARED_LIB=n
+
+#
+# Combine to one single library
+#
+CONFIG_RTE_BUILD_COMBINE_LIBS=n
+CONFIG_RTE_LIBNAME="powerpc_dpdk"
+
+#
+# Compile libc directory
+#
+CONFIG_RTE_LIBC=n
+
+#
+# Compile newlib as libc from source
+#
+CONFIG_RTE_LIBC_NEWLIB_SRC=n
+
+#
+# Use binary newlib
+#
+CONFIG_RTE_LIBC_NEWLIB_BIN=n
+
+#
+# Use binary newlib
+#
+CONFIG_RTE_LIBC_NETINCS=n
+
+#
+# Compile libgloss (newlib-stubs)
+#
+CONFIG_RTE_LIBGLOSS=n
+
+#
+# Compile Environment Abstraction Layer
+# Note: Power8 has 96 cores, so increase CONFIG_RTE_MAX_LCORE from 64 to 128
+#
+CONFIG_RTE_LIBRTE_EAL=y
+CONFIG_RTE_MAX_LCORE=128
+CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_MEMSEG=256
+CONFIG_RTE_MAX_MEMZONE=2560
+CONFIG_RTE_MAX_TAILQ=32
+CONFIG_RTE_LOG_LEVEL=8
+CONFIG_RTE_LOG_HISTORY=256
+CONFIG_RTE_LIBEAL_USE_HPET=n
+CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n
+CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n
+CONFIG_RTE_EAL_IGB_UIO=y
+CONFIG_RTE_EAL_VFIO=y
+
+#
+# Special configurations in PCI Config Space for high performance
+#
+CONFIG_RTE_PCI_CONFIG=n
+CONFIG_RTE_PCI_EXTENDED_TAG=""
+CONFIG_RTE_PCI_MAX_READ_REQUEST_SIZE=0
+
+#
+# Compile Environment Abstraction Layer for linux
+#
+CONFIG_RTE_LIBRTE_EAL_LINUXAPP=y
+
+#
+# Compile Environment Abstraction Layer for Bare metal
+#
+CONFIG_RTE_LIBRTE_EAL_BAREMETAL=n
+
+#
+# Compile Environment Abstraction Layer to support Vmware TSC map
+# Note: Power doesn't have this support
+#
+CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n
+
+#
+# Compile the argument parser library
+#
+CONFIG_RTE_LIBRTE_KVARGS=y
+
+#
+# Compile generic ethernet library
+#
+CONFIG_RTE_LIBRTE_ETHER=y
+CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
+CONFIG_RTE_MAX_ETHPORTS=32
+CONFIG_RTE_LIBRTE_IEEE1588=n
+CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+
+#
+# Support NIC bypass logic
+#
+CONFIG_RTE_NIC_BYPASS=n
+
+#
+# Note: Initially, all of the PMD drivers compilation are turned off on Power
+# Will turn on them only after the successful testing on Power
+#
+
+#
+# Compile burst-oriented IGB & EM PMD drivers
+#
+CONFIG_RTE_LIBRTE_EM_PMD=n
+CONFIG_RTE_LIBRTE_IGB_PMD=n
+CONFIG_RTE_LIBRTE_E1000_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_E1000_DE

[dpdk-dev] [PATCH v4 00/14] Patches for DPDK to support Power architecture

2014-11-25 Thread Chao Zhu
The set of patches add IBM Power architecture to the DPDK. It adds the
required support to the EAL library. This set of patches doesn't support
full DPDK function on Power processors. So a separate common configuration
file is used for Power to turn off some un-migrated functions. To  compile
on PPC64 architecture, GCC version >= 4.8 must be used. This v4 patch fixed
the checkpatch issues in v3. 
The only unsolved checkpatch issue is :
ERROR: space prohibited before open square bracket '['
This issue refers to the asm code input/output naming. But I think the
error is invalid.

Chao Zhu (14):
  Add compiling definations for IBM Power architecture
  Add atomic operations for IBM Power architecture
  Add byte order operations for IBM Power architecture
  Add CPU cycle operations for IBM Power architecture
  Add prefetch operation for IBM Power architecture
  Add spinlock operation for IBM Power architecture
  Add vector memcpy for IBM Power architecture
  Add CPU flag checking for IBM Power architecture
  Remove iopl operation for IBM Power architecture
  Add cache size define for IBM Power Architecture
  Add huge page size define for IBM Power architecture
  Add eal memory support for IBM Power Architecture
  test_memzone:fix finding the second smallest segment
  Fix the compiling of test-pmd on IBM Power Architecture

 app/test-pmd/config.c  |   39 ++-
 app/test/test_cpuflags.c   |   35 ++
 app/test/test_malloc.c |8 +-
 app/test/test_memzone.c|  132 ++-
 config/common_linuxapp_powerpc |  394 ++
 config/defconfig_ppc_64-power8-linuxapp-gcc|   42 ++
 config/defconfig_x86_64-native-linuxapp-clang  |1 +
 config/defconfig_x86_64-native-linuxapp-gcc|1 +
 config/defconfig_x86_64-native-linuxapp-icc|1 +
 lib/librte_eal/common/eal_common_memzone.c |   19 +-
 .../common/include/arch/ppc_64/rte_atomic.h|  427 
 .../common/include/arch/ppc_64/rte_byteorder.h |  150 +++
 .../common/include/arch/ppc_64/rte_cpuflags.h  |  187 +
 .../common/include/arch/ppc_64/rte_cycles.h|   88 
 .../common/include/arch/ppc_64/rte_memcpy.h|  226 +++
 .../common/include/arch/ppc_64/rte_prefetch.h  |   61 +++
 .../common/include/arch/ppc_64/rte_spinlock.h  |   73 
 lib/librte_eal/common/include/rte_memory.h |9 +-
 lib/librte_eal/common/include/rte_memzone.h|8 +
 lib/librte_eal/linuxapp/eal/eal.c  |   17 +-
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   91 +++--
 mk/arch/ppc_64/rte.vars.mk |   39 ++
 mk/machine/power8/rte.vars.mk  |   57 +++
 mk/rte.cpuflags.mk |   17 +
 24 files changed, 2063 insertions(+), 59 deletions(-)
 create mode 100644 config/common_linuxapp_powerpc
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk



[dpdk-dev] [PATCH v3 13/14] test_memzone:fix finding the second smallest segment

2014-11-23 Thread Chao Zhu
Curent implementation in test_memzone.c has bugs in finding the
second smallest memory segment. It's the last smallest memory segment,
but it's not the second smallest memory segment. This bug may cause test
failure in some cases. This patch fixes this bug.

Signed-off-by: Chao Zhu 
---
 app/test/test_memzone.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index 8668103..f3da2c1 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -794,7 +794,7 @@ test_memzone_reserve_memory_in_smallest_segment(void)
/* set new smallest */
min_ms = ms;
}
-   else if (prev_min_ms == NULL) {
+   else if ((prev_min_ms == NULL) || (prev_min_ms->len > ms->len)) 
{
prev_min_ms = ms;
}
}
@@ -874,7 +874,7 @@ test_memzone_reserve_memory_with_smallest_offset(void)
/* set new smallest */
min_ms = ms;
}
-   else if (prev_min_ms == NULL) {
+   else if ((prev_min_ms == NULL) || (prev_min_ms->len > ms->len)){
prev_min_ms = ms;
}
}
-- 
1.7.1



[dpdk-dev] [PATCH v3 09/14] Remove iopl operation for IBM Power architecture

2014-11-23 Thread Chao Zhu
iopl() call is mostly for the i386 architecture. In Power and other
architecture, it doesn't exist. This patch modified rte_eal_iopl_init()
and make it return -1 for Power and other architecture. Thus
rte_config.flags will not contain EAL_FLG_HIGH_IOPL flag for other
architecture.

Signed-off-by: Chao Zhu 
---
 lib/librte_eal/linuxapp/eal/eal.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 7a1d087..0bf81be 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -50,7 +50,9 @@
 #include 
 #include 
 #include 
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) 
 #include 
+#endif

 #include 
 #include 
@@ -752,13 +754,19 @@ rte_eal_mcfg_complete(void)

 /*
  * Request iopl privilege for all RPL, returns 0 on success
+ * iopl() call is mostly for the i386 architecture. For other architectures,
+ * return -1 to indicate IO priviledge can't be changed in this way. 
  */
 int
 rte_eal_iopl_init(void)
 {
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) 
if (iopl(3) != 0)
return -1;
return 0;
+#else
+   return -1;
+#endif
 }

 /* Launch threads, called at application init(). */
-- 
1.7.1



[dpdk-dev] [PATCH v3 08/14] Add CPU flag checking for IBM Power architecture

2014-11-23 Thread Chao Zhu
IBM Power processor doesn't have CPU flag hardware registers. This patch
uses aux vector software register to get CPU flags and add CPU flag
checking support for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_cpuflags.c   |   35 
 .../common/include/arch/ppc_64/rte_cpuflags.h  |  184 
 mk/rte.cpuflags.mk |   17 ++
 3 files changed, 236 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h

diff --git a/app/test/test_cpuflags.c b/app/test/test_cpuflags.c
index 82c0197..5aeba5d 100644
--- a/app/test/test_cpuflags.c
+++ b/app/test/test_cpuflags.c
@@ -80,6 +80,40 @@ test_cpuflags(void)
int result;
printf("\nChecking for flags from different registers...\n");

+#ifdef RTE_ARCH_PPC_64
+   printf("Check for PPC64:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_PPC64);
+
+   printf("Check for PPC32:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_PPC32);
+
+   printf("Check for VSX:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_VSX);
+
+   printf("Check for DFP:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_DFP);
+
+   printf("Check for FPU:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_FPU);
+
+   printf("Check for SMT:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_SMT);
+
+   printf("Check for MMU:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_MMU);
+
+   printf("Check for ALTIVEC:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ALTIVEC);
+
+   printf("Check for ARCH_2_06:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_06);
+
+   printf("Check for ARCH_2_07:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_07);
+
+   printf("Check for ICACHE_SNOOP:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ICACHE_SNOOP);
+#else
printf("Check for SSE:\t\t");
CHECK_FOR_FLAG(RTE_CPUFLAG_SSE);

@@ -117,6 +151,7 @@ test_cpuflags(void)
CHECK_FOR_FLAG(RTE_CPUFLAG_INVTSC);


+#endif

/*
 * Check if invalid data is handled properly
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
new file mode 100644
index 000..6b38f1c
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
@@ -0,0 +1,184 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_PPC_64_H_
+#define _RTE_CPUFLAGS_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include 
+#include 
+#include 
+
+#include "generic/rte_cpuflags.h"
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+   REG_HWCAP = 0,
+   REG_HWCAP2,
+};
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+   RTE_CPUFLAG_PPC_LE = 0,
+   RTE_CPUFLAG_TRUE_LE,
+   RTE_CPUFLAG_PSERIES_PERFMON_COMPAT,
+   RTE_CPUFLAG_VSX,
+   RTE_CPUFLAG_ARCH_2_06,
+   RTE_CPUFLAG_POWER6_EXT,
+   RTE_CPUFLAG_DFP,
+   RTE_CPUFLAG_PA6T,
+   RTE_CPUFLAG_ARCH_2_05,
+   RTE_CPUFLAG_ICACHE_SNOOP,
+   RTE_CPUFLAG_SMT,
+   

[dpdk-dev] [PATCH v3 07/14] Add vector memcpy for IBM Power architecture

2014-11-23 Thread Chao Zhu
The SSE based memory copy in DPDK only support x86. This patch adds
altivec based memory copy functions for IBM Power architecture. This
patch includes altivec.h which requires GCC version>= 4.8.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_memcpy.h|  224 
 1 files changed, 224 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
new file mode 100644
index 000..b9b8ddc
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -0,0 +1,224 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_PPC_64_H_
+#define _RTE_MEMCPY_PPC_64_H_
+
+#include 
+#include 
+/*To include altivec.h, GCC version must  >= 4.8 */
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+   vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+   vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+   vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+   vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+   vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+   vec_vsx_st(vec_vsx_ld(64, src), 64, dst);
+   vec_vsx_st(vec_vsx_ld(80, src), 80, dst);
+   vec_vsx_st(vec_vsx_ld(96, src), 96, dst);
+   vec_vsx_st(vec_vsx_ld(112, src), 112, dst);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+   rte_mov128(dst, src);
+   rte_mov128(dst + 128, src + 128);
+}
+
+#define rte_memcpy(dst, src, n)  \
+   ((__builtin_constant_p(n)) ?  \
+   memcpy((dst), (src), (n)) :  \
+   rte_memcpy_func((dst), (src), (n)))
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+   void *ret = dst;
+
+   /* We can't copy < 16 bytes using XMM registers so do it manually. */
+   if (n < 16) {
+   if (n & 0x01) {
+   *(uint8_t *)dst = *(const uint8_t *)src;
+   dst = (uint8_t *)dst + 1;
+   src = (const uint8_t *)src + 1;
+   }
+   if (n & 0x02) {
+   *(uint16_t *)dst = *(const uint16_t *)src;
+   dst = (uint16_t *)dst + 1;
+   src = (const uint16_t *)src + 1;
+   }
+   if (n & 0x04) {
+   *(uint32_t *)dst = *(const uint32_t *)

[dpdk-dev] [PATCH v3 06/14] Add spinlock operation for IBM Power architecture

2014-11-23 Thread Chao Zhu
This patch adds spinlock operations for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_spinlock.h  |   73 
 1 files changed, 73 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
new file mode 100644
index 000..ba028fe
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -0,0 +1,73 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_PPC_64_H_
+#define _RTE_SPINLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include "generic/rte_spinlock.h"
+
+/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+   while (__sync_lock_test_and_set(>locked, 1))
+   while(sl->locked)
+   rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+   __sync_lock_release(>locked);
+}
+
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+   return (__sync_lock_test_and_set(>locked,1) == 0);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_PPC_64_H_ */
-- 
1.7.1



[dpdk-dev] [PATCH v3 05/14] Add prefetch operation for IBM Power architecture

2014-11-23 Thread Chao Zhu
This patch add architecture specific prefetch operations for IBM Power
architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_prefetch.h  |   61 
 1 files changed, 61 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
new file mode 100644
index 000..9df0d13
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -0,0 +1,61 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_PREFETCH_PPC_64_H_
+#define _RTE_PREFETCH_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(volatile void *p)
+{
+   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(volatile void *p)
+{
+   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(volatile void *p)
+{
+   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_PPC_64_H_ */
-- 
1.7.1



[dpdk-dev] [PATCH v3 04/14] Add CPU cycle operations for IBM Power architecture

2014-11-23 Thread Chao Zhu
IBM Power architecture doesn't have TSC register to get CPU cycles. This
patch implements the time base register read instead of TSC register of
x86 on IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_cycles.h|   86 
 1 files changed, 86 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
new file mode 100644
index 000..ed66b48
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -0,0 +1,86 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_PPC_64_H_
+#define _RTE_CYCLES_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+   union {
+   uint64_t tsc_64;
+   struct {
+   uint32_t hi_32;
+   uint32_t lo_32;
+   };
+   } tsc;
+   uint32_t tmp;
+   asm volatile(
+   "0:\n"
+   "mftbu   %[hi32]\n"
+   "mftb%[lo32]\n"
+   "mftbu   %[tmp]\n"
+   "cmpw%[tmp],%[hi32]\n"
+   "bne 0b\n"
+   : [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32), [tmp] 
"=r"(tmp)
+   );
+   return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+   rte_mb();
+   return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_PPC_64_H_ */
+
-- 
1.7.1



[dpdk-dev] [PATCH v3 03/14] Add byte order operations for IBM Power architecture

2014-11-23 Thread Chao Zhu
This patch adds architecture specific byte order operations for IBM Power
architecture. Power architecture support both big endian and little
endian. This patch also adds a RTE_ARCH_BIG_ENDIAN micro.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc|1 +
 .../common/include/arch/ppc_64/rte_byteorder.h |  150 
 2 files changed, 151 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index 97d72ff..b10f60c 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -34,6 +34,7 @@ CONFIG_RTE_MACHINE="power8"

 CONFIG_RTE_ARCH="ppc_64"
 CONFIG_RTE_ARCH_PPC_64=y
+CONFIG_RTE_ARCH_BIG_ENDIAN=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
new file mode 100644
index 000..a593e8a
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
@@ -0,0 +1,150 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California.  All rights reserved.
+*/
+
+#ifndef _RTE_BYTEORDER_PPC_64_H_
+#define _RTE_BYTEORDER_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+   return ((_x >> 8) | ((_x << 8) & 0xff00));
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+   return ((_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff) |
+   ((_x << 24) & 0xff00));
+}
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+  * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+   return ((_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff) |
+   ((_x >> 8) & 0xff00) | ((_x << 8) & (0xffULL << 32)) |
+   ((_x << 24) & (0xffULL << 40)) |
+   ((_x << 40) & (0xffULL << 48)) | ((_x << 56)));
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?   \
+  rte_constant_bswap16(x) :\
+  rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ?   \
+  rte_constant_bswap32(x) :\
+  rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ?

[dpdk-dev] [PATCH v3 02/14] Add atomic operations for IBM Power architecture

2014-11-23 Thread Chao Zhu
This patch adds architecture specific atomic operation file for IBM
Power architecture CPU.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_atomic.h|  415 
 1 files changed, 415 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
new file mode 100644
index 000..9c69935
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -0,0 +1,415 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_PPC_64_H_
+#define _RTE_ATOMIC_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#definerte_mb()  asm volatile("sync" : : : "memory")
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#definerte_wmb() asm volatile("sync" : : : "memory")
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#definerte_rmb() asm volatile("sync" : : : "memory")
+
+/*- 16 bit atomic operations 
-*/
+/* To be compatible with Power7, use GCC built-in functions for 16 bit 
operations */
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+   return __atomic_compare_exchange(dst, , , 0, __ATOMIC_ACQUIRE, 
__ATOMIC_ACQUIRE) ? 1 : 0;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+   return rte_atomic16_cmpset((volatile uint16_t *)>cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+   __atomic_add_fetch(>cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+   __atomic_sub_fetch(>cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+   return (__atomic_add_fetch(>cnt, 1, __ATOMIC_ACQUIRE) == 0);
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+   return (__atomic_sub_fetch(>cnt, 1, __ATOMIC_ACQUIRE) == 0);
+}
+
+/*- 32 bit atomic operations 
-*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+   unsigned int ret = 0;
+
+   asm volatile(
+   "\tlwsync\n"
+   "1:\tlwarx %[ret], 0, %[dst]\n"
+   "cmplw %[exp], %[ret]\n"
+   "bne 2f\n"
+   

[dpdk-dev] [PATCH v3 01/14] Add compiling definations for IBM Power architecture

2014-11-23 Thread Chao Zhu
To make DPDK run on IBM Power architecture, configuration files for
Power architecuture are added. Also, the compiling related .mk files are
added.

Signed-off-by: Chao Zhu 
---
 config/common_linuxapp_powerpc  |  394 +++
 config/defconfig_ppc_64-power8-linuxapp-gcc |   40 +++
 mk/arch/ppc_64/rte.vars.mk  |   39 +++
 mk/machine/power8/rte.vars.mk   |   57 
 4 files changed, 530 insertions(+), 0 deletions(-)
 create mode 100644 config/common_linuxapp_powerpc
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk

diff --git a/config/common_linuxapp_powerpc b/config/common_linuxapp_powerpc
new file mode 100644
index 000..d230a0b
--- /dev/null
+++ b/config/common_linuxapp_powerpc
@@ -0,0 +1,394 @@
+#   BSD LICENSE
+#
+#   Copyright (C) IBM Corporation 2014.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of IBM Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#
+# define executive environment
+#
+# CONFIG_RTE_EXEC_ENV can be linuxapp, baremetal, bsdapp
+#
+CONFIG_RTE_EXEC_ENV="linuxapp"
+CONFIG_RTE_EXEC_ENV_LINUXAPP=y
+
+#
+# Use intrinsics or assembly code for key routines
+#
+CONFIG_RTE_FORCE_INTRINSICS=n
+
+#
+# Compile to share library
+#
+CONFIG_RTE_BUILD_SHARED_LIB=n
+
+#
+# Combine to one single library
+#
+CONFIG_RTE_BUILD_COMBINE_LIBS=n
+CONFIG_RTE_LIBNAME="powerpc_dpdk"
+
+#
+# Compile libc directory
+#
+CONFIG_RTE_LIBC=n
+
+#
+# Compile newlib as libc from source
+#
+CONFIG_RTE_LIBC_NEWLIB_SRC=n
+
+#
+# Use binary newlib
+#
+CONFIG_RTE_LIBC_NEWLIB_BIN=n
+
+#
+# Use binary newlib
+#
+CONFIG_RTE_LIBC_NETINCS=n
+
+#
+# Compile libgloss (newlib-stubs)
+#
+CONFIG_RTE_LIBGLOSS=n
+
+#
+# Compile Environment Abstraction Layer
+# Note: Power8 has 96 cores, so increase CONFIG_RTE_MAX_LCORE from 64 to 128
+#
+CONFIG_RTE_LIBRTE_EAL=y
+CONFIG_RTE_MAX_LCORE=128
+CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_MEMSEG=256
+CONFIG_RTE_MAX_MEMZONE=2560
+CONFIG_RTE_MAX_TAILQ=32
+CONFIG_RTE_LOG_LEVEL=8
+CONFIG_RTE_LOG_HISTORY=256
+CONFIG_RTE_LIBEAL_USE_HPET=n
+CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n
+CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n
+CONFIG_RTE_EAL_IGB_UIO=y
+CONFIG_RTE_EAL_VFIO=y
+
+#
+# Special configurations in PCI Config Space for high performance
+#
+CONFIG_RTE_PCI_CONFIG=n
+CONFIG_RTE_PCI_EXTENDED_TAG=""
+CONFIG_RTE_PCI_MAX_READ_REQUEST_SIZE=0
+
+#
+# Compile Environment Abstraction Layer for linux
+#
+CONFIG_RTE_LIBRTE_EAL_LINUXAPP=y
+
+#
+# Compile Environment Abstraction Layer for Bare metal
+#
+CONFIG_RTE_LIBRTE_EAL_BAREMETAL=n
+
+#
+# Compile Environment Abstraction Layer to support Vmware TSC map
+# Note: Power doesn't have this support
+#
+CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n
+
+#
+# Compile the argument parser library
+#
+CONFIG_RTE_LIBRTE_KVARGS=y
+
+#
+# Compile generic ethernet library
+#
+CONFIG_RTE_LIBRTE_ETHER=y
+CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
+CONFIG_RTE_MAX_ETHPORTS=32
+CONFIG_RTE_LIBRTE_IEEE1588=n
+CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+
+#
+# Support NIC bypass logic
+#
+CONFIG_RTE_NIC_BYPASS=n
+
+#
+# Note: Initially, all of the PMD drivers compilation are turned off on Power
+# Will turn on them only after the successful testing on Power
+#
+
+#
+# Compile burst-oriented IGB & EM PMD drivers
+#
+CONFIG_RTE_LIBRTE_EM_PMD=n
+CONFIG_RTE_LIBRTE_IGB_PMD=n
+CONFIG_RTE_LIBRTE_E1000_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_E1000_DE

[dpdk-dev] [PATCH v3 00/14] Patches for DPDK to support Power architecture

2014-11-23 Thread Chao Zhu
The set of patches add IBM Power architecture to the DPDK. It adds the required 
support to the
EAL library. This set of patches doesn't support full DPDK function on Power 
processors. So a
separate common configuration file is used for Power to turn off some 
un-migrated functions. To
compile on PPC64 architecture, GCC version >= 4.8 must be used. This v3 patch 
updates eal_memory.c
to fix the memory zone allocation and also solves the compiling problems of 
test-pmd.

Chao Zhu (14):
  Add compiling definations for IBM Power architecture
  Add atomic operations for IBM Power architecture
  Add byte order operations for IBM Power architecture
  Add CPU cycle operations for IBM Power architecture
  Add prefetch operation for IBM Power architecture
  Add spinlock operation for IBM Power architecture
  Add vector memcpy for IBM Power architecture
  Add CPU flag checking for IBM Power architecture
  Remove iopl operation for IBM Power architecture
  Add cache size define for IBM Power Architecture
  Add huge page size define for IBM Power architecture
  Add eal memory support for IBM Power Architecture
  test_memzone:fix finding the second smallest segment
  Fix the compiling of test-pmd on IBM Power Architecture

 app/test-pmd/config.c  |   33 +-
 app/test/test_cpuflags.c   |   35 ++
 app/test/test_malloc.c |8 +-
 app/test/test_memzone.c|  123 ++-
 config/common_linuxapp_powerpc |  394 +++
 config/defconfig_ppc_64-power8-linuxapp-gcc|   42 ++
 config/defconfig_x86_64-native-linuxapp-clang  |1 +
 config/defconfig_x86_64-native-linuxapp-gcc|1 +
 config/defconfig_x86_64-native-linuxapp-icc|1 +
 lib/librte_eal/common/eal_common_memzone.c |   15 +-
 .../common/include/arch/ppc_64/rte_atomic.h|  415 
 .../common/include/arch/ppc_64/rte_byteorder.h |  150 +++
 .../common/include/arch/ppc_64/rte_cpuflags.h  |  184 +
 .../common/include/arch/ppc_64/rte_cycles.h|   86 
 .../common/include/arch/ppc_64/rte_memcpy.h|  224 +++
 .../common/include/arch/ppc_64/rte_prefetch.h  |   61 +++
 .../common/include/arch/ppc_64/rte_spinlock.h  |   73 
 lib/librte_eal/common/include/rte_memory.h |9 +-
 lib/librte_eal/common/include/rte_memzone.h|8 +
 lib/librte_eal/linuxapp/eal/eal.c  |   13 +-
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   75 +++-
 mk/arch/ppc_64/rte.vars.mk |   39 ++
 mk/machine/power8/rte.vars.mk  |   57 +++
 mk/rte.cpuflags.mk |   17 +
 24 files changed, 2015 insertions(+), 49 deletions(-)
 create mode 100644 config/common_linuxapp_powerpc
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk



[dpdk-dev] [PATCH v2 12/12] Add eal memory support for IBM Power Architecture

2014-11-16 Thread Chao Zhu
IBM Power architecture has different memory architecture with x86. When
the physical memory address is in ascending order, the mmaped virtual
address is in descending order. This patch modified the memory segment
detection code to make it work for Power. This patch also added a
commond ARCH_PPC_64 defination for 64 bit systems.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc   |1 +
 config/defconfig_x86_64-native-linuxapp-clang |1 +
 config/defconfig_x86_64-native-linuxapp-gcc   |1 +
 config/defconfig_x86_64-native-linuxapp-icc   |1 +
 lib/librte_eal/linuxapp/eal/eal_memory.c  |   27 +++-
 5 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index b10f60c..23a5591 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -35,6 +35,7 @@ CONFIG_RTE_MACHINE="power8"
 CONFIG_RTE_ARCH="ppc_64"
 CONFIG_RTE_ARCH_PPC_64=y
 CONFIG_RTE_ARCH_BIG_ENDIAN=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/config/defconfig_x86_64-native-linuxapp-clang 
b/config/defconfig_x86_64-native-linuxapp-clang
index bbda080..5f3074e 100644
--- a/config/defconfig_x86_64-native-linuxapp-clang
+++ b/config/defconfig_x86_64-native-linuxapp-clang
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="clang"
 CONFIG_RTE_TOOLCHAIN_CLANG=y
diff --git a/config/defconfig_x86_64-native-linuxapp-gcc 
b/config/defconfig_x86_64-native-linuxapp-gcc
index 3de818a..60baf5b 100644
--- a/config/defconfig_x86_64-native-linuxapp-gcc
+++ b/config/defconfig_x86_64-native-linuxapp-gcc
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/config/defconfig_x86_64-native-linuxapp-icc 
b/config/defconfig_x86_64-native-linuxapp-icc
index 795333b..71d1e28 100644
--- a/config/defconfig_x86_64-native-linuxapp-icc
+++ b/config/defconfig_x86_64-native-linuxapp-icc
@@ -36,6 +36,7 @@ CONFIG_RTE_MACHINE="native"

 CONFIG_RTE_ARCH="x86_64"
 CONFIG_RTE_ARCH_X86_64=y
+CONFIG_RTE_ARCH_64=y

 CONFIG_RTE_TOOLCHAIN="icc"
 CONFIG_RTE_TOOLCHAIN_ICC=y
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index f2454f4..b9c6d2e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -316,11 +316,11 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 #endif
hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 
1] = '\0';
}
-#ifndef RTE_ARCH_X86_64
-   /* for 32-bit systems, don't remap 1G pages, just reuse original
+#ifndef RTE_ARCH_64
+   /* for 32-bit systems, don't remap 1G and 16G pages, just reuse 
original
 * map address as final map address.
 */
-   else if (hugepage_sz == RTE_PGSIZE_1G){
+   else if ((hugepage_sz == RTE_PGSIZE_1G) || (hugepage_sz == 
RTE_PGSIZE_16G)){
hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
hugepg_tbl[i].orig_va = NULL;
continue;
@@ -412,11 +412,11 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, 
struct hugepage_info *hpi)

while (i < hpi->num_pages[0]) {

-#ifndef RTE_ARCH_X86_64
-   /* for 32-bit systems, don't remap 1G pages, just reuse original
+#ifndef RTE_ARCH_64
+   /* for 32-bit systems, don't remap 1G pages and 16G pages, just 
reuse original
 * map address as final map address.
 */
-   if (hugepage_sz == RTE_PGSIZE_1G){
+   if ((hugepage_sz == RTE_PGSIZE_1G) || (hugepage_sz == 
RTE_PGSIZE_16G)){
hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
hugepg_tbl[i].orig_va = NULL;
i++;
@@ -1263,9 +1263,18 @@ rte_eal_hugepage_init(void)
else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
hugepage[i].size)
new_memseg = 1;
+#ifdef RTE_ARCH_PPC_64
+   /* IBM Power architecture has different memory layout than x86.
+   * If the physical address is lower address first, the mmaped 
virtual
+   * address will be higher address first */
+   else if (((unsigned long)hugepage[i-1].final_va -
+   (unsigned long)hugepage[i].final_va) != hugepage[i].size)
+   new_memseg = 1;
+#else
else if (((unsigned long)hugepage[i].final_va -
 

[dpdk-dev] [PATCH v2 11/12] Add huge page size define for IBM Power architecture

2014-11-16 Thread Chao Zhu
IBM Power architecture has different huge page sizes (16MB, 16GB) than
x86.This patch defines RTE_PGSIZE_16M and RTE_PGSIZE_16G in the
rte_page_sizes enum variable and adds huge page size support of DPDK
for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_memzone.c |  119 ++-
 lib/librte_eal/common/eal_common_memzone.c  |   15 +++-
 lib/librte_eal/common/include/rte_memory.h  |9 ++-
 lib/librte_eal/common/include/rte_memzone.h |8 ++
 lib/librte_eal/linuxapp/eal/eal.c   |5 +-
 5 files changed, 147 insertions(+), 9 deletions(-)

diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index 381f643..8668103 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -133,6 +133,8 @@ test_memzone_reserve_flags(void)
const struct rte_memseg *ms;
int hugepage_2MB_avail = 0;
int hugepage_1GB_avail = 0;
+   int hugepage_16MB_avail = 0;
+   int hugepage_16GB_avail = 0;
const size_t size = 100;
int i = 0;
ms = rte_eal_get_physmem_layout();
@@ -141,12 +143,20 @@ test_memzone_reserve_flags(void)
hugepage_2MB_avail = 1;
if (ms[i].hugepage_sz == RTE_PGSIZE_1G)
hugepage_1GB_avail = 1;
+   if (ms[i].hugepage_sz == RTE_PGSIZE_16M)
+   hugepage_16MB_avail = 1;
+   if (ms[i].hugepage_sz == RTE_PGSIZE_16G)
+   hugepage_16GB_avail = 1;
}
-   /* Display the availability of 2MB and 1GB pages */
+   /* Display the availability of 2MB ,1GB, 16MB, 16GB pages */
if (hugepage_2MB_avail)
printf("2MB Huge pages available\n");
if (hugepage_1GB_avail)
printf("1GB Huge pages available\n");
+   if (hugepage_16MB_avail)
+   printf("16MB Huge pages available\n");
+   if (hugepage_16GB_avail)
+   printf("16GB Huge pages available\n");
/*
 * If 2MB pages available, check that a small memzone is correctly
 * reserved from 2MB huge pages when requested by the RTE_MEMZONE_2MB 
flag.
@@ -255,6 +265,113 @@ test_memzone_reserve_flags(void)
}
}
}
+   /*
+* This option is for IBM Power. If 16MB pages available, check that a 
small memzone is correctly
+* reserved from 16MB huge pages when requested by the RTE_MEMZONE_16MB 
flag.
+* Also check that RTE_MEMZONE_SIZE_HINT_ONLY flag only defaults to an
+* available page size (i.e 16GB ) when 16MB pages are unavailable.
+*/
+   if (hugepage_16MB_avail){
+   mz = rte_memzone_reserve("flag_zone_16M", size, SOCKET_ID_ANY,
+   RTE_MEMZONE_16MB);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 16MB\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   mz = rte_memzone_reserve("flag_zone_16M_HINT", size, 
SOCKET_ID_ANY,
+   RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 2MB\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   /* Check if 1GB huge pages are unavailable, that function fails 
unless
+* HINT flag is indicated
+*/
+   if (!hugepage_16GB_avail) {
+   mz = rte_memzone_reserve("flag_zone_16G_HINT", size, 
SOCKET_ID_ANY,
+   
RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY);
+   if (mz == NULL) {
+   printf("MEMZONE FLAG 16GB & HINT\n");
+   return -1;
+   }
+   if (mz->hugepage_sz != RTE_PGSIZE_16M) {
+   printf("hugepage_sz not equal 16M\n");
+   return -1;
+   }
+
+   mz = rte_memzone_reserve("flag_zone_16G", size, 
SOCKET_ID_ANY,
+   RTE_MEMZONE_16GB);
+   if (mz != NULL) {
+   printf("MEMZONE FLAG 16GB\n");
+   return -1;
+   }
+   }
+   }
+   /*As with 16MB tests above for 16GB huge page requests*/
+   if (hugepage_16GB_avail

[dpdk-dev] [PATCH v2 10/12] Add cache size define for IBM Power Architecture

2014-11-16 Thread Chao Zhu
IBM Power architecture has different cache line size (128 bytes) than
x86 (64 bytes). This patch defines CACHE_LINE_SIZE to 128 bytes to
override the default value 64 bytes to support IBM Power Architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_malloc.c |8 
 mk/arch/ppc_64/rte.vars.mk |2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
index ee34ca3..63e6b32 100644
--- a/app/test/test_malloc.c
+++ b/app/test/test_malloc.c
@@ -300,9 +300,9 @@ test_big_alloc(void)
size_t size =rte_str_to_size(MALLOC_MEMZONE_SIZE)*2;
int align = 0;
 #ifndef RTE_LIBRTE_MALLOC_DEBUG
-   int overhead = 64 + 64;
+   int overhead = CACHE_LINE_SIZE + CACHE_LINE_SIZE;
 #else
-   int overhead = 64 + 64 + 64;
+   int overhead = CACHE_LINE_SIZE + CACHE_LINE_SIZE + CACHE_LINE_SIZE;
 #endif

rte_malloc_get_socket_stats(socket, _stats);
@@ -356,9 +356,9 @@ test_multi_alloc_statistics(void)
 #ifndef RTE_LIBRTE_MALLOC_DEBUG
int trailer_size = 0;
 #else
-   int trailer_size = 64;
+   int trailer_size = CACHE_LINE_SIZE;
 #endif
-   int overhead = 64 + trailer_size;
+   int overhead = CACHE_LINE_SIZE + trailer_size;

rte_malloc_get_socket_stats(socket, _stats);

diff --git a/mk/arch/ppc_64/rte.vars.mk b/mk/arch/ppc_64/rte.vars.mk
index 363fcd1..dfdeaea 100644
--- a/mk/arch/ppc_64/rte.vars.mk
+++ b/mk/arch/ppc_64/rte.vars.mk
@@ -32,7 +32,7 @@
 ARCH  ?= powerpc
 CROSS ?=

-CPU_CFLAGS  ?= -m64
+CPU_CFLAGS  ?= -m64 -DCACHE_LINE_SIZE=128
 CPU_LDFLAGS ?=
 CPU_ASFLAGS ?= -felf64

-- 
1.7.1



[dpdk-dev] [PATCH v2 09/12] Remove iopl operation for IBM Power architecture

2014-11-16 Thread Chao Zhu
iopl() call is mostly for the i386 architecture. In Power and other
architecture, it doesn't exist. This patch modified rte_eal_iopl_init()
and make it return -1 for Power and other architecture. Thus
rte_config.flags will not contain EAL_FLG_HIGH_IOPL flag for other
architecture.

Signed-off-by: Chao Zhu 
---
 lib/librte_eal/linuxapp/eal/eal.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 7a1d087..0bf81be 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -50,7 +50,9 @@
 #include 
 #include 
 #include 
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) 
 #include 
+#endif

 #include 
 #include 
@@ -752,13 +754,19 @@ rte_eal_mcfg_complete(void)

 /*
  * Request iopl privilege for all RPL, returns 0 on success
+ * iopl() call is mostly for the i386 architecture. For other architectures,
+ * return -1 to indicate IO priviledge can't be changed in this way. 
  */
 int
 rte_eal_iopl_init(void)
 {
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) 
if (iopl(3) != 0)
return -1;
return 0;
+#else
+   return -1;
+#endif
 }

 /* Launch threads, called at application init(). */
-- 
1.7.1



[dpdk-dev] [PATCH v2 08/12] Add CPU flag checking for IBM Power architecture

2014-11-16 Thread Chao Zhu
IBM Power processor doesn't have CPU flag hardware registers. This patch
uses aux vector software register to get CPU flags and add CPU flag
checking support for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 app/test/test_cpuflags.c   |   35 
 .../common/include/arch/ppc_64/rte_cpuflags.h  |  184 
 mk/rte.cpuflags.mk |   17 ++
 3 files changed, 236 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h

diff --git a/app/test/test_cpuflags.c b/app/test/test_cpuflags.c
index 82c0197..5aeba5d 100644
--- a/app/test/test_cpuflags.c
+++ b/app/test/test_cpuflags.c
@@ -80,6 +80,40 @@ test_cpuflags(void)
int result;
printf("\nChecking for flags from different registers...\n");

+#ifdef RTE_ARCH_PPC_64
+   printf("Check for PPC64:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_PPC64);
+
+   printf("Check for PPC32:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_PPC32);
+
+   printf("Check for VSX:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_VSX);
+
+   printf("Check for DFP:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_DFP);
+
+   printf("Check for FPU:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_FPU);
+
+   printf("Check for SMT:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_SMT);
+
+   printf("Check for MMU:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_MMU);
+
+   printf("Check for ALTIVEC:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ALTIVEC);
+
+   printf("Check for ARCH_2_06:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_06);
+
+   printf("Check for ARCH_2_07:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ARCH_2_07);
+
+   printf("Check for ICACHE_SNOOP:\t\t");
+   CHECK_FOR_FLAG(RTE_CPUFLAG_ICACHE_SNOOP);
+#else
printf("Check for SSE:\t\t");
CHECK_FOR_FLAG(RTE_CPUFLAG_SSE);

@@ -117,6 +151,7 @@ test_cpuflags(void)
CHECK_FOR_FLAG(RTE_CPUFLAG_INVTSC);


+#endif

/*
 * Check if invalid data is handled properly
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
new file mode 100644
index 000..6b38f1c
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
@@ -0,0 +1,184 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_PPC_64_H_
+#define _RTE_CPUFLAGS_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include 
+#include 
+#include 
+
+#include "generic/rte_cpuflags.h"
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+   REG_HWCAP = 0,
+   REG_HWCAP2,
+};
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+   RTE_CPUFLAG_PPC_LE = 0,
+   RTE_CPUFLAG_TRUE_LE,
+   RTE_CPUFLAG_PSERIES_PERFMON_COMPAT,
+   RTE_CPUFLAG_VSX,
+   RTE_CPUFLAG_ARCH_2_06,
+   RTE_CPUFLAG_POWER6_EXT,
+   RTE_CPUFLAG_DFP,
+   RTE_CPUFLAG_PA6T,
+   RTE_CPUFLAG_ARCH_2_05,
+   RTE_CPUFLAG_ICACHE_SNOOP,
+   RTE_CPUFLAG_SMT,
+   

[dpdk-dev] [PATCH v2 07/12] Add vector memcpy for IBM Power architecture

2014-11-16 Thread Chao Zhu
The SSE based memory copy in DPDK only support x86. This patch adds
altivec based memory copy functions for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_memcpy.h|  223 
 1 files changed, 223 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
new file mode 100644
index 000..2bbcfc0
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -0,0 +1,223 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_PPC_64_H_
+#define _RTE_MEMCPY_PPC_64_H_
+
+#include 
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+   vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+   vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+   vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+   vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+   vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+   vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+   vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+   vec_vsx_st(vec_vsx_ld(64, src), 64, dst);
+   vec_vsx_st(vec_vsx_ld(80, src), 80, dst);
+   vec_vsx_st(vec_vsx_ld(96, src), 96, dst);
+   vec_vsx_st(vec_vsx_ld(112, src), 112, dst);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+   rte_mov128(dst, src);
+   rte_mov128(dst + 128, src + 128);
+}
+
+#define rte_memcpy(dst, src, n)  \
+   ((__builtin_constant_p(n)) ?  \
+   memcpy((dst), (src), (n)) :  \
+   rte_memcpy_func((dst), (src), (n)))
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+   void *ret = dst;
+
+   /* We can't copy < 16 bytes using XMM registers so do it manually. */
+   if (n < 16) {
+   if (n & 0x01) {
+   *(uint8_t *)dst = *(const uint8_t *)src;
+   dst = (uint8_t *)dst + 1;
+   src = (const uint8_t *)src + 1;
+   }
+   if (n & 0x02) {
+   *(uint16_t *)dst = *(const uint16_t *)src;
+   dst = (uint16_t *)dst + 1;
+   src = (const uint16_t *)src + 1;
+   }
+   if (n & 0x04) {
+   *(uint32_t *)dst = *(const uint32_t *)src;
+   dst = (uint32_t *)dst + 1;
+   src = (const uint32_t *)src + 1

[dpdk-dev] [PATCH v2 06/12] Add spinlock operation for IBM Power architecture

2014-11-16 Thread Chao Zhu
This patch adds spinlock operations for IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_spinlock.h  |   73 
 1 files changed, 73 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
new file mode 100644
index 000..ba028fe
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -0,0 +1,73 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_PPC_64_H_
+#define _RTE_SPINLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include "generic/rte_spinlock.h"
+
+/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+   while (__sync_lock_test_and_set(>locked, 1))
+   while(sl->locked)
+   rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+   __sync_lock_release(>locked);
+}
+
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+   return (__sync_lock_test_and_set(>locked,1) == 0);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_PPC_64_H_ */
-- 
1.7.1



[dpdk-dev] [PATCH v2 05/12] Add prefetch operation for IBM Power architecture

2014-11-16 Thread Chao Zhu
This patch add architecture specific prefetch operations for IBM Power
architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_prefetch.h  |   61 
 1 files changed, 61 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
new file mode 100644
index 000..9df0d13
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -0,0 +1,61 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_PREFETCH_PPC_64_H_
+#define _RTE_PREFETCH_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(volatile void *p)
+{
+   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(volatile void *p)
+{
+   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(volatile void *p)
+{
+   asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_PPC_64_H_ */
-- 
1.7.1



[dpdk-dev] [PATCH v2 04/12] Add CPU cycle operations for IBM Power architecture

2014-11-16 Thread Chao Zhu
IBM Power architecture doesn't have TSC register to get CPU cycles. This
patch implements the time base register read instead of TSC register of
x86 on IBM Power architecture.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_cycles.h|   86 
 1 files changed, 86 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
new file mode 100644
index 000..ed66b48
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -0,0 +1,86 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_PPC_64_H_
+#define _RTE_CYCLES_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+   union {
+   uint64_t tsc_64;
+   struct {
+   uint32_t hi_32;
+   uint32_t lo_32;
+   };
+   } tsc;
+   uint32_t tmp;
+   asm volatile(
+   "0:\n"
+   "mftbu   %[hi32]\n"
+   "mftb%[lo32]\n"
+   "mftbu   %[tmp]\n"
+   "cmpw%[tmp],%[hi32]\n"
+   "bne 0b\n"
+   : [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32), [tmp] 
"=r"(tmp)
+   );
+   return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+   rte_mb();
+   return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_PPC_64_H_ */
+
-- 
1.7.1



[dpdk-dev] [PATCH v2 03/12] Add byte order operations for IBM Power architecture

2014-11-16 Thread Chao Zhu
This patch adds architecture specific byte order operations for IBM Power
architecture. Power architecture support both big endian and little
endian. This patch also adds a RTE_ARCH_BIG_ENDIAN micro.

Signed-off-by: Chao Zhu 
---
 config/defconfig_ppc_64-power8-linuxapp-gcc|1 +
 .../common/include/arch/ppc_64/rte_byteorder.h |  150 
 2 files changed, 151 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h

diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc 
b/config/defconfig_ppc_64-power8-linuxapp-gcc
index 97d72ff..b10f60c 100644
--- a/config/defconfig_ppc_64-power8-linuxapp-gcc
+++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
@@ -34,6 +34,7 @@ CONFIG_RTE_MACHINE="power8"

 CONFIG_RTE_ARCH="ppc_64"
 CONFIG_RTE_ARCH_PPC_64=y
+CONFIG_RTE_ARCH_BIG_ENDIAN=y

 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
new file mode 100644
index 000..a593e8a
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
@@ -0,0 +1,150 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California.  All rights reserved.
+*/
+
+#ifndef _RTE_BYTEORDER_PPC_64_H_
+#define _RTE_BYTEORDER_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+   return ((_x >> 8) | ((_x << 8) & 0xff00));
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+   return ((_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff) |
+   ((_x << 24) & 0xff00));
+}
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+  * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+   return ((_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff) |
+   ((_x >> 8) & 0xff00) | ((_x << 8) & (0xffULL << 32)) |
+   ((_x << 24) & (0xffULL << 40)) |
+   ((_x << 40) & (0xffULL << 48)) | ((_x << 56)));
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?   \
+  rte_constant_bswap16(x) :\
+  rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ?   \
+  rte_constant_bswap32(x) :\
+  rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ?

[dpdk-dev] [PATCH v2 02/12] Add atomic operations for IBM Power architecture

2014-11-16 Thread Chao Zhu
This patch adds architecture specific atomic operation file for IBM
Power architecture CPU.

Signed-off-by: Chao Zhu 
---
 .../common/include/arch/ppc_64/rte_atomic.h|  415 
 1 files changed, 415 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
new file mode 100644
index 000..9c69935
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -0,0 +1,415 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_PPC_64_H_
+#define _RTE_ATOMIC_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#definerte_mb()  asm volatile("sync" : : : "memory")
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#definerte_wmb() asm volatile("sync" : : : "memory")
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#definerte_rmb() asm volatile("sync" : : : "memory")
+
+/*- 16 bit atomic operations 
-*/
+/* To be compatible with Power7, use GCC built-in functions for 16 bit 
operations */
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+   return __atomic_compare_exchange(dst, , , 0, __ATOMIC_ACQUIRE, 
__ATOMIC_ACQUIRE) ? 1 : 0;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+   return rte_atomic16_cmpset((volatile uint16_t *)>cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+   __atomic_add_fetch(>cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+   __atomic_sub_fetch(>cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+   return (__atomic_add_fetch(>cnt, 1, __ATOMIC_ACQUIRE) == 0);
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+   return (__atomic_sub_fetch(>cnt, 1, __ATOMIC_ACQUIRE) == 0);
+}
+
+/*- 32 bit atomic operations 
-*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+   unsigned int ret = 0;
+
+   asm volatile(
+   "\tlwsync\n"
+   "1:\tlwarx %[ret], 0, %[dst]\n"
+   "cmplw %[exp], %[ret]\n"
+   "bne 2f\n"
+   

[dpdk-dev] [PATCH v2 01/12] Add compiling definations for IBM Power architecture

2014-11-16 Thread Chao Zhu
To make DPDK run on IBM Power architecture, configuration files for
Power architecuture are added. Also, the compiling related .mk files are
added.

Signed-off-by: Chao Zhu 
---
 config/common_linuxapp_powerpc  |  394 +++
 config/defconfig_ppc_64-power8-linuxapp-gcc |   40 +++
 mk/arch/ppc_64/rte.vars.mk  |   39 +++
 mk/machine/power8/rte.vars.mk   |   57 
 4 files changed, 530 insertions(+), 0 deletions(-)
 create mode 100644 config/common_linuxapp_powerpc
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk

diff --git a/config/common_linuxapp_powerpc b/config/common_linuxapp_powerpc
new file mode 100644
index 000..d230a0b
--- /dev/null
+++ b/config/common_linuxapp_powerpc
@@ -0,0 +1,394 @@
+#   BSD LICENSE
+#
+#   Copyright (C) IBM Corporation 2014.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of IBM Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#
+# define executive environment
+#
+# CONFIG_RTE_EXEC_ENV can be linuxapp, baremetal, bsdapp
+#
+CONFIG_RTE_EXEC_ENV="linuxapp"
+CONFIG_RTE_EXEC_ENV_LINUXAPP=y
+
+#
+# Use intrinsics or assembly code for key routines
+#
+CONFIG_RTE_FORCE_INTRINSICS=n
+
+#
+# Compile to share library
+#
+CONFIG_RTE_BUILD_SHARED_LIB=n
+
+#
+# Combine to one single library
+#
+CONFIG_RTE_BUILD_COMBINE_LIBS=n
+CONFIG_RTE_LIBNAME="powerpc_dpdk"
+
+#
+# Compile libc directory
+#
+CONFIG_RTE_LIBC=n
+
+#
+# Compile newlib as libc from source
+#
+CONFIG_RTE_LIBC_NEWLIB_SRC=n
+
+#
+# Use binary newlib
+#
+CONFIG_RTE_LIBC_NEWLIB_BIN=n
+
+#
+# Use binary newlib
+#
+CONFIG_RTE_LIBC_NETINCS=n
+
+#
+# Compile libgloss (newlib-stubs)
+#
+CONFIG_RTE_LIBGLOSS=n
+
+#
+# Compile Environment Abstraction Layer
+# Note: Power8 has 96 cores, so increase CONFIG_RTE_MAX_LCORE from 64 to 128
+#
+CONFIG_RTE_LIBRTE_EAL=y
+CONFIG_RTE_MAX_LCORE=128
+CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_MEMSEG=256
+CONFIG_RTE_MAX_MEMZONE=2560
+CONFIG_RTE_MAX_TAILQ=32
+CONFIG_RTE_LOG_LEVEL=8
+CONFIG_RTE_LOG_HISTORY=256
+CONFIG_RTE_LIBEAL_USE_HPET=n
+CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n
+CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n
+CONFIG_RTE_EAL_IGB_UIO=y
+CONFIG_RTE_EAL_VFIO=y
+
+#
+# Special configurations in PCI Config Space for high performance
+#
+CONFIG_RTE_PCI_CONFIG=n
+CONFIG_RTE_PCI_EXTENDED_TAG=""
+CONFIG_RTE_PCI_MAX_READ_REQUEST_SIZE=0
+
+#
+# Compile Environment Abstraction Layer for linux
+#
+CONFIG_RTE_LIBRTE_EAL_LINUXAPP=y
+
+#
+# Compile Environment Abstraction Layer for Bare metal
+#
+CONFIG_RTE_LIBRTE_EAL_BAREMETAL=n
+
+#
+# Compile Environment Abstraction Layer to support Vmware TSC map
+# Note: Power doesn't have this support
+#
+CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n
+
+#
+# Compile the argument parser library
+#
+CONFIG_RTE_LIBRTE_KVARGS=y
+
+#
+# Compile generic ethernet library
+#
+CONFIG_RTE_LIBRTE_ETHER=y
+CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
+CONFIG_RTE_MAX_ETHPORTS=32
+CONFIG_RTE_LIBRTE_IEEE1588=n
+CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+
+#
+# Support NIC bypass logic
+#
+CONFIG_RTE_NIC_BYPASS=n
+
+#
+# Note: Initially, all of the PMD drivers compilation are turned off on Power
+# Will turn on them only after the successful testing on Power
+#
+
+#
+# Compile burst-oriented IGB & EM PMD drivers
+#
+CONFIG_RTE_LIBRTE_EM_PMD=n
+CONFIG_RTE_LIBRTE_IGB_PMD=n
+CONFIG_RTE_LIBRTE_E1000_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_E1000_DE

[dpdk-dev] [PATCH v2 00/12] Patches for DPDK to support Power architecture

2014-11-16 Thread Chao Zhu
The set of patches add IBM Power architecture to the DPDK. It adds the required 
support to the 
EAL library. This set of patches doesn't support full DPDK function on Power 
processors. Many 
functions are turned off in configuratidon file. More patches will be added 
continuesly.

Chao Zhu (12):
  Add compiling definations for IBM Power architecture
  Add atomic operations for IBM Power architecture
  Add byte order operations for IBM Power architecture
  Add CPU cycle operations for IBM Power architecture
  Add prefetch operation for IBM Power architecture
  Add spinlock operation for IBM Power architecture
  Add vector memcpy for IBM Power architecture
  Add CPU flag checking for IBM Power architecture
  Remove iopl operation for IBM Power architecture
  Add cache size define for IBM Power Architecture
  Add huge page size define for IBM Power architecture
  Add eal memory support for IBM Power Architecture

 app/test/test_cpuflags.c   |   35 ++
 app/test/test_malloc.c |8 +-
 app/test/test_memzone.c|  119 ++-
 config/common_linuxapp_powerpc |  394 +++
 config/defconfig_ppc_64-power8-linuxapp-gcc|   42 ++
 config/defconfig_x86_64-native-linuxapp-clang  |1 +
 config/defconfig_x86_64-native-linuxapp-gcc|1 +
 config/defconfig_x86_64-native-linuxapp-icc|1 +
 lib/librte_eal/common/eal_common_memzone.c |   15 +-
 .../common/include/arch/ppc_64/rte_atomic.h|  415 
 .../common/include/arch/ppc_64/rte_byteorder.h |  150 +++
 .../common/include/arch/ppc_64/rte_cpuflags.h  |  184 +
 .../common/include/arch/ppc_64/rte_cycles.h|   86 
 .../common/include/arch/ppc_64/rte_memcpy.h|  223 +++
 .../common/include/arch/ppc_64/rte_prefetch.h  |   61 +++
 .../common/include/arch/ppc_64/rte_spinlock.h  |   73 
 lib/librte_eal/common/include/rte_memory.h |9 +-
 lib/librte_eal/common/include/rte_memzone.h|8 +
 lib/librte_eal/linuxapp/eal/eal.c  |   13 +-
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   27 +-
 mk/arch/ppc_64/rte.vars.mk |   39 ++
 mk/machine/power8/rte.vars.mk  |   57 +++
 mk/rte.cpuflags.mk |   17 +
 23 files changed, 1959 insertions(+), 19 deletions(-)
 create mode 100644 config/common_linuxapp_powerpc
 create mode 100644 config/defconfig_ppc_64-power8-linuxapp-gcc
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
 create mode 100644 mk/arch/ppc_64/rte.vars.mk
 create mode 100644 mk/machine/power8/rte.vars.mk



[dpdk-dev] [PATCH 00/12] Patches for DPDK to support Power architecture

2014-11-13 Thread Chao Zhu
Thomas,

Sorry for the delay. I'll push the updated patches to the mail list 
before next Monday.
Thanks a lot!

Best Regards!
--
Chao Zhu

On 2014/11/13 18:24, Thomas Monjalon wrote:
> Hi Chao,
>
> 2014-09-26 05:36, Chao Zhu:
>> The set of patches add IBM Power architecture to the DPDK. It adds the 
>> required support to the
>> EAL library. This set of patches doesn't support full function on Power 
>> processors. Many functions
>> are turned off in configuratidon. More patches will be added continuesly.
>>
>> Chao Zhu (12):
>>Add compiling definations for IBM Power architecture
>>Add atomic operations for IBM Power architecture
>>Add byte order operations for IBM Power architecture
>>Add CPU cycle operations for IBM Power architecture
>>Add prefetch operation for IBM Power architecture
>>Add spinlock operation for IBM Power architecture
>>Add vector memcpy for IBM Power architecture
>>Add CPU flag checking for IBM Power architecture
>>Remove iopl operation for IBM Power architecture
>>Add cache size define for IBM Power Architecture
>>Add huge page sizes for IBM Power architecture
>>Add memory support for IBM Power Architecture
> Could you share the status of the rework of these patches?
> Maybe that some parts could enter in 1.8 as a preview.
>




[dpdk-dev] [PATCH v3 00/10] split architecture specific operations

2014-11-05 Thread Chao Zhu

> The set of patches split x86 architecture specific operations from DPDK and 
> put
> them to x86 arch directory.
> This will make the adoption of DPDK much easier on other computer 
> architecture.
> For a new architecture, just add an architecture specific directory and
> necessary building configuration files, then DPDK eal library can support it.
>
>
> Reviewing patchset from Chao, I ended up modifying it along the way,
> so here is a new iteration of this patchset.
>
> Changes since Chao v2 patchset :
>
> - added a preliminary patch for moving rte_atomic.h (for better readability)
> - fixed documentation generation
> - implemented a generic header for each arch specific header (cpuflags, 
> memcpy,
>prefetch were missing)
> - removed C++ stuff from generic headers
> - centralised all doxygen stuff in generic headers (no need to have 
> duplicates)
> - refactored rte_cycles functions
> - moved vmware tsc stuff to arch rte_cycles.h headers
> - finished x86 factorisation
>
>
> Little summary of current state :
>
> - all applications continue to include the eal headers as before, these 
> headers
>are the arch-specific ones
> - the arch specific headers always include the generic ones. The generic 
> headers
>contain the doxygen documentation and code common to all architectures
> - a x86 architecture has been defined which handles both 32bits and 64bits
>peculiarities
>
>
> It builds fine for 32/64 bits (w and w/o "force intrinsics"), but I really 
> would
> like a lot of eyes on this (and I would say, especially, rte_cycles, 
> rte_memcpy
> and rte_cpuflags).
> I still have some concerns about the use of intrinsics for architecture != x86
> but I think Chao will be the best to look at this.
>
>
Acked-by: Chao Zhu 



[dpdk-dev] [PATCH v2 7/7] Split CPU flags operations to architecture specific

2014-10-16 Thread Chao Zhu
This patch splits CPU flags related operations from DPDK and push them
to architecture specific arch directories, so that other processor
architecture can implement it's own CPU flag functions to support DPDK.

Signed-off-by: Chao Zhu 
---
 lib/librte_eal/common/Makefile |4 +-
 lib/librte_eal/common/eal_common_cpuflags.c|  190 --
 .../common/include/arch/i686/rte_cpuflags.h|  364 
 .../common/include/arch/x86_64/rte_cpuflags.h  |  364 
 lib/librte_eal/common/include/rte_cpuflags.h   |  182 --
 5 files changed, 730 insertions(+), 374 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/i686/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/x86_64/rte_cpuflags.h
 delete mode 100644 lib/librte_eal/common/include/rte_cpuflags.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index e09d509..79f378e 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -36,7 +36,7 @@ INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h 
rte_lcore.h
 INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
-INC += rte_string_fns.h rte_cpuflags.h rte_version.h rte_tailq_elem.h
+INC += rte_string_fns.h rte_version.h rte_tailq_elem.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_dev.h
 INC += rte_common_vect.h
@@ -47,7 +47,7 @@ INC += rte_warnings.h
 endif

 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_spinlock.h
-ARCH_INC := $(GENERIC_INC) rte_prefetch.h rte_memcpy.h
+ARCH_INC := $(GENERIC_INC) rte_prefetch.h rte_memcpy.h rte_cpuflags.h

 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c 
b/lib/librte_eal/common/eal_common_cpuflags.c
index 9e79179..6fd360c 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -30,10 +30,6 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-#include 
-#include 
-#include 
-#include 
 #include 

 /*
@@ -50,192 +46,6 @@
 #endif

 /**
- * Enumeration of CPU registers
- */
-enum cpu_register_t {
-   REG_EAX = 0,
-   REG_EBX,
-   REG_ECX,
-   REG_EDX,
-};
-
-typedef uint32_t cpuid_registers_t[4];
-
-#define CPU_FLAG_NAME_MAX_LEN 64
-
-/**
- * Struct to hold a processor feature entry
- */
-struct feature_entry {
-   uint32_t leaf;  /**< cpuid leaf */
-   uint32_t subleaf;   /**< cpuid subleaf */
-   uint32_t reg;   /**< cpuid register */
-   uint32_t bit;   /**< cpuid register bit */
-   char name[CPU_FLAG_NAME_MAX_LEN];   /**< String for printing */
-};
-
-#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
-   [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
-
-/**
- * An array that holds feature entries
- */
-static const struct feature_entry cpu_feature_table[] = {
-   FEAT_DEF(SSE3, 0x0001, 0, REG_ECX,  0)
-   FEAT_DEF(PCLMULQDQ, 0x0001, 0, REG_ECX,  1)
-   FEAT_DEF(DTES64, 0x0001, 0, REG_ECX,  2)
-   FEAT_DEF(MONITOR, 0x0001, 0, REG_ECX,  3)
-   FEAT_DEF(DS_CPL, 0x0001, 0, REG_ECX,  4)
-   FEAT_DEF(VMX, 0x0001, 0, REG_ECX,  5)
-   FEAT_DEF(SMX, 0x0001, 0, REG_ECX,  6)
-   FEAT_DEF(EIST, 0x0001, 0, REG_ECX,  7)
-   FEAT_DEF(TM2, 0x0001, 0, REG_ECX,  8)
-   FEAT_DEF(SSSE3, 0x0001, 0, REG_ECX,  9)
-   FEAT_DEF(CNXT_ID, 0x0001, 0, REG_ECX, 10)
-   FEAT_DEF(FMA, 0x0001, 0, REG_ECX, 12)
-   FEAT_DEF(CMPXCHG16B, 0x0001, 0, REG_ECX, 13)
-   FEAT_DEF(XTPR, 0x0001, 0, REG_ECX, 14)
-   FEAT_DEF(PDCM, 0x0001, 0, REG_ECX, 15)
-   FEAT_DEF(PCID, 0x0001, 0, REG_ECX, 17)
-   FEAT_DEF(DCA, 0x0001, 0, REG_ECX, 18)
-   FEAT_DEF(SSE4_1, 0x0001, 0, REG_ECX, 19)
-   FEAT_DEF(SSE4_2, 0x0001, 0, REG_ECX, 20)
-   FEAT_DEF(X2APIC, 0x0001, 0, REG_ECX, 21)
-   FEAT_DEF(MOVBE, 0x0001, 0, REG_ECX, 22)
-   FEAT_DEF(POPCNT, 0x0001, 0, REG_ECX, 23)
-   FEAT_DEF(TSC_DEADLINE, 0x0001, 0, REG_ECX, 24)
-   FEAT_DEF(AES, 0x0001, 0, REG_ECX, 25)
-   FEAT_DEF(XSAVE, 0x0001, 0, REG_ECX, 26)
-   FEAT_DEF(OSXSAVE, 0x0001, 0, REG_ECX, 27)
-   FEAT_DEF(AVX, 0x0001, 0, REG_ECX, 28)
-   FEAT_DEF(F16C, 0x0001, 0, REG_ECX, 29)
-   FEAT_DEF(RDRAND, 0x0001, 0, REG_ECX, 30)
-
-   FEAT_DEF(FPU, 0x0001, 0, REG_EDX,  0)
-   FEAT_DEF(VME, 0x0001, 0, REG_EDX,  1)
-   FEAT_DEF(DE, 0x0001,

[dpdk-dev] [PATCH v2 6/7] Split memcpy operation to architecture specific

2014-10-16 Thread Chao Zhu
This patch splits the SSE based memory copy function from DPDK and push
them to architecture specific arch directories. Other processor
architecture can implement it's own vector based memory copy functions.
Signed-off-by: Chao Zhu 
---
 lib/librte_eal/common/Makefile |4 +-
 .../common/include/arch/i686/rte_memcpy.h  |  376 
 .../common/include/arch/x86_64/rte_memcpy.h|  376 
 lib/librte_eal/common/include/rte_memcpy.h |  376 
 4 files changed, 754 insertions(+), 378 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/i686/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/x86_64/rte_memcpy.h
 delete mode 100644 lib/librte_eal/common/include/rte_memcpy.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 9b9a73d..e09d509 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk

 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memcpy.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_cpuflags.h rte_version.h rte_tailq_elem.h
@@ -47,7 +47,7 @@ INC += rte_warnings.h
 endif

 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_spinlock.h
-ARCH_INC := $(GENERIC_INC) rte_prefetch.h
+ARCH_INC := $(GENERIC_INC) rte_prefetch.h rte_memcpy.h

 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
diff --git a/lib/librte_eal/common/include/arch/i686/rte_memcpy.h 
b/lib/librte_eal/common/include/arch/i686/rte_memcpy.h
new file mode 100644
index 000..ba750b1
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/i686/rte_memcpy.h
@@ -0,0 +1,376 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_I686_H_
+#define _RTE_MEMCPY_I686_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE implementation of memcpy().
+ */
+
+#include 
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:593) /* Stop unused variable warning (reg_a etc). */
+#endif
+
+/**
+ * Copy 16 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+   __m128i reg_a;
+   asm volatile (
+   "movdqu (%[src]), %[reg_a]\n\t"
+   "movdqu %[reg_a], (%[dst])\n\t"
+   : [reg_a] "=x" (reg_a)
+   : [src] "r" (src),
+ [dst] "r"(dst)
+   : "memory"
+   );
+}
+
+/**
+ * Copy 32 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * 

  1   2   >