[dpdk-dev] [PATCH v4] net/i40e: fix hash filter invalid issue in X722

2016-10-24 Thread Jeff Guo
When verifying the Hash filtering on X722, we found a problem that
the hash value in descriptor is incorrect. The root caused is X722
uses different way of hash key word selection comparing with X710/XL710.
This patch fixes it by setting X722 specific key selection.

Signed-off-by: Jeff Guo 
---
v4:
refine commit log
---
 drivers/net/i40e/i40e_ethdev.c | 60 +-
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index db5f808..ca515dd 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -211,6 +211,14 @@
 #define I40E_REG_INSET_L3_SRC_IP40x00018000ULL
 /* Destination IPv4 address */
 #define I40E_REG_INSET_L3_DST_IP40x0018ULL
+/* Source IPv4 address for X722 */
+#define I40E_X722_REG_INSET_L3_SRC_IP4   0x0006ULL
+/* Destination IPv4 address for X722 */
+#define I40E_X722_REG_INSET_L3_DST_IP4   0x0600ULL
+/* IPv4 Protocol for X722 */
+#define I40E_X722_REG_INSET_L3_IP4_PROTO 0x0010ULL
+/* IPv4 Time to Live for X722 */
+#define I40E_X722_REG_INSET_L3_IP4_TTL   0x0010ULL
 /* IPv4 Type of Service (TOS) */
 #define I40E_REG_INSET_L3_IP4_TOS0x0040ULL
 /* IPv4 Protocol */
@@ -7568,25 +7576,23 @@ i40e_parse_input_set(uint64_t *inset,
  * and vice versa
  */
 static uint64_t
-i40e_translate_input_set_reg(uint64_t input)
+i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input)
 {
uint64_t val = 0;
uint16_t i;

-   static const struct {
+   struct inset_map {
uint64_t inset;
uint64_t inset_reg;
-   } inset_map[] = {
+   };
+
+   static const struct inset_map inset_map_common[] = {
{I40E_INSET_DMAC, I40E_REG_INSET_L2_DMAC},
{I40E_INSET_SMAC, I40E_REG_INSET_L2_SMAC},
{I40E_INSET_VLAN_OUTER, I40E_REG_INSET_L2_OUTER_VLAN},
{I40E_INSET_VLAN_INNER, I40E_REG_INSET_L2_INNER_VLAN},
{I40E_INSET_LAST_ETHER_TYPE, I40E_REG_INSET_LAST_ETHER_TYPE},
-   {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4},
-   {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4},
{I40E_INSET_IPV4_TOS, I40E_REG_INSET_L3_IP4_TOS},
-   {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO},
-   {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL},
{I40E_INSET_IPV6_SRC, I40E_REG_INSET_L3_SRC_IP6},
{I40E_INSET_IPV6_DST, I40E_REG_INSET_L3_DST_IP6},
{I40E_INSET_IPV6_TC, I40E_REG_INSET_L3_IP6_TC},
@@ -7615,13 +7621,40 @@ i40e_translate_input_set_reg(uint64_t input)
{I40E_INSET_FLEX_PAYLOAD_W8, I40E_REG_INSET_FLEX_PAYLOAD_WORD8},
};

+/* some different registers map in x722*/
+   static const struct inset_map inset_map_diff_x722[] = {
+   {I40E_INSET_IPV4_SRC, I40E_X722_REG_INSET_L3_SRC_IP4},
+   {I40E_INSET_IPV4_DST, I40E_X722_REG_INSET_L3_DST_IP4},
+   {I40E_INSET_IPV4_PROTO, I40E_X722_REG_INSET_L3_IP4_PROTO},
+   {I40E_INSET_IPV4_TTL, I40E_X722_REG_INSET_L3_IP4_TTL},
+   };
+
+   static const struct inset_map inset_map_diff_not_x722[] = {
+   {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4},
+   {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4},
+   {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO},
+   {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL},
+   };
+
if (input == 0)
return val;

/* Translate input set to register aware inset */
-   for (i = 0; i < RTE_DIM(inset_map); i++) {
-   if (input & inset_map[i].inset)
-   val |= inset_map[i].inset_reg;
+   if (type == I40E_MAC_X722) {
+   for (i = 0; i < RTE_DIM(inset_map_diff_x722); i++) {
+   if (input & inset_map_diff_x722[i].inset)
+   val |= inset_map_diff_x722[i].inset_reg;
+   }
+   } else {
+   for (i = 0; i < RTE_DIM(inset_map_diff_not_x722); i++) {
+   if (input & inset_map_diff_not_x722[i].inset)
+   val |= inset_map_diff_not_x722[i].inset_reg;
+   }
+   }
+
+   for (i = 0; i < RTE_DIM(inset_map_common); i++) {
+   if (input & inset_map_common[i].inset)
+   val |= inset_map_common[i].inset_reg;
}

return val;
@@ -7712,7 +7745,8 @@ i40e_filter_input_set_init(struct i40e_pf *pf)
   I40E_INSET_MASK_NUM_REG);
if (num < 0)
return;
-   inset_reg = i40e_translate_input_set_reg(input_set);
+   inset_reg = 

[dpdk-dev] [PATCH v4] net/i40e: fix the hash filter invalid calculation in X722

2016-10-24 Thread Jeff Guo
When verifying the Hash filtering on X722, we found a problem that
the hash value in descriptor is incorrect. The root caused is X722
uses different way of hash key word selection comparing with X710/XL710.
This patch fixes it by setting X722 specific key selection.

Signed-off-by: Jeff Guo 
---
v4:
refine commit log
---
 drivers/net/i40e/i40e_ethdev.c | 60 +-
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index db5f808..ca515dd 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -211,6 +211,14 @@
 #define I40E_REG_INSET_L3_SRC_IP40x00018000ULL
 /* Destination IPv4 address */
 #define I40E_REG_INSET_L3_DST_IP40x0018ULL
+/* Source IPv4 address for X722 */
+#define I40E_X722_REG_INSET_L3_SRC_IP4   0x0006ULL
+/* Destination IPv4 address for X722 */
+#define I40E_X722_REG_INSET_L3_DST_IP4   0x0600ULL
+/* IPv4 Protocol for X722 */
+#define I40E_X722_REG_INSET_L3_IP4_PROTO 0x0010ULL
+/* IPv4 Time to Live for X722 */
+#define I40E_X722_REG_INSET_L3_IP4_TTL   0x0010ULL
 /* IPv4 Type of Service (TOS) */
 #define I40E_REG_INSET_L3_IP4_TOS0x0040ULL
 /* IPv4 Protocol */
@@ -7568,25 +7576,23 @@ i40e_parse_input_set(uint64_t *inset,
  * and vice versa
  */
 static uint64_t
-i40e_translate_input_set_reg(uint64_t input)
+i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input)
 {
uint64_t val = 0;
uint16_t i;

-   static const struct {
+   struct inset_map {
uint64_t inset;
uint64_t inset_reg;
-   } inset_map[] = {
+   };
+
+   static const struct inset_map inset_map_common[] = {
{I40E_INSET_DMAC, I40E_REG_INSET_L2_DMAC},
{I40E_INSET_SMAC, I40E_REG_INSET_L2_SMAC},
{I40E_INSET_VLAN_OUTER, I40E_REG_INSET_L2_OUTER_VLAN},
{I40E_INSET_VLAN_INNER, I40E_REG_INSET_L2_INNER_VLAN},
{I40E_INSET_LAST_ETHER_TYPE, I40E_REG_INSET_LAST_ETHER_TYPE},
-   {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4},
-   {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4},
{I40E_INSET_IPV4_TOS, I40E_REG_INSET_L3_IP4_TOS},
-   {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO},
-   {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL},
{I40E_INSET_IPV6_SRC, I40E_REG_INSET_L3_SRC_IP6},
{I40E_INSET_IPV6_DST, I40E_REG_INSET_L3_DST_IP6},
{I40E_INSET_IPV6_TC, I40E_REG_INSET_L3_IP6_TC},
@@ -7615,13 +7621,40 @@ i40e_translate_input_set_reg(uint64_t input)
{I40E_INSET_FLEX_PAYLOAD_W8, I40E_REG_INSET_FLEX_PAYLOAD_WORD8},
};

+/* some different registers map in x722*/
+   static const struct inset_map inset_map_diff_x722[] = {
+   {I40E_INSET_IPV4_SRC, I40E_X722_REG_INSET_L3_SRC_IP4},
+   {I40E_INSET_IPV4_DST, I40E_X722_REG_INSET_L3_DST_IP4},
+   {I40E_INSET_IPV4_PROTO, I40E_X722_REG_INSET_L3_IP4_PROTO},
+   {I40E_INSET_IPV4_TTL, I40E_X722_REG_INSET_L3_IP4_TTL},
+   };
+
+   static const struct inset_map inset_map_diff_not_x722[] = {
+   {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4},
+   {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4},
+   {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO},
+   {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL},
+   };
+
if (input == 0)
return val;

/* Translate input set to register aware inset */
-   for (i = 0; i < RTE_DIM(inset_map); i++) {
-   if (input & inset_map[i].inset)
-   val |= inset_map[i].inset_reg;
+   if (type == I40E_MAC_X722) {
+   for (i = 0; i < RTE_DIM(inset_map_diff_x722); i++) {
+   if (input & inset_map_diff_x722[i].inset)
+   val |= inset_map_diff_x722[i].inset_reg;
+   }
+   } else {
+   for (i = 0; i < RTE_DIM(inset_map_diff_not_x722); i++) {
+   if (input & inset_map_diff_not_x722[i].inset)
+   val |= inset_map_diff_not_x722[i].inset_reg;
+   }
+   }
+
+   for (i = 0; i < RTE_DIM(inset_map_common); i++) {
+   if (input & inset_map_common[i].inset)
+   val |= inset_map_common[i].inset_reg;
}

return val;
@@ -7712,7 +7745,8 @@ i40e_filter_input_set_init(struct i40e_pf *pf)
   I40E_INSET_MASK_NUM_REG);
if (num < 0)
return;
-   inset_reg = i40e_translate_input_set_reg(input_set);
+   inset_reg = 

[dpdk-dev] [PATCH v2] net/i40e: fix fdir configure failed issue in X710

2016-10-24 Thread Jeff Guo
Because of some register is only supported by X722, such as 
I40E_GLQF_FD_PCTYPES,
so it need to use the mac type to distinguish the behavior of X722 from X710 
and other
NICs, or it would result X710 functional failed.

Fixes: 8c5cb3c11513 (?net/i40e: add packet type translation for X722?)
Signed-off-by: Jeff Guo 
---
v2:
refine commit log
decrease some code duplication
---
 drivers/net/i40e/i40e_ethdev.c| 103 +-
 drivers/net/i40e/i40e_ethdev.h|  32 +++-
 drivers/net/i40e/i40e_ethdev_vf.c |  16 --
 drivers/net/i40e/i40e_fdir.c  |  55 
 4 files changed, 113 insertions(+), 93 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..db5f808 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -6164,7 +6164,7 @@ DONE:

 /* Configure hash enable flags for RSS */
 uint64_t
-i40e_config_hena(uint64_t flags)
+i40e_config_hena(uint64_t flags, enum i40e_mac_type type)
 {
uint64_t hena = 0;

@@ -6173,42 +6173,42 @@ i40e_config_hena(uint64_t flags)

if (flags & ETH_RSS_FRAG_IPV4)
hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4;
-   if (flags & ETH_RSS_NONFRAG_IPV4_TCP)
-#ifdef X722_SUPPORT
-   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
-   (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
-#else
-   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
-#endif
-   if (flags & ETH_RSS_NONFRAG_IPV4_UDP)
-#ifdef X722_SUPPORT
-   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
-   (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
-   (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
-#else
-   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
-#endif
+   if (flags & ETH_RSS_NONFRAG_IPV4_TCP) {
+   if (type == I40E_MAC_X722) {
+   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
+(1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+   } else
+   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+   }
+   if (flags & ETH_RSS_NONFRAG_IPV4_UDP) {
+   if (type == I40E_MAC_X722) {
+   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+(1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+(1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+   } else
+   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+   }
if (flags & ETH_RSS_NONFRAG_IPV4_SCTP)
hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
if (flags & ETH_RSS_NONFRAG_IPV4_OTHER)
hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
if (flags & ETH_RSS_FRAG_IPV6)
hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6;
-   if (flags & ETH_RSS_NONFRAG_IPV6_TCP)
-#ifdef X722_SUPPORT
-   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) |
-   (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
-#else
-   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
-#endif
-   if (flags & ETH_RSS_NONFRAG_IPV6_UDP)
-#ifdef X722_SUPPORT
-   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
-   (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
-   (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
-#else
-   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
-#endif
+   if (flags & ETH_RSS_NONFRAG_IPV6_TCP) {
+   if (type == I40E_MAC_X722) {
+   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) |
+(1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
+   } else
+   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+   }
+   if (flags & ETH_RSS_NONFRAG_IPV6_UDP) {
+   if (type == I40E_MAC_X722) {
+   hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+(1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+(1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+   } else
+   hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+   }
if (flags & ETH_RSS_NONFRAG_IPV6_SCTP)
hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP;
if (flags & ETH_RSS_NONFRAG_IPV6_OTHER)
@@ -6282,7 +6282,10 @@ i40e_pf_disable_rss(struct i40e_pf *pf)

hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-   hena &= ~I40E_RSS_HENA_ALL;
+   if (hw->mac.type == I40E_MAC_X722)
+   hena &= ~I40E_RSS_HENA_ALL_X722;
+   else
+   

[dpdk-dev] why all the other threads except lcore-slave pinned to master lcore?

2016-10-24 Thread Kevin Traynor
On 10/24/2016 12:10 PM, ychen wrote:
> Hi, I am a freshman learning DPDK, when I followed the document 
> INSTALL.DPDK.md to launch openvswitch with dpdk inited, I found that all the 
> threads are pinned to master lcore except lcore-slave and vfio-sync, but I 
> can not find any code to set the affinity for these threads. 
> Here is my question:
> 1. why vfio-sync is pinned to a core which is not included in the slave lcore 
> nor master lcore?
> 2. why all other threads pinned to master lcore? is anything I am setting 
> wrong?

Hi - these are probably more appropriate for the ovs-dev list. I
answered the post you put over there.

> 
> 
> Here is some logs:
> 2016-10-24T10:42:03Z|1|vlog|INFO|opened log file 
> /var/log/openvswitch/ovs-vswitchd.log
> 2016-10-24T10:42:03Z|2|ovs_numa|INFO|Discovered 24 CPU cores on NUMA node > 0
> 2016-10-24T10:42:03Z|3|ovs_numa|INFO|Discovered 24 CPU cores on NUMA node 
> 1
> 2016-10-24T10:42:03Z|4|ovs_numa|INFO|Discovered 2 NUMA nodes and 48 CPU 
> cores
> 2016-10-24T10:42:03Z|5|reconnect|INFO|unix:/var/run/openvswitch/db.sock: 
> connecting...
> 2016-10-24T10:42:03Z|6|reconnect|INFO|unix:/var/run/openvswitch/db.sock: 
> connected
> 2016-10-24T10:42:03Z|7|dpdk|INFO|DPDK Enabled, initializing
> 2016-10-24T10:42:03Z|8|dpdk|INFO|No vhost-sock-dir provided - defaulting 
> to /var/run/openvswitch
> 2016-10-24T10:42:03Z|9|dpdk|INFO|EAL ARGS: ovs-vswitchd -c 0xf 
> --socket-mem 1024,1024
> EAL: Detected 48 lcore(s)
> EAL: Probing VFIO support...
> EAL: VFIO support initialized
> PMD: bnxt_rte_pmd_init() called for (null)
> EAL: PCI device :01:00.0 on NUMA socket 0
> EAL:   probe driver: 8086:10fb rte_ixgbe_pmd
> EAL: PCI device :01:00.1 on NUMA socket 0
> EAL:   probe driver: 8086:10fb rte_ixgbe_pmd
> EAL: PCI device :06:00.0 on NUMA socket 0
> EAL:   probe driver: 8086:1521 rte_igb_pmd
> EAL: PCI device :06:00.1 on NUMA socket 0
> EAL:   probe driver: 8086:1521 rte_igb_pmd
> 2016-10-24T10:42:06Z|00010|dpdk|INFO|DPDK pdump packet capture enable
> 
> 
> and the output of the cpu_layout:
> cores =  [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13]
> sockets =  [0, 1]
> 
> 
> Socket 0Socket 1
> 
> Core 0  [0, 24] [1, 25] 
> Core 1  [2, 26] [3, 27] 
> Core 2  [4, 28] [5, 29] 
> Core 3  [6, 30] [7, 31] 
> Core 4  [8, 32] [9, 33] 
> Core 5  [10, 34][11, 35]
> Core 8  [12, 36][13, 37]
> Core 9  [14, 38][15, 39]
> Core 10 [16, 40][17, 41]
> Core 11 [18, 42][19, 43]
> Core 12 [20, 44][21, 45]
> Core 13 [22, 46][23, 47] 
> All the threads and their pinned core for vswitchd:
> 28262  28262   0 ovs-vswitchd
>  28263  28262  39 vfio-sync
>  28297  28262   0 eal-intr-thread
>  28298  28262   1 lcore-slave-1
>  28299  28262   2 lcore-slave-2
>  28300  28262   3 lcore-slave-3
>  28301  28262   0 dpdk_watchdog2
>  28302  28262   0 vhost_thread1
>  28303  28262   0 pdump-thread
>  28304  28262   0 ct_clean3
>  28305  28262   0 urcu4
>  28744  28262   0 handler101
>  28745  28262   0 handler100
>  28746  28262   0 handler99
>  28747  28262   0 handler98
>  28748  28262   0 handler95
>  28749  28262   0 handler77
>  28750  28262   0 handler79
>  28751  28262   0 handler80
>  28752  28262   0 handler81
>  28753  28262   0 handler73
>  28756  28262   0 handler92
>  28757  28262   0 handler82
>  28758  28262   0 handler96
>  28759  28262   0 handler71
>  28760  28262   0 handler61
>  28761  28262   0 handler62
>  28762  28262   0 handler83
>  28763  28262   0 handler63
>  28764  28262   0 handler84
>  28765  28262   0 handler93
>  28766  28262   0 handler64
>  28767  28262   0 handler85
>  28768  28262   0 handler74
>  28769  28262   0 handler65
>  28770  28262   0 handler66
>  28771  28262   0 handler78
>  28772  28262   0 handler86
>  28773  28262   0 handler87
>  28774  28262   0 handler97
>  28775  28262   0 handler88
>  28776  28262   0 handler56
>  28777  28262   0 handler76
>  28778  28262   0 handler67
>  28779  28262   0 handler60
>  28780  28262   0 handler68
>  28781  28262   0 revalidator75
>  28782  28262   0 revalidator57
>  28783  28262   0 revalidator89
>  28784  28262   0 revalidator69
>  28785  28262   0 revalidator54
>  28786  28262   0 revalidator90
>  28787  28262   0 revalidator55
>  28788  28262   0 revalidator58
>  28789  28262   0 revalidator59
>  28790  28262   0 revalidator70
>  28791  28262   0 revalidator94
>  28792  28262   0 revalidator91
>  28793  28262   0 revalidator72
>  28827  28262   4 pmd103
>  28829  28262   6 pmd102
> 



[dpdk-dev] why all the other threads except lcore-slave pinned to master lcore?

2016-10-24 Thread ychen
Hi, I am a freshman learning DPDK, when I followed the document INSTALL.DPDK.md 
to launch openvswitch with dpdk inited, I found that all the threads are pinned 
to master lcore except lcore-slave and vfio-sync, but I can not find any code 
to set the affinity for these threads. 
Here is my question:
1. why vfio-sync is pinned to a core which is not included in the slave lcore 
nor master lcore?
2. why all other threads pinned to master lcore? is anything I am setting wrong?


Here is some logs:
2016-10-24T10:42:03Z|1|vlog|INFO|opened log file 
/var/log/openvswitch/ovs-vswitchd.log
2016-10-24T10:42:03Z|2|ovs_numa|INFO|Discovered 24 CPU cores on NUMA node 0
2016-10-24T10:42:03Z|3|ovs_numa|INFO|Discovered 24 CPU cores on NUMA node 1
2016-10-24T10:42:03Z|4|ovs_numa|INFO|Discovered 2 NUMA nodes and 48 CPU 
cores
2016-10-24T10:42:03Z|5|reconnect|INFO|unix:/var/run/openvswitch/db.sock: 
connecting...
2016-10-24T10:42:03Z|6|reconnect|INFO|unix:/var/run/openvswitch/db.sock: 
connected
2016-10-24T10:42:03Z|7|dpdk|INFO|DPDK Enabled, initializing
2016-10-24T10:42:03Z|8|dpdk|INFO|No vhost-sock-dir provided - defaulting to 
/var/run/openvswitch
2016-10-24T10:42:03Z|9|dpdk|INFO|EAL ARGS: ovs-vswitchd -c 0xf --socket-mem 
1024,1024
EAL: Detected 48 lcore(s)
EAL: Probing VFIO support...
EAL: VFIO support initialized
PMD: bnxt_rte_pmd_init() called for (null)
EAL: PCI device :01:00.0 on NUMA socket 0
EAL:   probe driver: 8086:10fb rte_ixgbe_pmd
EAL: PCI device :01:00.1 on NUMA socket 0
EAL:   probe driver: 8086:10fb rte_ixgbe_pmd
EAL: PCI device :06:00.0 on NUMA socket 0
EAL:   probe driver: 8086:1521 rte_igb_pmd
EAL: PCI device :06:00.1 on NUMA socket 0
EAL:   probe driver: 8086:1521 rte_igb_pmd
2016-10-24T10:42:06Z|00010|dpdk|INFO|DPDK pdump packet capture enable


and the output of the cpu_layout:
cores =  [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13]
sockets =  [0, 1]


Socket 0Socket 1

Core 0  [0, 24] [1, 25] 
Core 1  [2, 26] [3, 27] 
Core 2  [4, 28] [5, 29] 
Core 3  [6, 30] [7, 31] 
Core 4  [8, 32] [9, 33] 
Core 5  [10, 34][11, 35]
Core 8  [12, 36][13, 37]
Core 9  [14, 38][15, 39]
Core 10 [16, 40][17, 41]
Core 11 [18, 42][19, 43]
Core 12 [20, 44][21, 45]
Core 13 [22, 46][23, 47] 
All the threads and their pinned core for vswitchd:
28262  28262   0 ovs-vswitchd
 28263  28262  39 vfio-sync
 28297  28262   0 eal-intr-thread
 28298  28262   1 lcore-slave-1
 28299  28262   2 lcore-slave-2
 28300  28262   3 lcore-slave-3
 28301  28262   0 dpdk_watchdog2
 28302  28262   0 vhost_thread1
 28303  28262   0 pdump-thread
 28304  28262   0 ct_clean3
 28305  28262   0 urcu4
 28744  28262   0 handler101
 28745  28262   0 handler100
 28746  28262   0 handler99
 28747  28262   0 handler98
 28748  28262   0 handler95
 28749  28262   0 handler77
 28750  28262   0 handler79
 28751  28262   0 handler80
 28752  28262   0 handler81
 28753  28262   0 handler73
 28756  28262   0 handler92
 28757  28262   0 handler82
 28758  28262   0 handler96
 28759  28262   0 handler71
 28760  28262   0 handler61
 28761  28262   0 handler62
 28762  28262   0 handler83
 28763  28262   0 handler63
 28764  28262   0 handler84
 28765  28262   0 handler93
 28766  28262   0 handler64
 28767  28262   0 handler85
 28768  28262   0 handler74
 28769  28262   0 handler65
 28770  28262   0 handler66
 28771  28262   0 handler78
 28772  28262   0 handler86
 28773  28262   0 handler87
 28774  28262   0 handler97
 28775  28262   0 handler88
 28776  28262   0 handler56
 28777  28262   0 handler76
 28778  28262   0 handler67
 28779  28262   0 handler60
 28780  28262   0 handler68
 28781  28262   0 revalidator75
 28782  28262   0 revalidator57
 28783  28262   0 revalidator89
 28784  28262   0 revalidator69
 28785  28262   0 revalidator54
 28786  28262   0 revalidator90
 28787  28262   0 revalidator55
 28788  28262   0 revalidator58
 28789  28262   0 revalidator59
 28790  28262   0 revalidator70
 28791  28262   0 revalidator94
 28792  28262   0 revalidator91
 28793  28262   0 revalidator72
 28827  28262   4 pmd103
 28829  28262   6 pmd102



[dpdk-dev] [PATCH v10 6/6] testpmd: use Tx preparation in csum engine

2016-10-24 Thread Tomasz Kulasek
Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5



[dpdk-dev] [PATCH v10 5/6] ixgbe: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..031414c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->tx_pkt_prep = NULL;
 #ifdef RTE_IXGBE_INC_VECTOR
if (txq->tx_rs_thresh 

[dpdk-dev] [PATCH v10 4/6] i40e: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {
PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts;
+

[dpdk-dev] [PATCH v10 3/6] fm10k: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5



[dpdk-dev] [PATCH v10 2/6] e1000: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = 

[dpdk-dev] [PATCH v10 1/6] ethdev: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of diferent
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   96 +
 lib/librte_mbuf/rte_mbuf.h|   64 +++
 lib/librte_net/rte_net.h  |   85 
 4 files changed, 246 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..c4a8ccd 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,93 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.

[dpdk-dev] [PATCH v10 0/6] add Tx preparation

2016-10-24 Thread Tomasz Kulasek
As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */

v10 changes:
 - moved drivers tx calback check in rte_eth_tx_prep after queue_id check

v9 changes:
 - fixed headers structure fragmentation check
 - moved fragmentation check into rte_validate_tx_offload()

v8 changes:
 - mbuf argument in rte_validate_tx_offload declared as const

v7 changes:
 - comments reworded/added
 - changed errno values returned from Tx prep API
 - added check in rte_phdr_cksum_fix if headers are in the first
   data segment and can be safetly modified
 - moved rte_validate_tx_offload to rte_mbuf
 - moved rte_phdr_cksum_fix to rte_net.h
 - removed rte_pkt.h new file as useless

v6 changes:
- added performance impact test results to the patch description

v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   

[dpdk-dev] rte_kni_tx_burst() hangs because of no freedescriptors

2016-10-24 Thread yingzhi
Hi Helin,


Thanks for your response, to answer your questions:
1. we send only one packet each time calling rte_kni_tx_burst(), which means 
the last argument is 1.
2. it returns 0 because the free mbuf function inside tx_burst will not free 
any mbuf:


if (txq->nb_tx_free < txq->tx_free_thresh)
ixgbe_tx_free_bufs(txq);



after this operation, the txq->nb_tx_free is not increased and keeps to be "0" 
eventually.


I did some tests today, I commented out this section of ixgbe_rxtx_vec_common.h 
-> ixgbe_tx_free_bufs


status = txq->tx_ring[txq->tx_next_dd].wb.status;
if (!(status & IXGBE_ADVTXD_STAT_DD))
return 0;



After ignoring DD bit check, our app runs for about 6 hours without issue. I 
suspect there is something wrong in my program set the DD bit somewhere. One of 
the possible cause currently I suspect is as far as I know, 
rte_pktmbuf_free(mbuf.array[k]) will free the mbuf of the packet and any 
fragmented packets following by it. But in our application such as below code 
snippet:


auto nb_tx = rte_eth_tx_burst(port, queue, mbuf.array, (uint16_t) 
nb_rx);
if (unlikely(nb_tx < nb_rx)) {
for (unsigned k = nb_tx; k < nb_rx; k++) {
rte_pktmbuf_free(mbuf.array[k]);
}
}



In this case if there are fragmented packets and failed transmission, may cause 
a mbuf be freed multiple times.


Above is just my suspect, need to do some tests later today or tomorrow.


Thanks
-- Original --
From:  "Zhang, Helin";;
Date:  Mon, Oct 24, 2016 11:33 AM
To:  "yingzhi"; 
Cc:  "dev at dpdk.org"; 
Subject:  RE: [dpdk-dev] rte_kni_tx_burst() hangs because of no freedescriptors



Hi Yingzhi

Thank you for the reporting! The description is not so clear at least for me.
Please help to narrown down the issue by youself.
How many packets would it have for calling TX function?
Why it would return 0 after calling TX function? No memory? Or return from 
else? Have you found anything?

Regards,
Helin

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of yingzhi
> Sent: Sunday, October 23, 2016 9:30 PM
> To: users; dev at dpdk.org
> Subject: [dpdk-dev] rte_kni_tx_burst() hangs because of no free descriptors
> 
> -
> Hi Experts,
> 
> Background:
> 
> We are using DPDK to develop a LoadBalancer following below logic: When
> a new packet is received:
>  1. if the dst_addr is management IP, forward to KNI. 2. if the dst_addr is in
> VIP list, select backend and forward(modify dst mac address). 3. otherwise
> drop the packet.
> 
> At this stage, we use one single thread for KNI forwarding and another for
> VIP forwarding(forward to eth).
> 
> DPDK version: 16.07
>  NIC: 82599ES 10-Gigabit SFI/SFP+ Network Connection
>  Linux: 14.04.1-Ubuntu x64
> 
> Promblem description:
> 
> The program runs correctly for sometime(around 2 hours for 400Mb traffic).
> But it it will hang. When problem happens, rte_eth_tx_burst() will not able to
> send out any packets(always returns 0). We tracked into that function and
> noticed it is actually calling ixgbe driver's ixgbe_xmit_pkts_vec() function 
> in
> our environment, because we use default tx queue configuration, after
> printing some info, we found if the free function works fine:
>  tx_rs_thresh: 32, tx_free_thresh: 32, nb_tx_free: 31
> 
> it will trigger free and make 32 more free descriptors:
>  tx_rs_thresh: 32, tx_free_thresh: 32, nb_tx_free: 62
> 
> but when something going wrong, it will no longer free anything:
>  tx_rs_thresh: 32, tx_free_thresh: 32, nb_tx_free: 0 tx_rs_thresh: 32,
> tx_free_thresh: 32, nb_tx_free: 0
> 
> It may related with the DD flag of the descriptor but we are not quite sure.
> 
> Our program logic:
> 
> create two mbuf pools on socket 0, one for rx_queue and one for kni. (all
> lcore threads runs on socket0)
> 
> init kni interface with rte_kni_alloc()
> 
> 
> init one NIC interface with
>  rte_eth_dev_configure(); rte_eth_rx_queue_setup();
> rte_eth_tx_queue_setup(); rte_eth_dev_start();
> 
> 
> 
> in the eth main loop: (code is simplified)
>  while(1) { n = rte_eth_rx_burst(packets); for (i = 0; i < n; ++i)
>   { if
> (SEND_TO_KNI) { m = rte_kni_tx_burst(packets[i]); if 
> (m != 1))
> { rte_pktmbuf_free(packets[i]); } }   
>   if (SEND_TO_ETH)
> { // after modify the packet m = 
> rte_eth_tx_burst(packets[i]);
> if (m != 1)) { rte_pktmbuf_free(packets[i]); }
>  } //
> otherwise drop the packet rte_pktmbuf_free(packets[i]); } }
> 
> 
> Please advise if I'm using DPDK in a wrong way. Sorry if I missed something
> basic, I'm new to DPDK.
> 
> Thanks in advance
>  Best regards


[dpdk-dev] [PATCH v5 06/21] eal/soc: introduce very essential SoC infra definitions

2016-10-24 Thread Jan Viktorin
On Mon, 24 Oct 2016 17:29:25 +0530
Shreyansh Jain  wrote:

> From: Jan Viktorin 
> 
> Define initial structures and functions for the SoC infrastructure.
> This patch supports only a very minimal functions for now.
> More features will be added in the following commits.
> 
> Includes rte_device/rte_driver inheritance of
> rte_soc_device/rte_soc_driver.
> 
> Signed-off-by: Jan Viktorin 
> Signed-off-by: Shreyansh Jain 
> Signed-off-by: Hemant Agrawal 
> ---
>  app/test/Makefile   |   1 +
>  app/test/test_soc.c |  90 +
>  lib/librte_eal/common/Makefile  |   2 +-
>  lib/librte_eal/common/eal_private.h |   4 +
>  lib/librte_eal/common/include/rte_soc.h | 138 
> 
>  5 files changed, 234 insertions(+), 1 deletion(-)
>  create mode 100644 app/test/test_soc.c
>  create mode 100644 lib/librte_eal/common/include/rte_soc.h
> 
> diff --git a/app/test/Makefile b/app/test/Makefile

[...]

> +++ b/lib/librte_eal/common/include/rte_soc.h
> @@ -0,0 +1,138 @@

[...]

> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +struct rte_soc_id {
> + const char *compatible; /**< OF compatible specification */
> + uint64_t priv_data; /**< SoC Driver specific data */

Do you expect this to be a pointer?

> +};
> +

[...]

> +
> +/**
> + * Initialization function for the driver called during SoC probing.
> + */
> +typedef int (soc_devinit_t)(struct rte_soc_driver *, struct rte_soc_device 
> *);
> +
> +/**
> + * Uninitialization function for the driver called during hotplugging.
> + */
> +typedef int (soc_devuninit_t)(struct rte_soc_device *);
> +
> +/**
> + * A structure describing a SoC driver.
> + */
> +struct rte_soc_driver {
> + TAILQ_ENTRY(rte_soc_driver) next;  /**< Next in list */
> + struct rte_driver driver;  /**< Inherit core driver. */
> + soc_devinit_t *devinit;/**< Device initialization */
> + soc_devuninit_t *devuninit;/**< Device uninitialization */

Shouldn't those functions be named probe/remove?

> + const struct rte_soc_id *id_table; /**< ID table, NULL terminated */
> +};
> +

[...]

> +#endif



-- 
   Jan Viktorin  E-mail: Viktorin at RehiveTech.com
   System Architect  Web:www.RehiveTech.com
   RehiveTech
   Brno, Czech Republic


[dpdk-dev] PCIe Hot Insert/Remove Support

2016-10-24 Thread Walker, Benjamin
Hi all,

My name is Ben Walker and I'm the technical lead for SPDK (it's like DPDK, but
for storage devices). SPDK relies on DPDK only for the base functionality in the
EAL - memory management, the rings, and the PCI scanning code. A key feature for
storage devices is support for hot insert and remove, so we're currently working
through how best to implement this for a user space driver. While doing this
work, we've run into a few issues with the current DPDK PCI/device/driver
framework that I'd like to discuss with this list. I'm not entirely ramped up on
all of the current activity in this area or what the future plans are, so please
educate me if something is coming that will address our current issues. I'm
working off of the latest commit on the master branch as of today.

Today, there appears to be two lists - one of PCI devices and one of drivers. To
update the list of PCI devices, you call rte_eal_pci_scan(), which scans the PCI
bus. That call does not attempt to load any drivers. One scan is automatically
performed when the eal is first initialized. To add or remove drivers from the
driver list you call rte_eal_driver_register/unregister. To match drivers in the
driver list to devices in the device list, you call rte_eal_pci_probe.

There are a few problems with how the code works for us. First,
rte_eal_pci_scan's algorithm will not correctly detect devices that are in its
internal list but weren't found by the most recent PCI bus scan (i.e. they were
hot removed). DPDK's scan doesn't seem to comprehend hot remove in any way.
Fortunately there is a public API to remove devices from the device list -
rte_eal_pci_detach. That function will automatically unload any drivers
associated with the device and then remove it from the list. There is a similar
call for adding a device to the list - rte_eal_pci_probe_one, which will add a
device to the device list and then automatically match it to drivers. I think if
rte_eal_pci_scan is going to be a public interface (and it is), it needs to
correctly comprehend the removal of PCI devices. Otherwise, make it a private
API that is only called in response to rte_eal_init and only expose the public
probe_one/detach calls for modifying the list of devices. My preference is for
the former, not the latter.

Second, rte_eal_pci_probe will call the driver initialization functions each
time a probe happens, even if the driver has already been successfully loaded.
This tends to crash a lot of the PMDs. It seems to me like rte_eal_pci_probe is
not safe to call more than once during the lifetime of the program, which is a
real challenge when you have multiple users of the PCI framework. For instance,
an application may manage both storage devices using the rte_eal_pci framework
and NICs, and the initialization routine may go something like:

register NIC drivers
rte_eal_probe()
...
register SSD drivers
rte_eal_probe()

This is almost certainly how any real code is going to function because the code
dealing with NICs is unrelated and probably unaware of the code dealing with the
SSDs. It should be fairly trivial to simply not call the probe() callback for a
device if the driver has already been loaded. Is this a reasonable modification
to make?

Thanks,
Ben


[dpdk-dev] [PATCH v5 01/21] eal: generalize PCI kernel driver enum to EAL

2016-10-24 Thread Jan Viktorin
On Mon, 24 Oct 2016 17:29:20 +0530
Shreyansh Jain  wrote:

> From: Jan Viktorin 
> 
> Signed-off-by: Jan Viktorin 
> Signed-off-by: Shreyansh Jain 

I think, there is no reason to prevent merging this. Feel free to add:

Acked-by: Jan Viktorin 


[dpdk-dev] [PATCH v4 11/17] eal/soc: add default scan for Soc devices

2016-10-24 Thread Jan Viktorin
On Mon, 24 Oct 2016 17:38:29 +0530
Shreyansh Jain  wrote:

> Hi Jan,
> 
> On Sunday 16 October 2016 12:42 PM, Shreyansh Jain wrote:
> > Hi Jan,
> >  

[...]

> >>  
> >>> +
> >>> +int
> >>> +rte_eal_soc_scan(void)  
> >>
> >> What about naming it rte_eal_soc_scan_default? This would underline the
> >> fact that this function can be replaced.  
> >
> > Yes, that would be in sync with match default. I will do it.  
> 
> In v5 I have replaced the name with rte_eal_soc_platform_bus(). This is 
> long but it does exactly what the name states - scan for platform bus. 
> This is still a helper.

OK.

> 
> >  
> >>
> >> Second, this is for the 7/17 patch:
> >>
> >> -/* register a driver */
> >>  void
> >>  rte_eal_soc_register(struct rte_soc_driver *driver)
> >>  {
> >> +  /* For a valid soc driver, match and scan function
> >> +   * should be provided.
> >> +   */
> >> +  RTE_VERIFY(driver != NULL);
> >> +  RTE_VERIFY(driver->match_fn != NULL);
> >> +  RTE_VERIFY(driver->scan_fn != NULL);
> >>
> >> What about setting the match_fn and scan_fn to default implementations if
> >> they
> >> are NULL? This would make the standard/default approach easier to use.
> >>
> >>TAILQ_INSERT_TAIL(_driver_list, driver, next);
> >>  }  
> >
> > I am not in favor of a forced default. What if user never intended it - it 
> > would lead to wrong scan being used and only intimation which can provided 
> > to user is a log.
> > Selecting such functions should be a model of PMD - one which is enforced.  
> 
> As mentioned before, I am not in favor of a 'default' implementation. 
> Thus, I would rather call these functions as 'helpers' rather than defaults.

Hmm, OK.

Jan

> 
> [...]
> 
> -
> Shreyansh



-- 
   Jan Viktorin  E-mail: Viktorin at RehiveTech.com
   System Architect  Web:www.RehiveTech.com
   RehiveTech
   Brno, Czech Republic


[dpdk-dev] mbuf changes

2016-10-24 Thread Morten Brørup
First of all: Thanks for a great DPDK Userspace 2016!



Continuing the Userspace discussion about Olivier Matz?s proposed mbuf 
changes...



1.

Stephen Hemminger had a noteworthy general comment about keeping metadata for 
the NIC in the appropriate section of the mbuf: Metadata generated by the NIC?s 
RX handler belongs in the first cache line, and metadata required by the NIC?s 
TX handler belongs in the second cache line. This also means that touching the 
second cache line on ingress should be avoided if possible; and Bruce 
Richardson mentioned that for this reason m->next was zeroed on free().



2.

There seemed to be consensus that the size of m->refcnt should match the size 
of m->port because a packet could be duplicated on all physical ports for L3 
multicast and L2 flooding.

Furthermore, although a single physical machine (i.e. a single server) with 255 
physical ports probably doesn?t exist, it might contain more than 255 virtual 
machines with a virtual port each, so it makes sense extending these mbuf 
fields from 8 to 16 bits.



3.

Someone (Bruce Richardson?) suggested moving m->refcnt and m->port to the 
second cache line, which then generated questions from the audience about the 
real life purpose of m->port, and if m->port could be removed from the mbuf 
structure.



4.

I suggested using offset -1 for m->refcnt, so m->refcnt becomes 0 on first 
allocation. This is based on the assumption that other mbuf fields must be 
zeroed at alloc()/free() anyway, so zeroing m->refcnt is cheaper than setting 
it to 1.

Furthermore (regardless of m->refcnt offset), I suggested that it is not 
required to modify m->refcnt when allocating and freeing the mbuf, thus saving 
one write operation on both alloc() and free(). However, this assumes that 
m->refcnt debugging, e.g. underrun detection, is not required.



5.

And here?s something new to think about:

m->next already reveals if there are more segments to a packet. Which purpose 
does m->nb_segs serve that is not already covered by m->next?





Med venlig hilsen / kind regards



Morten Br?rup

CTO





SmartShare Systems A/S

Tonsbakken 16-18

DK-2740 Skovlunde

Denmark



Office  +45 70 20 00 93

Direct  +45 89 93 50 22

Mobile +45 25 40 82 12



mb at smartsharesystems.com  

www.smartsharesystems.com  





[dpdk-dev] [PATCH v4 11/17] eal/soc: add default scan for Soc devices

2016-10-24 Thread Shreyansh Jain
Hi Jan,

On Sunday 16 October 2016 12:42 PM, Shreyansh Jain wrote:
> Hi Jan,
>
>> -Original Message-
>> From: Jan Viktorin [mailto:viktorin at rehivetech.com]
>> Sent: Sunday, October 16, 2016 6:27 AM
>> To: Shreyansh Jain 
>> Cc: dev at dpdk.org; thomas.monjalon at 6wind.com; david.marchand at 
>> 6wind.com
>> Subject: Re: [PATCH v4 11/17] eal/soc: add default scan for Soc devices
>>
>> On Sat, 15 Oct 2016 19:15:02 +0530
>> Shreyansh Jain  wrote:
>>
>>> From: Jan Viktorin 
>>>
>>> Default implementation which scans the sysfs platform devices hierarchy..
>>> For each device, extract the ueven and convert into rte_soc_device.
>>>
>>> The information populated can then be used in probe to match against
>>> the drivers registered.
>>>
>>> Signed-off-by: Jan Viktorin 
>>> [Shreyansh: restructure commit to be an optional implementation]
>>> Signed-off-by: Shreyansh Jain 
>>
>> [...]
>>
>>> +
>>> +int
>>> +rte_eal_soc_scan(void)
>>
>> What about naming it rte_eal_soc_scan_default? This would underline the
>> fact that this function can be replaced.
>
> Yes, that would be in sync with match default. I will do it.

In v5 I have replaced the name with rte_eal_soc_platform_bus(). This is 
long but it does exactly what the name states - scan for platform bus. 
This is still a helper.

>
>>
>> Second, this is for the 7/17 patch:
>>
>> -/* register a driver */
>>  void
>>  rte_eal_soc_register(struct rte_soc_driver *driver)
>>  {
>> +/* For a valid soc driver, match and scan function
>> + * should be provided.
>> + */
>> +RTE_VERIFY(driver != NULL);
>> +RTE_VERIFY(driver->match_fn != NULL);
>> +RTE_VERIFY(driver->scan_fn != NULL);
>>
>> What about setting the match_fn and scan_fn to default implementations if
>> they
>> are NULL? This would make the standard/default approach easier to use.
>>
>>  TAILQ_INSERT_TAIL(_driver_list, driver, next);
>>  }
>
> I am not in favor of a forced default. What if user never intended it - it 
> would lead to wrong scan being used and only intimation which can provided to 
> user is a log.
> Selecting such functions should be a model of PMD - one which is enforced.

As mentioned before, I am not in favor of a 'default' implementation. 
Thus, I would rather call these functions as 'helpers' rather than defaults.

[...]

-
Shreyansh


[dpdk-dev] [PATCH v5 21/21] eal/crypto: Support rte_soc_driver/device for cryptodev

2016-10-24 Thread Shreyansh Jain
- rte_cryptodev_driver/rte_cryptodev_dev embeds rte_soc_driver/device for
  linking SoC PMDs to crypto devices.
- Add probe and remove functions linked

Signed-off-by: Hemant Agrawal 
Signed-off-by: Shreyansh Jain 
---
 lib/librte_cryptodev/rte_cryptodev.c   | 122 -
 lib/librte_cryptodev/rte_cryptodev.h   |   3 +
 lib/librte_cryptodev/rte_cryptodev_pmd.h   |  18 +++-
 lib/librte_cryptodev/rte_cryptodev_version.map |   2 +
 4 files changed, 140 insertions(+), 5 deletions(-)

diff --git a/lib/librte_cryptodev/rte_cryptodev.c 
b/lib/librte_cryptodev/rte_cryptodev.c
index 127e8d0..77ec9fe 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -422,7 +422,8 @@ rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,

int retval;

-   cryptodrv = (struct rte_cryptodev_driver *)pci_drv;
+   cryptodrv = container_of(pci_drv, struct rte_cryptodev_driver,
+pci_drv);
if (cryptodrv == NULL)
return -ENODEV;

@@ -489,7 +490,8 @@ rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev)
if (cryptodev == NULL)
return -ENODEV;

-   cryptodrv = (const struct rte_cryptodev_driver *)pci_dev->driver;
+   cryptodrv = container_of(pci_dev->driver, struct rte_cryptodev_driver,
+pci_drv);
if (cryptodrv == NULL)
return -ENODEV;

@@ -513,6 +515,111 @@ rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev)
return 0;
 }

+
+int
+rte_cryptodev_soc_probe(struct rte_soc_driver *soc_drv,
+ struct rte_soc_device *soc_dev)
+{
+   struct rte_cryptodev_driver *cryptodrv;
+   struct rte_cryptodev *cryptodev;
+
+   char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+
+   int retval;
+
+   cryptodrv = container_of(soc_drv, struct rte_cryptodev_driver,
+soc_drv);
+
+   rte_eal_soc_device_name(_dev->addr, cryptodev_name,
+   sizeof(cryptodev_name));
+
+   cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name,
+  rte_socket_id());
+   if (cryptodev == NULL)
+   return -ENOMEM;
+
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   cryptodev->data->dev_private =
+   rte_zmalloc_socket(
+   "cryptodev private structure",
+   cryptodrv->dev_private_size,
+   RTE_CACHE_LINE_SIZE,
+   rte_socket_id());
+
+   if (cryptodev->data->dev_private == NULL)
+   rte_panic("Cannot allocate memzone for private "
+   "device data");
+   }
+
+   cryptodev->soc_dev = soc_dev;
+   cryptodev->driver = cryptodrv;
+
+   /* init user callbacks */
+   TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
+   /* Invoke PMD device initialization function */
+   retval = (*cryptodrv->cryptodev_init)(cryptodrv, cryptodev);
+   if (retval == 0)
+   return 0;
+
+   CDEV_LOG_ERR("driver %s: cryptodev_init(%s) failed\n",
+   soc_drv->driver.name,
+   soc_dev->addr.name);
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   rte_free(cryptodev->data->dev_private);
+
+   cryptodev->attached = RTE_CRYPTODEV_DETACHED;
+   cryptodev_globals.nb_devs--;
+
+   return -ENXIO;
+}
+
+int
+rte_cryptodev_soc_remove(struct rte_soc_device *soc_dev)
+{
+   const struct rte_cryptodev_driver *cryptodrv;
+   struct rte_cryptodev *cryptodev;
+   char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+   int ret;
+
+   if (soc_dev == NULL)
+   return -EINVAL;
+
+   rte_eal_soc_device_name(_dev->addr, cryptodev_name,
+   sizeof(cryptodev_name));
+
+   cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
+   if (cryptodev == NULL)
+   return -ENODEV;
+
+   cryptodrv = container_of(soc_dev->driver,
+   struct rte_cryptodev_driver, soc_drv);
+   if (cryptodrv == NULL)
+   return -ENODEV;
+
+   /* Invoke PMD device uninit function */
+   if (*cryptodrv->cryptodev_uninit) {
+   ret = (*cryptodrv->cryptodev_uninit)(cryptodrv, cryptodev);
+   if (ret)
+   return ret;
+   }
+
+   /* free crypto device */
+   rte_cryptodev_pmd_release_device(cryptodev);
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   rte_free(cryptodev->data->dev_private);
+
+   cryptodev->pci_dev = NULL;
+   cryptodev->soc_dev = NULL;
+   cryptodev->driver = NULL;
+   cryptodev->data = NULL;
+
+   return 0;
+}
+
 uint16_t
 

[dpdk-dev] [PATCH v5 20/21] ether: introduce ethernet dev probe remove

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_ether/rte_ethdev.c | 148 +-
 lib/librte_ether/rte_ethdev.h |  31 +
 2 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ba9ae1e..78b3fb8 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -325,6 +325,101 @@ rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
 }

 int
+rte_eth_dev_soc_probe(struct rte_soc_driver *soc_drv,
+ struct rte_soc_device *soc_dev)
+{
+   struct eth_driver*eth_drv;
+   struct rte_eth_dev *eth_dev;
+   char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+
+   int diag;
+
+   eth_drv = container_of(soc_drv, struct eth_driver, soc_drv);
+
+   rte_eal_soc_device_name(_dev->addr, ethdev_name,
+   sizeof(ethdev_name));
+
+   eth_dev = rte_eth_dev_allocate(ethdev_name);
+   if (eth_dev == NULL)
+   return -ENOMEM;
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   eth_dev->data->dev_private = rte_zmalloc(
+ "ethdev private structure",
+ eth_drv->dev_private_size,
+ RTE_CACHE_LINE_SIZE);
+   if (eth_dev->data->dev_private == NULL)
+   rte_panic("Cannot allocate memzone for private port "
+ "data\n");
+   }
+   eth_dev->soc_dev = soc_dev;
+   eth_dev->driver = eth_drv;
+   eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+   /* init user callbacks */
+   TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+   /*
+* Set the default MTU.
+*/
+   eth_dev->data->mtu = ETHER_MTU;
+
+   /* Invoke PMD device initialization function */
+   diag = (*eth_drv->eth_dev_init)(eth_dev);
+   if (diag == 0)
+   return 0;
+
+   RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
+   soc_drv->driver.name,
+   soc_dev->addr.name);
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   rte_free(eth_dev->data->dev_private);
+   rte_eth_dev_release_port(eth_dev);
+   return diag;
+}
+
+int
+rte_eth_dev_soc_remove(struct rte_soc_device *soc_dev)
+{
+   const struct eth_driver *eth_drv;
+   struct rte_eth_dev *eth_dev;
+   char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+   int ret;
+
+   if (soc_dev == NULL)
+   return -EINVAL;
+
+   rte_eal_soc_device_name(_dev->addr, ethdev_name,
+   sizeof(ethdev_name));
+
+   eth_dev = rte_eth_dev_allocated(ethdev_name);
+   if (eth_dev == NULL)
+   return -ENODEV;
+
+   eth_drv = container_of(soc_dev->driver, struct eth_driver, soc_drv);
+
+   /* Invoke PMD device uninit function */
+   if (*eth_drv->eth_dev_uninit) {
+   ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+   if (ret)
+   return ret;
+   }
+
+   /* free ether device */
+   rte_eth_dev_release_port(eth_dev);
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   rte_free(eth_dev->data->dev_private);
+
+   eth_dev->soc_dev = NULL;
+   eth_dev->driver = NULL;
+   eth_dev->data = NULL;
+
+   return 0;
+}
+
+
+int
 rte_eth_dev_is_valid_port(uint8_t port_id)
 {
if (port_id >= RTE_MAX_ETHPORTS ||
@@ -1557,6 +1652,7 @@ rte_eth_dev_info_get(uint8_t port_id, struct 
rte_eth_dev_info *dev_info)
RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
(*dev->dev_ops->dev_infos_get)(dev, dev_info);
dev_info->pci_dev = dev->pci_dev;
+   dev_info->soc_dev = dev->soc_dev;
dev_info->driver_name = dev->data->drv_name;
dev_info->nb_rx_queues = dev->data->nb_rx_queues;
dev_info->nb_tx_queues = dev->data->nb_tx_queues;
@@ -2534,8 +2630,15 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 static inline
 struct rte_intr_handle *eth_dev_get_intr_handle(struct rte_eth_dev *dev)
 {
-   if (dev->pci_dev)
+   if (dev->pci_dev) {
+   RTE_ASSERT(dev->soc_dev == NULL);
return >pci_dev->intr_handle;
+   }
+
+   if (dev->soc_dev) {
+   RTE_ASSERT(dev->pci_dev == NULL);
+   return >soc_dev->intr_handle;
+   }

RTE_ASSERT(0);
return NULL;
@@ -2572,6 +2675,23 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int 
op, void *data)
return 0;
 }

+static inline
+const char *eth_dev_get_driver_name(const struct rte_eth_dev *dev)
+{
+   if (dev->pci_dev) {
+   RTE_ASSERT(dev->soc_dev == NULL);
+   return dev->driver->pci_drv.driver.name;
+   }
+
+   if (dev->soc_dev) {
+   

[dpdk-dev] [PATCH v5 18/21] ether: verify we copy info from a PCI device

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Now that different types of ethdev exist, check for presence of PCI dev
while copying out the info.
Similar would be done for SoC.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_ether/rte_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 9aea048..daa1285 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3205,6 +3205,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct 
rte_pci_device *pci_de
return;
}

+   RTE_VERIFY(eth_dev->pci_dev != NULL);
+
eth_dev->data->dev_flags = 0;
if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
-- 
2.7.4



[dpdk-dev] [PATCH v5 17/21] ether: utilize container_of for pci_drv

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

It is not necessary to place the rte_pci_driver at the beginning
of the rte_eth_dev struct anymore as we use the container_of macro
to get the parent pointer.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_ether/rte_ethdev.c | 4 ++--
 lib/librte_ether/rte_ethdev.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 0d9d9c1..9aea048 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -241,7 +241,7 @@ rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,

int diag;

-   eth_drv = (struct eth_driver *)pci_drv;
+   eth_drv = container_of(pci_drv, struct eth_driver, pci_drv);

rte_eal_pci_device_name(_dev->addr, ethdev_name,
sizeof(ethdev_name));
@@ -302,7 +302,7 @@ rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
if (eth_dev == NULL)
return -ENODEV;

-   eth_drv = (const struct eth_driver *)pci_dev->driver;
+   eth_drv = container_of(pci_dev->driver, struct eth_driver, pci_drv);

/* Invoke PMD device uninit function */
if (*eth_drv->eth_dev_uninit) {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..f893fe0 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1850,7 +1850,7 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev 
*eth_dev);
  * Each Ethernet driver acts as a PCI driver and is represented by a generic
  * *eth_driver* structure that holds:
  *
- * - An *rte_pci_driver* structure (which must be the first field).
+ * - An *rte_pci_driver* structure.
  *
  * - The *eth_dev_init* function invoked for each matching PCI device.
  *
-- 
2.7.4



[dpdk-dev] [PATCH v5 16/21] eal/soc: additional features for SoC

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Additional features introduced:
 - Find kernel driver through sysfs bindings
 - Dummy implementation for mapping to kernel driver
 - DMA coherency value from sysfs
 - Numa node number from sysfs
 - Support for updating device during probe if already registered

Signed-off-by: Jan Viktorin 
[Shreyansh: merge multiple patches into single set]
Signed-off-by: Shreyansh Jain 
---
 lib/librte_eal/common/eal_common_soc.c  |  30 
 lib/librte_eal/common/eal_private.h |  23 ++
 lib/librte_eal/common/include/rte_soc.h |  28 +++
 lib/librte_eal/linuxapp/eal/eal_soc.c   | 129 
 4 files changed, 210 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_soc.c 
b/lib/librte_eal/common/eal_common_soc.c
index 44f5559..29c38e0 100644
--- a/lib/librte_eal/common/eal_common_soc.c
+++ b/lib/librte_eal/common/eal_common_soc.c
@@ -114,6 +114,26 @@ rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
return ret;
}

+   if (!dev->is_dma_coherent) {
+   if (!(drv->drv_flags & RTE_SOC_DRV_ACCEPT_NONCC)) {
+   RTE_LOG(DEBUG, EAL,
+   "  device is not DMA coherent, skipping\n");
+   return 1;
+   }
+   }
+
+   if (drv->drv_flags & RTE_SOC_DRV_NEED_MAPPING) {
+   /* map resources */
+   ret = rte_eal_soc_map_device(dev);
+   if (ret)
+   return ret;
+   } else if (drv->drv_flags & RTE_SOC_DRV_FORCE_UNBIND
+   && rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   /* unbind */
+   if (soc_unbind_kernel_driver(dev) < 0)
+   return -1;
+   }
+
dev->driver = drv;
RTE_VERIFY(drv->probe != NULL);
return drv->probe(drv, dev);
@@ -166,6 +186,10 @@ rte_eal_soc_detach_dev(struct rte_soc_driver *drv,
if (drv->remove && (drv->remove(dev) < 0))
return -1;  /* negative value is an error */

+   if (drv->drv_flags & RTE_SOC_DRV_NEED_MAPPING)
+   /* unmap resources for devices */
+   rte_eal_soc_unmap_device(dev);
+
/* clear driver structure */
dev->driver = NULL;

@@ -241,6 +265,12 @@ rte_eal_soc_probe_one(const struct rte_soc_addr *addr)
if (addr == NULL)
return -1;

+   /* update current SoC device in global list, kernel bindings might have
+* changed since last time we looked at it.
+*/
+   if (soc_update_device(addr) < 0)
+   goto err_return;
+
TAILQ_FOREACH(dev, _device_list, next) {
if (rte_eal_compare_soc_addr(>addr, addr))
continue;
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index d810f9f..30c648d 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -159,6 +159,29 @@ int pci_update_device(const struct rte_pci_addr *addr);
 int pci_unbind_kernel_driver(struct rte_pci_device *dev);

 /**
+ * Update a soc device object by asking the kernel for the latest information.
+ *
+ * This function is private to EAL.
+ *
+ * @param addr
+ *  The SoC address to look for
+ * @return
+ *   - 0 on success.
+ *   - negative on error.
+ */
+int soc_update_device(const struct rte_soc_addr *addr);
+
+/**
+ * Unbind kernel driver for this device
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int soc_unbind_kernel_driver(struct rte_soc_device *dev);
+
+/**
  * Map the PCI resource of a PCI device in virtual memory
  *
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/include/rte_soc.h 
b/lib/librte_eal/common/include/rte_soc.h
index a9b3129..3b8b03f 100644
--- a/lib/librte_eal/common/include/rte_soc.h
+++ b/lib/librte_eal/common/include/rte_soc.h
@@ -46,9 +46,11 @@ extern "C" {

 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -63,6 +65,14 @@ extern struct soc_device_list soc_device_list;
 TAILQ_HEAD(soc_driver_list, rte_soc_driver); /**< SoC drivers in D-linked Q. */
 TAILQ_HEAD(soc_device_list, rte_soc_device); /**< SoC devices in D-linked Q. */

+#define SOC_MAX_RESOURCE 6
+
+struct rte_soc_resource {
+   uint64_t phys_addr;
+   uint64_t len;
+   void *addr;
+};
+
 struct rte_soc_id {
union {
const char *compatible; /**< OF compatible specification */
@@ -84,8 +94,12 @@ struct rte_soc_device {
struct rte_device device;   /**< Inherit code device */
struct rte_soc_addr addr;   /**< SoC device Location */
struct rte_soc_id *id;  /**< SoC device ID list */
+   struct rte_soc_resource mem_resource[SOC_MAX_RESOURCE];
struct rte_intr_handle intr_handle; /**< Interrupt handle */
struct 

[dpdk-dev] [PATCH v5 15/21] eal/soc: add default scan for Soc devices

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Default implementation which scans the sysfs platform devices hierarchy.
For each device, extract the ueven and convert into rte_soc_device.

The information populated can then be used in probe to match against
the drivers registered.

Signed-off-by: Jan Viktorin 
[Shreyansh: restructure commit to be an optional implementation]
Signed-off-by: Shreyansh Jain 

--
 v5:
 - Update rte_eal_soc_scan to rte_eal_soc_scan_platform_bus
 - Fix comments over scan and match functions
---
 lib/librte_eal/common/include/rte_soc.h |  16 +-
 lib/librte_eal/linuxapp/eal/eal_soc.c   | 315 
 2 files changed, 329 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_soc.h 
b/lib/librte_eal/common/include/rte_soc.h
index 1f5f81b..a9b3129 100644
--- a/lib/librte_eal/common/include/rte_soc.h
+++ b/lib/librte_eal/common/include/rte_soc.h
@@ -64,7 +64,10 @@ TAILQ_HEAD(soc_driver_list, rte_soc_driver); /**< SoC 
drivers in D-linked Q. */
 TAILQ_HEAD(soc_device_list, rte_soc_device); /**< SoC devices in D-linked Q. */

 struct rte_soc_id {
-   const char *compatible; /**< OF compatible specification */
+   union {
+   const char *compatible; /**< OF compatible specification */
+   char *_compatible;
+   };
uint64_t priv_data; /**< SoC Driver specific data */
 };

@@ -200,7 +203,16 @@ rte_eal_parse_soc_spec(const char *spec, struct 
rte_soc_addr *addr)
 }

 /**
- * Default function for matching the Soc driver with device. Each driver can
+ * Helper function for scanning for new SoC devices on platform bus.
+ *
+ * @return
+ * 0 on success
+ * !0 on failure to scan
+ */
+int rte_eal_soc_scan_platform_bus(void);
+
+/**
+ * Helper function for matching the Soc driver with device. Each driver can
  * either use this function or define their own soc matching function.
  * This function relies on the compatible string extracted from sysfs. But,
  * a SoC might have different way of identifying its devices. Such SoC can
diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c 
b/lib/librte_eal/linuxapp/eal/eal_soc.c
index 3929a76..d8dfe97 100644
--- a/lib/librte_eal/linuxapp/eal/eal_soc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_soc.c
@@ -48,6 +48,321 @@
 #include 
 #include 

+/** Pathname of SoC devices directory. */
+#define SYSFS_SOC_DEVICES "/sys/bus/platform/devices"
+
+static const char *
+soc_get_sysfs_path(void)
+{
+   const char *path = NULL;
+
+   path = getenv("SYSFS_SOC_DEVICES");
+   if (path == NULL)
+   return SYSFS_SOC_DEVICES;
+
+   return path;
+}
+
+static char *
+dev_read_uevent(const char *dirname)
+{
+   char filename[PATH_MAX];
+   struct stat st;
+   char *buf;
+   ssize_t total = 0;
+   int fd;
+
+   snprintf(filename, sizeof(filename), "%s/uevent", dirname);
+   fd = open(filename, O_RDONLY);
+   if (fd < 0) {
+   RTE_LOG(WARNING, EAL, "Failed to open file %s\n", filename);
+   return strdup("");
+   }
+
+   if (fstat(fd, ) < 0) {
+   RTE_LOG(ERR, EAL, "Failed to stat file %s\n", filename);
+   close(fd);
+   return NULL;
+   }
+
+   if (st.st_size == 0) {
+   close(fd);
+   return strdup("");
+   }
+
+   buf = malloc(st.st_size + 1);
+   if (buf == NULL) {
+   RTE_LOG(ERR, EAL, "Failed to alloc memory to read %s\n",
+   filename);
+   close(fd);
+   return NULL;
+   }
+
+   while (total < st.st_size) {
+   ssize_t rlen = read(fd, buf + total, st.st_size - total);
+   if (rlen < 0) {
+   if (errno == EINTR)
+   continue;
+
+   RTE_LOG(ERR, EAL, "Failed to read file %s\n", filename);
+
+   free(buf);
+   close(fd);
+   return NULL;
+   }
+   if (rlen == 0) /* EOF */
+   break;
+
+   total += rlen;
+   }
+
+   buf[total] = '\0';
+   close(fd);
+
+   return buf;
+}
+
+static const char *
+dev_uevent_find(const char *uevent, const char *key)
+{
+   const size_t keylen = strlen(key);
+   const size_t total = strlen(uevent);
+   const char *p = uevent;
+
+   /* check whether it is the first key */
+   if (!strncmp(uevent, key, keylen))
+   return uevent + keylen;
+
+   /* check 2nd key or further... */
+   do {
+   p = strstr(p, key);
+   if (p == NULL)
+   break;
+
+   if (p[-1] == '\n') /* check we are at a new line */
+   return p + keylen;
+
+   p += keylen; /* skip this one */
+   } while (p - uevent < (ptrdiff_t) total);
+
+   return NULL;
+}
+
+static char *
+strdup_until_nl(const 

[dpdk-dev] [PATCH v5 13/21] eal/soc: add drv_flags

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

The flags are copied from the PCI ones. They should be refactorized into a
general set of flags in the future.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_eal/common/include/rte_soc.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_soc.h 
b/lib/librte_eal/common/include/rte_soc.h
index 90cd6aa..415d409 100644
--- a/lib/librte_eal/common/include/rte_soc.h
+++ b/lib/librte_eal/common/include/rte_soc.h
@@ -123,8 +123,18 @@ struct rte_soc_driver {
soc_scan_t *scan_fn;/**< Callback for scanning SoC bus*/
soc_match_t *match_fn;  /**< Callback to match dev<->drv */
const struct rte_soc_id *id_table; /**< ID table, NULL terminated */
+   uint32_t drv_flags;/**< Control handling of device */
 };

+/** Device needs to map its resources by EAL */
+#define RTE_SOC_DRV_NEED_MAPPING 0x0001
+/** Device needs to be unbound even if no module is provieded */
+#define RTE_SOC_DRV_FORCE_UNBIND 0x0004
+/** Device driver supports link state interrupt */
+#define RTE_SOC_DRV_INTR_LSC0x0008
+/** Device driver supports detaching capability */
+#define RTE_SOC_DRV_DETACHABLE  0x0010
+
 /**
  * Utility function to write a SoC device name, this device name can later be
  * used to retrieve the corresponding rte_soc_addr using above functions.
-- 
2.7.4



[dpdk-dev] [PATCH v5 12/21] eal/soc: extend and utilize devargs

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

It is assumed that SoC Devices provided on command line are prefixed with
"soc:". This patch adds parse and attach support for such devices.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_eal/common/eal_common_dev.c  | 27 +
 lib/librte_eal/common/eal_common_devargs.c  | 17 
 lib/librte_eal/common/eal_common_soc.c  | 61 -
 lib/librte_eal/common/include/rte_devargs.h |  8 
 lib/librte_eal/common/include/rte_soc.h | 24 
 5 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index 457d227..ebbcf47 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -107,17 +107,23 @@ rte_eal_dev_init(void)

 int rte_eal_dev_attach(const char *name, const char *devargs)
 {
-   struct rte_pci_addr addr;
+   struct rte_soc_addr soc_addr;
+   struct rte_pci_addr pci_addr;

if (name == NULL || devargs == NULL) {
RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
return -EINVAL;
}

-   if (eal_parse_pci_DomBDF(name, ) == 0) {
-   if (rte_eal_pci_probe_one() < 0)
+   memset(_addr, 0, sizeof(soc_addr));
+   if (rte_eal_parse_soc_spec(name, _addr) == 0) {
+   if (rte_eal_soc_probe_one(_addr) < 0) {
+   free(soc_addr.name);
+   goto err;
+   }
+   } else if (eal_parse_pci_DomBDF(name, _addr) == 0) {
+   if (rte_eal_pci_probe_one(_addr) < 0)
goto err;
-
} else {
if (rte_eal_vdev_init(name, devargs))
goto err;
@@ -132,15 +138,22 @@ err:

 int rte_eal_dev_detach(const char *name)
 {
-   struct rte_pci_addr addr;
+   struct rte_soc_addr soc_addr;
+   struct rte_pci_addr pci_addr;

if (name == NULL) {
RTE_LOG(ERR, EAL, "Invalid device provided.\n");
return -EINVAL;
}

-   if (eal_parse_pci_DomBDF(name, ) == 0) {
-   if (rte_eal_pci_detach() < 0)
+   memset(_addr, 0, sizeof(soc_addr));
+   if (rte_eal_parse_soc_spec(name, _addr) == 0) {
+   if (rte_eal_soc_detach(_addr) < 0) {
+   free(soc_addr.name);
+   goto err;
+   }
+   } else if (eal_parse_pci_DomBDF(name, _addr) == 0) {
+   if (rte_eal_pci_detach(_addr) < 0)
goto err;
} else {
if (rte_eal_vdev_uninit(name))
diff --git a/lib/librte_eal/common/eal_common_devargs.c 
b/lib/librte_eal/common/eal_common_devargs.c
index e403717..e1dae1a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -41,6 +41,7 @@
 #include 

 #include 
+#include 
 #include 
 #include "eal_private.h"

@@ -105,6 +106,14 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char 
*devargs_str)
goto fail;

break;
+
+   case RTE_DEVTYPE_WHITELISTED_SOC:
+   case RTE_DEVTYPE_BLACKLISTED_SOC:
+   /* try to parse soc device with prefix "soc:" */
+   if (rte_eal_parse_soc_spec(buf, >soc.addr) != 0)
+   goto fail;
+   break;
+
case RTE_DEVTYPE_VIRTUAL:
/* save driver name */
ret = snprintf(devargs->virt.drv_name,
@@ -166,6 +175,14 @@ rte_eal_devargs_dump(FILE *f)
   devargs->pci.addr.devid,
   devargs->pci.addr.function,
   devargs->args);
+   else if (devargs->type == RTE_DEVTYPE_WHITELISTED_SOC)
+   fprintf(f, "  SoC whitelist %s %s\n",
+  devargs->soc.addr.name,
+  devargs->soc.addr.fdt_path);
+   else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_SOC)
+   fprintf(f, "  SoC blacklist %s %s\n",
+  devargs->soc.addr.name,
+  devargs->soc.addr.fdt_path);
else if (devargs->type == RTE_DEVTYPE_VIRTUAL)
fprintf(f, "  VIRTUAL %s %s\n",
   devargs->virt.drv_name,
diff --git a/lib/librte_eal/common/eal_common_soc.c 
b/lib/librte_eal/common/eal_common_soc.c
index 256cef8..44f5559 100644
--- a/lib/librte_eal/common/eal_common_soc.c
+++ b/lib/librte_eal/common/eal_common_soc.c
@@ -37,6 +37,8 @@

 #include 
 #include 
+#include 
+#include 
 #include 

 #include "eal_private.h"
@@ -70,6 +72,21 @@ rte_eal_soc_match_compat(struct rte_soc_driver *drv,
return 1;
 }

+static struct rte_devargs *soc_devargs_lookup(struct rte_soc_device *dev)
+{
+   

[dpdk-dev] [PATCH v5 11/21] eal/soc: implement probing of drivers

2016-10-24 Thread Shreyansh Jain
Each SoC PMD registers a set of callback for scanning its own bus/infra and
matching devices to drivers when probe is called.
This patch introduces the infra for calls to SoC scan on rte_eal_soc_init()
and match on rte_eal_soc_probe().

Patch also adds test case for scan and probe.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
--
v4:
 - Update test_soc for descriptive test function names
 - Comments over test functions
 - devinit and devuninint --> probe/remove
 - RTE_VERIFY at some places
---
 app/test/test_soc.c | 205 ++-
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
 lib/librte_eal/common/eal_common_soc.c  | 213 +++-
 lib/librte_eal/common/include/rte_soc.h |  83 -
 lib/librte_eal/linuxapp/eal/eal.c   |   5 +
 lib/librte_eal/linuxapp/eal/eal_soc.c   |  21 ++-
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   4 +
 7 files changed, 523 insertions(+), 12 deletions(-)

diff --git a/app/test/test_soc.c b/app/test/test_soc.c
index ac03e64..b587d5e 100644
--- a/app/test/test_soc.c
+++ b/app/test/test_soc.c
@@ -87,14 +87,65 @@ static int test_compare_addr(void)
  */
 struct test_wrapper {
struct rte_soc_driver soc_drv;
+   struct rte_soc_device soc_dev;
 };

+static int empty_pmd0_probe(struct rte_soc_driver *drv,
+ struct rte_soc_device *dev);
+static int empty_pmd0_remove(struct rte_soc_device *dev);
+
+static void always_find_dev0_cb(void);
+static int match_dev0_by_name(struct rte_soc_driver *drv,
+ struct rte_soc_device *dev);
+
+static void always_find_dev1_cb(void);
+static int match_dev1_by_name(struct rte_soc_driver *drv,
+ struct rte_soc_device *dev);
+
+/**
+ * Dummy probe handler for PMD driver 'pmd0'.
+ *
+ * @param drv
+ * driver object
+ * @param dev
+ * device object
+ * @return
+ * 0 on success
+ */
+static int
+empty_pmd0_probe(struct rte_soc_driver *drv __rte_unused,
+  struct rte_soc_device *dev __rte_unused)
+{
+   return 0;
+}
+
+/**
+ * Remove handler for PMD driver 'pmd0'.
+ *
+ * @param dev
+ * device to remove
+ * @return
+ * 0 on success
+ */
+static int
+empty_pmd0_remove(struct rte_soc_device *dev)
+{
+   /* Release the memory associated with dev->addr.name */
+   free(dev->addr.name);
+
+   return 0;
+}
+
 struct test_wrapper empty_pmd0 = {
.soc_drv = {
.driver = {
.name = "empty_pmd0"
},
-   },
+   .probe = empty_pmd0_probe,
+   .remove = empty_pmd0_remove,
+   .scan_fn = always_find_dev0_cb,
+   .match_fn = match_dev0_by_name,
+   }
 };

 struct test_wrapper empty_pmd1 = {
@@ -102,9 +153,87 @@ struct test_wrapper empty_pmd1 = {
.driver = {
.name = "empty_pmd1"
},
+   .scan_fn = always_find_dev1_cb,
+   .match_fn = match_dev1_by_name,
},
 };

+/**
+ * Bus scan by PMD 'pmd0' for adding device 'dev0'
+ *
+ * @param void
+ * @return void
+ */
+static void
+always_find_dev0_cb(void)
+{
+   /* SoC's scan would scan devices on its bus and add to
+* soc_device_list
+*/
+   empty_pmd0.soc_dev.addr.name = strdup("empty_pmd0_dev");
+
+   TAILQ_INSERT_TAIL(_device_list, _pmd0.soc_dev, next);
+}
+
+/**
+ * Match device 'dev0' with driver PMD pmd0
+ *
+ * @param drv
+ * Driver with this matching needs to be done; unused here
+ * @param dev
+ * device to be matched against driver
+ * @return
+ * 0 on successful matched
+ * 1 if driver<=>device don't match
+ */
+static int
+match_dev0_by_name(struct rte_soc_driver *drv __rte_unused,
+  struct rte_soc_device *dev)
+{
+   if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd0_dev"))
+   return 0;
+
+   return 1;
+}
+
+/**
+ * Bus scan by PMD 'pmd0' for adding device 'dev1'
+ *
+ * @param void
+ * @return void
+ */
+static void
+always_find_dev1_cb(void)
+{
+   /* SoC's scan would scan devices on its bus and add to
+* soc_device_list
+*/
+   empty_pmd0.soc_dev.addr.name = strdup("empty_pmd1_dev");
+
+   TAILQ_INSERT_TAIL(_device_list, _pmd1.soc_dev, next);
+}
+
+/**
+ * Match device 'dev1' with driver PMD pmd0
+ *
+ * @param drv
+ * Driver with this matching needs to be done; unused here
+ * @param dev
+ * device to be matched against driver
+ * @return
+ * 0 on successful matched
+ * 1 if driver<=>device don't match
+ */
+static int
+match_dev1_by_name(struct rte_soc_driver *drv __rte_unused,
+  struct rte_soc_device *dev)
+{
+   if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd1_dev"))
+   return 0;
+
+   return 1;
+}
+
 static int
 

[dpdk-dev] [PATCH v5 10/21] eal/soc: init SoC infra from EAL

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_eal/bsdapp/eal/Makefile|  1 +
 lib/librte_eal/bsdapp/eal/eal.c   |  4 +++
 lib/librte_eal/bsdapp/eal/eal_soc.c   | 46 
 lib/librte_eal/common/eal_private.h   | 10 +++
 lib/librte_eal/linuxapp/eal/Makefile  |  1 +
 lib/librte_eal/linuxapp/eal/eal.c |  3 ++
 lib/librte_eal/linuxapp/eal/eal_soc.c | 56 +++
 7 files changed, 121 insertions(+)
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_soc.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_soc.c

diff --git a/lib/librte_eal/bsdapp/eal/Makefile 
b/lib/librte_eal/bsdapp/eal/Makefile
index a15b762..42b3a2b 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -56,6 +56,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_soc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 9b93da3..2d62b9d 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -64,6 +64,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -564,6 +565,9 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");

+   if (rte_eal_soc_init() < 0)
+   rte_panic("Cannot init SoC\n");
+
eal_check_mem_on_local_socket();

if (eal_plugins_init() < 0)
diff --git a/lib/librte_eal/bsdapp/eal/eal_soc.c 
b/lib/librte_eal/bsdapp/eal/eal_soc.c
new file mode 100644
index 000..cb297ff
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_soc.c
@@ -0,0 +1,46 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+/* Init the SoC EAL subsystem */
+int
+rte_eal_soc_init(void)
+{
+   return 0;
+}
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index 0e8d6f7..d810f9f 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -122,6 +122,16 @@ int rte_eal_pci_init(void);
 struct rte_soc_driver;
 struct rte_soc_device;

+/**
+ * Init the SoC infra.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_soc_init(void);
+
 struct rte_pci_driver;
 struct rte_pci_device;

diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index a520477..59e30fa 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -65,6 +65,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_soc.c
 

[dpdk-dev] [PATCH v5 09/21] eal: introduce command line enable SoC option

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Support --enable-soc. SoC support is disabled by default.

Signed-off-by: Jan Viktorin 
[Shreyansh: Change --no-soc to --enable-soc; disabled by default]
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 doc/guides/testpmd_app_ug/run_app.rst  | 4 
 lib/librte_eal/common/eal_common_options.c | 5 +
 lib/librte_eal/common/eal_internal_cfg.h   | 1 +
 lib/librte_eal/common/eal_options.h| 2 ++
 4 files changed, 12 insertions(+)

diff --git a/doc/guides/testpmd_app_ug/run_app.rst 
b/doc/guides/testpmd_app_ug/run_app.rst
index d7c5120..4dafe5f 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -156,6 +156,10 @@ See the DPDK Getting Started Guides for more information 
on these options.

 Use malloc instead of hugetlbfs.

+*   ``--enable-soc``
+
+Enable SoC framework support
+

 Testpmd Command-line Options
 
diff --git a/lib/librte_eal/common/eal_common_options.c 
b/lib/librte_eal/common/eal_common_options.c
index 6ca8af1..2156ab3 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -75,6 +75,7 @@ const struct option
 eal_long_options[] = {
{OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM},
{OPT_CREATE_UIO_DEV,0, NULL, OPT_CREATE_UIO_DEV_NUM   },
+   {OPT_ENABLE_SOC,0, NULL, OPT_ENABLE_SOC_NUM   },
{OPT_FILE_PREFIX,   1, NULL, OPT_FILE_PREFIX_NUM  },
{OPT_HELP,  0, NULL, OPT_HELP_NUM },
{OPT_HUGE_DIR,  1, NULL, OPT_HUGE_DIR_NUM },
@@ -843,6 +844,10 @@ eal_parse_common_option(int opt, const char *optarg,
break;

/* long options */
+   case OPT_ENABLE_SOC_NUM:
+   conf->enable_soc = 1;
+   break;
+
case OPT_HUGE_UNLINK_NUM:
conf->hugepage_unlink = 1;
break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h 
b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..2a6e3ea 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -67,6 +67,7 @@ struct internal_config {
unsigned hugepage_unlink; /**< true to unlink backing files */
volatile unsigned xen_dom0_support; /**< support app running on Xen 
Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
+   volatile unsigned enable_soc; /**< true to enable SoC */
volatile unsigned no_hpet;/**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping

* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h 
b/lib/librte_eal/common/eal_options.h
index a881c62..6e679c3 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -49,6 +49,8 @@ enum {
OPT_BASE_VIRTADDR_NUM,
 #define OPT_CREATE_UIO_DEV"create-uio-dev"
OPT_CREATE_UIO_DEV_NUM,
+#define OPT_ENABLE_SOC"enable-soc"
+   OPT_ENABLE_SOC_NUM,
 #define OPT_FILE_PREFIX   "file-prefix"
OPT_FILE_PREFIX_NUM,
 #define OPT_HUGE_DIR  "huge-dir"
-- 
2.7.4



[dpdk-dev] [PATCH v5 08/21] eal/soc: implement SoC device list and dump

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

SoC devices would be linked in a separate list (from PCI). This is used for
probe function.
A helper for dumping the device list is added.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  2 ++
 lib/librte_eal/common/eal_common_soc.c  | 34 +
 lib/librte_eal/common/include/rte_soc.h |  9 +++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  2 ++
 4 files changed, 47 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index cf6fb8e..86e3cfd 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -171,11 +171,13 @@ DPDK_16.11 {
rte_eal_dev_attach;
rte_eal_dev_detach;
rte_eal_map_resource;
+   rte_eal_soc_dump;
rte_eal_soc_register;
rte_eal_soc_unregister;
rte_eal_unmap_resource;
rte_eal_vdrv_register;
rte_eal_vdrv_unregister;
+   soc_device_list;
soc_driver_list;

 } DPDK_16.07;
diff --git a/lib/librte_eal/common/eal_common_soc.c 
b/lib/librte_eal/common/eal_common_soc.c
index 56135ed..5dcddc5 100644
--- a/lib/librte_eal/common/eal_common_soc.c
+++ b/lib/librte_eal/common/eal_common_soc.c
@@ -31,6 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

+#include 
+#include 
 #include 

 #include 
@@ -40,6 +42,38 @@
 /* Global SoC driver list */
 struct soc_driver_list soc_driver_list =
TAILQ_HEAD_INITIALIZER(soc_driver_list);
+struct soc_device_list soc_device_list =
+   TAILQ_HEAD_INITIALIZER(soc_device_list);
+
+/* dump one device */
+static int
+soc_dump_one_device(FILE *f, struct rte_soc_device *dev)
+{
+   int i;
+
+   fprintf(f, "%s", dev->addr.name);
+   fprintf(f, " - fdt_path: %s\n",
+   dev->addr.fdt_path ? dev->addr.fdt_path : "(none)");
+
+   for (i = 0; dev->id && dev->id[i].compatible; ++i)
+   fprintf(f, "   %s\n", dev->id[i].compatible);
+
+   return 0;
+}
+
+/* dump devices on the bus to an output stream */
+void
+rte_eal_soc_dump(FILE *f)
+{
+   struct rte_soc_device *dev = NULL;
+
+   if (!f)
+   return;
+
+   TAILQ_FOREACH(dev, _device_list, next) {
+   soc_dump_one_device(f, dev);
+   }
+}

 /* register a driver */
 void
diff --git a/lib/librte_eal/common/include/rte_soc.h 
b/lib/librte_eal/common/include/rte_soc.h
index d17b20f..4a01af5 100644
--- a/lib/librte_eal/common/include/rte_soc.h
+++ b/lib/librte_eal/common/include/rte_soc.h
@@ -56,8 +56,12 @@ extern "C" {

 extern struct soc_driver_list soc_driver_list;
 /**< Global list of SoC Drivers */
+extern struct soc_device_list soc_device_list;
+/**< Global list of SoC Devices */

 TAILQ_HEAD(soc_driver_list, rte_soc_driver); /**< SoC drivers in D-linked Q. */
+TAILQ_HEAD(soc_device_list, rte_soc_device); /**< SoC devices in D-linked Q. */
+

 struct rte_soc_id {
const char *compatible; /**< OF compatible specification */
@@ -142,6 +146,11 @@ rte_eal_compare_soc_addr(const struct rte_soc_addr *a0,
 }

 /**
+ * Dump discovered SoC devices.
+ */
+void rte_eal_soc_dump(FILE *f);
+
+/**
  * Register a SoC driver.
  */
 void rte_eal_soc_register(struct rte_soc_driver *driver);
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index ab6b985..0155025 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -175,11 +175,13 @@ DPDK_16.11 {
rte_eal_dev_attach;
rte_eal_dev_detach;
rte_eal_map_resource;
+   rte_eal_soc_dump;
rte_eal_soc_register;
rte_eal_soc_unregister;
rte_eal_unmap_resource;
rte_eal_vdrv_register;
rte_eal_vdrv_unregister;
+   soc_device_list;
soc_driver_list;

 } DPDK_16.07;
-- 
2.7.4



[dpdk-dev] [PATCH v5 07/21] eal/soc: add SoC PMD register/unregister logic

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Registeration of a SoC driver through a helper RTE_PMD_REGISTER_SOC
(on the lines of RTE_PMD_REGISTER_PCI). soc_driver_list stores all the
registered drivers.

Test case has been introduced to verify the registration and
deregistration.

Signed-off-by: Jan Viktorin 
[Shreyansh: update PMD registration method]
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 app/test/test_soc.c | 111 
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +
 lib/librte_eal/common/eal_common_soc.c  |  56 
 lib/librte_eal/common/include/rte_soc.h |  26 ++
 lib/librte_eal/linuxapp/eal/Makefile|   1 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +
 6 files changed, 200 insertions(+)
 create mode 100644 lib/librte_eal/common/eal_common_soc.c

diff --git a/app/test/test_soc.c b/app/test/test_soc.c
index 916a863..ac03e64 100644
--- a/app/test/test_soc.c
+++ b/app/test/test_soc.c
@@ -75,6 +75,108 @@ static int test_compare_addr(void)
free(a2.name);
free(a1.name);
free(a0.name);
+
+   return 0;
+}
+
+/**
+ * Empty PMD driver based on the SoC infra.
+ *
+ * The rte_soc_device is usually wrapped in some higher-level struct
+ * (eth_driver). We simulate such a wrapper with an anonymous struct here.
+ */
+struct test_wrapper {
+   struct rte_soc_driver soc_drv;
+};
+
+struct test_wrapper empty_pmd0 = {
+   .soc_drv = {
+   .driver = {
+   .name = "empty_pmd0"
+   },
+   },
+};
+
+struct test_wrapper empty_pmd1 = {
+   .soc_drv = {
+   .driver = {
+   .name = "empty_pmd1"
+   },
+   },
+};
+
+static int
+count_registered_socdrvs(void)
+{
+   int i;
+   struct rte_soc_driver *drv;
+
+   i = 0;
+   TAILQ_FOREACH(drv, _driver_list, next)
+   i += 1;
+
+   return i;
+}
+
+static int
+test_register_unregister(void)
+{
+   struct rte_soc_driver *drv;
+   int count;
+
+   rte_eal_soc_register(_pmd0.soc_drv);
+
+   TEST_ASSERT(!TAILQ_EMPTY(_driver_list),
+   "No PMD is present but the empty_pmd0 should be there");
+   drv = TAILQ_FIRST(_driver_list);
+   TEST_ASSERT(!strcmp(drv->driver.name, "empty_pmd0"),
+   "The registered PMD is not empty_pmd0 but '%s'",
+   drv->driver.name);
+
+   rte_eal_soc_register(_pmd1.soc_drv);
+
+   count = count_registered_socdrvs();
+   TEST_ASSERT_EQUAL(count, 2, "Expected 2 PMDs but detected %d", count);
+
+   rte_eal_soc_unregister(_pmd0.soc_drv);
+   count = count_registered_socdrvs();
+   TEST_ASSERT_EQUAL(count, 1, "Expected 1 PMDs but detected %d", count);
+
+   rte_eal_soc_unregister(_pmd1.soc_drv);
+
+   printf("%s has been successful\n", __func__);
+   return 0;
+}
+
+/* save real devices and drivers until the tests finishes */
+struct soc_driver_list real_soc_driver_list =
+   TAILQ_HEAD_INITIALIZER(real_soc_driver_list);
+
+static int test_soc_setup(void)
+{
+   struct rte_soc_driver *drv;
+
+   /* no real drivers for the test */
+   while (!TAILQ_EMPTY(_driver_list)) {
+   drv = TAILQ_FIRST(_driver_list);
+   rte_eal_soc_unregister(drv);
+   TAILQ_INSERT_TAIL(_soc_driver_list, drv, next);
+   }
+
+   return 0;
+}
+
+static int test_soc_cleanup(void)
+{
+   struct rte_soc_driver *drv;
+
+   /* bring back real drivers after the test */
+   while (!TAILQ_EMPTY(_soc_driver_list)) {
+   drv = TAILQ_FIRST(_soc_driver_list);
+   TAILQ_REMOVE(_soc_driver_list, drv, next);
+   rte_eal_soc_register(drv);
+   }
+
return 0;
 }

@@ -84,6 +186,15 @@ test_soc(void)
if (test_compare_addr())
return -1;

+   if (test_soc_setup())
+   return -1;
+
+   if (test_register_unregister())
+   return -1;
+
+   if (test_soc_cleanup())
+   return -1;
+
return 0;
 }

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 11d9f59..cf6fb8e 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -171,8 +171,11 @@ DPDK_16.11 {
rte_eal_dev_attach;
rte_eal_dev_detach;
rte_eal_map_resource;
+   rte_eal_soc_register;
+   rte_eal_soc_unregister;
rte_eal_unmap_resource;
rte_eal_vdrv_register;
rte_eal_vdrv_unregister;
+   soc_driver_list;

 } DPDK_16.07;
diff --git a/lib/librte_eal/common/eal_common_soc.c 
b/lib/librte_eal/common/eal_common_soc.c
new file mode 100644
index 000..56135ed
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_soc.c
@@ -0,0 +1,56 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights 

[dpdk-dev] [PATCH v5 06/21] eal/soc: introduce very essential SoC infra definitions

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Define initial structures and functions for the SoC infrastructure.
This patch supports only a very minimal functions for now.
More features will be added in the following commits.

Includes rte_device/rte_driver inheritance of
rte_soc_device/rte_soc_driver.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 
---
 app/test/Makefile   |   1 +
 app/test/test_soc.c |  90 +
 lib/librte_eal/common/Makefile  |   2 +-
 lib/librte_eal/common/eal_private.h |   4 +
 lib/librte_eal/common/include/rte_soc.h | 138 
 5 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_soc.c
 create mode 100644 lib/librte_eal/common/include/rte_soc.h

diff --git a/app/test/Makefile b/app/test/Makefile
index 5be023a..30295af 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -77,6 +77,7 @@ APP = test
 #
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) := commands.c
 SRCS-y += test.c
+SRCS-y += test_soc.c
 SRCS-y += resource.c
 SRCS-y += test_resource.c
 test_resource.res: test_resource.c
diff --git a/app/test/test_soc.c b/app/test/test_soc.c
new file mode 100644
index 000..916a863
--- /dev/null
+++ b/app/test/test_soc.c
@@ -0,0 +1,90 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "test.h"
+
+static char *safe_strdup(const char *s)
+{
+   char *c = strdup(s);
+
+   if (c == NULL)
+   rte_panic("failed to strdup '%s'\n", s);
+
+   return c;
+}
+
+static int test_compare_addr(void)
+{
+   struct rte_soc_addr a0;
+   struct rte_soc_addr a1;
+   struct rte_soc_addr a2;
+
+   a0.name = safe_strdup("ethernet0");
+   a0.fdt_path = NULL;
+
+   a1.name = safe_strdup("ethernet0");
+   a1.fdt_path = NULL;
+
+   a2.name = safe_strdup("ethernet1");
+   a2.fdt_path = NULL;
+
+   TEST_ASSERT(!rte_eal_compare_soc_addr(, ),
+   "Failed to compare two soc addresses that equal");
+   TEST_ASSERT(rte_eal_compare_soc_addr(, ),
+   "Failed to compare two soc addresses that differs");
+
+   free(a2.name);
+   free(a1.name);
+   free(a0.name);
+   return 0;
+}
+
+static int
+test_soc(void)
+{
+   if (test_compare_addr())
+   return -1;
+
+   return 0;
+}
+
+REGISTER_TEST_COMMAND(soc_autotest, test_soc);
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index dfd64aa..b414008 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk

 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_soc.h rte_pci.h
 INC += rte_per_lcore.h rte_random.h
 INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index c8c2131..0e8d6f7 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -36,6 

[dpdk-dev] [PATCH v5 05/21] eal: define container macro

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
---
 lib/librte_eal/common/include/rte_common.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_common.h 
b/lib/librte_eal/common/include/rte_common.h
index db5ac91..8152bd9 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -331,6 +331,24 @@ rte_bsf32(uint32_t v)
 #define offsetof(TYPE, MEMBER)  __builtin_offsetof (TYPE, MEMBER)
 #endif

+/**
+ * Return pointer to the wrapping struct instance.
+ * Example:
+ *
+ *  struct wrapper {
+ *  ...
+ *  struct child c;
+ *  ...
+ *  };
+ *
+ *  struct child *x = obtain(...);
+ *  struct wrapper *w = container_of(x, struct wrapper, c);
+ */
+#ifndef container_of
+#define container_of(p, type, member) \
+   ((type *) (((char *) (p)) - offsetof(type, member)))
+#endif
+
 #define _RTE_STR(x) #x
 /** Take a macro value and get a string version of it */
 #define RTE_STR(x) _RTE_STR(x)
-- 
2.7.4



[dpdk-dev] [PATCH v5 04/21] eal/linux: generalize PCI kernel driver extraction to EAL

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Generalize the PCI-specific pci_get_kernel_driver_by_path. The function
is general enough, we have just moved it to eal.c, changed the prefix to
rte_eal and provided it privately to other parts of EAL.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
---
 lib/librte_eal/bsdapp/eal/eal.c   |  7 +++
 lib/librte_eal/common/eal_private.h   | 14 ++
 lib/librte_eal/linuxapp/eal/eal.c | 29 +
 lib/librte_eal/linuxapp/eal/eal_pci.c | 31 +--
 4 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 5271fc2..9b93da3 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -640,3 +640,10 @@ rte_eal_unbind_kernel_driver(const char *devpath 
__rte_unused,
 {
return -ENOTSUP;
 }
+
+int
+rte_eal_get_kernel_driver_by_path(const char *filename __rte_unused,
+ char *dri_name __rte_unused)
+{
+   return -ENOTSUP;
+}
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index b0c208a..c8c2131 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -269,6 +269,20 @@ int rte_eal_check_module(const char *module_name);
 int rte_eal_unbind_kernel_driver(const char *devpath, const char *devid);

 /**
+ * Extract the kernel driver name from the absolute path to the driver.
+ *
+ * @param filename  path to the driver ("/driver")
+ * @path  dri_name  target buffer where to place the driver name
+ *  (should be at least PATH_MAX long)
+ *
+ * @return
+ *  -1   on failure
+ *   0   when successful
+ *   1   when there is no such driver
+ */
+int rte_eal_get_kernel_driver_by_path(const char *filename, char *dri_name);
+
+/**
  * Get cpu core_id.
  *
  * This function is private to the EAL.
diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 5f6676d..00af21c 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -969,3 +969,32 @@ error:
fclose(f);
return -1;
 }
+
+int
+rte_eal_get_kernel_driver_by_path(const char *filename, char *dri_name)
+{
+   int count;
+   char path[PATH_MAX];
+   char *name;
+
+   if (!filename || !dri_name)
+   return -1;
+
+   count = readlink(filename, path, PATH_MAX);
+   if (count >= PATH_MAX)
+   return -1;
+
+   /* For device does not have a driver */
+   if (count < 0)
+   return 1;
+
+   path[count] = '\0';
+
+   name = strrchr(path, '/');
+   if (name) {
+   strncpy(dri_name, name + 1, strlen(name + 1) + 1);
+   return 0;
+   }
+
+   return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index a03553f..e1cf9e8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -78,35 +78,6 @@ pci_unbind_kernel_driver(struct rte_pci_device *dev)
return rte_eal_unbind_kernel_driver(devpath, devid);
 }

-static int
-pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
-{
-   int count;
-   char path[PATH_MAX];
-   char *name;
-
-   if (!filename || !dri_name)
-   return -1;
-
-   count = readlink(filename, path, PATH_MAX);
-   if (count >= PATH_MAX)
-   return -1;
-
-   /* For device does not have a driver */
-   if (count < 0)
-   return 1;
-
-   path[count] = '\0';
-
-   name = strrchr(path, '/');
-   if (name) {
-   strncpy(dri_name, name + 1, strlen(name + 1) + 1);
-   return 0;
-   }
-
-   return -1;
-}
-
 /* Map pci device */
 int
 rte_eal_pci_map_device(struct rte_pci_device *dev)
@@ -354,7 +325,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,

/* parse driver */
snprintf(filename, sizeof(filename), "%s/driver", dirname);
-   ret = pci_get_kernel_driver_by_path(filename, driver);
+   ret = rte_eal_get_kernel_driver_by_path(filename, driver);
if (ret < 0) {
RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
free(dev);
-- 
2.7.4



[dpdk-dev] [PATCH v5 03/21] eal/linux: generalize PCI kernel unbinding driver to EAL

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Generalize the PCI-specific pci_unbind_kernel_driver. It is now divided
into two parts. First, determination of the path and string identification
of the device to be unbound. Second, the actual unbind operation which is
generic.

BSD implementation updated as ENOTSUP

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
--
Changes since v2:
 - update BSD support for unbind kernel driver
---
 lib/librte_eal/bsdapp/eal/eal.c   |  7 +++
 lib/librte_eal/bsdapp/eal/eal_pci.c   |  4 ++--
 lib/librte_eal/common/eal_private.h   | 13 +
 lib/librte_eal/linuxapp/eal/eal.c | 26 ++
 lib/librte_eal/linuxapp/eal/eal_pci.c | 33 +
 5 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 35e3117..5271fc2 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -633,3 +633,10 @@ rte_eal_process_type(void)
 {
return rte_config.process_type;
 }
+
+int
+rte_eal_unbind_kernel_driver(const char *devpath __rte_unused,
+const char *devid __rte_unused)
+{
+   return -ENOTSUP;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c 
b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 7ed0115..703f034 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -89,11 +89,11 @@

 /* unbind kernel driver for this device */
 int
-pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused)
+pci_unbind_kernel_driver(struct rte_pci_device *dev)
 {
RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented "
"for BSD\n");
-   return -ENOTSUP;
+   return rte_eal_unbind_kernel_driver(dev);
 }

 /* Map pci device */
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..b0c208a 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -256,6 +256,19 @@ int rte_eal_alarm_init(void);
 int rte_eal_check_module(const char *module_name);

 /**
+ * Unbind kernel driver bound to the device specified by the given devpath,
+ * and its string identification.
+ *
+ * @param devpath  path to the device directory ("/sys/.../devices/")
+ * @param devididentification of the device ()
+ *
+ * @return
+ *  -1  unbind has failed
+ *   0  module has been unbound
+ */
+int rte_eal_unbind_kernel_driver(const char *devpath, const char *devid);
+
+/**
  * Get cpu core_id.
  *
  * This function is private to the EAL.
diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 2075282..5f6676d 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -943,3 +943,29 @@ rte_eal_check_module(const char *module_name)
/* Module has been found */
return 1;
 }
+
+int
+rte_eal_unbind_kernel_driver(const char *devpath, const char *devid)
+{
+   char filename[PATH_MAX];
+   FILE *f;
+
+   snprintf(filename, sizeof(filename),
+"%s/driver/unbind", devpath);
+
+   f = fopen(filename, "w");
+   if (f == NULL) /* device was not bound */
+   return 0;
+
+   if (fwrite(devid, strlen(devid), 1, f) == 0) {
+   RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__,
+   filename);
+   goto error;
+   }
+
+   fclose(f);
+   return 0;
+error:
+   fclose(f);
+   return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 876ba38..a03553f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -59,38 +59,23 @@ int
 pci_unbind_kernel_driver(struct rte_pci_device *dev)
 {
int n;
-   FILE *f;
-   char filename[PATH_MAX];
-   char buf[BUFSIZ];
+   char devpath[PATH_MAX];
+   char devid[BUFSIZ];
struct rte_pci_addr *loc = >addr;

-   /* open /sys/bus/pci/devices/:BB:CC.D/driver */
-   snprintf(filename, sizeof(filename),
-   "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(),
+   /* devpath /sys/bus/pci/devices/:BB:CC.D */
+   snprintf(devpath, sizeof(devpath),
+   "%s/" PCI_PRI_FMT, pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid, loc->function);

-   f = fopen(filename, "w");
-   if (f == NULL) /* device was not bound */
-   return 0;
-
-   n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+   n = snprintf(devid, sizeof(devid), PCI_PRI_FMT "\n",
 loc->domain, loc->bus, loc->devid, loc->function);
-   if ((n < 0) || (n >= (int)sizeof(buf))) {
+   if ((n < 0) || (n >= (int)sizeof(devid))) {
RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
-   goto error;
-  

[dpdk-dev] [PATCH v5 02/21] eal: generalize PCI map/unmap resource to EAL

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

The functions pci_map_resource, pci_unmap_resource are generic so the
pci_* prefix can be omitted. The functions are moved to the
eal_common_dev.c so they can be reused by other infrastructure.

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 
---
 lib/librte_eal/bsdapp/eal/eal_pci.c |  2 +-
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  2 ++
 lib/librte_eal/common/eal_common_dev.c  | 39 +
 lib/librte_eal/common/eal_common_pci.c  | 39 -
 lib/librte_eal/common/eal_common_pci_uio.c  | 16 +-
 lib/librte_eal/common/include/rte_dev.h | 32 
 lib/librte_eal/common/include/rte_pci.h | 32 
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c   |  2 +-
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c  |  5 ++--
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  2 ++
 10 files changed, 89 insertions(+), 82 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c 
b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 8b3ed88..7ed0115 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -228,7 +228,7 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,

/* if matching map is found, then use it */
offset = res_idx * pagesz;
-   mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
+   mapaddr = rte_eal_map_resource(NULL, fd, (off_t)offset,
(size_t)dev->mem_resource[res_idx].len, 0);
close(fd);
if (mapaddr == MAP_FAILED)
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 2f81f7c..11d9f59 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -170,6 +170,8 @@ DPDK_16.11 {
rte_delay_us_callback_register;
rte_eal_dev_attach;
rte_eal_dev_detach;
+   rte_eal_map_resource;
+   rte_eal_unmap_resource;
rte_eal_vdrv_register;
rte_eal_vdrv_unregister;

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index 4f3b493..457d227 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -151,3 +152,41 @@ err:
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name);
return -EINVAL;
 }
+
+/* map a particular resource from a file */
+void *
+rte_eal_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+int additional_flags)
+{
+   void *mapaddr;
+
+   /* Map the Memory resource of device */
+   mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+   MAP_SHARED | additional_flags, fd, offset);
+   if (mapaddr == MAP_FAILED) {
+   RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s"
+   " (%p)\n", __func__, fd, requested_addr,
+   (unsigned long)size, (unsigned long)offset,
+   strerror(errno), mapaddr);
+   } else
+   RTE_LOG(DEBUG, EAL, "  Device memory mapped at %p\n", mapaddr);
+
+   return mapaddr;
+}
+
+/* unmap a particular resource */
+void
+rte_eal_unmap_resource(void *requested_addr, size_t size)
+{
+   if (requested_addr == NULL)
+   return;
+
+   /* Unmap the Memory resource of device */
+   if (munmap(requested_addr, size)) {
+   RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+   __func__, requested_addr, (unsigned long)size,
+   strerror(errno));
+   } else
+   RTE_LOG(DEBUG, EAL, "  Device memory unmapped at %p\n",
+   requested_addr);
+}
diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 638cd86..464acc1 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -67,7 +67,6 @@
 #include 
 #include 
 #include 
-#include 

 #include 
 #include 
@@ -114,44 +113,6 @@ static struct rte_devargs *pci_devargs_lookup(struct 
rte_pci_device *dev)
return NULL;
 }

-/* map a particular resource from a file */
-void *
-pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
-int additional_flags)
-{
-   void *mapaddr;
-
-   /* Map the PCI memory resource of device */
-   mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
-   MAP_SHARED | additional_flags, fd, offset);
-   if (mapaddr == MAP_FAILED) {
-   RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s 
(%p)\n",
-   __func__, fd, requested_addr,
-   (unsigned 

[dpdk-dev] [PATCH v5 01/21] eal: generalize PCI kernel driver enum to EAL

2016-10-24 Thread Shreyansh Jain
From: Jan Viktorin 

Signed-off-by: Jan Viktorin 
Signed-off-by: Shreyansh Jain 

--
Changes since v0:
 - fix compilation error due to missing include
---
 lib/librte_eal/common/include/rte_dev.h | 12 
 lib/librte_eal/common/include/rte_pci.h |  9 -
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index b3873bd..e73b0fa 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -109,6 +109,18 @@ struct rte_mem_resource {
void *addr; /**< Virtual address, NULL when not mapped. */
 };

+/**
+ * Kernel driver passthrough type
+ */
+enum rte_kernel_driver {
+   RTE_KDRV_UNKNOWN = 0,
+   RTE_KDRV_IGB_UIO,
+   RTE_KDRV_VFIO,
+   RTE_KDRV_UIO_GENERIC,
+   RTE_KDRV_NIC_UIO,
+   RTE_KDRV_NONE,
+};
+
 /** Double linked list of device drivers. */
 TAILQ_HEAD(rte_driver_list, rte_driver);
 /** Double linked list of devices. */
diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 9ce8847..2c7046f 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -135,15 +135,6 @@ struct rte_pci_addr {

 struct rte_devargs;

-enum rte_kernel_driver {
-   RTE_KDRV_UNKNOWN = 0,
-   RTE_KDRV_IGB_UIO,
-   RTE_KDRV_VFIO,
-   RTE_KDRV_UIO_GENERIC,
-   RTE_KDRV_NIC_UIO,
-   RTE_KDRV_NONE,
-};
-
 /**
  * A structure describing a PCI device.
  */
-- 
2.7.4



[dpdk-dev] [PATCH v5 00/21] Introduce SoC device/driver framework for EAL

2016-10-24 Thread Shreyansh Jain
Introduction:
=

This patch set is direct derivative of Jan's original series [1],[2].

 - This version is based on master HEAD (173511).

 - In this, I am merging the series [11] back. It was initially part
   of this set but I had split considering that those changes in PCI
   were good standalone as well. But, 1) not much feedback was avail-
   able and 2) this patchset is a use-case for those patches making
   it easier to review. Just like what Jan had intended in original
   series.

 - Hereafter, this series is not dependent on any other external patch
   series.

Aim:


As of now EAL is primarly focused on PCI initialization/probing.

 rte_eal_init()
  |- rte_eal_pci_init(): Find PCI devices from sysfs
  |- ...
  |- rte_eal_memzone_init()
  |- ...
  `- rte_eal_pci_probe(): Driver<=>Device initialization

This patchset introduces SoC framework which would enable SoC drivers and
drivers to be plugged into EAL, very similar to how PCI drivers/devices are
done today.

This is a stripped down version of PCI framework which allows the SoC PMDs
to implement their own routines for detecting devices and linking devices to
drivers.

1) Changes to EAL
 rte_eal_init()
  |- rte_eal_pci_init(): Find PCI devices from sysfs
  |- rte_eal_soc_init(): Calls PMDs->scan_fn
  |- ...
  |- rte_eal_memzone_init()
  |- ...
  |- rte_eal_pci_probe(): Driver<=>Device initialization, PMD->devinit()
  `- rte_eal_soc_probe(): Calls PMDs->match_fn and PMDs->devinit();

2) New device/driver structures:
  - rte_soc_driver (inheriting rte_driver)
  - rte_soc_device (inheriting rte_device)
  - rte_eth_dev and eth_driver embedded rte_soc_device and rte_soc_driver,
respectively.

3) The SoC PMDs need to:
 - define rte_soc_driver with necessary scan and match callbacks
 - Register themselves using DRIVER_REGISTER_SOC()
 - Implement respective bus scanning in the scan callbacks to add necessary
   devices to SoC device list
 - Implement necessary eth_dev_init/uninint for ethernet instances

4) Design considerations that are same as PCI:
 - SoC initialization is being done through rte_eal_init(), just after PCI
   initialization is done.
 - As in case of PCI, probe is done after rte_eal_pci_probe() to link the
   devices detected with the drivers registered.
 - Device attach/detach functions are available and have been designed on
   the lines of PCI framework.
 - PMDs register using DRIVER_REGISTER_SOC, very similar to
   DRIVER_REGISTER_PCI for PCI devices.
 - Linked list of SoC driver and devices exists independent of the other
   driver/device list, but inheriting rte_driver/rte_driver, these are
   also part of a global list.

5) Design considerations that are different from PCI:
 - Each driver implements its own scan and match function. PCI uses the BDF
   format to read the device from sysfs, but this _may_not_ be a case for a
   SoC ethernet device.
   = This is an important change from initial proposal by Jan in [2].
   Unlike his attempt to use /sys/bus/platform, this patch relies on the
   PMD to detect the devices. This is because SoC may require specific or
   additional info for device detection. Further, SoC may have embedded
   devices/MACs which require initialization which cannot be covered
   through sysfs parsing.
   `-> Point (6) below is a side note to above.
   = PCI based PMDs rely on EAL's capability to detect devices. This
   proposal puts the onus on PMD to detect devices, add to soc_device_list
   and wait for Probe. Matching, of device<=>driver is again PMD's
   callback.

6) Adding default scan and match helpers for PMDs
 - The design warrrants the PMDs implement their own scan of devices
   on bus, and match routines for probe implementation.
   This patch introduces helpers which can be used by PMDs for scan of
   the platform bus and matching devices against the compatible string
   extracted from the scan.
 - Intention is to make it easier to integrate known SoC which expose
   platform bus compliant information (compat, sys/bus/platform...).
 - PMDs which have deviations from this standard model can implement and
   hook their bus scanning and probe match callbacks while registering
   driver.

Patchset Overview:
==
 - Patches 0001~0004 are from [11] - moving some PCI specific functions
   and definitions to non-PCI area.
 - Patches 0005~0008 introduce the base infrastructure and test case
 - Patch 0009 is for command line support for no-soc, on lines of no-pci
 - Patch 0010 enables EAL to handle SoC type devices
 - Patch 0011 adds support for scan and probe callbacks and updates the test
   framework with relevant test case.
 - Patch 0012~0014 enable device argument, driver specific flags and
   interrupt handling related basic infra. Subsequent patches build up on
   them.
 - Patch 0015~0016 add support for default function which PMDs can use for
   scanning platform bus. These functions are optional and need to be hooked
   to by PMDs.
 - Patch 0017~0019 makes 

[dpdk-dev] [PATCH v10 0/6] add Tx preparation

2016-10-24 Thread Ananyev, Konstantin
> 
> As discussed in that thread:
> 
> http://dpdk.org/ml/archives/dev/2015-September/023603.html
> 
> Different NIC models depending on HW offload requested might impose
> different requirements on packets to be TX-ed in terms of:
> 
>  - Max number of fragments per packet allowed
>  - Max number of fragments per TSO segments
>  - The way pseudo-header checksum should be pre-calculated
>  - L3/L4 header fields filling
>  - etc.
> 
> 
> MOTIVATION:
> ---
> 
> 1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
>However, this work is sometimes required, and now, it's an
>application issue.
> 
> 2) Different hardware may have different requirements for TX offloads,
>other subset can be supported and so on.
> 
> 3) Some parameters (e.g. number of segments in ixgbe driver) may hung
>device. These parameters may be vary for different devices.
> 
>For example i40e HW allows 8 fragments per packet, but that is after
>TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.
> 
> 4) Fields in packet may require different initialization (like e.g. will
>require pseudo-header checksum precalculation, sometimes in a
>different way depending on packet type, and so on). Now application
>needs to care about it.
> 
> 5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
>prepare packet burst in acceptable form for specific device.
> 
> 6) Some additional checks may be done in debug mode keeping tx_burst
>implementation clean.
> 
> 
> PROPOSAL:
> -
> 
> To help user to deal with all these varieties we propose to:
> 
> 1) Introduce rte_eth_tx_prep() function to do necessary preparations of
>packet burst to be safely transmitted on device for desired HW
>offloads (set/reset checksum field according to the hardware
>requirements) and check HW constraints (number of segments per
>packet, etc).
> 
>While the limitations and requirements may differ for devices, it
>requires to extend rte_eth_dev structure with new function pointer
>"tx_pkt_prep" which can be implemented in the driver to prepare and
>verify packets, in devices specific way, before burst, what should to
>prevent application to send malformed packets.
> 
> 2) Also new fields will be introduced in rte_eth_desc_lim:
>nb_seg_max and nb_mtu_seg_max, providing an information about max
>segments in TSO and non-TSO packets acceptable by device.
> 
>This information is useful for application to not create/limit
>malicious packet.
> 
> 
> APPLICATION (CASE OF USE):
> --
> 
> 1) Application should to initialize burst of packets to send, set
>required tx offload flags and required fields, like l2_len, l3_len,
>l4_len, and tso_segsz
> 
> 2) Application passes burst to the rte_eth_tx_prep to check conditions
>required to send packets through the NIC.
> 
> 3) The result of rte_eth_tx_prep can be used to send valid packets
>and/or restore invalid if function fails.
> 
> e.g.
> 
>   for (i = 0; i < nb_pkts; i++) {
> 
>   /* initialize or process packet */
> 
>   bufs[i]->tso_segsz = 800;
>   bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
>   | PKT_TX_IP_CKSUM;
>   bufs[i]->l2_len = sizeof(struct ether_hdr);
>   bufs[i]->l3_len = sizeof(struct ipv4_hdr);
>   bufs[i]->l4_len = sizeof(struct tcp_hdr);
>   }
> 
>   /* Prepare burst of TX packets */
>   nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);
> 
>   if (nb_prep < nb_pkts) {
>   printf("tx_prep failed\n");
> 
>   /* nb_prep indicates here first invalid packet. rte_eth_tx_prep
>* can be used on remaining packets to find another ones.
>*/
> 
>   }
> 
>   /* Send burst of TX packets */
>   nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);
> 
>   /* Free any unsent packets. */
> 
> v10 changes:
>  - moved drivers tx calback check in rte_eth_tx_prep after queue_id check
> 
> v9 changes:
>  - fixed headers structure fragmentation check
>  - moved fragmentation check into rte_validate_tx_offload()
> 
> v8 changes:
>  - mbuf argument in rte_validate_tx_offload declared as const
> 
> v7 changes:
>  - comments reworded/added
>  - changed errno values returned from Tx prep API
>  - added check in rte_phdr_cksum_fix if headers are in the first
>data segment and can be safetly modified
>  - moved rte_validate_tx_offload to rte_mbuf
>  - moved rte_phdr_cksum_fix to rte_net.h
>  - removed rte_pkt.h new file as useless
> 
> v6 changes:
> - added performance impact test results to the patch description
> 
> v5 changes:
>  - rebased csum engine modification
>  - added information to the csum engine about performance tests
>  - some performance improvements
> 
> v4 changes:
>  - tx_prep is now set to default behavior (NULL) for simple/vector path
>in 

[dpdk-dev] mbuf changes

2016-10-24 Thread Bruce Richardson
On Mon, Oct 24, 2016 at 04:11:33PM +, Wiles, Keith wrote:
> 
> > On Oct 24, 2016, at 10:49 AM, Morten Br?rup  
> > wrote:
> > 
> > First of all: Thanks for a great DPDK Userspace 2016!
> > 
> > 
> > 
> > Continuing the Userspace discussion about Olivier Matz?s proposed mbuf 
> > changes...

Thanks for keeping the discussion going!
> > 
> > 
> > 
> > 1.
> > 
> > Stephen Hemminger had a noteworthy general comment about keeping metadata 
> > for the NIC in the appropriate section of the mbuf: Metadata generated by 
> > the NIC?s RX handler belongs in the first cache line, and metadata required 
> > by the NIC?s TX handler belongs in the second cache line. This also means 
> > that touching the second cache line on ingress should be avoided if 
> > possible; and Bruce Richardson mentioned that for this reason m->next was 
> > zeroed on free().
> > 
Thinking about it, I suspect there are more fields we can reset on free
to save time on alloc. Refcnt, as discussed below is one of them, but so
too could be the nb_segs field and possibly others.

> > 
> > 
> > 2.
> > 
> > There seemed to be consensus that the size of m->refcnt should match the 
> > size of m->port because a packet could be duplicated on all physical ports 
> > for L3 multicast and L2 flooding.
> > 
> > Furthermore, although a single physical machine (i.e. a single server) with 
> > 255 physical ports probably doesn?t exist, it might contain more than 255 
> > virtual machines with a virtual port each, so it makes sense extending 
> > these mbuf fields from 8 to 16 bits.
> 
> I thought we also talked about removing the m->port from the mbuf as it is 
> not really needed.
> 
Yes, this was mentioned, and also the option of moving the port value to
the second cacheline, but it appears that NXP are using the port value
in their NIC drivers for passing in metadata, so we'd need their
agreement on any move (or removal).

> > 
> > 
> > 
> > 3.
> > 
> > Someone (Bruce Richardson?) suggested moving m->refcnt and m->port to the 
> > second cache line, which then generated questions from the audience about 
> > the real life purpose of m->port, and if m->port could be removed from the 
> > mbuf structure.
> > 
> > 
> > 
> > 4.
> > 
> > I suggested using offset -1 for m->refcnt, so m->refcnt becomes 0 on first 
> > allocation. This is based on the assumption that other mbuf fields must be 
> > zeroed at alloc()/free() anyway, so zeroing m->refcnt is cheaper than 
> > setting it to 1.
> > 
> > Furthermore (regardless of m->refcnt offset), I suggested that it is not 
> > required to modify m->refcnt when allocating and freeing the mbuf, thus 
> > saving one write operation on both alloc() and free(). However, this 
> > assumes that m->refcnt debugging, e.g. underrun detection, is not required.

I don't think it really matters what sentinal value is used for the
refcnt because it can't be blindly assigned on free like other fields.
However, I think 0 as first reference value becomes more awkward
than 1, because we need to deal with underflow. Consider the situation
where we have two references to the mbuf, so refcnt is 1, and both are
freed at the same time. Since the refcnt is not-zero, then both cores
will do an atomic decrement simultaneously giving a refcnt of -1. We can
then set this back to zero before freeing, however, I'd still prefer to
have refcnt be an accurate value so that it always stays positive, and
we can still set it to "one" on free to avoid having to set on alloc.

Also, if we set refcnt on free rather than alloc, it does set itself up
as a good candidate for moving to the second cacheline. Fast-path
processing does not normally update the value.

> > 
> > 
> > 
> > 5.
> > 
> > And here?s something new to think about:
> > 
> > m->next already reveals if there are more segments to a packet. Which 
> > purpose does m->nb_segs serve that is not already covered by m->next?

It is duplicate info, but nb_segs can be used to check the validity of
the next pointer without having to read the second mbuf cacheline.

Whether it's worth having is something I'm happy enough to discuss,
though.

One other point I'll mention is that we need to have a discussion on
how/where to add in a timestamp value into the mbuf. Personally, I think
it can be in a union with the sequence number value, but I also suspect
that 32-bits of a timestamp is not going to be enough for many.

Thoughts?

/Bruce


[dpdk-dev] mbuf changes

2016-10-24 Thread Wiles, Keith

> On Oct 24, 2016, at 10:49 AM, Morten Br?rup  
> wrote:
> 
> First of all: Thanks for a great DPDK Userspace 2016!
> 
> 
> 
> Continuing the Userspace discussion about Olivier Matz?s proposed mbuf 
> changes...
> 
> 
> 
> 1.
> 
> Stephen Hemminger had a noteworthy general comment about keeping metadata for 
> the NIC in the appropriate section of the mbuf: Metadata generated by the 
> NIC?s RX handler belongs in the first cache line, and metadata required by 
> the NIC?s TX handler belongs in the second cache line. This also means that 
> touching the second cache line on ingress should be avoided if possible; and 
> Bruce Richardson mentioned that for this reason m->next was zeroed on free().
> 
> 
> 
> 2.
> 
> There seemed to be consensus that the size of m->refcnt should match the size 
> of m->port because a packet could be duplicated on all physical ports for L3 
> multicast and L2 flooding.
> 
> Furthermore, although a single physical machine (i.e. a single server) with 
> 255 physical ports probably doesn?t exist, it might contain more than 255 
> virtual machines with a virtual port each, so it makes sense extending these 
> mbuf fields from 8 to 16 bits.

I thought we also talked about removing the m->port from the mbuf as it is not 
really needed.

> 
> 
> 
> 3.
> 
> Someone (Bruce Richardson?) suggested moving m->refcnt and m->port to the 
> second cache line, which then generated questions from the audience about the 
> real life purpose of m->port, and if m->port could be removed from the mbuf 
> structure.
> 
> 
> 
> 4.
> 
> I suggested using offset -1 for m->refcnt, so m->refcnt becomes 0 on first 
> allocation. This is based on the assumption that other mbuf fields must be 
> zeroed at alloc()/free() anyway, so zeroing m->refcnt is cheaper than setting 
> it to 1.
> 
> Furthermore (regardless of m->refcnt offset), I suggested that it is not 
> required to modify m->refcnt when allocating and freeing the mbuf, thus 
> saving one write operation on both alloc() and free(). However, this assumes 
> that m->refcnt debugging, e.g. underrun detection, is not required.
> 
> 
> 
> 5.
> 
> And here?s something new to think about:
> 
> m->next already reveals if there are more segments to a packet. Which purpose 
> does m->nb_segs serve that is not already covered by m->next?
> 
> 
> 
> 
> 
> Med venlig hilsen / kind regards
> 
> 
> 
> Morten Br?rup
> 
> CTO
> 
> 
> 
> 
> 
> SmartShare Systems A/S
> 
> Tonsbakken 16-18
> 
> DK-2740 Skovlunde
> 
> Denmark
> 
> 
> 
> Office  +45 70 20 00 93
> 
> Direct  +45 89 93 50 22
> 
> Mobile +45 25 40 82 12
> 
> 
> 
> mb at smartsharesystems.com  
> 
> www.smartsharesystems.com  
> 
> 
> 

Regards,
Keith



[dpdk-dev] [PATCH] Revert "bonding: use existing enslaved device queues"

2016-10-24 Thread Declan Doherty
On 24/10/16 15:51, Jan Blunck wrote:
> On Mon, Oct 24, 2016 at 7:02 AM, Declan Doherty
>  wrote:
>> On 14/10/16 00:37, Eric Kinzie wrote:
>>>
>>> On Wed Oct 12 16:24:21 +0100 2016, Bruce Richardson wrote:

 On Wed, Oct 12, 2016 at 04:24:54PM +0300, Ilya Maximets wrote:
>
> On 07.10.2016 05:02, Eric Kinzie wrote:
>>
>> On Wed Sep 07 15:28:10 +0300 2016, Ilya Maximets wrote:
>>>
>>> This reverts commit 5b7bb2bda5519b7800f814df64d4e015282140e5.
>>>
>>> It is necessary to reconfigure all queues every time because
>>> configuration
>>> can be changed.
>>>
>>> For example, if we're reconfiguring bonding device with new memory
>>> pool,
>>> already configured queues will still use the old one. And if the old
>>> mempool be freed, application likely will panic in attempt to use
>>> freed mempool.
>>>
>>> This happens when we use the bonding device with OVS 2.6 while MTU
>>> reconfiguration:
>>>
>>> PANIC in rte_mempool_get_ops():
>>> assert "(ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX)"
>>> failed
>>>
>>> Cc: 
>>> Signed-off-by: Ilya Maximets 
>>> ---
>>>  drivers/net/bonding/rte_eth_bond_pmd.c | 10 ++
>>>  1 file changed, 2 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
>>> b/drivers/net/bonding/rte_eth_bond_pmd.c
>>> index b20a272..eb5b6d1 100644
>>> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
>>> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
>>> @@ -1305,8 +1305,6 @@ slave_configure(struct rte_eth_dev
>>> *bonded_eth_dev,
>>> struct bond_rx_queue *bd_rx_q;
>>> struct bond_tx_queue *bd_tx_q;
>>>
>>> -   uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
>>> -   uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
>>> int errval;
>>> uint16_t q_id;
>>>
>>> @@ -1347,9 +1345,7 @@ slave_configure(struct rte_eth_dev
>>> *bonded_eth_dev,
>>> }
>>>
>>> /* Setup Rx Queues */
>>> -   /* Use existing queues, if any */
>>> -   for (q_id = old_nb_rx_queues;
>>> -q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
>>> +   for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues;
>>> q_id++) {
>>> bd_rx_q = (struct bond_rx_queue
>>> *)bonded_eth_dev->data->rx_queues[q_id];
>>>
>>> errval =
>>> rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
>>> @@ -1365,9 +1361,7 @@ slave_configure(struct rte_eth_dev
>>> *bonded_eth_dev,
>>> }
>>>
>>> /* Setup Tx Queues */
>>> -   /* Use existing queues, if any */
>>> -   for (q_id = old_nb_tx_queues;
>>> -q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
>>> +   for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues;
>>> q_id++) {
>>> bd_tx_q = (struct bond_tx_queue
>>> *)bonded_eth_dev->data->tx_queues[q_id];
>>>
>>> errval =
>>> rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
>>> --
>>> 2.7.4
>>>
>>
>> NAK
>>
>> There are still some users of this code.  Let's give them a chance to
>> comment before removing it.
>
>
> Hi Eric,
>
> Are these users in CC-list? If not, could you, please, add them?
> This patch awaits in mail-list already more than a month. I think, it's
> enough
> time period for all who wants to say something. Patch fixes a real bug
> that
> prevent using of DPDK bonding in all applications that reconfigures
> devices
> in runtime including OVS.
>
 Agreed.

 Eric, does reverting this patch cause you problems directly, or is your
 concern
 just with regards to potential impact to others?

 Thanks,
 /Bruce
>>>
>>>
>>> This won't impact me directly.  The users are CCed (different thread)
>>> and I haven't seen any comment, so I no longer have any objection to
>>> reverting this change.
>>>
>>> Eric
>>>
>>
>> As there has been no further objections and this reinstates the original
>> expected behavior of the bonding driver. I'm re-ack'ing for inclusion in
>> release.
>>
>> Acked-by: Declan Doherty 
>
> Ok, I can revert the revert for us.
>
> Do I read this correctly that you are not interested in fixing this properly?!
>
> Thanks,
> Jan
>

Jan, sorry I missed the replies from last week due to the way my mail 
client was filtering the conversation. Let me have another look at this 
and I'll come back to the list.

Thanks
Declan


[dpdk-dev] [PATCH v9 6/6] testpmd: use Tx preparation in csum engine

2016-10-24 Thread Tomasz Kulasek
Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5



[dpdk-dev] [PATCH v9 5/6] ixgbe: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..031414c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->tx_pkt_prep = NULL;
 #ifdef RTE_IXGBE_INC_VECTOR
if (txq->tx_rs_thresh 

[dpdk-dev] [PATCH v9 4/6] i40e: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {
PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts;
+

[dpdk-dev] [PATCH v9 3/6] fm10k: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5



[dpdk-dev] [PATCH v9 2/6] e1000: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = 

[dpdk-dev] [PATCH v9 1/6] ethdev: add Tx preparation

2016-10-24 Thread Tomasz Kulasek
Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of different
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   97 +
 lib/librte_mbuf/rte_mbuf.h|   64 +++
 lib/librte_net/rte_net.h  |   85 
 4 files changed, 247 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..d548d48 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its 

[dpdk-dev] [PATCH v9 0/6] add Tx preparation

2016-10-24 Thread Tomasz Kulasek

>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001
From: Tomasz Kulasek 
Date: Fri, 14 Oct 2016 16:10:35 +0200
Subject: [PATCH v6 0/6] add Tx preparation

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */


v9 changes:
 - fixed headers structure fragmentation check
 - moved fragmentation check into rte_validate_tx_offload()

v8 changes:
 - mbuf argument in rte_validate_tx_offload declared as const

v7 changes:
 - comments reworded/added
 - changed errno values returned from Tx prep API
 - added check in rte_phdr_cksum_fix if headers are in the first
   data segment and can be safetly modified
 - moved rte_validate_tx_offload to rte_mbuf
 - moved rte_phdr_cksum_fix to rte_net.h
 - removed rte_pkt.h new file as useless

v6 changes:
- added performance impact test results to the patch description

v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked 

[dpdk-dev] [PATCH v2 0/2] net/ixgbe: fix VF VLAN insert

2016-10-24 Thread Bruce Richardson
On Thu, Oct 20, 2016 at 12:54:52AM +, Lu, Wenzhuo wrote:
> Hi,
> 
> > -Original Message-
> > From: Iremonger, Bernard
> > Sent: Wednesday, October 19, 2016 10:48 PM
> > To: dev at dpdk.org; daniels at research.att.com; Lu, Wenzhuo; az5157 at 
> > att.com
> > Cc: Iremonger, Bernard
> > Subject: [PATCH v2 0/2] net/ixgbe: fix VF VLAN insert
> > 
> > Changes in v2:
> > Add testpmd patch.
> > Update testpmd for change to rte_pmd_ixgbe_set_vf_vlan_insert function.
> > 
> > Bernard Iremonger (1):
> >   app/test_pmd: change to the VF VLAN insert command
> > 
> > E. Scott Daniels (1):
> >   net/ixgbe: fix VLAN insert parameter type and its use
> > 
> >  app/test-pmd/cmdline.c  | 19 +--
> >  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  2 +-
> >  drivers/net/ixgbe/ixgbe_ethdev.c|  8 
> >  drivers/net/ixgbe/rte_pmd_ixgbe.h   |  9 +
> >  4 files changed, 19 insertions(+), 19 deletions(-)
> > 
> > --
> > 2.10.1
> Series-Acked-by: Wenzhuo Lu 
> 
Applied to dpdk-next-net/rel_16_11

/Bruce


[dpdk-dev] [PATCH v4 00/32] net/qede: update qede pmd to 1.2.0.1 and enable by default

2016-10-24 Thread Bruce Richardson
On Tue, Oct 18, 2016 at 09:11:14PM -0700, Rasesh Mody wrote:
> Hi,
> 
> This patch set includes changes to update the base driver, work with
> newer FW 8.10.9.0, adds new features, includes enhancements and code
> cleanup, provides bug fixes and updates documentation for the QEDE
> poll mode driver.
> 
> It enables QEDE PMD in the dpdk config by default. The dependency on
> external library libz has been addressed.
> 
> The patch set updates the QEDE PMD to 1.2.0.1.
> 
> Review comments received for v3 have been addressed.
> 
> Please apply to DPDK tree for v16.11 release.
> 
> Thanks!
> Rasesh
> 
> Harish Patil (14):
>   net/qede: change signature of MCP command API
>   net/qede: serialize access to MFW mbox
>   net/qede: add NIC selftest and query sensor info support
>   net/qede: fix port (re)configuration issue
>   net/qede/base: allow MTU change via vport-update
>   net/qede: add missing 100G link speed capability
>   net/qede: remove unused/dead code
>   net/qede: fixes for VLAN filters
>   net/qede: add enable/disable VLAN filtering
>   net/qede: fix RSS related issues
>   net/qede/base: add support to initiate PF FLR
>   net/qede: skip slowpath polling for 100G VF device
>   net/qede: fix driver version string
>   net/qede: fix status block index for VF queues
> 
> Rasesh Mody (16):
>   net/qede/base: add new init files and rearrange the code
>   net/qede/base: formatting changes
>   net/qede: use FW CONFIG defines as needed
>   net/qede/base: add HSI changes and register defines
>   net/qede/base: add attention formatting string
>   net/qede/base: additional formatting/comment changes
>   net/qede: fix 32 bit compilation
>   net/qede/base: update base driver
>   net/qede/base: rename structure and defines
>   net/qede/base: comment enhancements
>   net/qede/base: add MFW crash dump support
>   net/qede/base: change Rx Tx queue start APIs
>   net/qede: add support for queue statistics
>   net/qede: remove zlib dependency and enable PMD by default
>   doc: update qede pmd documentation
>   net/qede: update driver version
> 
> Sony Chacko (2):
>   net/qede: enable support for unequal number of Rx/Tx queues
>   net/qede: add scatter gather support
> 
Patchset applied to dpdk_next_net/rel_16_11

Thanks,
/Bruce


[dpdk-dev] [PATCH v2] net/mlx5: fix init on secondary process

2016-10-24 Thread Bruce Richardson
On Mon, Oct 17, 2016 at 04:18:59PM +0200, Adrien Mazarguil wrote:
> On Mon, Oct 17, 2016 at 02:52:39PM +0100, Ferruh Yigit wrote:
> > Hi Adrien,
> > 
> > On 10/17/2016 1:56 PM, Olivier Gournet wrote:
> > > Fixes: 1d88ba171942 ("net/mlx5: refactor Tx data path")
> > > Fixes: 21c8bb4928c9 ("net/mlx5: split Tx queue structure")
> > > 
> > > Signed-off-by: Olivier Gournet 
> > 
> > According your comment on previous version of it, I think you have your
> > Ack on this patch, but can you please confirm?
> 
> Yes it's fine, thanks.
> 
> Acked-by: Adrien Mazarguil 
> 
Applied to dpdk-next-net/rel_16_11

/Bruce


[dpdk-dev] [PATCH v8 1/6] ethdev: add Tx preparation

2016-10-24 Thread Kulasek, TomaszX


> -Original Message-
> From: Ananyev, Konstantin
> Sent: Monday, October 24, 2016 14:57
> To: Kulasek, TomaszX ; dev at dpdk.org
> Cc: olivier.matz at 6wind.com
> Subject: RE: [PATCH v8 1/6] ethdev: add Tx preparation
> 
> 
> 
> > -Original Message-
> > From: Kulasek, TomaszX
> > Sent: Monday, October 24, 2016 1:49 PM
> > To: Ananyev, Konstantin ; dev at dpdk.org
> > Cc: olivier.matz at 6wind.com
> > Subject: RE: [PATCH v8 1/6] ethdev: add Tx preparation
> >
> > Hi Konstantin,
> >
> > > -Original Message-
> > > From: Ananyev, Konstantin
> > > Sent: Monday, October 24, 2016 14:15
> > > To: Kulasek, TomaszX ; dev at dpdk.org
> > > Cc: olivier.matz at 6wind.com
> > > Subject: RE: [PATCH v8 1/6] ethdev: add Tx preparation
> > >
> > > Hi Tomasz,
> > >
> >
> > [...]
> >
> > > >
> > > > +/**
> > > > + * Fix pseudo header checksum
> > > > + *
> > > > + * This function fixes pseudo header checksum for TSO and non-TSO
> > > > +tcp/udp in
> > > > + * provided mbufs packet data.
> > > > + *
> > > > + * - for non-TSO tcp/udp packets full pseudo-header checksum is
> > > > +counted
> > > and set
> > > > + *   in packet data,
> > > > + * - for TSO the IP payload length is not included in pseudo
> header.
> > > > + *
> > > > + * This function expects that used headers are in the first data
> > > > +segment of
> > > > + * mbuf, and are not fragmented.
> > > > + *
> > > > + * @param m
> > > > + *   The packet mbuf to be validated.
> > > > + * @return
> > > > + *   0 if checksum is initialized properly
> > > > + */
> > > > +static inline int
> > > > +rte_phdr_cksum_fix(struct rte_mbuf *m) {
> > > > +   struct ipv4_hdr *ipv4_hdr;
> > > > +   struct ipv6_hdr *ipv6_hdr;
> > > > +   struct tcp_hdr *tcp_hdr;
> > > > +   struct udp_hdr *udp_hdr;
> > > > +   uint64_t ol_flags = m->ol_flags;
> > > > +   uint64_t inner_l3_offset = m->l2_len;
> > > > +
> > > > +   if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> > > > +   inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
> > > > +
> > > > +   /* headers are fragmented */
> > > > +   if (unlikely(rte_pktmbuf_data_len(m) >= inner_l3_offset +
> > > > +m->l3_len
> > > +
> > > > +   m->l4_len))
> > >
> > > Might be better to move that check into rte_validate_tx_offload(),
> > > so it would be called only when TX_DEBUG is on.
> >
> > While unfragmented headers are not general requirements for Tx
> > offloads, and this requirement is for this particular implementation,
> maybe for performance reasons will be better to keep it here, and just add
> #if DEBUG to leave rte_validate_tx_offload more generic.
> 
> Hmm and what is the advantage to pollute that code with more ifdefs?
> Again, why unfragmented headers are not general requirements?
> As long as DPDK pseudo-headear csum calculation routines can't handle
> fragmented case, it pretty much is a general requirement, no?
> Konstantin
> 

Ok, you're right, if we assume that this is general requirement, it should be 
moved.

> >
> > > Another thing, shouldn't it be:
> > > if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
> ?
> >
> > Yes, it should.
> >
> > > Konstantin
> > >
> >
> > Tomasz


[dpdk-dev] [PATCH 2/2] app/test: use correct offsets in AES perf test

2016-10-24 Thread Jain, Deepak K


> -Original Message-
> From: Trahe, Fiona
> Sent: Monday, October 24, 2016 1:00 PM
> To: dev at dpdk.org
> Cc: De Lara Guarch, Pablo ; Trahe, Fiona
> ; Griffin, John ; Jain,
> Deepak K ; Kusztal, ArkadiuszX
> 
> Subject: [PATCH 2/2] app/test: use correct offsets in AES perf test
> 
> offsets for digest and data need to be adjusted to take prepended IV into
> account
> 
> Signed-off-by: Fiona Trahe 
> ---
>  app/test/test_cryptodev_perf.c | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/app/test/test_cryptodev_perf.c
> b/app/test/test_cryptodev_perf.c index e05e11b..53dd8f5 100644
> --- a/app/test/test_cryptodev_perf.c
> +++ b/app/test/test_cryptodev_perf.c
> @@ -2714,26 +2714,27 @@ test_perf_set_crypto_op_aes(struct
> rte_crypto_op *op, struct rte_mbuf *m,
> 
> --
> 2.5.0
Acked-by: Deepak Kumar Jain 


[dpdk-dev] [PATCH 1/2] crypto/qat: rework request builder for performance

2016-10-24 Thread Jain, Deepak K


> -Original Message-
> From: Trahe, Fiona
> Sent: Monday, October 24, 2016 1:00 PM
> To: dev at dpdk.org
> Cc: De Lara Guarch, Pablo ; Trahe, Fiona
> ; Griffin, John ; Jain,
> Deepak K ; Kusztal, ArkadiuszX
> 
> Subject: [PATCH 1/2] crypto/qat: rework request builder for performance
> 
> QAT PMD adjusts the buffer start address and offsets passed to the device
> so that the DMAs in and out of the device are 64-byte aligned.
> This gives more consistent throughput, which had been variable depending
> on how the application set up the mbuf.
> The message builder code had to be considerably re-factored to do this
> efficiently.
> 
> Signed-off-by: Fiona Trahe 
> ---
>  drivers/crypto/qat/qat_adf/icp_qat_hw.h  |   5 +
>  drivers/crypto/qat/qat_adf/qat_algs.h|   1 +
>  drivers/crypto/qat/qat_adf/qat_algs_build_desc.c |   2 +
>  drivers/crypto/qat/qat_crypto.c  | 239 
> ---
>  4 files changed, 176 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/crypto/qat/qat_adf/icp_qat_hw.h
> b/drivers/crypto/qat/qat_adf/icp_qat_hw.h
> index a08094f..ebe245f 100644
> --- a/drivers/crypto/qat/qat_adf/icp_qat_hw.h
> --
> 2.5.0
Acked-By: Deepak Kumar Jain 


[dpdk-dev] [PATCH 2/2] app/test: use correct offsets in AES perf test

2016-10-24 Thread Fiona Trahe
offsets for digest and data need to be adjusted
to take prepended IV into account

Signed-off-by: Fiona Trahe 
---
 app/test/test_cryptodev_perf.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/app/test/test_cryptodev_perf.c b/app/test/test_cryptodev_perf.c
index e05e11b..53dd8f5 100644
--- a/app/test/test_cryptodev_perf.c
+++ b/app/test/test_cryptodev_perf.c
@@ -2714,26 +2714,27 @@ test_perf_set_crypto_op_aes(struct rte_crypto_op *op, 
struct rte_mbuf *m,
}

/* Authentication Parameters */
-   op->sym->auth.digest.data = (uint8_t *)m->buf_addr +
-   (m->data_off + data_len);
-   op->sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m, 
data_len);
+   op->sym->auth.digest.data = rte_pktmbuf_mtod_offset(m, uint8_t *,
+   AES_CIPHER_IV_LENGTH + data_len);
+   op->sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m,
+   AES_CIPHER_IV_LENGTH + data_len);
op->sym->auth.digest.length = digest_len;
op->sym->auth.aad.data = aes_iv;
op->sym->auth.aad.length = AES_CIPHER_IV_LENGTH;

/* Cipher Parameters */
-   op->sym->cipher.iv.data = (uint8_t *)m->buf_addr + m->data_off;
+   op->sym->cipher.iv.data = rte_pktmbuf_mtod(m, uint8_t *);
op->sym->cipher.iv.phys_addr = rte_pktmbuf_mtophys(m);
op->sym->cipher.iv.length = AES_CIPHER_IV_LENGTH;

rte_memcpy(op->sym->cipher.iv.data, aes_iv, AES_CIPHER_IV_LENGTH);

/* Data lengths/offsets Parameters */
-   op->sym->auth.data.offset = 0;
+   op->sym->auth.data.offset = AES_CIPHER_IV_LENGTH;
op->sym->auth.data.length = data_len;

-   op->sym->cipher.data.offset = AES_BLOCK_SIZE;
-   op->sym->cipher.data.length = data_len - AES_BLOCK_SIZE;
+   op->sym->cipher.data.offset = AES_CIPHER_IV_LENGTH;
+   op->sym->cipher.data.length = data_len;

op->sym->m_src = m;

-- 
2.5.0



[dpdk-dev] [PATCH 1/2] crypto/qat: rework request builder for performance

2016-10-24 Thread Fiona Trahe
QAT PMD adjusts the buffer start address and offsets passed
to the device so that the DMAs in and out of the device are
64-byte aligned.
This gives more consistent throughput, which had been
variable depending on how the application set up the mbuf.
The message builder code had to be considerably re-factored
to do this efficiently.

Signed-off-by: Fiona Trahe 
---
 drivers/crypto/qat/qat_adf/icp_qat_hw.h  |   5 +
 drivers/crypto/qat/qat_adf/qat_algs.h|   1 +
 drivers/crypto/qat/qat_adf/qat_algs_build_desc.c |   2 +
 drivers/crypto/qat/qat_crypto.c  | 239 ---
 4 files changed, 176 insertions(+), 71 deletions(-)

diff --git a/drivers/crypto/qat/qat_adf/icp_qat_hw.h 
b/drivers/crypto/qat/qat_adf/icp_qat_hw.h
index a08094f..ebe245f 100644
--- a/drivers/crypto/qat/qat_adf/icp_qat_hw.h
+++ b/drivers/crypto/qat/qat_adf/icp_qat_hw.h
@@ -237,6 +237,11 @@ enum icp_qat_hw_cipher_dir {
ICP_QAT_HW_CIPHER_DECRYPT = 1,
 };

+enum icp_qat_hw_auth_op {
+   ICP_QAT_HW_AUTH_VERIFY = 0,
+   ICP_QAT_HW_AUTH_GENERATE = 1,
+};
+
 enum icp_qat_hw_cipher_convert {
ICP_QAT_HW_CIPHER_NO_CONVERT = 0,
ICP_QAT_HW_CIPHER_KEY_CONVERT = 1,
diff --git a/drivers/crypto/qat/qat_adf/qat_algs.h 
b/drivers/crypto/qat/qat_adf/qat_algs.h
index 78a92f3..dcc0df5 100644
--- a/drivers/crypto/qat/qat_adf/qat_algs.h
+++ b/drivers/crypto/qat/qat_adf/qat_algs.h
@@ -98,6 +98,7 @@ struct qat_session {
enum icp_qat_hw_cipher_dir qat_dir;
enum icp_qat_hw_cipher_mode qat_mode;
enum icp_qat_hw_auth_algo qat_hash_alg;
+   enum icp_qat_hw_auth_op auth_op;
struct qat_alg_cd cd;
uint8_t *cd_cur_ptr;
phys_addr_t cd_paddr;
diff --git a/drivers/crypto/qat/qat_adf/qat_algs_build_desc.c 
b/drivers/crypto/qat/qat_adf/qat_algs_build_desc.c
index 0b66b37..8900668 100644
--- a/drivers/crypto/qat/qat_adf/qat_algs_build_desc.c
+++ b/drivers/crypto/qat/qat_adf/qat_algs_build_desc.c
@@ -625,11 +625,13 @@ int qat_alg_aead_session_create_content_desc_auth(struct 
qat_session *cdesc,
ICP_QAT_FW_LA_NO_RET_AUTH_RES);
ICP_QAT_FW_LA_CMP_AUTH_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_CMP_AUTH_RES);
+   cdesc->auth_op = ICP_QAT_HW_AUTH_VERIFY;
} else {
ICP_QAT_FW_LA_RET_AUTH_SET(header->serv_specif_flags,
   ICP_QAT_FW_LA_RET_AUTH_RES);
ICP_QAT_FW_LA_CMP_AUTH_SET(header->serv_specif_flags,
   ICP_QAT_FW_LA_NO_CMP_AUTH_RES);
+   cdesc->auth_op = ICP_QAT_HW_AUTH_GENERATE;
}

/*
diff --git a/drivers/crypto/qat/qat_crypto.c b/drivers/crypto/qat/qat_crypto.c
index f8db12f..798cd98 100644
--- a/drivers/crypto/qat/qat_crypto.c
+++ b/drivers/crypto/qat/qat_crypto.c
@@ -951,6 +951,13 @@ qat_write_hw_desc_entry(struct rte_crypto_op *op, uint8_t 
*out_msg)
struct icp_qat_fw_la_cipher_req_params *cipher_param;
struct icp_qat_fw_la_auth_req_params *auth_param;
register struct icp_qat_fw_la_bulk_req *qat_req;
+   uint8_t do_auth = 0, do_cipher = 0;
+   uint32_t cipher_len = 0, cipher_ofs = 0;
+   uint32_t auth_len = 0, auth_ofs = 0;
+   uint32_t min_ofs = 0;
+   uint32_t digest_appended = 1;
+   uint64_t buf_start = 0;
+

 #ifdef RTE_LIBRTE_PMD_QAT_DEBUG_TX
if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) {
@@ -975,88 +982,173 @@ qat_write_hw_desc_entry(struct rte_crypto_op *op, 
uint8_t *out_msg)
qat_req = (struct icp_qat_fw_la_bulk_req *)out_msg;
rte_mov128((uint8_t *)qat_req, (const uint8_t *)&(ctx->fw_req));
qat_req->comn_mid.opaque_data = (uint64_t)(uintptr_t)op;
+   cipher_param = (void *)_req->serv_specif_rqpars;
+   auth_param = (void *)((uint8_t *)cipher_param + sizeof(*cipher_param));

-   qat_req->comn_mid.dst_length =
-   qat_req->comn_mid.src_length =
-   rte_pktmbuf_data_len(op->sym->m_src);
+   if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_HASH_CIPHER ||
+   ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER_HASH) {
+   do_auth = 1;
+   do_cipher = 1;
+   } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_AUTH) {
+   do_auth = 1;
+   do_cipher = 0;
+   } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER) {
+   do_auth = 0;
+   do_cipher = 1;
+   }

-   qat_req->comn_mid.dest_data_addr =
-   qat_req->comn_mid.src_data_addr =
-   rte_pktmbuf_mtophys(op->sym->m_src);
+   if (do_cipher) {

-   if (unlikely(op->sym->m_dst != NULL)) {
-   qat_req->comn_mid.dest_data_addr =
-   rte_pktmbuf_mtophys(op->sym->m_dst);
-   qat_req->comn_mid.dst_length =
-   

[dpdk-dev] [PATCH 0/2] crypto/qat: performance optimisation

2016-10-24 Thread Fiona Trahe
QAT PMD adjusts the buffer start address and offsets passed
to the device so that the DMAs in and out of the device are
64-byte aligned.
This gives more consistent throughput, which had been
variable depending on how the application set up the mbuf.
The message builder code had to be considerably re-factored
to do this efficiently.
Also performance test not taking IV prepend offsets 
into account were corrected.

Fiona Trahe (2):
  crypto/qat: rework request builder for performance
  app/test: use correct offsets in AES perf test

 app/test/test_cryptodev_perf.c   |  15 +-
 drivers/crypto/qat/qat_adf/icp_qat_hw.h  |   5 +
 drivers/crypto/qat/qat_adf/qat_algs.h|   1 +
 drivers/crypto/qat/qat_adf/qat_algs_build_desc.c |   2 +
 drivers/crypto/qat/qat_crypto.c  | 242 ---
 5 files changed, 185 insertions(+), 80 deletions(-)

-- 
2.5.0



[dpdk-dev] [PATCH v8 1/6] ethdev: add Tx preparation

2016-10-24 Thread Ananyev, Konstantin


> -Original Message-
> From: Kulasek, TomaszX
> Sent: Monday, October 24, 2016 1:49 PM
> To: Ananyev, Konstantin ; dev at dpdk.org
> Cc: olivier.matz at 6wind.com
> Subject: RE: [PATCH v8 1/6] ethdev: add Tx preparation
> 
> Hi Konstantin,
> 
> > -Original Message-
> > From: Ananyev, Konstantin
> > Sent: Monday, October 24, 2016 14:15
> > To: Kulasek, TomaszX ; dev at dpdk.org
> > Cc: olivier.matz at 6wind.com
> > Subject: RE: [PATCH v8 1/6] ethdev: add Tx preparation
> >
> > Hi Tomasz,
> >
> 
> [...]
> 
> > >
> > > +/**
> > > + * Fix pseudo header checksum
> > > + *
> > > + * This function fixes pseudo header checksum for TSO and non-TSO
> > > +tcp/udp in
> > > + * provided mbufs packet data.
> > > + *
> > > + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted
> > and set
> > > + *   in packet data,
> > > + * - for TSO the IP payload length is not included in pseudo header.
> > > + *
> > > + * This function expects that used headers are in the first data
> > > +segment of
> > > + * mbuf, and are not fragmented.
> > > + *
> > > + * @param m
> > > + *   The packet mbuf to be validated.
> > > + * @return
> > > + *   0 if checksum is initialized properly
> > > + */
> > > +static inline int
> > > +rte_phdr_cksum_fix(struct rte_mbuf *m) {
> > > + struct ipv4_hdr *ipv4_hdr;
> > > + struct ipv6_hdr *ipv6_hdr;
> > > + struct tcp_hdr *tcp_hdr;
> > > + struct udp_hdr *udp_hdr;
> > > + uint64_t ol_flags = m->ol_flags;
> > > + uint64_t inner_l3_offset = m->l2_len;
> > > +
> > > + if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> > > + inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
> > > +
> > > + /* headers are fragmented */
> > > + if (unlikely(rte_pktmbuf_data_len(m) >= inner_l3_offset + m->l3_len
> > +
> > > + m->l4_len))
> >
> > Might be better to move that check into rte_validate_tx_offload(), so it
> > would be called only when TX_DEBUG is on.
> 
> While unfragmented headers are not general requirements for Tx offloads, and 
> this requirement is for this particular implementation,
> maybe for performance reasons will be better to keep it here, and just add 
> #if DEBUG to leave rte_validate_tx_offload more generic.

Hmm and what is the advantage to pollute that code with more ifdefs?
Again, why unfragmented headers are not general requirements?
As long as DPDK pseudo-headear csum calculation routines can't handle 
fragmented case,
it pretty much is a general requirement, no?
Konstantin

> 
> > Another thing, shouldn't it be:
> > if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) ?
> 
> Yes, it should.
> 
> > Konstantin
> >
> 
> Tomasz


[dpdk-dev] [PATCH v8 1/6] ethdev: add Tx preparation

2016-10-24 Thread Kulasek, TomaszX
Hi Konstantin,

> -Original Message-
> From: Ananyev, Konstantin
> Sent: Monday, October 24, 2016 14:15
> To: Kulasek, TomaszX ; dev at dpdk.org
> Cc: olivier.matz at 6wind.com
> Subject: RE: [PATCH v8 1/6] ethdev: add Tx preparation
> 
> Hi Tomasz,
> 

[...]

> >
> > +/**
> > + * Fix pseudo header checksum
> > + *
> > + * This function fixes pseudo header checksum for TSO and non-TSO
> > +tcp/udp in
> > + * provided mbufs packet data.
> > + *
> > + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted
> and set
> > + *   in packet data,
> > + * - for TSO the IP payload length is not included in pseudo header.
> > + *
> > + * This function expects that used headers are in the first data
> > +segment of
> > + * mbuf, and are not fragmented.
> > + *
> > + * @param m
> > + *   The packet mbuf to be validated.
> > + * @return
> > + *   0 if checksum is initialized properly
> > + */
> > +static inline int
> > +rte_phdr_cksum_fix(struct rte_mbuf *m) {
> > +   struct ipv4_hdr *ipv4_hdr;
> > +   struct ipv6_hdr *ipv6_hdr;
> > +   struct tcp_hdr *tcp_hdr;
> > +   struct udp_hdr *udp_hdr;
> > +   uint64_t ol_flags = m->ol_flags;
> > +   uint64_t inner_l3_offset = m->l2_len;
> > +
> > +   if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> > +   inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
> > +
> > +   /* headers are fragmented */
> > +   if (unlikely(rte_pktmbuf_data_len(m) >= inner_l3_offset + m->l3_len
> +
> > +   m->l4_len))
> 
> Might be better to move that check into rte_validate_tx_offload(), so it
> would be called only when TX_DEBUG is on.

While unfragmented headers are not general requirements for Tx offloads, and 
this requirement is for this particular implementation, maybe for performance 
reasons will be better to keep it here, and just add #if DEBUG to leave 
rte_validate_tx_offload more generic.

> Another thing, shouldn't it be:
> if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) ?

Yes, it should.

> Konstantin
> 

Tomasz


[dpdk-dev] [PATCH v2 3/3] drivers: register aliases for renamed cryptodev drivers

2016-10-24 Thread Jan Blunck
This registers the legacy names of the driver being renamed in
commit 2f45703c17ac ("drivers: make driver names consistent").

Signed-off-by: Jan Blunck 
Tested-by: Pablo de Lara 
---
 drivers/crypto/aesni_gcm/aesni_gcm_pmd.c   | 1 +
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 1 +
 drivers/crypto/kasumi/rte_kasumi_pmd.c | 1 +
 drivers/crypto/null/null_crypto_pmd.c  | 1 +
 drivers/crypto/snow3g/rte_snow3g_pmd.c | 1 +
 5 files changed, 5 insertions(+)

diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c 
b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
index 0b3fd09..dba5e15 100644
--- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
+++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
@@ -529,6 +529,7 @@ static struct rte_vdev_driver aesni_gcm_pmd_drv = {
 };

 RTE_PMD_REGISTER_VDEV(CRYPTODEV_NAME_AESNI_GCM_PMD, aesni_gcm_pmd_drv);
+RTE_PMD_REGISTER_ALIAS(CRYPTODEV_NAME_AESNI_GCM_PMD, cryptodev_aesni_gcm_pmd);
 RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_AESNI_GCM_PMD,
"max_nb_queue_pairs= "
"max_nb_sessions= "
diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c 
b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
index b936735..f07cd07 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
@@ -720,6 +720,7 @@ static struct rte_vdev_driver cryptodev_aesni_mb_pmd_drv = {
 };

 RTE_PMD_REGISTER_VDEV(CRYPTODEV_NAME_AESNI_MB_PMD, cryptodev_aesni_mb_pmd_drv);
+RTE_PMD_REGISTER_ALIAS(CRYPTODEV_NAME_AESNI_MB_PMD, cryptodev_aesni_mb_pmd);
 RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_AESNI_MB_PMD,
"max_nb_queue_pairs= "
"max_nb_sessions= "
diff --git a/drivers/crypto/kasumi/rte_kasumi_pmd.c 
b/drivers/crypto/kasumi/rte_kasumi_pmd.c
index 11bbf80..b119da2 100644
--- a/drivers/crypto/kasumi/rte_kasumi_pmd.c
+++ b/drivers/crypto/kasumi/rte_kasumi_pmd.c
@@ -656,6 +656,7 @@ static struct rte_vdev_driver cryptodev_kasumi_pmd_drv = {
 };

 RTE_PMD_REGISTER_VDEV(CRYPTODEV_NAME_KASUMI_PMD, cryptodev_kasumi_pmd_drv);
+RTE_PMD_REGISTER_ALIAS(CRYPTODEV_NAME_KASUMI_PMD, cryptodev_kasumi_pmd);
 RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_KASUMI_PMD,
"max_nb_queue_pairs= "
"max_nb_sessions= "
diff --git a/drivers/crypto/null/null_crypto_pmd.c 
b/drivers/crypto/null/null_crypto_pmd.c
index a7d3600..c69606b 100644
--- a/drivers/crypto/null/null_crypto_pmd.c
+++ b/drivers/crypto/null/null_crypto_pmd.c
@@ -274,6 +274,7 @@ static struct rte_vdev_driver cryptodev_null_pmd_drv = {
 };

 RTE_PMD_REGISTER_VDEV(CRYPTODEV_NAME_NULL_PMD, cryptodev_null_pmd_drv);
+RTE_PMD_REGISTER_ALIAS(CRYPTODEV_NAME_NULL_PMD, cryptodev_null_pmd);
 RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_NULL_PMD,
"max_nb_queue_pairs= "
"max_nb_sessions= "
diff --git a/drivers/crypto/snow3g/rte_snow3g_pmd.c 
b/drivers/crypto/snow3g/rte_snow3g_pmd.c
index a794251..3b4292a 100644
--- a/drivers/crypto/snow3g/rte_snow3g_pmd.c
+++ b/drivers/crypto/snow3g/rte_snow3g_pmd.c
@@ -644,6 +644,7 @@ static struct rte_vdev_driver cryptodev_snow3g_pmd_drv = {
 };

 RTE_PMD_REGISTER_VDEV(CRYPTODEV_NAME_SNOW3G_PMD, cryptodev_snow3g_pmd_drv);
+RTE_PMD_REGISTER_ALIAS(CRYPTODEV_NAME_SNOW3G_PMD, cryptodev_snow3g_pmd);
 RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_SNOW3G_PMD,
"max_nb_queue_pairs= "
"max_nb_sessions= "
-- 
2.6.6



[dpdk-dev] [PATCH v2 2/3] drivers: register aliases for renamed VDEV drivers

2016-10-24 Thread Jan Blunck
This registers the legacy names of the driver being renamed in
commit 2f45703c17ac ("drivers: make driver names consistent").

Signed-off-by: Jan Blunck 
---
 drivers/net/af_packet/rte_eth_af_packet.c | 1 +
 drivers/net/bonding/rte_eth_bond_pmd.c| 1 +
 drivers/net/mpipe/mpipe_tilegx.c  | 2 ++
 drivers/net/null/rte_eth_null.c   | 1 +
 drivers/net/pcap/rte_eth_pcap.c   | 1 +
 drivers/net/ring/rte_eth_ring.c   | 1 +
 drivers/net/vhost/rte_eth_vhost.c | 1 +
 drivers/net/virtio/virtio_user_ethdev.c   | 1 +
 drivers/net/xenvirt/rte_eth_xenvirt.c | 1 +
 9 files changed, 10 insertions(+)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c 
b/drivers/net/af_packet/rte_eth_af_packet.c
index 201c1be..ff45068 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -895,6 +895,7 @@ static struct rte_vdev_driver pmd_af_packet_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv);
+RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet);
 RTE_PMD_REGISTER_PARAM_STRING(net_af_packet,
"iface= "
"qpairs= "
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 9e38ec9..9df245e 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -2560,6 +2560,7 @@ static struct rte_vdev_driver bond_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_bonding, bond_drv);
+RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);

 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
"slave= "
diff --git a/drivers/net/mpipe/mpipe_tilegx.c b/drivers/net/mpipe/mpipe_tilegx.c
index adf299b..f00 100644
--- a/drivers/net/mpipe/mpipe_tilegx.c
+++ b/drivers/net/mpipe/mpipe_tilegx.c
@@ -1632,7 +1632,9 @@ static struct rte_vdev_driver pmd_mpipe_gbe_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_mpipe_xgbe, pmd_mpipe_xgbe_drv);
+RTE_PMD_REGISTER_ALIAS(net_mpipe_xgbe, xgbe);
 RTE_PMD_REGISTER_VDEV(net_mpipe_gbe, pmd_mpipe_gbe_drv);
+RTE_PMD_REGISTER_ALIAS(net_mpipe_gbe, gbe);

 static void __attribute__((constructor, used))
 mpipe_init_contexts(void)
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 0b7cc37..836d982 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -692,6 +692,7 @@ static struct rte_vdev_driver pmd_null_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_null, pmd_null_drv);
+RTE_PMD_REGISTER_ALIAS(net_null, eth_null);
 RTE_PMD_REGISTER_PARAM_STRING(net_null,
"size= "
"copy=");
diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 0c4711d..0162f44 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -1065,6 +1065,7 @@ static struct rte_vdev_driver pmd_pcap_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
+RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
 RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
ETH_PCAP_RX_PCAP_ARG "= "
ETH_PCAP_TX_PCAP_ARG "= "
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index ee1fb76..6d2a8c1 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -629,5 +629,6 @@ static struct rte_vdev_driver pmd_ring_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_ring, pmd_ring_drv);
+RTE_PMD_REGISTER_ALIAS(net_ring, eth_ring);
 RTE_PMD_REGISTER_PARAM_STRING(net_ring,
ETH_RING_NUMA_NODE_ACTION_ARG "=name:node:action(ATTACH|CREATE)");
diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index 6f58476..766d4ef 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -1244,6 +1244,7 @@ static struct rte_vdev_driver pmd_vhost_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv);
+RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost);
 RTE_PMD_REGISTER_PARAM_STRING(net_vhost,
"iface= "
"queues=");
diff --git a/drivers/net/virtio/virtio_user_ethdev.c 
b/drivers/net/virtio/virtio_user_ethdev.c
index bfdc3d0..406beea 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -479,6 +479,7 @@ static struct rte_vdev_driver virtio_user_driver = {
 };

 RTE_PMD_REGISTER_VDEV(net_virtio_user, virtio_user_driver);
+RTE_PMD_REGISTER_ALIAS(net_virtio_user, virtio_user);
 RTE_PMD_REGISTER_PARAM_STRING(net_virtio_user,
"path= "
"mac= "
diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.c 
b/drivers/net/xenvirt/rte_eth_xenvirt.c
index 5a897b9..c08a056 100644
--- a/drivers/net/xenvirt/rte_eth_xenvirt.c
+++ b/drivers/net/xenvirt/rte_eth_xenvirt.c
@@ -765,5 +765,6 @@ static struct rte_vdev_driver pmd_xenvirt_drv = {
 };

 RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv);
+RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt);
 RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt,
"mac=");
-- 
2.6.6



[dpdk-dev] [PATCH v2 1/3] drivers: add name alias registration for rte_driver

2016-10-24 Thread Jan Blunck
This adds infrastructure for drivers to allow being requested by an alias
so that a renamed driver can still get loaded by its legacy name.

Signed-off-by: Jan Blunck 
Reviewed-by: Maxime Coquelin 
Tested-by: Pablo de Lara 
---
 lib/librte_eal/common/eal_common_vdev.c  | 8 
 lib/librte_eal/common/include/rte_dev.h  | 1 +
 lib/librte_eal/common/include/rte_vdev.h | 5 +
 3 files changed, 14 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_vdev.c 
b/lib/librte_eal/common/eal_common_vdev.c
index 8b05f50..0ff2377 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -79,6 +79,14 @@ rte_eal_vdev_init(const char *name, const char *args)
return driver->probe(name, args);
}

+   /* Give new names precedence over aliases. */
+   TAILQ_FOREACH(driver, _driver_list, next) {
+   if (driver->driver.alias &&
+   !strncmp(driver->driver.alias, name,
+   strlen(driver->driver.alias)))
+   return driver->probe(name, args);
+   }
+
RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
return -EINVAL;
 }
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index b3873bd..8840380 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -149,6 +149,7 @@ void rte_eal_device_remove(struct rte_device *dev);
 struct rte_driver {
TAILQ_ENTRY(rte_driver) next;  /**< Next in list. */
const char *name;   /**< Driver name. */
+   const char *alias;  /**< Driver alias. */
 };

 /**
diff --git a/lib/librte_eal/common/include/rte_vdev.h 
b/lib/librte_eal/common/include/rte_vdev.h
index 97260b2..784e837 100644
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ b/lib/librte_eal/common/include/rte_vdev.h
@@ -83,13 +83,18 @@ void rte_eal_vdrv_unregister(struct rte_vdev_driver 
*driver);

 #define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
 RTE_INIT(vdrvinitfn_ ##vdrv);\
+static const char *vdrvinit_ ## nm ## _alias;\
 static void vdrvinitfn_ ##vdrv(void)\
 {\
(vdrv).driver.name = RTE_STR(nm);\
+   (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
rte_eal_vdrv_register();\
 } \
 RTE_PMD_EXPORT_NAME(nm, __COUNTER__)

+#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
+static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.6.6



[dpdk-dev] [PATCH v8 1/6] ethdev: add Tx preparation

2016-10-24 Thread Ananyev, Konstantin
Hi Tomasz,

> 
>  /**
> + * Validate general requirements for tx offload in mbuf.
> + *
> + * This function checks correctness and completeness of Tx offload settings.
> + *
> + * @param m
> + *   The packet mbuf to be validated.
> + * @return
> + *   0 if packet is valid
> + */
> +static inline int
> +rte_validate_tx_offload(const struct rte_mbuf *m)
> +{
> + uint64_t ol_flags = m->ol_flags;
> +
> + /* Does packet set any of available offloads? */
> + if (!(ol_flags & PKT_TX_OFFLOAD_MASK))
> + return 0;
> +
> + /* IP checksum can be counted only for IPv4 packet */
> + if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
> + return -EINVAL;
> +
> + /* IP type not set when required */
> + if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
> + if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
> + return -EINVAL;
> +
> + /* Check requirements for TSO packet */
> + if (ol_flags & PKT_TX_TCP_SEG)
> + if ((m->tso_segsz == 0) ||
> + ((ol_flags & PKT_TX_IPV4) &&
> + !(ol_flags & PKT_TX_IP_CKSUM)))
> + return -EINVAL;
> +
> + /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
> + if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&
> + !(ol_flags & PKT_TX_OUTER_IPV4))
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +/**
>   * Dump an mbuf structure to a file.
>   *
>   * Dump all fields for the given packet mbuf and all its associated
> diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h
> index d4156ae..79669d7 100644
> --- a/lib/librte_net/rte_net.h
> +++ b/lib/librte_net/rte_net.h
> @@ -38,6 +38,11 @@
>  extern "C" {
>  #endif
> 
> +#include 
> +#include 
> +#include 
> +#include 
> +
>  /**
>   * Structure containing header lengths associated to a packet, filled
>   * by rte_net_get_ptype().
> @@ -86,6 +91,91 @@ struct rte_net_hdr_lens {
>  uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
>   struct rte_net_hdr_lens *hdr_lens, uint32_t layers);
> 
> +/**
> + * Fix pseudo header checksum
> + *
> + * This function fixes pseudo header checksum for TSO and non-TSO tcp/udp in
> + * provided mbufs packet data.
> + *
> + * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and 
> set
> + *   in packet data,
> + * - for TSO the IP payload length is not included in pseudo header.
> + *
> + * This function expects that used headers are in the first data segment of
> + * mbuf, and are not fragmented.
> + *
> + * @param m
> + *   The packet mbuf to be validated.
> + * @return
> + *   0 if checksum is initialized properly
> + */
> +static inline int
> +rte_phdr_cksum_fix(struct rte_mbuf *m)
> +{
> + struct ipv4_hdr *ipv4_hdr;
> + struct ipv6_hdr *ipv6_hdr;
> + struct tcp_hdr *tcp_hdr;
> + struct udp_hdr *udp_hdr;
> + uint64_t ol_flags = m->ol_flags;
> + uint64_t inner_l3_offset = m->l2_len;
> +
> + if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> + inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
> +
> + /* headers are fragmented */
> + if (unlikely(rte_pktmbuf_data_len(m) >= inner_l3_offset + m->l3_len +
> + m->l4_len))

Might be better to move that check into rte_validate_tx_offload(),
so it would be called only when TX_DEBUG is on.
Another thing, shouldn't it be:
if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
?
Konstantin


> + return -ENOTSUP;
> +
> + if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
> + if (ol_flags & PKT_TX_IPV4) {
> + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
> + inner_l3_offset);
> +
> + if (ol_flags & PKT_TX_IP_CKSUM)
> + ipv4_hdr->hdr_checksum = 0;
> +
> + udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
> + m->l3_len);
> + udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr,
> + ol_flags);
> + } else {
> + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
> + inner_l3_offset);
> + /* non-TSO udp */
> + udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *,
> + inner_l3_offset + m->l3_len);
> + udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr,
> + ol_flags);
> + }
> + } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
> + (ol_flags & PKT_TX_TCP_SEG)) {
> + if (ol_flags & PKT_TX_IPV4) {
> + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
> + inner_l3_offset);
> +
> +   

[dpdk-dev] [PATCH] Revert "bonding: use existing enslaved device queues"

2016-10-24 Thread Declan Doherty
On 14/10/16 00:37, Eric Kinzie wrote:
> On Wed Oct 12 16:24:21 +0100 2016, Bruce Richardson wrote:
>> On Wed, Oct 12, 2016 at 04:24:54PM +0300, Ilya Maximets wrote:
>>> On 07.10.2016 05:02, Eric Kinzie wrote:
 On Wed Sep 07 15:28:10 +0300 2016, Ilya Maximets wrote:
> This reverts commit 5b7bb2bda5519b7800f814df64d4e015282140e5.
>
> It is necessary to reconfigure all queues every time because configuration
> can be changed.
>
> For example, if we're reconfiguring bonding device with new memory pool,
> already configured queues will still use the old one. And if the old
> mempool be freed, application likely will panic in attempt to use
> freed mempool.
>
> This happens when we use the bonding device with OVS 2.6 while MTU
> reconfiguration:
>
> PANIC in rte_mempool_get_ops():
> assert "(ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX)" failed
>
> Cc: 
> Signed-off-by: Ilya Maximets 
> ---
>  drivers/net/bonding/rte_eth_bond_pmd.c | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
> b/drivers/net/bonding/rte_eth_bond_pmd.c
> index b20a272..eb5b6d1 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -1305,8 +1305,6 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
>   struct bond_rx_queue *bd_rx_q;
>   struct bond_tx_queue *bd_tx_q;
>
> - uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
> - uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
>   int errval;
>   uint16_t q_id;
>
> @@ -1347,9 +1345,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
>   }
>
>   /* Setup Rx Queues */
> - /* Use existing queues, if any */
> - for (q_id = old_nb_rx_queues;
> -  q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
> + for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
>   bd_rx_q = (struct bond_rx_queue 
> *)bonded_eth_dev->data->rx_queues[q_id];
>
>   errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, 
> q_id,
> @@ -1365,9 +1361,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
>   }
>
>   /* Setup Tx Queues */
> - /* Use existing queues, if any */
> - for (q_id = old_nb_tx_queues;
> -  q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
> + for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
>   bd_tx_q = (struct bond_tx_queue 
> *)bonded_eth_dev->data->tx_queues[q_id];
>
>   errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, 
> q_id,
> --
> 2.7.4
>

 NAK

 There are still some users of this code.  Let's give them a chance to
 comment before removing it.
>>>
>>> Hi Eric,
>>>
>>> Are these users in CC-list? If not, could you, please, add them?
>>> This patch awaits in mail-list already more than a month. I think, it's 
>>> enough
>>> time period for all who wants to say something. Patch fixes a real bug that
>>> prevent using of DPDK bonding in all applications that reconfigures devices
>>> in runtime including OVS.
>>>
>> Agreed.
>>
>> Eric, does reverting this patch cause you problems directly, or is your 
>> concern
>> just with regards to potential impact to others?
>>
>> Thanks,
>> /Bruce
>
> This won't impact me directly.  The users are CCed (different thread)
> and I haven't seen any comment, so I no longer have any objection to
> reverting this change.
>
> Eric
>

As there has been no further objections and this reinstates the original 
expected behavior of the bonding driver. I'm re-ack'ing for inclusion in 
release.

Acked-by: Declan Doherty 


[dpdk-dev] [PATCH] net/mlx5: fix handling of small mbuf sizes

2016-10-24 Thread Raslan Darawsheh
When mbufs are smaller than MRU, multi-segment support must be enabled to
default set when not in promiscuous or allmulticast modes.

Fixes: 9964b965ad69 ("net/mlx5: re-add Rx scatter support")

Signed-off-by: Raslan Darawsheh 
---
 drivers/net/mlx5/mlx5_rxq.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 4dc5cc3..62253ed 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -946,6 +946,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl,
(void)conf; /* Thresholds configuration (ignored). */
/* Enable scattered packets support for this queue if necessary. */
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+   /* If smaller than MRU, multi-segment support must be enabled. */
+   if (mb_len < (priv->mtu > dev->data->dev_conf.rxmode.max_rx_pkt_len ?
+dev->data->dev_conf.rxmode.max_rx_pkt_len :
+priv->mtu
+))
+   dev->data->dev_conf.rxmode.jumbo_frame = 1;
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
 (mb_len - RTE_PKTMBUF_HEADROOM))) {
-- 
1.9.1



[dpdk-dev] [PATCH] net/mlx5: fix default set for multicast traffic

2016-10-24 Thread Raslan Darawsheh
Remove non-IPv6 multicast traffic with destination MAC 33:33:* from the
default set when not in promiscuous or allmulticast modes.

Fixes: 0497ddaac511 ("mlx5: add special flows for broadcast and IPv6 multicast")

Signed-off-by: Raslan Darawsheh 
---
 drivers/net/mlx5/mlx5_rxmode.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 173e6e8..4ffe703 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -104,7 +104,6 @@ static const struct special_flow_init special_flow_init[] = 
{
.hash_types =
1 << HASH_RXQ_UDPV6 |
1 << HASH_RXQ_IPV6 |
-   1 << HASH_RXQ_ETH |
0,
.per_vlan = 1,
},
-- 
1.9.1



[dpdk-dev] [PATCH] Revert "bonding: use existing enslaved device queues"

2016-10-24 Thread Jan Blunck
On Wed, Oct 19, 2016 at 5:47 AM, Ilya Maximets  
wrote:
> On 18.10.2016 18:19, Jan Blunck wrote:
>> On Tue, Oct 18, 2016 at 2:49 PM, Ilya Maximets  
>> wrote:
>>> On 18.10.2016 15:28, Jan Blunck wrote:
 If the application already configured queues the PMD should not
 silently claim ownership and reset them.

 What exactly is the problem when changing MTU? This works fine from
 what I can tell.
>>>
>>> Following scenario leads to APP PANIC:
>>>
>>> 1. mempool_1 = rte_mempool_create()
>>> 2. rte_eth_rx_queue_setup(bond0, ..., mempool_1);
>>> 3. rte_eth_dev_start(bond0);
>>> 4. mempool_2 = rte_mempool_create();
>>> 5. rte_eth_dev_stop(bond0);
>>> 6. rte_eth_rx_queue_setup(bond0, ..., mempool_2);
>>> 7. rte_eth_dev_start(bond0);
>>> * RX queues still use 'mempool_1' because reconfiguration doesn't 
>>> affect them. *
>>> 8. rte_mempool_free(mempool_1);
>>> 9. On any rx operation we'll get PANIC because of using freed 
>>> 'mempool_1':
>>>  PANIC in rte_mempool_get_ops():
>>>  assert "(ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX)" 
>>> failed
>>>
>>> You may just start OVS 2.6 with DPDK bonding device and attempt to change 
>>> MTU via 'mtu_request'.
>>> Bug is easily reproducible.
>>>
>>
>> I see. I'm not 100% that this is expected to work without leaking the
>> driver's queues though. The driver is allowed to do allocations in
>> its rx_queue_setup() function that are being freed via
>> rx_queue_release() later. But rx_queue_release() is only called if you
>> reconfigure the
>> device with 0 queues. From what I understand there is no other way to
>> reconfigure a device to use another mempool.
>>
>> But ... even that wouldn't work with the bonding driver right now: the
>> bonding master only configures the slaves during startup. I can put
>> that on my todo list.
>>
>> Coming back to your original problem: changing the MTU for the bond
>> does work through rte_eth_dev_set_mtu() for slaves supporting that. In
>> any other case you could (re-)configure rxmode.max_rx_pkt_len (and
>> jumbo_frame / enable_scatter accordingly). This does work without a
>> call to rte_eth_rx_queue_setup().
>
> Thanks for suggestion, but using of rte_eth_dev_set_mtu() without
> reconfiguration will require to have mempools with huge mbufs (9KB)
> for all ports from the start. This is unacceptable because leads to
> significant performance regressions because of fast cache exhausting.
> Also this will require big work to rewrite OVS reconfiguration code
> this way.
> Anyway, it isn't the MTU only problem. Number of rx/tx descriptors
> also can't be changed in runtime.
>
>
> I'm not fully understand what is the use case for this 'reusing' code.
> Could you, please, describe situation where this behaviour is necessary?

The device that is added to the bond was used before and therefore
already has allocated queues. Therefore we reuse the existing queues
of the devices instead of borrowing the queues of the bond device. If
the slave is removed from the bond again there is no need to allocate
the queues again.

Hope that clarifies the usecase,
Jan


>
> Best regards, Ilya Maximets.
>
>>>

 On Wed, Sep 7, 2016 at 2:28 PM, Ilya Maximets  
 wrote:
> This reverts commit 5b7bb2bda5519b7800f814df64d4e015282140e5.
>
> It is necessary to reconfigure all queues every time because configuration
> can be changed.
>
> For example, if we're reconfiguring bonding device with new memory pool,
> already configured queues will still use the old one. And if the old
> mempool be freed, application likely will panic in attempt to use
> freed mempool.
>
> This happens when we use the bonding device with OVS 2.6 while MTU
> reconfiguration:
>
> PANIC in rte_mempool_get_ops():
> assert "(ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX)" failed
>
> Cc: 
> Signed-off-by: Ilya Maximets 
> ---
>  drivers/net/bonding/rte_eth_bond_pmd.c | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
> b/drivers/net/bonding/rte_eth_bond_pmd.c
> index b20a272..eb5b6d1 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -1305,8 +1305,6 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
> struct bond_rx_queue *bd_rx_q;
> struct bond_tx_queue *bd_tx_q;
>
> -   uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
> -   uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
> int errval;
> uint16_t q_id;
>
> @@ -1347,9 +1345,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
> }
>
> /* Setup Rx Queues */
> -   /* Use existing queues, if any */
> -   for (q_id = 

[dpdk-dev] [PATCH] Revert "bonding: use existing enslaved device queues"

2016-10-24 Thread Jan Blunck
On Mon, Oct 24, 2016 at 7:02 AM, Declan Doherty
 wrote:
> On 14/10/16 00:37, Eric Kinzie wrote:
>>
>> On Wed Oct 12 16:24:21 +0100 2016, Bruce Richardson wrote:
>>>
>>> On Wed, Oct 12, 2016 at 04:24:54PM +0300, Ilya Maximets wrote:

 On 07.10.2016 05:02, Eric Kinzie wrote:
>
> On Wed Sep 07 15:28:10 +0300 2016, Ilya Maximets wrote:
>>
>> This reverts commit 5b7bb2bda5519b7800f814df64d4e015282140e5.
>>
>> It is necessary to reconfigure all queues every time because
>> configuration
>> can be changed.
>>
>> For example, if we're reconfiguring bonding device with new memory
>> pool,
>> already configured queues will still use the old one. And if the old
>> mempool be freed, application likely will panic in attempt to use
>> freed mempool.
>>
>> This happens when we use the bonding device with OVS 2.6 while MTU
>> reconfiguration:
>>
>> PANIC in rte_mempool_get_ops():
>> assert "(ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX)"
>> failed
>>
>> Cc: 
>> Signed-off-by: Ilya Maximets 
>> ---
>>  drivers/net/bonding/rte_eth_bond_pmd.c | 10 ++
>>  1 file changed, 2 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
>> b/drivers/net/bonding/rte_eth_bond_pmd.c
>> index b20a272..eb5b6d1 100644
>> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
>> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
>> @@ -1305,8 +1305,6 @@ slave_configure(struct rte_eth_dev
>> *bonded_eth_dev,
>> struct bond_rx_queue *bd_rx_q;
>> struct bond_tx_queue *bd_tx_q;
>>
>> -   uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
>> -   uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
>> int errval;
>> uint16_t q_id;
>>
>> @@ -1347,9 +1345,7 @@ slave_configure(struct rte_eth_dev
>> *bonded_eth_dev,
>> }
>>
>> /* Setup Rx Queues */
>> -   /* Use existing queues, if any */
>> -   for (q_id = old_nb_rx_queues;
>> -q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
>> +   for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues;
>> q_id++) {
>> bd_rx_q = (struct bond_rx_queue
>> *)bonded_eth_dev->data->rx_queues[q_id];
>>
>> errval =
>> rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
>> @@ -1365,9 +1361,7 @@ slave_configure(struct rte_eth_dev
>> *bonded_eth_dev,
>> }
>>
>> /* Setup Tx Queues */
>> -   /* Use existing queues, if any */
>> -   for (q_id = old_nb_tx_queues;
>> -q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
>> +   for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues;
>> q_id++) {
>> bd_tx_q = (struct bond_tx_queue
>> *)bonded_eth_dev->data->tx_queues[q_id];
>>
>> errval =
>> rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
>> --
>> 2.7.4
>>
>
> NAK
>
> There are still some users of this code.  Let's give them a chance to
> comment before removing it.


 Hi Eric,

 Are these users in CC-list? If not, could you, please, add them?
 This patch awaits in mail-list already more than a month. I think, it's
 enough
 time period for all who wants to say something. Patch fixes a real bug
 that
 prevent using of DPDK bonding in all applications that reconfigures
 devices
 in runtime including OVS.

>>> Agreed.
>>>
>>> Eric, does reverting this patch cause you problems directly, or is your
>>> concern
>>> just with regards to potential impact to others?
>>>
>>> Thanks,
>>> /Bruce
>>
>>
>> This won't impact me directly.  The users are CCed (different thread)
>> and I haven't seen any comment, so I no longer have any objection to
>> reverting this change.
>>
>> Eric
>>
>
> As there has been no further objections and this reinstates the original
> expected behavior of the bonding driver. I'm re-ack'ing for inclusion in
> release.
>
> Acked-by: Declan Doherty 

Ok, I can revert the revert for us.

Do I read this correctly that you are not interested in fixing this properly?!

Thanks,
Jan


[dpdk-dev] [PATCH 2/2] net/i40e: fix VF bonded device link down

2016-10-24 Thread Wu, Jingjing


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Qiming Yang
> Sent: Thursday, October 13, 2016 2:07 PM
> To: dev at dpdk.org
> Cc: Yang, Qiming 
> Subject: [dpdk-dev] [PATCH 2/2] net/i40e: fix VF bonded device link down
> 
> Originally, using DPDK as host driver, when VF bonded device
> uses I40E_VIRTCHNL_OP_GET_LINK_STAT to query PF the link status,

If VF device is used as slave of a bond device, it will be polled periodically 
though alarm. Interrupt is involved here.
And then VF will send I40E_VIRTCHNL_OP_GET_LINK_STAT message to PF to get the 
status. The response is handled by interrupt callback. Interrupt is involved 
here again. That's what bond device cannot bring up.

> This patch uses PF to notify link status instead of VF query.
This patch changes like that remove I40E_VIRTCHNL_OP_GET_LINK_STAT message, 
link status in VF driver is updated when PF driver notify it, and VF stores the 
links status locally. VF driver just returns the local status when being 
required.

> Fixes: 5c9222058df7 ("i40e: move to drivers/net/")
The same comments as your previous patch. It is not exact commit introduced 
this issue.

Thanks
Jingjing


[dpdk-dev] [PATCH 1/2] net/i40e: fix link status change interrupt

2016-10-24 Thread Wu, Jingjing


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Qiming Yang
> Sent: Thursday, October 13, 2016 2:07 PM
> To: dev at dpdk.org
> Cc: Yang, Qiming 
> Subject: [dpdk-dev] [PATCH 1/2] net/i40e: fix link status change interrupt
> 
> Previously, link status interrupt in i40e is achieved by checking
> LINK_STAT_CHANGE_MASK in PFINT_ICR0 register which is provided only
> for diagnostic use. Instead, drivers need to get the link status
> change notification by using LSE (Link Status Event).
> 
> This patch enables LSE and calls LSC callback when the event is
> received. This patch also removes the processing on
> LINK_STAT_CHANGE_MASK.

Good description! Thanks!
> Fixes: 5c9222058df7 ("i40e: move to drivers/net/")

Acked-by: Jingjing Wu  with minor comment:
The commit 5c9222058df7 ("i40e: move to drivers/net/") is just a moving i40e 
PMD driver code to current folder. It is not exactly that introduced the issue.
Maybe what you are looking for is 4861cde46116 ("i40e: new poll mode driver")


Thanks
Jingjing


[dpdk-dev] [PATCH] doc: announce ABI change for ethtool app enhance

2016-10-24 Thread Wu, Jingjing


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Qiming Yang
> Sent: Sunday, October 9, 2016 11:17 AM
> To: dev at dpdk.org
> Cc: Yang, Qiming 
> Subject: [dpdk-dev] [PATCH] doc: announce ABI change for ethtool app enhance
> 
> This patch adds a notice that the ABI change for ethtool app to
> get the NIC firmware version in the 17.02 release.
> 
> Signed-off-by: Qiming Yang 
> ---
>  doc/guides/rel_notes/deprecation.rst | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/doc/guides/rel_notes/deprecation.rst 
> b/doc/guides/rel_notes/deprecation.rst
> index 845d2aa..60bd7ed 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -62,3 +62,7 @@ Deprecation Notices
>  * API will change for ``rte_port_source_params`` and ``rte_port_sink_params``
>structures. The member ``file_name`` data type will be changed from
>``char *`` to ``const char *``. This change targets release 16.11.
> +
> +* In 17.02 ABI change is planned: the ``rte_eth_dev_info`` structure
> +  will be extended with a new member ``fw_version`` in order to store
> +  the NIC firmware version.
> --
> 2.7.4

Acked-by: Jingjing Wu 



[dpdk-dev] [PATCH] net/i40e: fix the hash filter invalid calculation in X722

2016-10-24 Thread Wu, Jingjing

> -Original Message-
> From: Guo, Jia
> Sent: Thursday, October 20, 2016 10:49 AM
> To: Zhang, Helin ; Wu, Jingjing  intel.com>
> Cc: dev at dpdk.org; Guo, Jia ; Yigit, Ferruh 
> 
> Subject: [PATCH] net/i40e: fix the hash filter invalid calculation in X722
> 
> As X722 extracts IPv4 header to Field Vector different with XL710/X710,
> need to corresponding to modify the fields of IPv4 header in input set
> to map different default Field Vector Table of different NICs.
> Signed-off-by: Jeff Guo 
> ---
> v3:
> remove the x722 macro
> v2:
> fix compile error when x722 macro is not defined and simplify
> the code to avoid duplication.
> ---
>  drivers/net/i40e/i40e_ethdev.c | 60 
> +-
>  1 file changed, 47 insertions(+), 13 deletions(-)
> 

How about change the commit log it like:

When verifying the Hash filtering on X722, we found the behavior was not 
expected. For example, the hash value in descriptor is incorrect.
That was because X722 uses different way of hash key word selection comparing 
with X710/XL710.
This patch fixes it by setting X722 specific key selection.

And few minor comments:

If this is not the first patch, please use [PATCH v3] instead of [PATCH].
And the fixes line is missed.

Thanks
Jingijng


[dpdk-dev] [PATCH] net/i40e: fix fdir configure failed issue in X710

2016-10-24 Thread Wu, Jingjing


> -Original Message-
> From: Guo, Jia
> Sent: Thursday, October 20, 2016 10:48 AM
> To: Zhang, Helin ; Wu, Jingjing  intel.com>
> Cc: dev at dpdk.org; Guo, Jia ; Yigit, Ferruh 
> 
> Subject: [PATCH] net/i40e: fix fdir configure failed issue in X710
> 
> The correct way to distinguish the behavior of X722 and X710
> or other NICs should be using the mac type but not X722 macro.
> 
The code change looks fine, but the commit log looks  Just don't know what the 
fdir configuration failure is.
Could you describe it in your commit log?
And a minor comment is like

I40E_RSS_HENA_ALL seems a part of I40E_RSS_HENA_ALL_X722, why not

#define I40E_RSS_HENA_ALL_ X722  I40E_RSS_HENA_ALL | I40E_FILTER_PCTYPE_XX

Thanks
Jingjing



[dpdk-dev] [PATCH 2/2] examples/tep_term: Fix packet len for multi-seg mbuf

2016-10-24 Thread Tan, Jianfeng


> -Original Message-
> From: Michael Qiu [mailto:qdy220091330 at gmail.com]
> Sent: Tuesday, October 18, 2016 1:49 PM
> To: dev at dpdk.org
> Cc: Tan, Jianfeng; Michael Qiu
> Subject: [PATCH 2/2] examples/tep_term: Fix packet len for multi-seg mbuf
> 
> For multi-seg mbuf, ip->total_length should be pkt_len subtract
> ether len.
> 
> Fixes: 4abe471ed6fc("examples/tep_term: implement VXLAN processing")
> 
> Signed-off-by: Michael Qiu 

Acked-by: Jianfeng Tan 

Thanks,
Jianfeng


[dpdk-dev] [PATCH 1/2] examples/tep_term: Fix l4_len issue

2016-10-24 Thread Tan, Jianfeng


> -Original Message-
> From: Michael Qiu [mailto:qdy220091330 at gmail.com]
> Sent: Tuesday, October 18, 2016 1:49 PM
> To: dev at dpdk.org
> Cc: Tan, Jianfeng; Michael Qiu
> Subject: [PATCH 1/2] examples/tep_term: Fix l4_len issue
> 
> l4_len is not fixed, althrough mostly it is a fixed value,
> but when guest using iperf to do some tests, the l4_len
> will have another 12 bytes optional fields.
> 
> Fixes: 2bb43bd4350a("examples/tep_term: add TSO offload configuration")
> 
> Signed-off-by: Michael Qiu 

Acked-by: Jianfeng Tan 

Thanks,
Jianfeng


[dpdk-dev] rte_kni_tx_burst() hangs because of no free descriptors

2016-10-24 Thread Zhang, Helin
Hi Yingzhi

Thank you for the reporting! The description is not so clear at least for me.
Please help to narrown down the issue by youself.
How many packets would it have for calling TX function?
Why it would return 0 after calling TX function? No memory? Or return from 
else? Have you found anything?

Regards,
Helin

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of yingzhi
> Sent: Sunday, October 23, 2016 9:30 PM
> To: users; dev at dpdk.org
> Subject: [dpdk-dev] rte_kni_tx_burst() hangs because of no free descriptors
> 
> -
> Hi Experts,
> 
> Background:
> 
> We are using DPDK to develop a LoadBalancer following below logic: When
> a new packet is received:
>  1. if the dst_addr is management IP, forward to KNI. 2. if the dst_addr is in
> VIP list, select backend and forward(modify dst mac address). 3. otherwise
> drop the packet.
> 
> At this stage, we use one single thread for KNI forwarding and another for
> VIP forwarding(forward to eth).
> 
> DPDK version: 16.07
>  NIC: 82599ES 10-Gigabit SFI/SFP+ Network Connection
>  Linux: 14.04.1-Ubuntu x64
> 
> Promblem description:
> 
> The program runs correctly for sometime(around 2 hours for 400Mb traffic).
> But it it will hang. When problem happens, rte_eth_tx_burst() will not able to
> send out any packets(always returns 0). We tracked into that function and
> noticed it is actually calling ixgbe driver's ixgbe_xmit_pkts_vec() function 
> in
> our environment, because we use default tx queue configuration, after
> printing some info, we found if the free function works fine:
>  tx_rs_thresh: 32, tx_free_thresh: 32, nb_tx_free: 31
> 
> it will trigger free and make 32 more free descriptors:
>  tx_rs_thresh: 32, tx_free_thresh: 32, nb_tx_free: 62
> 
> but when something going wrong, it will no longer free anything:
>  tx_rs_thresh: 32, tx_free_thresh: 32, nb_tx_free: 0 tx_rs_thresh: 32,
> tx_free_thresh: 32, nb_tx_free: 0
> 
> It may related with the DD flag of the descriptor but we are not quite sure.
> 
> Our program logic:
> 
> create two mbuf pools on socket 0, one for rx_queue and one for kni. (all
> lcore threads runs on socket0)
> 
> init kni interface with rte_kni_alloc()
> 
> 
> init one NIC interface with
>  rte_eth_dev_configure(); rte_eth_rx_queue_setup();
> rte_eth_tx_queue_setup(); rte_eth_dev_start();
> 
> 
> 
> in the eth main loop: (code is simplified)
>  while(1) { n = rte_eth_rx_burst(packets); for (i = 0; i < n; ++i)
>   { if
> (SEND_TO_KNI) { m = rte_kni_tx_burst(packets[i]); if 
> (m != 1))
> { rte_pktmbuf_free(packets[i]); } }   
>   if (SEND_TO_ETH)
> { // after modify the packet m = 
> rte_eth_tx_burst(packets[i]);
> if (m != 1)) { rte_pktmbuf_free(packets[i]); }
>  } //
> otherwise drop the packet rte_pktmbuf_free(packets[i]); } }
> 
> 
> Please advise if I'm using DPDK in a wrong way. Sorry if I missed something
> basic, I'm new to DPDK.
> 
> Thanks in advance
>  Best regards


[dpdk-dev] [PATCH 1/2] net/i40e: fix link status change interrupt

2016-10-24 Thread Yang, Qiming
Ferruh,
Thank you for your reminder, I will remember to CC to the maintainer in future. 

Jingjing and Helin,
Can you help to review these two patches?

Thanks,
Qiming

-Original Message-
From: Yigit, Ferruh 
Sent: Wednesday, October 19, 2016 6:57 PM
To: Yang, Qiming ; dev at dpdk.org
Cc: Wu, Jingjing 
Subject: Re: [dpdk-dev] [PATCH 1/2] net/i40e: fix link status change interrupt

Hi Qiming,

On 10/13/2016 7:07 AM, Qiming Yang wrote:
> Previously, link status interrupt in i40e is achieved by checking 
> LINK_STAT_CHANGE_MASK in PFINT_ICR0 register which is provided only 
> for diagnostic use. Instead, drivers need to get the link status 
> change notification by using LSE (Link Status Event).
> 
> This patch enables LSE and calls LSC callback when the event is 
> received. This patch also removes the processing on 
> LINK_STAT_CHANGE_MASK.
> 
> Fixes: 5c9222058df7 ("i40e: move to drivers/net/")
> 
> Signed-off-by: Qiming Yang 

CC: Jingjing Wu 

Can you please add maintainer(s) to CC when sending a patch, mail traffic in 
dpdk.org is keep increasing and it is hard to follow. Adding maintainer to the 
CC helps a lot to the maintainer.

I guess everybody knows, but just as a reminder, maintainer list kept in
file: http://dpdk.org/browse/dpdk/tree/MAINTAINERS


Thanks,
ferruh



[dpdk-dev] Manual link speed/duplex configuration not working with DPDK

2016-10-24 Thread Lu, Wenzhuo
Hi Ananda,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ananda
> Sathyanarayana
> Sent: Saturday, October 22, 2016 3:27 AM
> To: dev at dpdk.org
> Cc: ananda at versa-networks.com; Vignesh Chinnakkannu
> Subject: [dpdk-dev] Manual link speed/duplex configuration not working with
> DPDK
> 
> Hi All,
> 
> 
> 
> While testing manual link speed/duplex configuration with DPDK 1.7.1, I
> observed the same issues mentioned by the below post
> 
> http://dpdk.org/ml/archives/dev/2015-January/010834.html. I see the same
> issue with 16.04 as well.
Don?t know the history. Seems it?s discussion but not a patch. Guess that's why 
it?s not accepted.
It looks OK to me. Maybe we can create a patch for it?

> 
> 
> 
> Looks like the above patch is not accepted by the DPDK community yet.  Any
> specific reason ?
> 
> 
> 
> From the code, it looks like, hw->mac.autoneg, variable is used to switch
> between calling either autoneg function or forcing speed/duplex function.
> But this variable is not modified in eth_em_start/eth_igb_start routines (it 
> is
> always set to 1) while forcing the link.
> 
> 
> 
> s32 e1000_setup_copper_link_generic(struct e1000_hw *hw)
> 
> {
> 
> s32 ret_val;
> 
> bool link;
> 
> 
> 
> DEBUGFUNC("e1000_setup_copper_link_generic");
> 
> 
> 
> if (hw->mac.autoneg) {  always set, is not modified
> in eth_em_start/eth_igb_start
> 
> 
> 
> /* Setup autoneg and flow control advertisement and perform
> 
>  * autonegotiation.
> 
>  */
> 
> ret_val = e1000_copper_link_autoneg(hw);
> 
> if (ret_val)
> 
> return ret_val;
> 
> } else {
> 
> /* PHY will be set to 10H, 10F, 100H or 100F
> 
>  * depending on user settings.
> 
>  */
> 
> DEBUGOUT("Forcing Speed and Duplex\n");
> 
> ret_val = hw->phy.ops.force_speed_duplex(hw);
>  Not called at all
> 
> if (ret_val) {
> 
> DEBUGOUT("Error Forcing Speed and Duplex\n");
> 
> return ret_val;
> 
> }
> 
> }
> 
> 
> 
> }
> 
> 
> 
> 
> 
> Thanks,
> 
> Ananda


[dpdk-dev] DPDK-QoS- Using un-used bandwidth within a class

2016-10-24 Thread sreenaath vasudevan
Hi
I am using DPDK QoS and I find something strange. I am not sure if
something is wrong with my config or my understanding of queue weights is
wrong.

In my config, I am using only 1 port and 1 subport and 1 pipe. Within that
pipe, I am using only the last class (C3). Port, subport and pipe are
configured with 100Mbps speed.
C3 is given the entire pipe's TB rate i.e entire bandwidth in essence.
In C3, I am giving relative weights of 1:4:2:2 for the four queues
q0,q1,q2,q3
When no other traffic is coming in to q0,q2,q3, I am pumping ~100Mbps in to
q1. However, I am seeing only 40% of the traffic going through q1. In other
words the max throughput allowed through the queue is based on its weight
and the unused bandwidth is not used.
Cannot the unused bandwidth from q0,q2 and q3 be used for q1?

Note-
Following is the QoS config output spit out by DPDK in syslog


SCHED: Low level config for pipe profile 0:

token bucket: period = 10, credits per period = 1, size = 25000

Traffic classes: period = 125, credits per period = [0, 0, 0, 125000]

Traffic class 3 oversubscription: weight = 0

WRR cost: [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [4, 1, 2, 2]

SCHED: Low level config for subport 0:

Token bucket: period = 10, credits per period = 1, size = 25000

Traffic classes: period = 125, credits per period = [0, 0, 0, 125000]

Traffic class 3 oversubscription: wm min = 0, wm max = 0


-- 
regards
sreenaath