[dpdk-dev] [PATCH v4 2/2] i40e: Enable bad checksum flags in i40e vPMD

2016-10-06 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

Decode the checksum flags from the rx descriptor, setting
the appropriate bit in the mbuf ol_flags field when the flag
indicates a bad checksum.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
---
 drivers/net/i40e/i40e_rxtx_vec.c | 48 +++-
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c
index 6c63141..ccd4956 100644
--- a/drivers/net/i40e/i40e_rxtx_vec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec.c
@@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 static inline void
 desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 {
-   __m128i vlan0, vlan1, rss;
-   union {
-   uint16_t e[4];
-   uint64_t dword;
-   } vol;
+   __m128i vlan0, vlan1, rss, l3_l4e;

/* mask everything except RSS, flow director and VLAN flags
 * bit2 is for VLAN tag, bit11 for flow director indication
 * bit13:12 for RSS indication.
 */
-   const __m128i rss_vlan_msk = _mm_set_epi16(
-   0x, 0x, 0x, 0x,
-   0x3804, 0x3804, 0x3804, 0x3804);
+   const __m128i rss_vlan_msk = _mm_set_epi32(
+   0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804);

/* map rss and vlan type to rss hash and vlan flag */
const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
@@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf 
**rx_pkts)
PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0,
0, 0, PKT_RX_FDIR, 0);

-   vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]);
-   vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]);
-   vlan0 = _mm_unpacklo_epi32(vlan0, vlan1);
+   const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | 
PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD,
+   PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_L4_CKSUM_BAD,
+   PKT_RX_IP_CKSUM_BAD,
+   0);
+
+   vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
+   vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);
+   vlan0 = _mm_unpacklo_epi64(vlan0, vlan1);

vlan1 = _mm_and_si128(vlan0, rss_vlan_msk);
vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1);

-   rss = _mm_srli_epi16(vlan1, 11);
+   rss = _mm_srli_epi32(vlan1, 11);
rss = _mm_shuffle_epi8(rss_flags, rss);

+   l3_l4e = _mm_srli_epi32(vlan1, 22);
+   l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e);
+
vlan0 = _mm_or_si128(vlan0, rss);
-   vol.dword = _mm_cvtsi128_si64(vlan0);
+   vlan0 = _mm_or_si128(vlan0, l3_l4e);

-   rx_pkts[0]->ol_flags = vol.e[0];
-   rx_pkts[1]->ol_flags = vol.e[1];
-   rx_pkts[2]->ol_flags = vol.e[2];
-   rx_pkts[3]->ol_flags = vol.e[3];
+   rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0);
+   rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2);
+   rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4);
+   rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6);
 }
 #else
 #define desc_to_olflags_v(desc, rx_pkts) do {} while (0)
@@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev 
*dev)
 #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
/* whithout rx ol_flags, no VP flag report */
if (rxmode->hw_vlan_strip != 0 ||
-   rxmode->hw_vlan_extend != 0)
+   rxmode->hw_vlan_extend != 0 ||
+   rxmode->hw_ip_checksum != 0)
return -1;
 #endif

@@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev 
*dev)
 /* - no csum error report support
 * - no header split support
 */
-   if (rxmode->hw_ip_checksum == 1 ||
-   rxmode->header_split == 1)
+   if (rxmode->header_split == 1)
return -1;

return 0;
-- 
2.1.0



[dpdk-dev] [PATCH v4 1/2] i40e: Add packet_type metadata in the i40e vPMD

2016-10-06 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

The ptype is decoded from the rx descriptor and stored
in the packet type field in the mbuf using the same function
as the non-vector driver.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
Acked-by: Qi Zhang 
---

Changes in v2:
 - Add missing reference to i40e_recv_scattered_pkts_vec() when
   querying supported packet types.

Changes in v3:
 - None. (Please ignore this version).

Changes in v4:
 - Fix rss/fdir status mask and shift to get accurate Flow Director Filter
   Match (FLM) indication.

 drivers/net/i40e/i40e_rxtx.c | 567 +--
 drivers/net/i40e/i40e_rxtx.h | 563 ++
 drivers/net/i40e/i40e_rxtx_vec.c |  16 ++
 3 files changed, 582 insertions(+), 564 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 554d167..7433480 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t 
qword)
 }
 #endif

-/* For each value it means, datasheet of hardware can tell more details
- *
- * @note: fix i40e_dev_supported_ptypes_get() if any change here.
- */
-static inline uint32_t
-i40e_rxd_pkt_type_mapping(uint8_t ptype)
-{
-   static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = {
-   /* L2 types */
-   /* [0] reserved */
-   [1] = RTE_PTYPE_L2_ETHER,
-   [2] = RTE_PTYPE_L2_ETHER_TIMESYNC,
-   /* [3] - [5] reserved */
-   [6] = RTE_PTYPE_L2_ETHER_LLDP,
-   /* [7] - [10] reserved */
-   [11] = RTE_PTYPE_L2_ETHER_ARP,
-   /* [12] - [21] reserved */
-
-   /* Non tunneled IPv4 */
-   [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_FRAG,
-   [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_NONFRAG,
-   [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_UDP,
-   /* [25] reserved */
-   [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_TCP,
-   [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_SCTP,
-   [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_ICMP,
-
-   /* IPv4 --> IPv4 */
-   [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [32] reserved */
-   [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_SCTP,
-   [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_ICMP,
-
-   /* IPv4 --> IPv6 */
-   [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [39] reserved */
-   [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6

[dpdk-dev] [PATCH v3 1/2] i40e: Add packet_type metadata in the i40e vPMD

2016-10-06 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

The ptype is decoded from the rx descriptor and stored
in the packet type field in the mbuf using the same function
as the non-vector driver.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
Acked-by: Qi Zhang 
---

Changes in v2:
 - Add missing reference to i40e_recv_scattered_pkts_vec() when
   querying supported packet types.

Changes in v3:
 - None.

 drivers/net/i40e/i40e_rxtx.c | 567 +--
 drivers/net/i40e/i40e_rxtx.h | 563 ++
 drivers/net/i40e/i40e_rxtx_vec.c |  16 ++
 3 files changed, 582 insertions(+), 564 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 554d167..7433480 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t 
qword)
 }
 #endif

-/* For each value it means, datasheet of hardware can tell more details
- *
- * @note: fix i40e_dev_supported_ptypes_get() if any change here.
- */
-static inline uint32_t
-i40e_rxd_pkt_type_mapping(uint8_t ptype)
-{
-   static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = {
-   /* L2 types */
-   /* [0] reserved */
-   [1] = RTE_PTYPE_L2_ETHER,
-   [2] = RTE_PTYPE_L2_ETHER_TIMESYNC,
-   /* [3] - [5] reserved */
-   [6] = RTE_PTYPE_L2_ETHER_LLDP,
-   /* [7] - [10] reserved */
-   [11] = RTE_PTYPE_L2_ETHER_ARP,
-   /* [12] - [21] reserved */
-
-   /* Non tunneled IPv4 */
-   [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_FRAG,
-   [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_NONFRAG,
-   [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_UDP,
-   /* [25] reserved */
-   [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_TCP,
-   [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_SCTP,
-   [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_ICMP,
-
-   /* IPv4 --> IPv4 */
-   [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [32] reserved */
-   [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_SCTP,
-   [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_ICMP,
-
-   /* IPv4 --> IPv6 */
-   [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [39] reserved */
-   [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4

[dpdk-dev] [PATCH v2 2/2] i40e: Enable bad checksum flags in i40e vPMD

2016-10-05 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

Decode the checksum flags from the rx descriptor, setting
the appropriate bit in the mbuf ol_flags field when the flag
indicates a bad checksum.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
---
 drivers/net/i40e/i40e_rxtx_vec.c | 48 +++-
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c
index 6c63141..d2267ad 100644
--- a/drivers/net/i40e/i40e_rxtx_vec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec.c
@@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 static inline void
 desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 {
-   __m128i vlan0, vlan1, rss;
-   union {
-   uint16_t e[4];
-   uint64_t dword;
-   } vol;
+   __m128i vlan0, vlan1, rss, l3_l4e;

/* mask everything except RSS, flow director and VLAN flags
 * bit2 is for VLAN tag, bit11 for flow director indication
 * bit13:12 for RSS indication.
 */
-   const __m128i rss_vlan_msk = _mm_set_epi16(
-   0x, 0x, 0x, 0x,
-   0x3804, 0x3804, 0x3804, 0x3804);
+   const __m128i rss_vlan_msk = _mm_set_epi32(
+   0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004);

/* map rss and vlan type to rss hash and vlan flag */
const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
@@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf 
**rx_pkts)
PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0,
0, 0, PKT_RX_FDIR, 0);

-   vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]);
-   vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]);
-   vlan0 = _mm_unpacklo_epi32(vlan0, vlan1);
+   const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | 
PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD,
+   PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_L4_CKSUM_BAD,
+   PKT_RX_IP_CKSUM_BAD,
+   0);
+
+   vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
+   vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);
+   vlan0 = _mm_unpacklo_epi64(vlan0, vlan1);

vlan1 = _mm_and_si128(vlan0, rss_vlan_msk);
vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1);

-   rss = _mm_srli_epi16(vlan1, 11);
+   rss = _mm_srli_epi32(vlan1, 12);
rss = _mm_shuffle_epi8(rss_flags, rss);

+   l3_l4e = _mm_srli_epi32(vlan1, 22);
+   l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e);
+
vlan0 = _mm_or_si128(vlan0, rss);
-   vol.dword = _mm_cvtsi128_si64(vlan0);
+   vlan0 = _mm_or_si128(vlan0, l3_l4e);

-   rx_pkts[0]->ol_flags = vol.e[0];
-   rx_pkts[1]->ol_flags = vol.e[1];
-   rx_pkts[2]->ol_flags = vol.e[2];
-   rx_pkts[3]->ol_flags = vol.e[3];
+   rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0);
+   rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2);
+   rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4);
+   rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6);
 }
 #else
 #define desc_to_olflags_v(desc, rx_pkts) do {} while (0)
@@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev 
*dev)
 #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
/* whithout rx ol_flags, no VP flag report */
if (rxmode->hw_vlan_strip != 0 ||
-   rxmode->hw_vlan_extend != 0)
+   rxmode->hw_vlan_extend != 0 ||
+   rxmode->hw_ip_checksum != 0)
return -1;
 #endif

@@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev 
*dev)
 /* - no csum error report support
 * - no header split support
 */
-   if (rxmode->hw_ip_checksum == 1 ||
-   rxmode->header_split == 1)
+   if (rxmode->header_split == 1)
return -1;

return 0;
-- 
2.1.0



[dpdk-dev] [PATCH v2 1/2] i40e: Add packet_type metadata in the i40e vPMD

2016-10-05 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

The ptype is decoded from the rx descriptor and stored
in the packet type field in the mbuf using the same function
as the non-vector driver.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
Acked-by: Qi Zhang 
---

Changes in v2:
 - Add missing reference to i40e_recv_scattered_pkts_vec() when
   querying supported packet types.

 drivers/net/i40e/i40e_rxtx.c | 567 +--
 drivers/net/i40e/i40e_rxtx.h | 563 ++
 drivers/net/i40e/i40e_rxtx_vec.c |  16 ++
 3 files changed, 582 insertions(+), 564 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 554d167..7433480 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t 
qword)
 }
 #endif

-/* For each value it means, datasheet of hardware can tell more details
- *
- * @note: fix i40e_dev_supported_ptypes_get() if any change here.
- */
-static inline uint32_t
-i40e_rxd_pkt_type_mapping(uint8_t ptype)
-{
-   static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = {
-   /* L2 types */
-   /* [0] reserved */
-   [1] = RTE_PTYPE_L2_ETHER,
-   [2] = RTE_PTYPE_L2_ETHER_TIMESYNC,
-   /* [3] - [5] reserved */
-   [6] = RTE_PTYPE_L2_ETHER_LLDP,
-   /* [7] - [10] reserved */
-   [11] = RTE_PTYPE_L2_ETHER_ARP,
-   /* [12] - [21] reserved */
-
-   /* Non tunneled IPv4 */
-   [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_FRAG,
-   [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_NONFRAG,
-   [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_UDP,
-   /* [25] reserved */
-   [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_TCP,
-   [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_SCTP,
-   [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_ICMP,
-
-   /* IPv4 --> IPv4 */
-   [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [32] reserved */
-   [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_SCTP,
-   [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_ICMP,
-
-   /* IPv4 --> IPv6 */
-   [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [39] reserved */
-   [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTY

[dpdk-dev] [PATCH v2 2/2] i40e: Enable bad checksum flags in i40e vPMD

2016-10-05 Thread Jeff Shaw
On Wed, Oct 05, 2016 at 04:57:28PM -0700, Chen, Jing D wrote:
> Hi,
> 
> > -Original Message-
> > From: Shaw, Jeffrey B
> > Sent: Wednesday, October 5, 2016 5:13 PM
> > To: dev at dpdk.org
> > Cc: Zhang, Helin ; Wu, Jingjing
> > ; damarion at cisco.com; Zhang, Qi Z
> > ; Chen, Jing D 
> > Subject: [PATCH v2 2/2] i40e: Enable bad checksum flags in i40e vPMD
> > 
> > From: Damjan Marion 
> > 
> > Decode the checksum flags from the rx descriptor, setting the appropriate 
> > bit
> > in the mbuf ol_flags field when the flag indicates a bad checksum.
> > 
> > Signed-off-by: Damjan Marion 
> > Signed-off-by: Jeff Shaw 
> > ---
> >  drivers/net/i40e/i40e_rxtx_vec.c | 48 
> > +++---
> > --
> >  1 file changed, 28 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/net/i40e/i40e_rxtx_vec.c
> > b/drivers/net/i40e/i40e_rxtx_vec.c
> > index 6c63141..d2267ad 100644
> > --- a/drivers/net/i40e/i40e_rxtx_vec.c
> > +++ b/drivers/net/i40e/i40e_rxtx_vec.c
> > @@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)  static
> > inline void  desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) 
> >  {
> > -   __m128i vlan0, vlan1, rss;
> > -   union {
> > -   uint16_t e[4];
> > -   uint64_t dword;
> > -   } vol;
> > +   __m128i vlan0, vlan1, rss, l3_l4e;
> > 
> > /* mask everything except RSS, flow director and VLAN flags
> >  * bit2 is for VLAN tag, bit11 for flow director indication
> >  * bit13:12 for RSS indication.
> >  */
> > -   const __m128i rss_vlan_msk = _mm_set_epi16(
> > -   0x, 0x, 0x, 0x,
> > -   0x3804, 0x3804, 0x3804, 0x3804);
> > +   const __m128i rss_vlan_msk = _mm_set_epi32(
> > +   0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004);

Mask is wrong here. Should be 0x1c03804, ..., etc.

> > 
> > /* map rss and vlan type to rss hash and vlan flag */
> > const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, @@ -163,23
> > +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
> > PKT_RX_RSS_HASH | PKT_RX_FDIR,
> > PKT_RX_RSS_HASH, 0, 0,
> > 0, 0, PKT_RX_FDIR, 0);
> > 
> > -   vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]);
> > -   vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]);
> > -   vlan0 = _mm_unpacklo_epi32(vlan0, vlan1);
> > +   const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
> > +   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
> > | PKT_RX_IP_CKSUM_BAD,
> > +   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
> > +   PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
> > +   PKT_RX_EIP_CKSUM_BAD,
> > +   PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
> > +   PKT_RX_L4_CKSUM_BAD,
> > +   PKT_RX_IP_CKSUM_BAD,
> > +   0);
> > +
> > +   vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
> > +   vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);
> > +   vlan0 = _mm_unpacklo_epi64(vlan0, vlan1);
> > 
> > vlan1 = _mm_and_si128(vlan0, rss_vlan_msk);
> > vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1);
> > 
> > -   rss = _mm_srli_epi16(vlan1, 11);
> > +   rss = _mm_srli_epi32(vlan1, 12);
> > rss = _mm_shuffle_epi8(rss_flags, rss);
> 
> My bad. Original code will use bit[13:11] to identify RSS and FDIR flag. Now 
> It masked bit 11 out when creating " rss_vlan_msk" and doing shift above,
> while it still try to use  original "rss_flags"?

Good catch.  I have no idea how you spotted that, and you're right, we should
be shifting by 11, not 12. Also the mask needs to be updated (as you
mentioned to me offline) which I noted above.

Damjan, unless you object I'll send a v3 with an updated rss_vlan_msk and
the 11 bit shift so we also get the Flow Director Filter Match (FLM)
indication.

> 


[dpdk-dev] [PATCH 0/2] Add ptype and xsum handling in i40e rx vpmd

2016-09-01 Thread Jeff Shaw
On Fri, Jul 15, 2016 at 10:26:23PM +0200, Thomas Monjalon wrote:
> 2016-07-14 09:59, Jeff Shaw:
> > Our testing suggests minimal (in some cases zero) impact to core-bound
> > forwarding throughput as measured by testpmd. Throughput increase is
> > observed in l3fwd as now the vpmd can be used with hw_ip_checksum
> > enabled and without needing '--parse-ptype'.
> > 
> > The benefits to applications using this functionality is realized when
> > Ethernet processing and L3/L4 checksum validation can be skipped.
> > 
> > We hope others can also test performance in their applications while
> > conducting a review of this series.
> 
> Thanks for the patches. They need some careful review and are a bit late
> for an integration in 16.07. Thus they are pending for 16.11.

Hello, I noticed there are other patches going into i40e ahead of this
one. Would somebody please review and merge this one if there are no
issues?

Thanks,
Jeff


[dpdk-dev] [PATCH 2/2] i40e: Enable bad checksum flags in i40e vPMD

2016-07-14 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

Decode the checksum flags from the rx descriptor, setting
the appropriate bit in the mbuf ol_flags field when the flag
indicates a bad checksum.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
---
 drivers/net/i40e/i40e_rxtx_vec.c | 48 +++-
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c
index e78ac63..ace51df 100644
--- a/drivers/net/i40e/i40e_rxtx_vec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec.c
@@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 static inline void
 desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 {
-   __m128i vlan0, vlan1, rss;
-   union {
-   uint16_t e[4];
-   uint64_t dword;
-   } vol;
+   __m128i vlan0, vlan1, rss, l3_l4e;

/* mask everything except RSS, flow director and VLAN flags
 * bit2 is for VLAN tag, bit11 for flow director indication
 * bit13:12 for RSS indication.
 */
-   const __m128i rss_vlan_msk = _mm_set_epi16(
-   0x, 0x, 0x, 0x,
-   0x3804, 0x3804, 0x3804, 0x3804);
+   const __m128i rss_vlan_msk = _mm_set_epi32(
+   0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004);

/* map rss and vlan type to rss hash and vlan flag */
const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
@@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf 
**rx_pkts)
PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0,
0, 0, PKT_RX_FDIR, 0);

-   vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]);
-   vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]);
-   vlan0 = _mm_unpacklo_epi32(vlan0, vlan1);
+   const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | 
PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_EIP_CKSUM_BAD,
+   PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+   PKT_RX_L4_CKSUM_BAD,
+   PKT_RX_IP_CKSUM_BAD,
+   0);
+
+   vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
+   vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);
+   vlan0 = _mm_unpacklo_epi64(vlan0, vlan1);

vlan1 = _mm_and_si128(vlan0, rss_vlan_msk);
vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1);

-   rss = _mm_srli_epi16(vlan1, 11);
+   rss = _mm_srli_epi32(vlan1, 12);
rss = _mm_shuffle_epi8(rss_flags, rss);

+   l3_l4e = _mm_srli_epi32(vlan1, 22);
+   l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e);
+
vlan0 = _mm_or_si128(vlan0, rss);
-   vol.dword = _mm_cvtsi128_si64(vlan0);
+   vlan0 = _mm_or_si128(vlan0, l3_l4e);

-   rx_pkts[0]->ol_flags = vol.e[0];
-   rx_pkts[1]->ol_flags = vol.e[1];
-   rx_pkts[2]->ol_flags = vol.e[2];
-   rx_pkts[3]->ol_flags = vol.e[3];
+   rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0);
+   rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2);
+   rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4);
+   rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6);
 }
 #else
 #define desc_to_olflags_v(desc, rx_pkts) do {} while (0)
@@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev 
*dev)
 #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
/* whithout rx ol_flags, no VP flag report */
if (rxmode->hw_vlan_strip != 0 ||
-   rxmode->hw_vlan_extend != 0)
+   rxmode->hw_vlan_extend != 0 ||
+   rxmode->hw_ip_checksum != 0)
return -1;
 #endif

@@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev 
*dev)
 /* - no csum error report support
 * - no header split support
 */
-   if (rxmode->hw_ip_checksum == 1 ||
-   rxmode->header_split == 1)
+   if (rxmode->header_split == 1)
return -1;

return 0;
-- 
2.5.0



[dpdk-dev] [PATCH 1/2] i40e: Add packet_type metadata in the i40e vPMD

2016-07-14 Thread Jeff Shaw
From: Damjan Marion <damar...@cisco.com>

The ptype is decoded from the rx descriptor and stored
in the packet type field in the mbuf using the same function
as the non-vector driver.

Signed-off-by: Damjan Marion 
Signed-off-by: Jeff Shaw 
---
 drivers/net/i40e/i40e_rxtx.c | 566 +--
 drivers/net/i40e/i40e_rxtx.h | 563 ++
 drivers/net/i40e/i40e_rxtx_vec.c |  16 ++
 3 files changed, 581 insertions(+), 564 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index d3cfb98..2903347 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t 
qword)
 }
 #endif

-/* For each value it means, datasheet of hardware can tell more details
- *
- * @note: fix i40e_dev_supported_ptypes_get() if any change here.
- */
-static inline uint32_t
-i40e_rxd_pkt_type_mapping(uint8_t ptype)
-{
-   static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = {
-   /* L2 types */
-   /* [0] reserved */
-   [1] = RTE_PTYPE_L2_ETHER,
-   [2] = RTE_PTYPE_L2_ETHER_TIMESYNC,
-   /* [3] - [5] reserved */
-   [6] = RTE_PTYPE_L2_ETHER_LLDP,
-   /* [7] - [10] reserved */
-   [11] = RTE_PTYPE_L2_ETHER_ARP,
-   /* [12] - [21] reserved */
-
-   /* Non tunneled IPv4 */
-   [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_FRAG,
-   [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_NONFRAG,
-   [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_UDP,
-   /* [25] reserved */
-   [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_TCP,
-   [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_SCTP,
-   [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_L4_ICMP,
-
-   /* IPv4 --> IPv4 */
-   [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [32] reserved */
-   [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_SCTP,
-   [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_ICMP,
-
-   /* IPv4 --> IPv6 */
-   [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_FRAG,
-   [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_NONFRAG,
-   [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_UDP,
-   /* [39] reserved */
-   [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_TCP,
-   [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-   RTE_PTYPE_TUNNEL_IP |
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-   RTE_PTYPE_INNER_L4_SCTP,
-   [42] = RTE_PT

[dpdk-dev] [PATCH 0/2] Add ptype and xsum handling in i40e rx vpmd

2016-07-14 Thread Jeff Shaw
Our testing suggests minimal (in some cases zero) impact to core-bound
forwarding throughput as measured by testpmd. Throughput increase is
observed in l3fwd as now the vpmd can be used with hw_ip_checksum
enabled and without needing '--parse-ptype'.

The benefits to applications using this functionality is realized when
Ethernet processing and L3/L4 checksum validation can be skipped.

We hope others can also test performance in their applications while
conducting a review of this series.

Damjan Marion (2):
  i40e: Add packet_type metadata in the i40e vPMD
  i40e: Enable bad checksum flags in i40e vPMD

 drivers/net/i40e/i40e_rxtx.c | 566 +--
 drivers/net/i40e/i40e_rxtx.h | 563 ++
 drivers/net/i40e/i40e_rxtx_vec.c |  64 +++--
 3 files changed, 609 insertions(+), 584 deletions(-)

-- 
2.5.0



[dpdk-dev] [PATCH] doc: fix mailing list address typo.

2016-07-13 Thread Jeff Shaw
The correct mailing list dev at dpdk.org, not dev at dpkg.org.

Signed-off-by: Jeff Shaw 
---
 doc/guides/contributing/patches.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/guides/contributing/patches.rst 
b/doc/guides/contributing/patches.rst
index 06af91d..16a21a5 100644
--- a/doc/guides/contributing/patches.rst
+++ b/doc/guides/contributing/patches.rst
@@ -22,7 +22,7 @@ The DPDK development process has the following features:
 * Patches are reviewed publicly on the mailing list.
 * Successfully reviewed patches are merged to the master branch of the 
repository.

-The mailing list for DPDK development is `dev at dpkg.org 
<http://dpdk.org/ml/archives/dev/>`_.
+The mailing list for DPDK development is `dev at dpdk.org 
<http://dpdk.org/ml/archives/dev/>`_.
 Contributors will need to `register for the mailing list 
<http://dpdk.org/ml/listinfo/dev>`_ in order to submit patches.
 It is also worth registering for the DPDK `Patchwork 
<http://dpdk.org/dev/patchwxispork/project/dpdk/list/>`_

-- 
2.5.0



[dpdk-dev] [PATCH] tools: Fix typo in dpdk_nic_bind.py script.

2016-02-08 Thread Jeff Shaw
We should call sys.exit(), not divide sys by exit().

Signed-off-by: Jeff Shaw 
---
 tools/dpdk_nic_bind.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/dpdk_nic_bind.py b/tools/dpdk_nic_bind.py
index f02454e..a1923c5 100755
--- a/tools/dpdk_nic_bind.py
+++ b/tools/dpdk_nic_bind.py
@@ -317,7 +317,7 @@ def unbind_one(dev_id, force):
 f = open(filename, "a")
 except:
 print "Error: unbind failed for %s - Cannot open %s" % (dev_id, 
filename)
-sys/exit(1)
+sys.exit(1)
 f.write(dev_id)
 f.close()

-- 
2.1.0



[dpdk-dev] fm10k_rxtx.c does not compile

2015-02-18 Thread Jeff Shaw
On Wed, Feb 18, 2015 at 05:29:02PM +, Wiles, Keith wrote:
> I believe Jeff @ Intel is going to submit a patch soon.

I sent patches to fix these errors.

Thanks,
Jeff



[dpdk-dev] [PATCH] fm10k: fix clang warning flags

2015-02-18 Thread Jeff Shaw
This commit fixes the following error which was reported when
compiling with clang by removing the option.

error: unknown warning option '-Wno-unused-but-set-variable'

Signed-off-by: Jeff Shaw 
---
 lib/librte_pmd_fm10k/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_pmd_fm10k/Makefile b/lib/librte_pmd_fm10k/Makefile
index 986f4ef..26663ae 100644
--- a/lib/librte_pmd_fm10k/Makefile
+++ b/lib/librte_pmd_fm10k/Makefile
@@ -55,7 +55,7 @@ else ifeq ($(CC), clang)
 #
 CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value
 CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args
-CFLAGS_BASE_DRIVER += -Wno-unused-variable -Wno-unused-but-set-variable
+CFLAGS_BASE_DRIVER += -Wno-unused-variable
 CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers

 else
-- 
2.1.0



[dpdk-dev] [PATCH v5 08/17] fm10k: add RX/TX single queue start/stop function

2015-02-13 Thread Jeff Shaw
Hi David, thanks for the review.

On Fri, Feb 13, 2015 at 12:31:16PM +0100, David Marchand wrote:
> Hello,
> 
> On Fri, Feb 13, 2015 at 9:19 AM, Chen Jing D(Mark) 
> wrote:
> 
> [snip]
> 
> +/*
> > + * Verify Rx packet buffer alignment is valid.
> > + *
> > + * Hardware requires specific alignment for Rx packet buffers. At
> > + * least one of the following two conditions must be satisfied.
> > + *  1. Address is 512B aligned
> > + *  2. Address is 8B aligned and buffer does not cross 4K boundary.
> > + *
> > + * Return 1 if buffer alignment satisfies at least one condition,
> > + * otherwise return 0.
> > + *
> > + * Note: Alignment is checked by the driver when the Rx queue is reset. It
> > + *   is assumed that if an entire descriptor ring can be filled with
> > + *   buffers containing valid alignment, then all buffers in that
> > mempool
> > + *   have valid address alignment. It is the responsibility of the
> > user
> > + *   to ensure all buffers have valid alignment, as it is the user who
> > + *   creates the mempool.
> > + * Note: It is assumed the buffer needs only to store a maximum size
> > Ethernet
> > + *   frame.
> > + */
> > +static inline int
> > +fm10k_addr_alignment_valid(struct rte_mbuf *mb)
> > +{
> > +   uint64_t addr = MBUF_DMA_ADDR_DEFAULT(mb);
> > +   uint64_t boundary1, boundary2;
> > +
> > +   /* 512B aligned? */
> > +   if (RTE_ALIGN(addr, 512) == addr)
> > +   return 1;
> > +
> > +   /* 8B aligned, and max Ethernet frame would not cross a 4KB
> > boundary? */
> > +   if (RTE_ALIGN(addr, 8) == addr) {
> > +   boundary1 = RTE_ALIGN_FLOOR(addr, 4096);
> > +   boundary2 = RTE_ALIGN_FLOOR(addr +
> > ETHER_MAX_VLAN_FRAME_LEN,
> > +   4096);
> > +   if (boundary1 == boundary2)
> > +   return 1;
> > +   }
> > +
> > +   /* use RTE_LOG directly to make sure this error is seen */
> > +   RTE_LOG(ERR, PMD, "%s(): Error: Invalid buffer alignment\n",
> > __func__);
> > +
> > +   return 0;
> > +}
> >
> 
> Same comment as before, do not directly use RTE_LOG.
> This is init stuff, you have a PMD_INIT_LOG macro.
Agreed, the comment should be fixed.

> 
> By the way, I need to dig deeper into this, but I can see multiple patches
> ensuring buffer alignment.
> Do we really need to validate this alignment here, if we already validated
> this constraint at the mempool level ?
> 

This is really a sanity check. The buffer alignment needs to be
checked at runtime becuase a user could modify the alignment. We
provide a check here to be extra safe, and hopefully to fail at
init time rather than later.

There are two ways to satisfy the alignment requirements for the
hardware. Currently the driver implements the 512B alignment, but
it is possible somebody may want to the other 8B alignment w/o
crossing a 4K page boundary.  This sanity check would help catch
any possible issues in the future related to buffer alignment.

-Jeff


[dpdk-dev] [PATCH v4 12/15] fm10k: Add scatter receive function

2015-02-11 Thread Jeff Shaw
On Wed, Feb 11, 2015 at 09:31:35AM +0800, Chen Jing D(Mark) wrote:
>  
> +uint16_t
> +fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct rte_mbuf *mbuf;
> + union fm10k_rx_desc desc;
> + struct fm10k_rx_queue *q = rx_queue;
> + uint16_t count = 0;
> + uint16_t nb_rcv, nb_seg;
> + int alloc = 0;
> + uint16_t next_dd;
> + struct rte_mbuf *first_seg = q->pkt_first_seg;
> + struct rte_mbuf *last_seg = q->pkt_last_seg;
> +
> + next_dd = q->next_dd;
> + nb_rcv = 0;
> +
> + nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh);
> + for (count = 0; count < nb_seg; count++) {
> + mbuf = q->sw_ring[next_dd];
> + desc = q->hw_ring[next_dd];
> + if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
> + break;
> +#ifdef RTE_LIBRTE_FM10K_DEBUG_RX
> + dump_rxd();
> +#endif
> +
> + if (++next_dd == q->nb_desc) {
> + next_dd = 0;
> + alloc = 1;
> + }
> +
> + /* Prefetch next mbuf while processing current one. */
> + rte_prefetch0(q->sw_ring[next_dd]);
> +
> + /*
> +  * When next RX descriptor is on a cache-line boundary,
> +  * prefetch the next 4 RX descriptors and the next 8 pointers
> +  * to mbufs.
> +  */
> + if ((next_dd & 0x3) == 0) {
> + rte_prefetch0(>hw_ring[next_dd]);
> + rte_prefetch0(>sw_ring[next_dd]);
> + }
> +
> + /* Fill data length */
> + rte_pktmbuf_data_len(mbuf) = desc.w.length;
> +
> + /*
> +  * If this is the first buffer of the received packet,
> +  * set the pointer to the first mbuf of the packet and
> +  * initialize its context.
> +  * Otherwise, update the total length and the number of segments
> +  * of the current scattered packet, and update the pointer to
> +  * the last mbuf of the current packet.
> +  */
> + if (!first_seg) {
> + first_seg = mbuf;
> + first_seg->pkt_len = desc.w.length;
> + } else {
> + first_seg->pkt_len =
> + (uint16_t)(first_seg->pkt_len +
> + rte_pktmbuf_data_len(mbuf));
> + first_seg->nb_segs++;
> + last_seg->next = mbuf;
> + }
> +
> + /*
> +  * If this is not the last buffer of the received packet,
> +  * update the pointer to the last mbuf of the current scattered
> +  * packet and continue to parse the RX ring.
> +  */
> + if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) {
> + last_seg = mbuf;
> + continue;
> + }
> +
> + first_seg->ol_flags = 0;
> +#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
> + rx_desc_to_ol_flags(first_seg, );
> +#endif
> + first_seg->hash.rss = desc.d.rss;
> +
> + /* Prefetch data of first segment, if configured to do so. */
> + rte_packet_prefetch((char *)first_seg->buf_addr +
> + first_seg->data_off);
> +
> + /*
> +  * Store the mbuf address into the next entry of the array
> +  * of returned packets.
> +  */
> + rx_pkts[nb_rcv++] = first_seg;
> +
> + /*
> +  * Setup receipt context for a new packet.
> +  */
> + first_seg = NULL;
> + }
> +
> + q->next_dd = next_dd;
> + q->pkt_first_seg = first_seg;
> + q->pkt_last_seg = last_seg;
> +
> + if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
> + rte_mempool_get_bulk(q->mp, (void **)>sw_ring[q->next_alloc],
> + q->alloc_thresh);

Same thing here.  The return value should be checked in case the
mempool runs out of buffers.

> + for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
> + mbuf = q->sw_ring[q->next_alloc];
> +
> + /* setup static mbuf fields */
> + fm10k_pktmbuf_reset(mbuf, q->port_id);
> +
> + /* write descriptor */
> + desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
> + desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
> + q->hw_ring[q->next_alloc] = desc;
> + }
> + FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
> + q->next_trigger += q->alloc_thresh;
> + if (q->next_trigger >= q->nb_desc) {
> + q->next_trigger = q->alloc_thresh - 1;
> + q->next_alloc = 0;
> +  

[dpdk-dev] [PATCH v4 10/15] fm10k: add receive and tranmit function

2015-02-11 Thread Jeff Shaw
On Wed, Feb 11, 2015 at 09:31:33AM +0800, Chen Jing D(Mark) wrote:

> +uint16_t
> +fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct rte_mbuf *mbuf;
> + union fm10k_rx_desc desc;
> + struct fm10k_rx_queue *q = rx_queue;
> + uint16_t count = 0;
> + int alloc = 0;
> + uint16_t next_dd;
> +
> + next_dd = q->next_dd;
> +
> + nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh);
> + for (count = 0; count < nb_pkts; ++count) {
> + mbuf = q->sw_ring[next_dd];
> + desc = q->hw_ring[next_dd];
> + if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
> + break;
> +#ifdef RTE_LIBRTE_FM10K_DEBUG_RX
> + dump_rxd();
> +#endif
> + rte_pktmbuf_pkt_len(mbuf) = desc.w.length;
> + rte_pktmbuf_data_len(mbuf) = desc.w.length;
> +
> + mbuf->ol_flags = 0;
> +#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
> + rx_desc_to_ol_flags(mbuf, );
> +#endif
> +
> + mbuf->hash.rss = desc.d.rss;
> +
> + rx_pkts[count] = mbuf;
> + if (++next_dd == q->nb_desc) {
> + next_dd = 0;
> + alloc = 1;
> + }
> +
> + /* Prefetch next mbuf while processing current one. */
> + rte_prefetch0(q->sw_ring[next_dd]);
> +
> + /*
> +  * When next RX descriptor is on a cache-line boundary,
> +  * prefetch the next 4 RX descriptors and the next 8 pointers
> +  * to mbufs.
> +  */
> + if ((next_dd & 0x3) == 0) {
> + rte_prefetch0(>hw_ring[next_dd]);
> + rte_prefetch0(>sw_ring[next_dd]);
> + }
> + }
> +
> + q->next_dd = next_dd;
> +
> + if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
> + rte_mempool_get_bulk(q->mp, (void **)>sw_ring[q->next_alloc],
> + q->alloc_thresh);

The return value should be checked here in case the mempool runs out
of buffers. Thanks Helin for spotting this.  I'm not sure how I missed it
originally.

> + for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
> + mbuf = q->sw_ring[q->next_alloc];
> +
> + /* setup static mbuf fields */
> + fm10k_pktmbuf_reset(mbuf, q->port_id);
> +
> + /* write descriptor */
> + desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
> + desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
> + q->hw_ring[q->next_alloc] = desc;
> + }
> + FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
> + q->next_trigger += q->alloc_thresh;
> + if (q->next_trigger >= q->nb_desc) {
> + q->next_trigger = q->alloc_thresh - 1;
> + q->next_alloc = 0;
> + }
> + }
> +
> + return count;
> +}
> +

Thanks,
Jeff


[dpdk-dev] [PATCH 00/18] lib/librte_pmd_fm10k : fm10k pmd driver

2015-01-30 Thread Jeff Shaw
On Fri, Jan 30, 2015 at 04:26:33PM -0500, Neil Horman wrote:
> On Fri, Jan 30, 2015 at 01:07:16PM +0800, Chen Jing D(Mark) wrote:
> > From: "Chen Jing D(Mark)" 
> > 
> > The patch set add poll mode driver for the host interface of Intel
> > Red Rock Canyon silicon, which integrates NIC and switch functionalities.
> > The patch set include below features:
> > 
> > 1. Basic RX/TX functions for PF/VF.
> > 2. Interrupt handling mechanism for PF/VF.
> > 3. per queue start/stop functions for PF/VF.
> > 4. Mailbox handling between PF/VF and PF/Switch Manager.
> > 5. Receive Side Scaling (RSS) for PF/VF.
> > 6. Scatter receive function for PF/VF.
> > 7. reta update/query for PF/VF.
> > 8. VLAN filter set for PF.
> > 9. Link status query for PF/VF.
> > 
> > Jeff Shaw (18):
> >   fm10k: add base driver
> >   Change config/ files to add macros for fm10k
> >   fm10k: Add empty fm10k files
> >   fm10k: add fm10k device id
> >   fm10k: Add code to register fm10k pmd PF driver
> >   fm10k: add reta update/requery functions
> >   fm10k: add rx_queue_setup/release function
> >   fm10k: add tx_queue_setup/release function
> >   fm10k: add RX/TX single queue start/stop function
> >   fm10k: add dev start/stop functions
> >   fm10k: add receive and tranmit function
> >   fm10k: add PF RSS support
> >   fm10k: Add scatter receive function
> >   fm10k: add function to set vlan
> >   fm10k: Add SRIOV-VF support
> >   fm10k: add PF and VF interrupt handling function
> >   Change lib/Makefile to add fm10k driver into compile list.
> >   Change mk/rte.app.mk to add fm10k lib into link
> > 
> >  config/common_bsdapp|9 +
> >  config/common_linuxapp  |9 +
> >  lib/Makefile|1 +
> >  lib/librte_eal/common/include/rte_pci_dev_ids.h |   22 +
> >  lib/librte_pmd_fm10k/Makefile   |   96 +
> >  lib/librte_pmd_fm10k/SHARED/fm10k_api.c |  327 
> >  lib/librte_pmd_fm10k/SHARED/fm10k_api.h |   60 +
> >  lib/librte_pmd_fm10k/SHARED/fm10k_common.c  |  573 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_common.h  |   52 +
> >  lib/librte_pmd_fm10k/SHARED/fm10k_mbx.c | 2186 
> > +++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_mbx.h |  329 
> >  lib/librte_pmd_fm10k/SHARED/fm10k_osdep.h   |  116 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_pf.c  | 1877 +++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_pf.h  |  152 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_tlv.c |  914 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_tlv.h |  199 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_type.h|  925 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_vf.c  |  586 ++
> >  lib/librte_pmd_fm10k/SHARED/fm10k_vf.h  |   91 +
> >  lib/librte_pmd_fm10k/fm10k.h|  293 +++
> >  lib/librte_pmd_fm10k/fm10k_ethdev.c | 1846 +++
> >  lib/librte_pmd_fm10k/fm10k_logs.h   |   66 +
> >  lib/librte_pmd_fm10k/fm10k_rxtx.c   |  427 +
> >  mk/rte.app.mk   |4 +
> >  24 files changed, 11160 insertions(+), 0 deletions(-)
> >  create mode 100644 lib/librte_pmd_fm10k/Makefile
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_api.c
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_api.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_common.c
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_common.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_mbx.c
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_mbx.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_osdep.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_pf.c
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_pf.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_tlv.c
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_tlv.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_type.h
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_vf.c
> >  create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_vf.h
> >  create mode 100644 lib/librte_pmd_fm10k/fm10k.h
> >  create mode 100644 lib/librte_pmd_fm10k/fm10k_ethdev.c
> >  create mode 100644 lib/librte_pmd_fm10k/fm10k_logs.h
> >  create mode 100644 lib/librte_pmd_fm10k/fm10k_rxtx.c
> > 
> 
> Why is there a SHARED directory in the driver?  Are there other drivers that 
> use
> the shared fm10k code?

No, the other poll-mode drivers do not use the shared fm10k code. The
directory is similar to the 'ixgbe' and 'i40e' directories in their
respective PMDs, only that it is named 'SHARED' for the fm10k driver.

-Jeff

> 
> Neil
> 


[dpdk-dev] Intel 82599 tx_conf setting

2014-11-05 Thread Jeff Shaw
On Wed, Nov 05, 2014 at 09:43:43AM +0900, Gyumin wrote:
> Hi
> 
> I've read the Intel 82599 official manual and I found that optimal 
> PTHRESH is the tx descriptor buffer size - N (N is CPU cache line 
> divided by 16).

This is sometimes true, but not always.  I believe you are referring
to section "7.2.3.4.1 Transmit Descriptor Fetch and Write-back Settings"
in the datasheet.  You'll see the PTHRESH, HTHRESH, and WTHRESH parameters
should be tuned to for your workload. You should try a few combinations
of parameters (starting with the defaults) to see which is really optimal
for your application.

> 1. I guess the size of the tx descriptor buffer is 128. Isn't it right?
>Where is the size of the tx descriptor buffer in the official manual?

The wording in the manual may be a bit confusing. You will see the manual
refers to the "on-chip descriptor buffer size".  This is where the NIC
stores descriptors which were fetched from the actual descriptor ring in
host memory.  Section "7.2.3.3 Transmit Descriptor Ring" states that the
size of the on-chip descriptor buffer size per queue is 40.

> 
> 2. What it means that the TX_PTHRESH=36 in the testpmd.c?
>If the size of tx descriptor buffer is 128 then optimal thresholds 
> to minimize latency are pthresh=4(cache line / 16), hthresh=0 and 
> wthresh=0. Is there something I missed?

Since the on-chip descriptor buffer size is 40, it is clear that we have
chosen reasonable defaults since 40 minus 4 is 36. I recommend you test
a few different values to see how these parameters impact the performance
characteristics of your workload.

> 
> 
> Thanks.
You're welcome.

-Jeff


[dpdk-dev] ixgbe_recv_pkts, ixgbe_recv_pkts_bulk_alloc. what is difference?

2014-10-22 Thread Jeff Shaw
On Wed, Oct 22, 2014 at 11:18:17PM +0900, GyuminHwang wrote:
> Hi all
> 
> I have several questions about ixgbe_rxtx.c especially Tx and Rx function.
> What is the difference between ixgbe_recv_pkts and
> ixgbe_recv_pkts_bulk_alloc? I already know the earlier function is
> non-bulk function and the later function is bulk function. But I want to
> know is the mechanism of these two functions, and the role of H/W ring
> and S/W ring in each function.
As you mentioned, the main difference is that the bulk_alloc version allocates 
packet buffers in bulk (using rte_mempool_get_bulk) while the ixgbe_recv_pkts 
function allocates a single buffer at a time to replace the one which was just 
used to receive a frame.  Another major difference with the bulk_alloc version 
is that the descriptor ring (aka H/W ring) is scanned in bulk to determine if 
multiple frames are available to be received.  The resulting performance is 
higher than if operations were done one at a time, as is teh case with the 
ixgbe_recv_pkts function.  The drawback of using the bulk_alloc function is 
that it does not support more than one descriptor per frame, so you cannot use 
it if you are configured to receive packets greater than 2KB in size.

The H/W ring is the hardware descriptor ring on the NIC.  This is where 
descriptors are read/written.  There are plenty of details in section 7.1 of 
the Intel(R) 82599 10 Gigabit Ethernet Controller datasheet.  As for the 
software ring, this is where pointers to mbufs are stored.  You can think of 
the h/w ring as storing descriptors, and is used for controlling the NIC 
behavior, while the s/w ring is for storing buffer pointers.  The sw_ring[0] 
contains a pointer to the buffer to be used for hw_ring[0].

-Jeff


[dpdk-dev] DPDK Demos at IDF conference using DDIO

2014-09-25 Thread Jeff Shaw
On Thu, Sep 25, 2014 at 03:19:31PM +, Anjali Kulkarni wrote:
> Hi,
> 
> There were a few DPDK demos at IDF, and from one of them I gathered that you 
> can use DDIO to enhance performance by using certain lookup APIs in DPDK. Can 
> someone shed light on this? Is DDIO enabled by default? It is available only 
> on the v3 chip and needs DPDK 1.7 or how can get the performance boost of 
> DDIO in my application?
> 
> Thanks
> Anjali

Intel(R) Data Direct I/O Technology (Intel(R) DDIO) is a feature introduced 
with the Intel(R) Xeon(R) processor E5 family.
It has been around for several years and is available at least on all Xeon E5 
processors. DDIO is part of the platform, so any DPDK version can take 
advantage of the feature.  There are several papers and videos available on the 
Internet that can provide more details.


Thanks,
Jeff


[dpdk-dev] TCP/IP stack for DPDK

2014-09-09 Thread Jeff Shaw
On Tue, Sep 09, 2014 at 08:49:44AM +0800, zimeiw wrote:
> hi,
> 
> 
> I  have porting major FreeBSD tcp/ip stack to dpdk. new tcp/ip stack is based 
> on dpdk rte_mbuf, rte_ring, rte_memory and rte_table. it is faster to 
> forwarding packets.
> 
> Below feature are ready:
> 
> Netdp initialize
> Ether layer
> ARP
> IP layer
> Routing
> ICMP
> Commands for adding, deleting, showing IP address
> Commands for adding, deleting, showing static route
> Next planning:
> Porting udp to netdp.
> 
> Porting tcp to netdp.
> Porting socket to netdp.
> 
> 
> You can find the code from the link: https://github.com/dpdk-net/netdp
> 
> 
> 
Hi zimeiw, when will you be posting the source code to github? I can only find 
a static lib and some header files.
Thanks,
Jeff


[dpdk-dev] Ability to/impact of running with smaller page sizes

2014-06-30 Thread Jeff Shaw
Hi Matt,

On Mon, Jun 30, 2014 at 05:43:39PM -0500, Matt Laswell wrote:
> Hey Folks,
> 
> In my application, I'm seeing some design considerations in a project I'm
> working on that push me towards the use of smaller memory page sizes.  I'm
> curious - is it possible in practical terms to run DPDK without hugepages?

Yes, but I do not believe an implementation exists.

>  If so, does anybody have any practical experience (or a
> back-of-the-envelop estimate) of how badly such a configuration would hurt
> performance?  For sake of argument, assume that virtually all of the memory
> being used is in pre-allocated mempools (e.g lots of rte_mempool_create(),
> very little rte_malloc().
> 

It is possible, though not recommended if you want "good performance", to use
smaller memory page sizes.  Poor performance results from penalties incurred
due to DTLB misses.  Please consider the following example.

An application pre-allocates several thousand buffers to use for packet
reception and transmission using 4KB pages.  Each buffer contains 2KB worth
of data space, or enough to store the typical maximum Ethernet frame size. 
Since the page size is only 4KB, each DTLB entry can cache a maximum of two
packet buffer address translations.  If the first level DTLB has, for
instance, 64 x 4KB entries, you would only be able to cache about 128 address
translations at any given time (+1,024 if you include the second level DTLB). 
With 32 x 2MB entries, each DTLB entry can cache address translations for 32K
packet buffers at any given time.

If you believe that your application performance will be negatively impacted
by latencies incurred due to DTLB misses, it is recommended to take steps
which would maximize the DTLB hit rate.

Of course, you will not know how this impacts performance for you application
unless it is tried under realistic conditions.  If you end up doing so, could
you please update the list?


Thanks,
Jeff