[dpdk-dev] [PATCH v4 2/2] i40e: Enable bad checksum flags in i40e vPMD
From: Damjan Marion <damar...@cisco.com> Decode the checksum flags from the rx descriptor, setting the appropriate bit in the mbuf ol_flags field when the flag indicates a bad checksum. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw --- drivers/net/i40e/i40e_rxtx_vec.c | 48 +++- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c index 6c63141..ccd4956 100644 --- a/drivers/net/i40e/i40e_rxtx_vec.c +++ b/drivers/net/i40e/i40e_rxtx_vec.c @@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) static inline void desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { - __m128i vlan0, vlan1, rss; - union { - uint16_t e[4]; - uint64_t dword; - } vol; + __m128i vlan0, vlan1, rss, l3_l4e; /* mask everything except RSS, flow director and VLAN flags * bit2 is for VLAN tag, bit11 for flow director indication * bit13:12 for RSS indication. */ - const __m128i rss_vlan_msk = _mm_set_epi16( - 0x, 0x, 0x, 0x, - 0x3804, 0x3804, 0x3804, 0x3804); + const __m128i rss_vlan_msk = _mm_set_epi32( + 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804); /* map rss and vlan type to rss hash and vlan flag */ const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, @@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, 0, 0, PKT_RX_FDIR, 0); - vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); - vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); - vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); + const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, + 0); + + vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); + vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); + vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); - rss = _mm_srli_epi16(vlan1, 11); + rss = _mm_srli_epi32(vlan1, 11); rss = _mm_shuffle_epi8(rss_flags, rss); + l3_l4e = _mm_srli_epi32(vlan1, 22); + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + vlan0 = _mm_or_si128(vlan0, rss); - vol.dword = _mm_cvtsi128_si64(vlan0); + vlan0 = _mm_or_si128(vlan0, l3_l4e); - rx_pkts[0]->ol_flags = vol.e[0]; - rx_pkts[1]->ol_flags = vol.e[1]; - rx_pkts[2]->ol_flags = vol.e[2]; - rx_pkts[3]->ol_flags = vol.e[3]; + rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0); + rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2); + rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4); + rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6); } #else #define desc_to_olflags_v(desc, rx_pkts) do {} while (0) @@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE /* whithout rx ol_flags, no VP flag report */ if (rxmode->hw_vlan_strip != 0 || - rxmode->hw_vlan_extend != 0) + rxmode->hw_vlan_extend != 0 || + rxmode->hw_ip_checksum != 0) return -1; #endif @@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) /* - no csum error report support * - no header split support */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + if (rxmode->header_split == 1) return -1; return 0; -- 2.1.0
[dpdk-dev] [PATCH v4 1/2] i40e: Add packet_type metadata in the i40e vPMD
From: Damjan Marion <damar...@cisco.com> The ptype is decoded from the rx descriptor and stored in the packet type field in the mbuf using the same function as the non-vector driver. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw Acked-by: Qi Zhang --- Changes in v2: - Add missing reference to i40e_recv_scattered_pkts_vec() when querying supported packet types. Changes in v3: - None. (Please ignore this version). Changes in v4: - Fix rss/fdir status mask and shift to get accurate Flow Director Filter Match (FLM) indication. drivers/net/i40e/i40e_rxtx.c | 567 +-- drivers/net/i40e/i40e_rxtx.h | 563 ++ drivers/net/i40e/i40e_rxtx_vec.c | 16 ++ 3 files changed, 582 insertions(+), 564 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 554d167..7433480 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) } #endif -/* For each value it means, datasheet of hardware can tell more details - * - * @note: fix i40e_dev_supported_ptypes_get() if any change here. - */ -static inline uint32_t -i40e_rxd_pkt_type_mapping(uint8_t ptype) -{ - static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { - /* L2 types */ - /* [0] reserved */ - [1] = RTE_PTYPE_L2_ETHER, - [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, - /* [3] - [5] reserved */ - [6] = RTE_PTYPE_L2_ETHER_LLDP, - /* [7] - [10] reserved */ - [11] = RTE_PTYPE_L2_ETHER_ARP, - /* [12] - [21] reserved */ - - /* Non tunneled IPv4 */ - [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [25] reserved */ - [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv4 --> IPv4 */ - [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [32] reserved */ - [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> IPv6 */ - [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [39] reserved */ - [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6
[dpdk-dev] [PATCH v3 1/2] i40e: Add packet_type metadata in the i40e vPMD
From: Damjan Marion <damar...@cisco.com> The ptype is decoded from the rx descriptor and stored in the packet type field in the mbuf using the same function as the non-vector driver. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw Acked-by: Qi Zhang --- Changes in v2: - Add missing reference to i40e_recv_scattered_pkts_vec() when querying supported packet types. Changes in v3: - None. drivers/net/i40e/i40e_rxtx.c | 567 +-- drivers/net/i40e/i40e_rxtx.h | 563 ++ drivers/net/i40e/i40e_rxtx_vec.c | 16 ++ 3 files changed, 582 insertions(+), 564 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 554d167..7433480 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) } #endif -/* For each value it means, datasheet of hardware can tell more details - * - * @note: fix i40e_dev_supported_ptypes_get() if any change here. - */ -static inline uint32_t -i40e_rxd_pkt_type_mapping(uint8_t ptype) -{ - static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { - /* L2 types */ - /* [0] reserved */ - [1] = RTE_PTYPE_L2_ETHER, - [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, - /* [3] - [5] reserved */ - [6] = RTE_PTYPE_L2_ETHER_LLDP, - /* [7] - [10] reserved */ - [11] = RTE_PTYPE_L2_ETHER_ARP, - /* [12] - [21] reserved */ - - /* Non tunneled IPv4 */ - [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [25] reserved */ - [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv4 --> IPv4 */ - [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [32] reserved */ - [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> IPv6 */ - [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [39] reserved */ - [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4
[dpdk-dev] [PATCH v2 2/2] i40e: Enable bad checksum flags in i40e vPMD
From: Damjan Marion <damar...@cisco.com> Decode the checksum flags from the rx descriptor, setting the appropriate bit in the mbuf ol_flags field when the flag indicates a bad checksum. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw --- drivers/net/i40e/i40e_rxtx_vec.c | 48 +++- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c index 6c63141..d2267ad 100644 --- a/drivers/net/i40e/i40e_rxtx_vec.c +++ b/drivers/net/i40e/i40e_rxtx_vec.c @@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) static inline void desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { - __m128i vlan0, vlan1, rss; - union { - uint16_t e[4]; - uint64_t dword; - } vol; + __m128i vlan0, vlan1, rss, l3_l4e; /* mask everything except RSS, flow director and VLAN flags * bit2 is for VLAN tag, bit11 for flow director indication * bit13:12 for RSS indication. */ - const __m128i rss_vlan_msk = _mm_set_epi16( - 0x, 0x, 0x, 0x, - 0x3804, 0x3804, 0x3804, 0x3804); + const __m128i rss_vlan_msk = _mm_set_epi32( + 0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004); /* map rss and vlan type to rss hash and vlan flag */ const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, @@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, 0, 0, PKT_RX_FDIR, 0); - vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); - vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); - vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); + const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, + 0); + + vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); + vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); + vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); - rss = _mm_srli_epi16(vlan1, 11); + rss = _mm_srli_epi32(vlan1, 12); rss = _mm_shuffle_epi8(rss_flags, rss); + l3_l4e = _mm_srli_epi32(vlan1, 22); + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + vlan0 = _mm_or_si128(vlan0, rss); - vol.dword = _mm_cvtsi128_si64(vlan0); + vlan0 = _mm_or_si128(vlan0, l3_l4e); - rx_pkts[0]->ol_flags = vol.e[0]; - rx_pkts[1]->ol_flags = vol.e[1]; - rx_pkts[2]->ol_flags = vol.e[2]; - rx_pkts[3]->ol_flags = vol.e[3]; + rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0); + rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2); + rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4); + rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6); } #else #define desc_to_olflags_v(desc, rx_pkts) do {} while (0) @@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE /* whithout rx ol_flags, no VP flag report */ if (rxmode->hw_vlan_strip != 0 || - rxmode->hw_vlan_extend != 0) + rxmode->hw_vlan_extend != 0 || + rxmode->hw_ip_checksum != 0) return -1; #endif @@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) /* - no csum error report support * - no header split support */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + if (rxmode->header_split == 1) return -1; return 0; -- 2.1.0
[dpdk-dev] [PATCH v2 1/2] i40e: Add packet_type metadata in the i40e vPMD
From: Damjan Marion <damar...@cisco.com> The ptype is decoded from the rx descriptor and stored in the packet type field in the mbuf using the same function as the non-vector driver. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw Acked-by: Qi Zhang --- Changes in v2: - Add missing reference to i40e_recv_scattered_pkts_vec() when querying supported packet types. drivers/net/i40e/i40e_rxtx.c | 567 +-- drivers/net/i40e/i40e_rxtx.h | 563 ++ drivers/net/i40e/i40e_rxtx_vec.c | 16 ++ 3 files changed, 582 insertions(+), 564 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 554d167..7433480 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) } #endif -/* For each value it means, datasheet of hardware can tell more details - * - * @note: fix i40e_dev_supported_ptypes_get() if any change here. - */ -static inline uint32_t -i40e_rxd_pkt_type_mapping(uint8_t ptype) -{ - static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { - /* L2 types */ - /* [0] reserved */ - [1] = RTE_PTYPE_L2_ETHER, - [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, - /* [3] - [5] reserved */ - [6] = RTE_PTYPE_L2_ETHER_LLDP, - /* [7] - [10] reserved */ - [11] = RTE_PTYPE_L2_ETHER_ARP, - /* [12] - [21] reserved */ - - /* Non tunneled IPv4 */ - [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [25] reserved */ - [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv4 --> IPv4 */ - [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [32] reserved */ - [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> IPv6 */ - [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [39] reserved */ - [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTY
[dpdk-dev] [PATCH v2 2/2] i40e: Enable bad checksum flags in i40e vPMD
On Wed, Oct 05, 2016 at 04:57:28PM -0700, Chen, Jing D wrote: > Hi, > > > -Original Message- > > From: Shaw, Jeffrey B > > Sent: Wednesday, October 5, 2016 5:13 PM > > To: dev at dpdk.org > > Cc: Zhang, Helin ; Wu, Jingjing > > ; damarion at cisco.com; Zhang, Qi Z > > ; Chen, Jing D > > Subject: [PATCH v2 2/2] i40e: Enable bad checksum flags in i40e vPMD > > > > From: Damjan Marion > > > > Decode the checksum flags from the rx descriptor, setting the appropriate > > bit > > in the mbuf ol_flags field when the flag indicates a bad checksum. > > > > Signed-off-by: Damjan Marion > > Signed-off-by: Jeff Shaw > > --- > > drivers/net/i40e/i40e_rxtx_vec.c | 48 > > +++--- > > -- > > 1 file changed, 28 insertions(+), 20 deletions(-) > > > > diff --git a/drivers/net/i40e/i40e_rxtx_vec.c > > b/drivers/net/i40e/i40e_rxtx_vec.c > > index 6c63141..d2267ad 100644 > > --- a/drivers/net/i40e/i40e_rxtx_vec.c > > +++ b/drivers/net/i40e/i40e_rxtx_vec.c > > @@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) static > > inline void desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) > > { > > - __m128i vlan0, vlan1, rss; > > - union { > > - uint16_t e[4]; > > - uint64_t dword; > > - } vol; > > + __m128i vlan0, vlan1, rss, l3_l4e; > > > > /* mask everything except RSS, flow director and VLAN flags > > * bit2 is for VLAN tag, bit11 for flow director indication > > * bit13:12 for RSS indication. > > */ > > - const __m128i rss_vlan_msk = _mm_set_epi16( > > - 0x, 0x, 0x, 0x, > > - 0x3804, 0x3804, 0x3804, 0x3804); > > + const __m128i rss_vlan_msk = _mm_set_epi32( > > + 0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004); Mask is wrong here. Should be 0x1c03804, ..., etc. > > > > /* map rss and vlan type to rss hash and vlan flag */ > > const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, @@ -163,23 > > +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) > > PKT_RX_RSS_HASH | PKT_RX_FDIR, > > PKT_RX_RSS_HASH, 0, 0, > > 0, 0, PKT_RX_FDIR, 0); > > > > - vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); > > - vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); > > - vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); > > + const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, > > + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD > > | PKT_RX_IP_CKSUM_BAD, > > + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, > > + PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, > > + PKT_RX_EIP_CKSUM_BAD, > > + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, > > + PKT_RX_L4_CKSUM_BAD, > > + PKT_RX_IP_CKSUM_BAD, > > + 0); > > + > > + vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); > > + vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); > > + vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); > > > > vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); > > vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); > > > > - rss = _mm_srli_epi16(vlan1, 11); > > + rss = _mm_srli_epi32(vlan1, 12); > > rss = _mm_shuffle_epi8(rss_flags, rss); > > My bad. Original code will use bit[13:11] to identify RSS and FDIR flag. Now > It masked bit 11 out when creating " rss_vlan_msk" and doing shift above, > while it still try to use original "rss_flags"? Good catch. I have no idea how you spotted that, and you're right, we should be shifting by 11, not 12. Also the mask needs to be updated (as you mentioned to me offline) which I noted above. Damjan, unless you object I'll send a v3 with an updated rss_vlan_msk and the 11 bit shift so we also get the Flow Director Filter Match (FLM) indication. >
[dpdk-dev] [PATCH 0/2] Add ptype and xsum handling in i40e rx vpmd
On Fri, Jul 15, 2016 at 10:26:23PM +0200, Thomas Monjalon wrote: > 2016-07-14 09:59, Jeff Shaw: > > Our testing suggests minimal (in some cases zero) impact to core-bound > > forwarding throughput as measured by testpmd. Throughput increase is > > observed in l3fwd as now the vpmd can be used with hw_ip_checksum > > enabled and without needing '--parse-ptype'. > > > > The benefits to applications using this functionality is realized when > > Ethernet processing and L3/L4 checksum validation can be skipped. > > > > We hope others can also test performance in their applications while > > conducting a review of this series. > > Thanks for the patches. They need some careful review and are a bit late > for an integration in 16.07. Thus they are pending for 16.11. Hello, I noticed there are other patches going into i40e ahead of this one. Would somebody please review and merge this one if there are no issues? Thanks, Jeff
[dpdk-dev] [PATCH 2/2] i40e: Enable bad checksum flags in i40e vPMD
From: Damjan Marion <damar...@cisco.com> Decode the checksum flags from the rx descriptor, setting the appropriate bit in the mbuf ol_flags field when the flag indicates a bad checksum. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw --- drivers/net/i40e/i40e_rxtx_vec.c | 48 +++- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c index e78ac63..ace51df 100644 --- a/drivers/net/i40e/i40e_rxtx_vec.c +++ b/drivers/net/i40e/i40e_rxtx_vec.c @@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) static inline void desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { - __m128i vlan0, vlan1, rss; - union { - uint16_t e[4]; - uint64_t dword; - } vol; + __m128i vlan0, vlan1, rss, l3_l4e; /* mask everything except RSS, flow director and VLAN flags * bit2 is for VLAN tag, bit11 for flow director indication * bit13:12 for RSS indication. */ - const __m128i rss_vlan_msk = _mm_set_epi16( - 0x, 0x, 0x, 0x, - 0x3804, 0x3804, 0x3804, 0x3804); + const __m128i rss_vlan_msk = _mm_set_epi32( + 0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004); /* map rss and vlan type to rss hash and vlan flag */ const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, @@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, 0, 0, PKT_RX_FDIR, 0); - vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); - vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); - vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); + const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, + 0); + + vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); + vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); + vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); - rss = _mm_srli_epi16(vlan1, 11); + rss = _mm_srli_epi32(vlan1, 12); rss = _mm_shuffle_epi8(rss_flags, rss); + l3_l4e = _mm_srli_epi32(vlan1, 22); + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + vlan0 = _mm_or_si128(vlan0, rss); - vol.dword = _mm_cvtsi128_si64(vlan0); + vlan0 = _mm_or_si128(vlan0, l3_l4e); - rx_pkts[0]->ol_flags = vol.e[0]; - rx_pkts[1]->ol_flags = vol.e[1]; - rx_pkts[2]->ol_flags = vol.e[2]; - rx_pkts[3]->ol_flags = vol.e[3]; + rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0); + rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2); + rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4); + rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6); } #else #define desc_to_olflags_v(desc, rx_pkts) do {} while (0) @@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE /* whithout rx ol_flags, no VP flag report */ if (rxmode->hw_vlan_strip != 0 || - rxmode->hw_vlan_extend != 0) + rxmode->hw_vlan_extend != 0 || + rxmode->hw_ip_checksum != 0) return -1; #endif @@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) /* - no csum error report support * - no header split support */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + if (rxmode->header_split == 1) return -1; return 0; -- 2.5.0
[dpdk-dev] [PATCH 1/2] i40e: Add packet_type metadata in the i40e vPMD
From: Damjan Marion <damar...@cisco.com> The ptype is decoded from the rx descriptor and stored in the packet type field in the mbuf using the same function as the non-vector driver. Signed-off-by: Damjan Marion Signed-off-by: Jeff Shaw --- drivers/net/i40e/i40e_rxtx.c | 566 +-- drivers/net/i40e/i40e_rxtx.h | 563 ++ drivers/net/i40e/i40e_rxtx_vec.c | 16 ++ 3 files changed, 581 insertions(+), 564 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index d3cfb98..2903347 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) } #endif -/* For each value it means, datasheet of hardware can tell more details - * - * @note: fix i40e_dev_supported_ptypes_get() if any change here. - */ -static inline uint32_t -i40e_rxd_pkt_type_mapping(uint8_t ptype) -{ - static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { - /* L2 types */ - /* [0] reserved */ - [1] = RTE_PTYPE_L2_ETHER, - [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, - /* [3] - [5] reserved */ - [6] = RTE_PTYPE_L2_ETHER_LLDP, - /* [7] - [10] reserved */ - [11] = RTE_PTYPE_L2_ETHER_ARP, - /* [12] - [21] reserved */ - - /* Non tunneled IPv4 */ - [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [25] reserved */ - [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv4 --> IPv4 */ - [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [32] reserved */ - [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> IPv6 */ - [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [39] reserved */ - [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [42] = RTE_PT
[dpdk-dev] [PATCH 0/2] Add ptype and xsum handling in i40e rx vpmd
Our testing suggests minimal (in some cases zero) impact to core-bound forwarding throughput as measured by testpmd. Throughput increase is observed in l3fwd as now the vpmd can be used with hw_ip_checksum enabled and without needing '--parse-ptype'. The benefits to applications using this functionality is realized when Ethernet processing and L3/L4 checksum validation can be skipped. We hope others can also test performance in their applications while conducting a review of this series. Damjan Marion (2): i40e: Add packet_type metadata in the i40e vPMD i40e: Enable bad checksum flags in i40e vPMD drivers/net/i40e/i40e_rxtx.c | 566 +-- drivers/net/i40e/i40e_rxtx.h | 563 ++ drivers/net/i40e/i40e_rxtx_vec.c | 64 +++-- 3 files changed, 609 insertions(+), 584 deletions(-) -- 2.5.0
[dpdk-dev] [PATCH] doc: fix mailing list address typo.
The correct mailing list dev at dpdk.org, not dev at dpkg.org. Signed-off-by: Jeff Shaw --- doc/guides/contributing/patches.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/guides/contributing/patches.rst b/doc/guides/contributing/patches.rst index 06af91d..16a21a5 100644 --- a/doc/guides/contributing/patches.rst +++ b/doc/guides/contributing/patches.rst @@ -22,7 +22,7 @@ The DPDK development process has the following features: * Patches are reviewed publicly on the mailing list. * Successfully reviewed patches are merged to the master branch of the repository. -The mailing list for DPDK development is `dev at dpkg.org <http://dpdk.org/ml/archives/dev/>`_. +The mailing list for DPDK development is `dev at dpdk.org <http://dpdk.org/ml/archives/dev/>`_. Contributors will need to `register for the mailing list <http://dpdk.org/ml/listinfo/dev>`_ in order to submit patches. It is also worth registering for the DPDK `Patchwork <http://dpdk.org/dev/patchwxispork/project/dpdk/list/>`_ -- 2.5.0
[dpdk-dev] [PATCH] tools: Fix typo in dpdk_nic_bind.py script.
We should call sys.exit(), not divide sys by exit(). Signed-off-by: Jeff Shaw --- tools/dpdk_nic_bind.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/dpdk_nic_bind.py b/tools/dpdk_nic_bind.py index f02454e..a1923c5 100755 --- a/tools/dpdk_nic_bind.py +++ b/tools/dpdk_nic_bind.py @@ -317,7 +317,7 @@ def unbind_one(dev_id, force): f = open(filename, "a") except: print "Error: unbind failed for %s - Cannot open %s" % (dev_id, filename) -sys/exit(1) +sys.exit(1) f.write(dev_id) f.close() -- 2.1.0
[dpdk-dev] fm10k_rxtx.c does not compile
On Wed, Feb 18, 2015 at 05:29:02PM +, Wiles, Keith wrote: > I believe Jeff @ Intel is going to submit a patch soon. I sent patches to fix these errors. Thanks, Jeff
[dpdk-dev] [PATCH] fm10k: fix clang warning flags
This commit fixes the following error which was reported when compiling with clang by removing the option. error: unknown warning option '-Wno-unused-but-set-variable' Signed-off-by: Jeff Shaw --- lib/librte_pmd_fm10k/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/librte_pmd_fm10k/Makefile b/lib/librte_pmd_fm10k/Makefile index 986f4ef..26663ae 100644 --- a/lib/librte_pmd_fm10k/Makefile +++ b/lib/librte_pmd_fm10k/Makefile @@ -55,7 +55,7 @@ else ifeq ($(CC), clang) # CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args -CFLAGS_BASE_DRIVER += -Wno-unused-variable -Wno-unused-but-set-variable +CFLAGS_BASE_DRIVER += -Wno-unused-variable CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers else -- 2.1.0
[dpdk-dev] [PATCH v5 08/17] fm10k: add RX/TX single queue start/stop function
Hi David, thanks for the review. On Fri, Feb 13, 2015 at 12:31:16PM +0100, David Marchand wrote: > Hello, > > On Fri, Feb 13, 2015 at 9:19 AM, Chen Jing D(Mark) > wrote: > > [snip] > > +/* > > + * Verify Rx packet buffer alignment is valid. > > + * > > + * Hardware requires specific alignment for Rx packet buffers. At > > + * least one of the following two conditions must be satisfied. > > + * 1. Address is 512B aligned > > + * 2. Address is 8B aligned and buffer does not cross 4K boundary. > > + * > > + * Return 1 if buffer alignment satisfies at least one condition, > > + * otherwise return 0. > > + * > > + * Note: Alignment is checked by the driver when the Rx queue is reset. It > > + * is assumed that if an entire descriptor ring can be filled with > > + * buffers containing valid alignment, then all buffers in that > > mempool > > + * have valid address alignment. It is the responsibility of the > > user > > + * to ensure all buffers have valid alignment, as it is the user who > > + * creates the mempool. > > + * Note: It is assumed the buffer needs only to store a maximum size > > Ethernet > > + * frame. > > + */ > > +static inline int > > +fm10k_addr_alignment_valid(struct rte_mbuf *mb) > > +{ > > + uint64_t addr = MBUF_DMA_ADDR_DEFAULT(mb); > > + uint64_t boundary1, boundary2; > > + > > + /* 512B aligned? */ > > + if (RTE_ALIGN(addr, 512) == addr) > > + return 1; > > + > > + /* 8B aligned, and max Ethernet frame would not cross a 4KB > > boundary? */ > > + if (RTE_ALIGN(addr, 8) == addr) { > > + boundary1 = RTE_ALIGN_FLOOR(addr, 4096); > > + boundary2 = RTE_ALIGN_FLOOR(addr + > > ETHER_MAX_VLAN_FRAME_LEN, > > + 4096); > > + if (boundary1 == boundary2) > > + return 1; > > + } > > + > > + /* use RTE_LOG directly to make sure this error is seen */ > > + RTE_LOG(ERR, PMD, "%s(): Error: Invalid buffer alignment\n", > > __func__); > > + > > + return 0; > > +} > > > > Same comment as before, do not directly use RTE_LOG. > This is init stuff, you have a PMD_INIT_LOG macro. Agreed, the comment should be fixed. > > By the way, I need to dig deeper into this, but I can see multiple patches > ensuring buffer alignment. > Do we really need to validate this alignment here, if we already validated > this constraint at the mempool level ? > This is really a sanity check. The buffer alignment needs to be checked at runtime becuase a user could modify the alignment. We provide a check here to be extra safe, and hopefully to fail at init time rather than later. There are two ways to satisfy the alignment requirements for the hardware. Currently the driver implements the 512B alignment, but it is possible somebody may want to the other 8B alignment w/o crossing a 4K page boundary. This sanity check would help catch any possible issues in the future related to buffer alignment. -Jeff
[dpdk-dev] [PATCH v4 12/15] fm10k: Add scatter receive function
On Wed, Feb 11, 2015 at 09:31:35AM +0800, Chen Jing D(Mark) wrote: > > +uint16_t > +fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts) > +{ > + struct rte_mbuf *mbuf; > + union fm10k_rx_desc desc; > + struct fm10k_rx_queue *q = rx_queue; > + uint16_t count = 0; > + uint16_t nb_rcv, nb_seg; > + int alloc = 0; > + uint16_t next_dd; > + struct rte_mbuf *first_seg = q->pkt_first_seg; > + struct rte_mbuf *last_seg = q->pkt_last_seg; > + > + next_dd = q->next_dd; > + nb_rcv = 0; > + > + nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh); > + for (count = 0; count < nb_seg; count++) { > + mbuf = q->sw_ring[next_dd]; > + desc = q->hw_ring[next_dd]; > + if (!(desc.d.staterr & FM10K_RXD_STATUS_DD)) > + break; > +#ifdef RTE_LIBRTE_FM10K_DEBUG_RX > + dump_rxd(); > +#endif > + > + if (++next_dd == q->nb_desc) { > + next_dd = 0; > + alloc = 1; > + } > + > + /* Prefetch next mbuf while processing current one. */ > + rte_prefetch0(q->sw_ring[next_dd]); > + > + /* > + * When next RX descriptor is on a cache-line boundary, > + * prefetch the next 4 RX descriptors and the next 8 pointers > + * to mbufs. > + */ > + if ((next_dd & 0x3) == 0) { > + rte_prefetch0(>hw_ring[next_dd]); > + rte_prefetch0(>sw_ring[next_dd]); > + } > + > + /* Fill data length */ > + rte_pktmbuf_data_len(mbuf) = desc.w.length; > + > + /* > + * If this is the first buffer of the received packet, > + * set the pointer to the first mbuf of the packet and > + * initialize its context. > + * Otherwise, update the total length and the number of segments > + * of the current scattered packet, and update the pointer to > + * the last mbuf of the current packet. > + */ > + if (!first_seg) { > + first_seg = mbuf; > + first_seg->pkt_len = desc.w.length; > + } else { > + first_seg->pkt_len = > + (uint16_t)(first_seg->pkt_len + > + rte_pktmbuf_data_len(mbuf)); > + first_seg->nb_segs++; > + last_seg->next = mbuf; > + } > + > + /* > + * If this is not the last buffer of the received packet, > + * update the pointer to the last mbuf of the current scattered > + * packet and continue to parse the RX ring. > + */ > + if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) { > + last_seg = mbuf; > + continue; > + } > + > + first_seg->ol_flags = 0; > +#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE > + rx_desc_to_ol_flags(first_seg, ); > +#endif > + first_seg->hash.rss = desc.d.rss; > + > + /* Prefetch data of first segment, if configured to do so. */ > + rte_packet_prefetch((char *)first_seg->buf_addr + > + first_seg->data_off); > + > + /* > + * Store the mbuf address into the next entry of the array > + * of returned packets. > + */ > + rx_pkts[nb_rcv++] = first_seg; > + > + /* > + * Setup receipt context for a new packet. > + */ > + first_seg = NULL; > + } > + > + q->next_dd = next_dd; > + q->pkt_first_seg = first_seg; > + q->pkt_last_seg = last_seg; > + > + if ((q->next_dd > q->next_trigger) || (alloc == 1)) { > + rte_mempool_get_bulk(q->mp, (void **)>sw_ring[q->next_alloc], > + q->alloc_thresh); Same thing here. The return value should be checked in case the mempool runs out of buffers. > + for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) { > + mbuf = q->sw_ring[q->next_alloc]; > + > + /* setup static mbuf fields */ > + fm10k_pktmbuf_reset(mbuf, q->port_id); > + > + /* write descriptor */ > + desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); > + desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); > + q->hw_ring[q->next_alloc] = desc; > + } > + FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger); > + q->next_trigger += q->alloc_thresh; > + if (q->next_trigger >= q->nb_desc) { > + q->next_trigger = q->alloc_thresh - 1; > + q->next_alloc = 0; > +
[dpdk-dev] [PATCH v4 10/15] fm10k: add receive and tranmit function
On Wed, Feb 11, 2015 at 09:31:33AM +0800, Chen Jing D(Mark) wrote: > +uint16_t > +fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts) > +{ > + struct rte_mbuf *mbuf; > + union fm10k_rx_desc desc; > + struct fm10k_rx_queue *q = rx_queue; > + uint16_t count = 0; > + int alloc = 0; > + uint16_t next_dd; > + > + next_dd = q->next_dd; > + > + nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh); > + for (count = 0; count < nb_pkts; ++count) { > + mbuf = q->sw_ring[next_dd]; > + desc = q->hw_ring[next_dd]; > + if (!(desc.d.staterr & FM10K_RXD_STATUS_DD)) > + break; > +#ifdef RTE_LIBRTE_FM10K_DEBUG_RX > + dump_rxd(); > +#endif > + rte_pktmbuf_pkt_len(mbuf) = desc.w.length; > + rte_pktmbuf_data_len(mbuf) = desc.w.length; > + > + mbuf->ol_flags = 0; > +#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE > + rx_desc_to_ol_flags(mbuf, ); > +#endif > + > + mbuf->hash.rss = desc.d.rss; > + > + rx_pkts[count] = mbuf; > + if (++next_dd == q->nb_desc) { > + next_dd = 0; > + alloc = 1; > + } > + > + /* Prefetch next mbuf while processing current one. */ > + rte_prefetch0(q->sw_ring[next_dd]); > + > + /* > + * When next RX descriptor is on a cache-line boundary, > + * prefetch the next 4 RX descriptors and the next 8 pointers > + * to mbufs. > + */ > + if ((next_dd & 0x3) == 0) { > + rte_prefetch0(>hw_ring[next_dd]); > + rte_prefetch0(>sw_ring[next_dd]); > + } > + } > + > + q->next_dd = next_dd; > + > + if ((q->next_dd > q->next_trigger) || (alloc == 1)) { > + rte_mempool_get_bulk(q->mp, (void **)>sw_ring[q->next_alloc], > + q->alloc_thresh); The return value should be checked here in case the mempool runs out of buffers. Thanks Helin for spotting this. I'm not sure how I missed it originally. > + for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) { > + mbuf = q->sw_ring[q->next_alloc]; > + > + /* setup static mbuf fields */ > + fm10k_pktmbuf_reset(mbuf, q->port_id); > + > + /* write descriptor */ > + desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); > + desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); > + q->hw_ring[q->next_alloc] = desc; > + } > + FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger); > + q->next_trigger += q->alloc_thresh; > + if (q->next_trigger >= q->nb_desc) { > + q->next_trigger = q->alloc_thresh - 1; > + q->next_alloc = 0; > + } > + } > + > + return count; > +} > + Thanks, Jeff
[dpdk-dev] [PATCH 00/18] lib/librte_pmd_fm10k : fm10k pmd driver
On Fri, Jan 30, 2015 at 04:26:33PM -0500, Neil Horman wrote: > On Fri, Jan 30, 2015 at 01:07:16PM +0800, Chen Jing D(Mark) wrote: > > From: "Chen Jing D(Mark)" > > > > The patch set add poll mode driver for the host interface of Intel > > Red Rock Canyon silicon, which integrates NIC and switch functionalities. > > The patch set include below features: > > > > 1. Basic RX/TX functions for PF/VF. > > 2. Interrupt handling mechanism for PF/VF. > > 3. per queue start/stop functions for PF/VF. > > 4. Mailbox handling between PF/VF and PF/Switch Manager. > > 5. Receive Side Scaling (RSS) for PF/VF. > > 6. Scatter receive function for PF/VF. > > 7. reta update/query for PF/VF. > > 8. VLAN filter set for PF. > > 9. Link status query for PF/VF. > > > > Jeff Shaw (18): > > fm10k: add base driver > > Change config/ files to add macros for fm10k > > fm10k: Add empty fm10k files > > fm10k: add fm10k device id > > fm10k: Add code to register fm10k pmd PF driver > > fm10k: add reta update/requery functions > > fm10k: add rx_queue_setup/release function > > fm10k: add tx_queue_setup/release function > > fm10k: add RX/TX single queue start/stop function > > fm10k: add dev start/stop functions > > fm10k: add receive and tranmit function > > fm10k: add PF RSS support > > fm10k: Add scatter receive function > > fm10k: add function to set vlan > > fm10k: Add SRIOV-VF support > > fm10k: add PF and VF interrupt handling function > > Change lib/Makefile to add fm10k driver into compile list. > > Change mk/rte.app.mk to add fm10k lib into link > > > > config/common_bsdapp|9 + > > config/common_linuxapp |9 + > > lib/Makefile|1 + > > lib/librte_eal/common/include/rte_pci_dev_ids.h | 22 + > > lib/librte_pmd_fm10k/Makefile | 96 + > > lib/librte_pmd_fm10k/SHARED/fm10k_api.c | 327 > > lib/librte_pmd_fm10k/SHARED/fm10k_api.h | 60 + > > lib/librte_pmd_fm10k/SHARED/fm10k_common.c | 573 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_common.h | 52 + > > lib/librte_pmd_fm10k/SHARED/fm10k_mbx.c | 2186 > > +++ > > lib/librte_pmd_fm10k/SHARED/fm10k_mbx.h | 329 > > lib/librte_pmd_fm10k/SHARED/fm10k_osdep.h | 116 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_pf.c | 1877 +++ > > lib/librte_pmd_fm10k/SHARED/fm10k_pf.h | 152 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_tlv.c | 914 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_tlv.h | 199 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_type.h| 925 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_vf.c | 586 ++ > > lib/librte_pmd_fm10k/SHARED/fm10k_vf.h | 91 + > > lib/librte_pmd_fm10k/fm10k.h| 293 +++ > > lib/librte_pmd_fm10k/fm10k_ethdev.c | 1846 +++ > > lib/librte_pmd_fm10k/fm10k_logs.h | 66 + > > lib/librte_pmd_fm10k/fm10k_rxtx.c | 427 + > > mk/rte.app.mk |4 + > > 24 files changed, 11160 insertions(+), 0 deletions(-) > > create mode 100644 lib/librte_pmd_fm10k/Makefile > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_api.c > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_api.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_common.c > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_common.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_mbx.c > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_mbx.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_osdep.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_pf.c > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_pf.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_tlv.c > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_tlv.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_type.h > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_vf.c > > create mode 100644 lib/librte_pmd_fm10k/SHARED/fm10k_vf.h > > create mode 100644 lib/librte_pmd_fm10k/fm10k.h > > create mode 100644 lib/librte_pmd_fm10k/fm10k_ethdev.c > > create mode 100644 lib/librte_pmd_fm10k/fm10k_logs.h > > create mode 100644 lib/librte_pmd_fm10k/fm10k_rxtx.c > > > > Why is there a SHARED directory in the driver? Are there other drivers that > use > the shared fm10k code? No, the other poll-mode drivers do not use the shared fm10k code. The directory is similar to the 'ixgbe' and 'i40e' directories in their respective PMDs, only that it is named 'SHARED' for the fm10k driver. -Jeff > > Neil >
[dpdk-dev] Intel 82599 tx_conf setting
On Wed, Nov 05, 2014 at 09:43:43AM +0900, Gyumin wrote: > Hi > > I've read the Intel 82599 official manual and I found that optimal > PTHRESH is the tx descriptor buffer size - N (N is CPU cache line > divided by 16). This is sometimes true, but not always. I believe you are referring to section "7.2.3.4.1 Transmit Descriptor Fetch and Write-back Settings" in the datasheet. You'll see the PTHRESH, HTHRESH, and WTHRESH parameters should be tuned to for your workload. You should try a few combinations of parameters (starting with the defaults) to see which is really optimal for your application. > 1. I guess the size of the tx descriptor buffer is 128. Isn't it right? >Where is the size of the tx descriptor buffer in the official manual? The wording in the manual may be a bit confusing. You will see the manual refers to the "on-chip descriptor buffer size". This is where the NIC stores descriptors which were fetched from the actual descriptor ring in host memory. Section "7.2.3.3 Transmit Descriptor Ring" states that the size of the on-chip descriptor buffer size per queue is 40. > > 2. What it means that the TX_PTHRESH=36 in the testpmd.c? >If the size of tx descriptor buffer is 128 then optimal thresholds > to minimize latency are pthresh=4(cache line / 16), hthresh=0 and > wthresh=0. Is there something I missed? Since the on-chip descriptor buffer size is 40, it is clear that we have chosen reasonable defaults since 40 minus 4 is 36. I recommend you test a few different values to see how these parameters impact the performance characteristics of your workload. > > > Thanks. You're welcome. -Jeff
[dpdk-dev] ixgbe_recv_pkts, ixgbe_recv_pkts_bulk_alloc. what is difference?
On Wed, Oct 22, 2014 at 11:18:17PM +0900, GyuminHwang wrote: > Hi all > > I have several questions about ixgbe_rxtx.c especially Tx and Rx function. > What is the difference between ixgbe_recv_pkts and > ixgbe_recv_pkts_bulk_alloc? I already know the earlier function is > non-bulk function and the later function is bulk function. But I want to > know is the mechanism of these two functions, and the role of H/W ring > and S/W ring in each function. As you mentioned, the main difference is that the bulk_alloc version allocates packet buffers in bulk (using rte_mempool_get_bulk) while the ixgbe_recv_pkts function allocates a single buffer at a time to replace the one which was just used to receive a frame. Another major difference with the bulk_alloc version is that the descriptor ring (aka H/W ring) is scanned in bulk to determine if multiple frames are available to be received. The resulting performance is higher than if operations were done one at a time, as is teh case with the ixgbe_recv_pkts function. The drawback of using the bulk_alloc function is that it does not support more than one descriptor per frame, so you cannot use it if you are configured to receive packets greater than 2KB in size. The H/W ring is the hardware descriptor ring on the NIC. This is where descriptors are read/written. There are plenty of details in section 7.1 of the Intel(R) 82599 10 Gigabit Ethernet Controller datasheet. As for the software ring, this is where pointers to mbufs are stored. You can think of the h/w ring as storing descriptors, and is used for controlling the NIC behavior, while the s/w ring is for storing buffer pointers. The sw_ring[0] contains a pointer to the buffer to be used for hw_ring[0]. -Jeff
[dpdk-dev] DPDK Demos at IDF conference using DDIO
On Thu, Sep 25, 2014 at 03:19:31PM +, Anjali Kulkarni wrote: > Hi, > > There were a few DPDK demos at IDF, and from one of them I gathered that you > can use DDIO to enhance performance by using certain lookup APIs in DPDK. Can > someone shed light on this? Is DDIO enabled by default? It is available only > on the v3 chip and needs DPDK 1.7 or how can get the performance boost of > DDIO in my application? > > Thanks > Anjali Intel(R) Data Direct I/O Technology (Intel(R) DDIO) is a feature introduced with the Intel(R) Xeon(R) processor E5 family. It has been around for several years and is available at least on all Xeon E5 processors. DDIO is part of the platform, so any DPDK version can take advantage of the feature. There are several papers and videos available on the Internet that can provide more details. Thanks, Jeff
[dpdk-dev] TCP/IP stack for DPDK
On Tue, Sep 09, 2014 at 08:49:44AM +0800, zimeiw wrote: > hi, > > > I have porting major FreeBSD tcp/ip stack to dpdk. new tcp/ip stack is based > on dpdk rte_mbuf, rte_ring, rte_memory and rte_table. it is faster to > forwarding packets. > > Below feature are ready: > > Netdp initialize > Ether layer > ARP > IP layer > Routing > ICMP > Commands for adding, deleting, showing IP address > Commands for adding, deleting, showing static route > Next planning: > Porting udp to netdp. > > Porting tcp to netdp. > Porting socket to netdp. > > > You can find the code from the link: https://github.com/dpdk-net/netdp > > > Hi zimeiw, when will you be posting the source code to github? I can only find a static lib and some header files. Thanks, Jeff
[dpdk-dev] Ability to/impact of running with smaller page sizes
Hi Matt, On Mon, Jun 30, 2014 at 05:43:39PM -0500, Matt Laswell wrote: > Hey Folks, > > In my application, I'm seeing some design considerations in a project I'm > working on that push me towards the use of smaller memory page sizes. I'm > curious - is it possible in practical terms to run DPDK without hugepages? Yes, but I do not believe an implementation exists. > If so, does anybody have any practical experience (or a > back-of-the-envelop estimate) of how badly such a configuration would hurt > performance? For sake of argument, assume that virtually all of the memory > being used is in pre-allocated mempools (e.g lots of rte_mempool_create(), > very little rte_malloc(). > It is possible, though not recommended if you want "good performance", to use smaller memory page sizes. Poor performance results from penalties incurred due to DTLB misses. Please consider the following example. An application pre-allocates several thousand buffers to use for packet reception and transmission using 4KB pages. Each buffer contains 2KB worth of data space, or enough to store the typical maximum Ethernet frame size. Since the page size is only 4KB, each DTLB entry can cache a maximum of two packet buffer address translations. If the first level DTLB has, for instance, 64 x 4KB entries, you would only be able to cache about 128 address translations at any given time (+1,024 if you include the second level DTLB). With 32 x 2MB entries, each DTLB entry can cache address translations for 32K packet buffers at any given time. If you believe that your application performance will be negatively impacted by latencies incurred due to DTLB misses, it is recommended to take steps which would maximize the DTLB hit rate. Of course, you will not know how this impacts performance for you application unless it is tried under realistic conditions. If you end up doing so, could you please update the list? Thanks, Jeff