From: "Chen Jing D(Mark)" <jing.d.c...@intel.com>

Add 2 functions, in which using SSE instructions to parse RX desc
to get pkt_type and ol_flags in mbuf.

Signed-off-by: Chen Jing D(Mark) <jing.d.chen at intel.com>
---
 drivers/net/fm10k/fm10k_rxtx_vec.c |  127 ++++++++++++++++++++++++++++++++++++
 1 files changed, 127 insertions(+), 0 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c 
b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 75533f9..581a309 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -44,6 +44,133 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif

+/* Handling the offload flags (olflags) field takes computation
+ * time when receiving packets. Therefore we provide a flag to disable
+ * the processing of the olflags field when they are not needed. This
+ * gives improved performance, at the cost of losing the offload info
+ * in the received packet
+ */
+#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
+
+/* Vlan present flag shift */
+#define VP_SHIFT     (2)
+/* L3 type shift */
+#define L3TYPE_SHIFT     (4)
+/* L4 type shift */
+#define L4TYPE_SHIFT     (7)
+
+static inline void
+fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+{
+       __m128i ptype0, ptype1, vtag0, vtag1;
+       union {
+               uint16_t e[4];
+               uint64_t dword;
+       } vol;
+
+       const __m128i pkttype_msk = _mm_set_epi16(
+                       0x0000, 0x0000, 0x0000, 0x0000,
+                       PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
+                       PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
+
+       /* mask everything except rss type */
+       const __m128i rsstype_msk = _mm_set_epi16(
+                       0x0000, 0x0000, 0x0000, 0x0000,
+                       0x000F, 0x000F, 0x000F, 0x000F);
+
+       /* map rss type to rss hash flag */
+       const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0,
+                       0, 0, 0, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
+
+       ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
+       ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
+       vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
+       vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]);
+
+       ptype0 = _mm_unpacklo_epi32(ptype0, ptype1);
+       ptype0 = _mm_and_si128(ptype0, rsstype_msk);
+       ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
+
+       vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
+       vtag1 = _mm_srli_epi16(vtag1, VP_SHIFT);
+       vtag1 = _mm_and_si128(vtag1, pkttype_msk);
+
+       vtag1 = _mm_or_si128(ptype0, vtag1);
+       vol.dword = _mm_cvtsi128_si64(vtag1);
+
+       rx_pkts[0]->ol_flags = vol.e[0];
+       rx_pkts[1]->ol_flags = vol.e[1];
+       rx_pkts[2]->ol_flags = vol.e[2];
+       rx_pkts[3]->ol_flags = vol.e[3];
+}
+
+static inline void
+fm10k_desc_to_pktype_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+{
+       __m128i l3l4type0, l3l4type1, l3type, l4type;
+       union {
+               uint16_t e[4];
+               uint64_t dword;
+       } vol;
+
+       /* L3 pkt type mask  Bit4 to Bit6 */
+       const __m128i l3type_msk = _mm_set_epi16(
+                       0x0000, 0x0000, 0x0000, 0x0000,
+                       0x0070, 0x0070, 0x0070, 0x0070);
+
+       /* L4 pkt type mask  Bit7 to Bit9 */
+       const __m128i l4type_msk = _mm_set_epi16(
+                       0x0000, 0x0000, 0x0000, 0x0000,
+                       0x0380, 0x0380, 0x0380, 0x0380);
+
+       /* convert RRC l3 type to mbuf format */
+       const __m128i l3type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, RTE_PTYPE_L3_IPV6_EXT,
+                       RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV4_EXT,
+                       RTE_PTYPE_L3_IPV4, 0);
+
+       /* Convert RRC l4 type to mbuf format l4type_flags shift-left 8 bits
+        * to fill into8 bits length.
+        */
+       const __m128i l4type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,
+                       RTE_PTYPE_TUNNEL_GENEVE >> 8,
+                       RTE_PTYPE_TUNNEL_NVGRE >> 8,
+                       RTE_PTYPE_TUNNEL_VXLAN >> 8,
+                       RTE_PTYPE_TUNNEL_GRE >> 8,
+                       RTE_PTYPE_L4_UDP >> 8,
+                       RTE_PTYPE_L4_TCP >> 8,
+                       0);
+
+       l3l4type0 = _mm_unpacklo_epi16(descs[0], descs[1]);
+       l3l4type1 = _mm_unpacklo_epi16(descs[2], descs[3]);
+       l3l4type0 = _mm_unpacklo_epi32(l3l4type0, l3l4type1);
+
+       l3type = _mm_and_si128(l3l4type0, l3type_msk);
+       l4type = _mm_and_si128(l3l4type0, l4type_msk);
+
+       l3type = _mm_srli_epi16(l3type, L3TYPE_SHIFT);
+       l4type = _mm_srli_epi16(l4type, L4TYPE_SHIFT);
+
+       l3type = _mm_shuffle_epi8(l3type_flags, l3type);
+       /* l4type_flags shift-left for 8 bits, need shift-right back */
+       l4type = _mm_shuffle_epi8(l4type_flags, l4type);
+
+       l4type = _mm_slli_epi16(l4type, 8);
+       l3l4type0 = _mm_or_si128(l3type, l4type);
+       vol.dword = _mm_cvtsi128_si64(l3l4type0);
+
+       rx_pkts[0]->packet_type = vol.e[0];
+       rx_pkts[1]->packet_type = vol.e[1];
+       rx_pkts[2]->packet_type = vol.e[2];
+       rx_pkts[3]->packet_type = vol.e[3];
+}
+#else
+#define fm10k_desc_to_olflags_v(desc, rx_pkts) do {} while (0)
+#define fm10k_desc_to_pktype_v(desc, rx_pkts) do {} while (0)
+#endif
+
 int __attribute__((cold))
 fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq)
 {
-- 
1.7.7.6

Reply via email to