[ovs-dev] [PATCH v6 9/9] mfex-avx512: Add support for tunnel packets in avx512 MFEX.

2022-10-06 Thread Kumar Amber
This patch adds the necessary support to avx512 mfex to
support handling of tunnel packet type.

Signed-off-by: Kumar Amber 

---
v6:
- Fix minor comments from Cian.
- Deduce magic bits through protocol sizes.
v5:
- check metadata IP address to find tunneling is valid or not.
  As dummy-pmd often passes garbage data to dpif.
---
---
 lib/dpif-netdev-avx512.c  |  32 ++---
 lib/dpif-netdev-extract-avx512.c  | 206 --
 lib/dpif-netdev-private-extract.c |   4 +-
 3 files changed, 187 insertions(+), 55 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 90bc7a57d..ed5c4463a 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -184,15 +184,18 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 goto action_stage;
 }
 
-/* Do a batch miniflow extract into keys, but only for outer packets. */
+/* Do a batch miniflow extract into keys. */
 uint32_t mf_mask = 0;
-if (recirc_depth == 0) {
-miniflow_extract_func mfex_func;
-atomic_read_relaxed(>miniflow_extract_opt, _func);
-if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
-md_is_valid);
-}
+miniflow_extract_func mfex_func;
+atomic_read_relaxed(>miniflow_extract_opt, _func);
+miniflow_extract_func mfex_inner_func;
+atomic_read_relaxed(>miniflow_extract_inner_opt, _inner_func);
+if (md_is_valid && mfex_inner_func) {
+mf_mask = mfex_inner_func(packets, keys, batch_size, in_port, pmd,
+  md_is_valid);
+} else if (!md_is_valid && mfex_func) {
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+md_is_valid);
 }
 
 uint32_t iter = lookup_pkts_bitmask;
@@ -207,21 +210,20 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_metadata_prefetch_init(_packets[i + prefetch_ahead]->md);
 }
 
+/* Check the minfiflow mask to see if the packet was correctly
+ * classifed by vector mfex else do a scalar miniflow extract
+ * for that packet.
+ */
+bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
 /* Get packet pointer from bitmask and packet md. */
 struct dp_packet *packet = packets->packets[i];
-if (!md_is_valid) {
+if (!md_is_valid && !mfex_hit) {
 pkt_metadata_init(>md, in_port);
 }
 
 struct dp_netdev_flow *f = NULL;
 struct netdev_flow_key *key = [i];
 
-/* Check the minfiflow mask to see if the packet was correctly
- * classifed by vector mfex else do a scalar miniflow extract
- * for that packet.
- */
-bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
-
 /* Check for a partial hardware offload match. */
 if (hwol_enabled && recirc_depth == 0) {
 if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 833e9bd31..9dfe4a234 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -360,6 +360,66 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
MF_WORD(ipv6_dst, 2) | MF_BIT(tp_src) | MF_BIT(tp_dst))
 #define MF_IPV6_TCP   (MF_IPV6_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
 
+#define MF_TUNNEL MF_WORD(tunnel, offsetof(struct flow_tnl, metadata) / 8)
+
+#define MF_ETH_TUNNEL (MF_TUNNEL | MF_ETH)
+#define MF_ETH_VLAN_TUNNEL (MF_TUNNEL | MF_ETH_VLAN)
+
+/* Block offsets represents the offsets into the blocks array of miniflow
+ * and are derived experimentally. Scalar miniflow parses the header
+ * in a fixed order and sequentially in a dynamic fashion thus incrementing
+ * pointer and copying data is enough but in AVX512 since the headers are
+ * parsed using pre-defined masks we need these magic offsets to write
+ * some of the data items at the correct loaction in the blocks array
+ * using below magic numbers.
+ */
+#define BLK_META_DATA_OFFS(offsetof(struct flow_tnl, metadata)   \
+  / sizeof(uint64_t))
+
+/* First two blocks hold the metadata hash and in-port. */
+#define BLK_SIZE_META 2
+/* Block size for vlan. */
+#define BLK_SIZE_VLAN 1
+/* Block Size for handling special offsets TCP flags, IPv6 flags. */
+#define BLK_SIZE  1
+#define BLK_VLAN_PCP  (BLK_SIZE_META + (VLAN_ETH_HEADER_LEN / 8))
+/* Eth block is padded to 2 blocks in scalar. */
+#define BLK_IPv4_TCP_FLAG (BLK_SIZE_META + ((ETH_HEADER_LEN + 2) / 8) \
+  + (IP_HEADER_LEN / 8))
+#define BLK_VLAN_IPv4_TCP_FLAG(BLK_IPv4_TCP_FLAG + BLK_S

[ovs-dev] [PATCH v6 8/9] mfex-study: Modify study func to select outer and inner MFEX funcs.

2022-10-06 Thread Kumar Amber
The MFEX study function is split into outer and inner to allow
for independent selection and studying of packets in outer and inner
flows to different ISA optimized miniflow extract implementations.

Signed-off-by: Kumar Amber 

---
v6:
- Fix minor comments from Cian.
---
---
 lib/dpif-netdev-extract-study.c | 127 +---
 1 file changed, 82 insertions(+), 45 deletions(-)

diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
index 71354cc4c..da4734256 100644
--- a/lib/dpif-netdev-extract-study.c
+++ b/lib/dpif-netdev-extract-study.c
@@ -30,7 +30,9 @@ static atomic_uint32_t mfex_study_pkts_count = 
MFEX_MAX_PKT_COUNT;
 /* Struct to hold miniflow study stats. */
 struct study_stats {
 uint32_t pkt_count;
+uint32_t pkt_inner_count;
 uint32_t impl_hitcount[MFEX_IMPL_MAX];
+uint32_t impl_inner_hitcount[MFEX_IMPL_MAX];
 };
 
 /* Define per thread data to hold the study stats. */
@@ -67,6 +69,57 @@ mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char 
*name)
 return -EINVAL;
 }
 
+/* Reset stats so that the study function can be called again for the next
+ * traffic type and an optimal function pointer can be chosen.
+ */
+static inline void
+mfex_reset_stats(uint32_t *impls_hitcount, uint32_t *pkt_cnt) {
+memset(impls_hitcount, 0, sizeof(uint32_t) * MFEX_IMPL_MAX);
+*pkt_cnt = 0;
+}
+
+static inline void
+mfex_study_select_best_impls(struct dpif_miniflow_extract_impl *mfex_funcs,
+ uint32_t pkt_cnt, uint32_t *impls_arr,
+ atomic_uintptr_t *pmd_func, char *name)
+{
+
+uint32_t best_func_index = MFEX_IMPL_START_IDX;
+uint32_t max_hits = 0;
+
+for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
+if (impls_arr[i] > max_hits) {
+max_hits = impls_arr[i];
+best_func_index = i;
+}
+}
+
+/* If at least 50% of the packets hit the implementation,
+ * enable that implementation.
+ */
+if (max_hits >= (mfex_study_pkts_count / 2)) {
+atomic_store_relaxed(pmd_func,
+(uintptr_t) mfex_funcs[best_func_index].extract_func);
+VLOG_INFO("MFEX %s study chose impl %s: (hits %u/%u pkts)",
+  name, mfex_funcs[best_func_index].name, max_hits, pkt_cnt);
+} else {
+/* Set the implementation to null for default miniflow. */
+atomic_store_relaxed(pmd_func,
+(uintptr_t) mfex_funcs[MFEX_IMPL_SCALAR].extract_func);
+VLOG_INFO("Not enough packets matched (%u/%u), disabling"
+  " optimized MFEX.", max_hits, pkt_cnt);
+}
+
+/* In debug mode show stats for all the counters. */
+if (VLOG_IS_DBG_ENABLED()) {
+for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
+VLOG_DBG("MFEX study results for implementation %s:"
+ " (hits %u/%u pkts)", mfex_funcs[i].name,
+ impls_arr[i], pkt_cnt);
+}
+}
+}
+
 uint32_t
 mfex_study_traffic(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
@@ -76,10 +129,12 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 {
 uint32_t hitmask = 0;
 uint32_t mask = 0;
+uint32_t study_cnt_pkts;
 struct dp_netdev_pmd_thread *pmd = pmd_handle;
 struct dpif_miniflow_extract_impl *miniflow_funcs;
 struct study_stats *stats = mfex_study_get_study_stats_ptr();
 miniflow_funcs = dpif_mfex_impl_info_get();
+atomic_read_relaxed(_study_pkts_count, _cnt_pkts);
 
 /* Run traffic optimized miniflow_extract to collect the hitmask
  * to be compared after certain packets have been hit to choose
@@ -93,7 +148,11 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
  in_port, pmd_handle,
  md_is_valid);
-stats->impl_hitcount[i] += count_1bits(hitmask);
+if (!md_is_valid) {
+stats->impl_hitcount[i] += count_1bits(hitmask);
+} else {
+stats->impl_inner_hitcount[i] += count_1bits(hitmask);
+}
 
 /* If traffic is not classified then we dont overwrite the keys
  * array in minfiflow implementations so its safe to create a
@@ -102,54 +161,32 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 mask |= hitmask;
 }
 
-stats->pkt_count += dp_packet_batch_size(packets);
-
-/* Choose the best implementation after a minimum packets have been
- * processed.
+/* Choose the best miniflow extract implementation to use for inner
+ * and outer packets separately.
  */
-uint32_t study_cnt_pkts;
-atomic_read_relaxed(_study_pkts_count, _cnt_pkts);
-
-if (stats->pkt_count >= study_cnt_pkts) {
-  

[ovs-dev] [PATCH v6 6/9] dpif-mfex: Modify set/get MFEX commands to include inner.

2022-10-06 Thread Kumar Amber
The set command is modified to allow the user to select
different implementations for processing inner packets.
Also, the get command is modified to indicate both inner
and outer MFEX implementation in use.

$ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 study 1024 -recirc

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 Documentation/topics/dpdk/bridge.rst | 24 ++--
 lib/dpif-netdev-private-extract.c| 23 ++-
 lib/dpif-netdev-private-extract.h|  6 +-
 lib/dpif-netdev-private-thread.h |  3 +++
 lib/dpif-netdev.c| 23 +++
 5 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index 354f1ced1..dbebea624 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -293,13 +293,21 @@ command also shows whether the CPU supports each 
implementation::
 An implementation can be selected manually by the following command::
 
 $ ovs-appctl dpif-netdev/miniflow-parser-set [-pmd core_id] name \
-  [study_cnt]
+  [study_cnt] [-recirc]
 
-The above command has two optional parameters: ``study_cnt`` and ``core_id``.
-The ``core_id`` sets a particular packet parsing function to a specific
-PMD thread on the core.  The third parameter ``study_cnt``, which is specific
-to ``study`` and ignored by other implementations, means how many packets
-are needed to choose the best implementation.
+The above command has three optional parameters: ``study_cnt``, ``core_id``
+and ``-recirc``. The ``core_id`` sets a particular packet parsing function
+to a specific PMD thread on the core.  The third parameter ``study_cnt``,
+which is specific to ``study`` and ignored by other implementations, means
+how many packets are needed to choose the best implementation. The fourth
+parameter ``-recirc`` indicates to MFEX to use optimized MFEX inner for
+processing tunneled inner packets. The optional ``-recirc`` parameter gives
+flexibility to set different optimized MFEX function on inner and outer,
+when set to study.
+
+Inner and outer MFEX can be selected independently by the following command::
+
+$ ovs-appctl dpif-netdev/miniflow-parser-set study -recirc
 
 Also user can select the ``study`` implementation which studies the traffic for
 a specific number of packets by applying all available implementations of
@@ -322,6 +330,10 @@ following command::
 
 $ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 scalar
 
+``study`` can be selected with packet count and explicit PMD selection along
+with the ``recirc`` by following command::
+
+$ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 study 1024 -recirc
 
 Actions Implementations (Experimental)
 --
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 1a9b35420..fe0a53c2c 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -33,6 +33,8 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
 
 /* Variable to hold the default MFEX implementation. */
 static ATOMIC(miniflow_extract_func) default_mfex_func;
+/* Variable to hold the default MFEX inner implementation. */
+static ATOMIC(miniflow_extract_func) default_mfex_inner_func;
 
 #if MFEX_IMPL_AVX512_CHECK
 static int32_t
@@ -231,16 +233,31 @@ dp_mfex_impl_get_default(void)
 return return_func;
 }
 
+miniflow_extract_func
+dp_mfex_inner_impl_get_default(void)
+{
+miniflow_extract_func return_func;
+atomic_uintptr_t *mfex_func = (void *)_mfex_inner_func;
+
+atomic_read_relaxed(mfex_func, (uintptr_t *) _func);
+
+return return_func;
+}
+
 int
-dp_mfex_impl_set_default_by_name(const char *name)
+dp_mfex_impl_set_default_by_name(const char *name, bool mfex_inner)
 {
 miniflow_extract_func new_default;
 atomic_uintptr_t *mfex_func = (void *)_mfex_func;
+atomic_uintptr_t *mfex_inner_func = (void *)_mfex_inner_func;
 
 int err = dp_mfex_impl_get_by_name(name, _default);
 
 if (!err) {
 atomic_store_relaxed(mfex_func, (uintptr_t) new_default);
+if (mfex_inner) {
+atomic_store_relaxed(mfex_inner_func, (uintptr_t) new_default);
+}
 }
 
 return err;
@@ -268,6 +285,10 @@ dp_mfex_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
 if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
 ds_put_format(reply, "%u,", pmd->core_id);
 }
+if (pmd->miniflow_extract_inner_opt ==
+mfex_impls[i].extract_func) {
+ds_put_format(reply, "%u,", pmd->core_id);
+}
 }
 
 ds_chomp(reply, ',');
diff --git a/lib/dpif-netdev-private-extract.h 
b/lib/dpif-netdev-private-extract.h
index 8a7f9b01a..f5e6d3

[ovs-dev] [PATCH v6 7/9] dpif-mfex: Change MFEX fn pointer prototype to include md_is_valid.

2022-10-06 Thread Kumar Amber
The md_is_valid parameter is passed from DPIF to MFEX to allow MFEX
functions to detect the tunneling and decide the processing of Inner
packets in static predictable branches.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-avx512.c  |  3 ++-
 lib/dpif-netdev-extract-avx512.c  |  9 +
 lib/dpif-netdev-extract-study.c   |  6 --
 lib/dpif-netdev-private-extract.c |  6 --
 lib/dpif-netdev-private-extract.h | 13 -
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 9def69a87..90bc7a57d 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -190,7 +190,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 miniflow_extract_func mfex_func;
 atomic_read_relaxed(>miniflow_extract_opt, _func);
 if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+md_is_valid);
 }
 }
 
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 4afbed97e..833e9bd31 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -744,6 +744,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 uint32_t keys_size OVS_UNUSED,
 odp_port_t in_port,
 void *pmd_handle OVS_UNUSED,
+bool md_is_valid OVS_UNUSED,
 const enum MFEX_PROFILES profile_id,
 const uint32_t use_vbmi OVS_UNUSED)
 {
@@ -978,10 +979,10 @@ __attribute__((__target__("avx512vbmi"))) 
  \
 mfex_avx512_vbmi_##name(struct dp_packet_batch *packets,\
 struct netdev_flow_key *keys, uint32_t keys_size,\
 odp_port_t in_port, struct dp_netdev_pmd_thread \
-*pmd_handle)\
+*pmd_handle, bool md_is_valid)  \
 {   \
 return mfex_avx512_process(packets, keys, keys_size, in_port,   \
-   pmd_handle, profile, 1); \
+   pmd_handle, md_is_valid, profile, 1);\
 }
 #else
 #define VBMI_MFEX_FUNC(name, profile)
@@ -992,10 +993,10 @@ uint32_t  
  \
 mfex_avx512_##name(struct dp_packet_batch *packets, \
struct netdev_flow_key *keys, uint32_t keys_size,\
odp_port_t in_port, struct dp_netdev_pmd_thread  \
-   *pmd_handle) \
+   *pmd_handle, bool md_is_valid)   \
 {   \
 return mfex_avx512_process(packets, keys, keys_size, in_port,   \
-   pmd_handle, profile, 0); \
+   pmd_handle, md_is_valid, profile, 0);\
 }
 
 #define DECLARE_MFEX_FUNC(name, profile)\
diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
index 69077c844..71354cc4c 100644
--- a/lib/dpif-netdev-extract-study.c
+++ b/lib/dpif-netdev-extract-study.c
@@ -71,7 +71,8 @@ uint32_t
 mfex_study_traffic(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
uint32_t keys_size, odp_port_t in_port,
-   struct dp_netdev_pmd_thread *pmd_handle)
+   struct dp_netdev_pmd_thread *pmd_handle,
+   bool md_is_valid)
 {
 uint32_t hitmask = 0;
 uint32_t mask = 0;
@@ -90,7 +91,8 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 }
 
 hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
- in_port, pmd_handle);
+ in_port, pmd_handle,
+ md_is_valid);
 stats->impl_hitcount[i] += count_1bits(hitmask);
 
 /* If traffic is not classified then we dont overwrite the keys
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index fe0a53c2c..12ac8ecce 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -340,7 +340,8 @@ uint32_t
 dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
 struct netdev_flow_key *keys,
 uint32_t keys_size, odp_port_t in_port,
-struct dp_netdev_pmd_thread *pmd_handle)
+  

[ovs-dev] [PATCH v6 5/9] dpif-netdev: Add function pointer for dpif re-circulate.

2022-10-06 Thread Kumar Amber
This patch adds support for selecting the recirculation
implementation based on the DPIF implementation.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v6:
- Refactor common function from default function.
v3:
- Add description  to the dpif recirc function.
- Fix use of return value to fall back to scalar dpif.
---
---
 lib/dpif-netdev-private-dpif.c   | 73 +++-
 lib/dpif-netdev-private-dpif.h   | 18 
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 14 +-
 4 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 07039b1c2..3c7218ca0 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -55,18 +55,39 @@ dp_netdev_input_avx512_probe(void)
 static struct dpif_netdev_impl_info_t dpif_impls[] = {
 /* The default scalar C code implementation. */
 [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input,
+  .recirc_func = dp_netdev_recirculate,
   .probe = NULL,
   .name = "dpif_scalar", },
 
 #if DPIF_NETDEV_IMPL_AVX512_CHECK
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .recirc_func = dp_netdev_input_avx512_recirc,
   .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
 
 static dp_netdev_input_func default_dpif_func;
+static dp_netdev_recirc_func default_dpif_recirc_func;
+
+static inline int
+dp_netdev_dpif_probe(int dpif_idx)
+{
+/* Configure-time overriding to run test suite on all implementations. */
+#if DPIF_NETDEV_IMPL_AVX512_CHECK
+#ifdef DPIF_AVX512_DEFAULT
+dp_netdev_input_func_probe probe;
+
+/* Check if the compiled default is compatible. */
+probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
+if (!probe || !probe()) {
+ dpif_idx = DPIF_NETDEV_IMPL_AVX512;
+}
+#endif
+#endif
+return dpif_idx;
+}
 
 dp_netdev_input_func
 dp_netdev_impl_get_default(void)
@@ -76,18 +97,7 @@ dp_netdev_impl_get_default(void)
 if (!default_dpif_func) {
 int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
 
-/* Configure-time overriding to run test suite on all implementations. */
-#if DPIF_NETDEV_IMPL_AVX512_CHECK
-#ifdef DPIF_AVX512_DEFAULT
-dp_netdev_input_func_probe probe;
-
-/* Check if the compiled default is compatible. */
-probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
-if (!probe || !probe()) {
-dpif_idx = DPIF_NETDEV_IMPL_AVX512;
-}
-#endif
-#endif
+dpif_idx = dp_netdev_dpif_probe(dpif_idx);
 
 VLOG_INFO("Default DPIF implementation is %s.\n",
   dpif_impls[dpif_idx].name);
@@ -97,6 +107,24 @@ dp_netdev_impl_get_default(void)
 return default_dpif_func;
 }
 
+dp_netdev_recirc_func
+dp_netdev_recirc_impl_get_default(void)
+{
+/* For the first call, this will be NULL. Compute the compile time default.
+ */
+if (!default_dpif_recirc_func) {
+int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
+
+dpif_idx = dp_netdev_dpif_probe(dpif_idx);
+
+VLOG_INFO("Default re-circulate DPIF implementation is %s.\n",
+  dpif_impls[dpif_idx].name);
+default_dpif_recirc_func = dpif_impls[dpif_idx].recirc_func;
+}
+
+return default_dpif_recirc_func;
+}
+
 void
 dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
size_t n)
@@ -132,10 +160,12 @@ dp_netdev_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
  * returns the function pointer to the one requested by "name".
  */
 static int32_t
-dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func)
+dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *dpif_func,
+   dp_netdev_recirc_func *dpif_recirc_func)
 {
 ovs_assert(name);
-ovs_assert(out_func);
+ovs_assert(dpif_func);
+ovs_assert(dpif_recirc_func);
 
 uint32_t i;
 
@@ -145,11 +175,13 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 if (dpif_impls[i].probe) {
 int probe_err = dpif_impls[i].probe();
 if (probe_err) {
-*out_func = NULL;
+*dpif_func = NULL;
+*dpif_recirc_func = NULL;
 return probe_err;
 }
 }
-*out_func = dpif_impls[i].input_func;
+*dpif_func = dpif_impls[i].input_func;
+*dpif_recirc_func = dpif_impls[i].recirc_func;
 return 0;
 }
 }
@@ -160,12 +192,15 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 int32_t
 dp_netdev_impl_set_default_by_name(const char *name)
 {
-dp_netdev

[ovs-dev] [PATCH v6 3/9] dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.

2022-10-06 Thread Kumar Amber
Create new APIs for the avx512 DPIF, enabling one baseline
common code to be specialized into DPIF implementations for
"outer" processing, and "recirc" processing.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Sunil Pai G 

---
v4:
- Rebase onto master.
v3:
- Fix comments from Harry.
---
---
 lib/dpif-netdev-avx512.c   | 25 +
 lib/dpif-netdev-private-dpif.c |  6 +++---
 lib/dpif-netdev-private-dpif.h | 14 +++---
 lib/dpif-netdev.c  |  5 ++---
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 83e7a1394..a36f4f312 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -58,10 +58,10 @@ struct dpif_userdata {
 struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
 };
 
-int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port)
+static inline int32_t ALWAYS_INLINE
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid OVS_UNUSED, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -413,5 +413,22 @@ action_stage:
 return 0;
 }
 
+int32_t
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, false, in_port);
+return ret;
+}
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, true, 0);
+return ret;
+}
+
 #endif
 #endif
diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 7f16fa0dc..07039b1c2 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -40,7 +40,7 @@ enum dpif_netdev_impl_info_idx {
 
 #if DPIF_NETDEV_IMPL_AVX512_CHECK
 static int32_t
-dp_netdev_input_outer_avx512_probe(void)
+dp_netdev_input_avx512_probe(void)
 {
 if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512F)
 || !cpu_has_isa(OVS_CPU_ISA_X86_BMI2)) {
@@ -60,8 +60,8 @@ static struct dpif_netdev_impl_info_t dpif_impls[] = {
 
 #if DPIF_NETDEV_IMPL_AVX512_CHECK
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
-[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512,
-  .probe = dp_netdev_input_outer_avx512_probe,
+[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index b3e75b7a2..4465d034f 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -84,10 +84,18 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 struct dp_packet_batch *packets,
 odp_port_t in_port);
 
+int32_t
+dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
+  struct dp_packet_batch *);
+
 /* AVX512 enabled DPIF implementation function. */
 int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port);
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port);
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets);
 
 #endif /* netdev-private.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 92e63599e..a50571dc8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -545,8 +545,6 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
   const struct flow *flow,
   const struct nlattr *actions,
   size_t actions_len);
-static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
-  struct dp_packet_batch *);
 
 static void dp_netdev_disable_upcall(struct dp_netdev *);
 static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
@@ -8492,11 +8490,12 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 return 0;
 }
 
-static void
+int32_t
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets)
 {
 dp_netdev_input__(pmd, packets, true, 0);
+return 0;
 }
 
 struct d

[ovs-dev] [PATCH v6 4/9] dpif-netdev-avx512: Add inner packet handling to dpif.

2022-10-06 Thread Kumar Amber
This patch adds the necessary changes required to support
tunnel packet types in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Sunil Pai G 

---
v4:
- Rebase onto Simple match.
v3:
- Apply in_port optimization suggested by Harry.
---
---
 lib/dpif-netdev-avx512.c | 38 ++
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index a36f4f312..9def69a87 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -61,7 +61,7 @@ struct dpif_userdata {
 static inline int32_t ALWAYS_INLINE
 dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet_batch *packets,
- bool md_is_valid OVS_UNUSED, odp_port_t in_port)
+ bool md_is_valid, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -73,6 +73,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 struct netdev_flow_key *keys = ud->keys;
 struct netdev_flow_key **key_ptrs = ud->key_ptrs;
 struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
+const uint32_t recirc_depth = *recirc_depth_get();
 
 /* The AVX512 DPIF implementation handles rules in a way that is optimized
  * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
@@ -106,7 +107,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_metadata_prefetch_init(>md);
 }
 
-const bool simple_match_enabled = dp_netdev_simple_match_enabled(pmd,
+const bool simple_match_enabled = !md_is_valid &&
+  dp_netdev_simple_match_enabled(pmd,
  in_port);
 /* Check if EMC or SMC are enabled. */
 struct dfc_cache *cache = >flow_cache;
@@ -182,12 +184,14 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 goto action_stage;
 }
 
-/* Do a batch minfilow extract into keys. */
+/* Do a batch miniflow extract into keys, but only for outer packets. */
 uint32_t mf_mask = 0;
-miniflow_extract_func mfex_func;
-atomic_read_relaxed(>miniflow_extract_opt, _func);
-if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+if (recirc_depth == 0) {
+miniflow_extract_func mfex_func;
+atomic_read_relaxed(>miniflow_extract_opt, _func);
+if (mfex_func) {
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+}
 }
 
 uint32_t iter = lookup_pkts_bitmask;
@@ -204,7 +208,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* Get packet pointer from bitmask and packet md. */
 struct dp_packet *packet = packets->packets[i];
-pkt_metadata_init(>md, in_port);
+if (!md_is_valid) {
+pkt_metadata_init(>md, in_port);
+}
 
 struct dp_netdev_flow *f = NULL;
 struct netdev_flow_key *key = [i];
@@ -216,7 +222,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
 
 /* Check for a partial hardware offload match. */
-if (hwol_enabled) {
+if (hwol_enabled && recirc_depth == 0) {
 if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
 /* Packet restoration failed and it was dropped, do not
  * continue processing. */
@@ -249,7 +255,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
 key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
+key->hash = (md_is_valid == false)
+? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf)
+: dpif_netdev_packet_get_rss_hash(packet, >mf);
 
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
@@ -287,7 +295,11 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  * dpcls_rules[] array.
  */
 if (dpcls_key_idx > 0) {
-struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+odp_port_t port_no = in_port;
+if (md_is_valid) {
+port_no = packets->packets[0]->md.in_port.odp_port;
+}
+struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, port_no);
 if (OVS_UNLIKELY(!cls)) {
 return -1;
 }
@@ -353,7 +365,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pmd_perf_update_counter(>perf_stats, PMD_STAT_MASKED_LOOKUP,
 dpcls_key_idx);
 action_stage:
-pmd_per

[ovs-dev] [PATCH v6 2/9] dpif-netdev: Refactor hash function to own header.

2022-10-06 Thread Kumar Amber
The refactor allows us to use hash function accross
multiple files which was earlier restricted to
dpif-netdev.c only. This patch enables the use of
the hash function in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Sunil Pai G 

---
v3:
- Fix minor comments from Harry.
---
---
 lib/dpif-netdev-private-dpcls.h | 23 +++
 lib/dpif-netdev.c   | 22 --
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 2a9279437..1b37ecb16 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -25,6 +25,7 @@
 
 #include "cmap.h"
 #include "openvswitch/thread.h"
+#include "dpif-netdev-private-dpif.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -125,6 +126,28 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet 
*packet,
 return hash;
 }
 
+static inline uint32_t
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+const struct miniflow *mf)
+{
+uint32_t hash, recirc_depth;
+
+if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
+hash = dp_packet_get_rss_hash(packet);
+} else {
+hash = miniflow_hash_5tuple(mf, 0);
+dp_packet_set_rss_hash(packet, hash);
+}
+
+/* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+recirc_depth = *recirc_depth_get_unsafe();
+if (OVS_UNLIKELY(recirc_depth)) {
+hash = hash_finish(hash, recirc_depth);
+}
+return hash;
+}
+
 /* Allow other implementations to call dpcls_lookup() for subtable search. */
 bool
 dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4866231a3..92e63599e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7806,28 +7806,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
  actions, wc, put_actions, dp->upcall_aux);
 }
 
-static inline uint32_t
-dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-const struct miniflow *mf)
-{
-uint32_t hash, recirc_depth;
-
-if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-hash = dp_packet_get_rss_hash(packet);
-} else {
-hash = miniflow_hash_5tuple(mf, 0);
-dp_packet_set_rss_hash(packet, hash);
-}
-
-/* The RSS hash must account for the recirculation depth to avoid
- * collisions in the exact match cache */
-recirc_depth = *recirc_depth_get_unsafe();
-if (OVS_UNLIKELY(recirc_depth)) {
-hash = hash_finish(hash, recirc_depth);
-}
-return hash;
-}
-
 struct packet_batch_per_flow {
 unsigned int byte_count;
 uint16_t tcp_flags;
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 1/9] dpif-netdev: Refactor per thread recirc data allocation.

2022-10-06 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c. The patch enables the use of recirc_depth_get()
function in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Harry van Haaren 
Acked-by: Sunil Pai G 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index ef4cee2ba..7f16fa0dc 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -31,6 +31,8 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 #define DPIF_NETDEV_IMPL_AVX512_CHECK (__x86_64__ && HAVE_AVX512F \
 && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index cf331cec7..b3e75b7a2 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a45b46014..4866231a3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 0/9] DPIF + MFEX Inner AVX512

2022-10-06 Thread Kumar Amber
This Series of Patchsets introduce the Optimizations for
supporting tunneled packets in DPIF and MFEX. Along with
the optimization various refactoring of scalar
path is done to be used accross without duplication.

Over the Tests we have observed a gain of approximate 20~25%
gain in performance over the scalar path.

---
v6:
- Fix minor comments.
- Reworked magic block array and build the offsets from header sizes.
v5:
- Added comments to decribe method for handling  MFEX inner.
- Fixed garbage passing of incorrect tunnel values.
---

Kumar Amber (9):
  dpif-netdev: Refactor per thread recirc data allocation.
  dpif-netdev: Refactor hash function to own header.
  dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.
  dpif-netdev-avx512: Add inner packet handling to dpif.
  dpif-netdev: Add function pointer for dpif re-circulate.
  dpif-mfex: Modify set/get MFEX commands to include inner.
  dpif-mfex: Change MFEX fn pointer prototype to include md_is_valid.
  mfex-study: Modify study func to select outer and inner MFEX funcs.
  mfex-avx512: Add support for tunnel packets in avx512 MFEX.

 Documentation/topics/dpdk/bridge.rst |  24 ++-
 lib/dpif-netdev-avx512.c |  72 ++---
 lib/dpif-netdev-extract-avx512.c | 213 +--
 lib/dpif-netdev-extract-study.c  | 133 +++--
 lib/dpif-netdev-private-dpcls.h  |  23 +++
 lib/dpif-netdev-private-dpif.c   |  81 +++---
 lib/dpif-netdev-private-dpif.h   |  37 -
 lib/dpif-netdev-private-extract.c|  33 -
 lib/dpif-netdev-private-extract.h|  19 ++-
 lib/dpif-netdev-private-thread.h |   6 +
 lib/dpif-netdev.c|  67 -
 11 files changed, 526 insertions(+), 182 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 9/9] mfex-avx512: Add support for tunnel packets in avx512 mfex.

2022-08-25 Thread Kumar Amber
This patch adds the necessary support to avx512 mfex to
support handling of tunnel packet type.

Signed-off-by: Kumar Amber 

---
v5:
- check metadata IP address to find tunneling is valid or not.
  As dummy-pmd often passes garbage data to dpif.
---
---
 lib/dpif-netdev-avx512.c  |  16 +--
 lib/dpif-netdev-extract-avx512.c  | 195 --
 lib/dpif-netdev-private-extract.c |   4 +-
 3 files changed, 170 insertions(+), 45 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 1c3b67b02..d5c61baff 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -185,15 +185,17 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 }
 
 /* Do a batch minfilow extract into keys. */
- /* Do a batch minfilow extract into keys, but only for outer packets. */
 uint32_t mf_mask = 0;
-if (recirc_depth == 0) {
-miniflow_extract_func mfex_func;
-atomic_read_relaxed(>miniflow_extract_opt, _func);
-if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+miniflow_extract_func mfex_func;
+atomic_read_relaxed(>miniflow_extract_opt, _func);
+miniflow_extract_func mfex_inner_func;
+atomic_read_relaxed(>miniflow_extract_inner_opt, _inner_func);
+if (md_is_valid && mfex_inner_func) {
+mf_mask = mfex_inner_func(packets, keys, batch_size, in_port, pmd,
+  md_is_valid);
+} else if (!md_is_valid && mfex_func) {
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
 md_is_valid);
-}
 }
 
 uint32_t iter = lookup_pkts_bitmask;
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 833e9bd31..4c62bd911 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -360,6 +360,53 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
MF_WORD(ipv6_dst, 2) | MF_BIT(tp_src) | MF_BIT(tp_dst))
 #define MF_IPV6_TCP   (MF_IPV6_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
 
+#define MF_TUNNEL MF_WORD(tunnel, offsetof(struct flow_tnl, metadata) / 8)
+
+#define MF_ETH_TUNNEL (MF_TUNNEL | MF_ETH)
+#define MF_ETH_VLAN_TUNNEL (MF_TUNNEL | MF_ETH_VLAN)
+
+/* Block offsets represents the offsets into the blocks array of miniflow
+ * and are derived experimentally. Scalar miniflow parses the header
+ * in a fixed order and sequentially in a dynamic fashion thus incrementing
+ * pointer and copying data is enough but in AVX512 since the headers are
+ * parsed using pre-defined masks we need these magic offsets to write
+ * some of the data items at the correct loaction in the blocks array
+ * using below magic numbers.
+ */
+#define BLK_META_DATA_OFFS9
+#define BLK_IPv4_TCP_FLAG 6
+#define BLK_VLAN_IPv4_TCP_FLAG7
+#define BLK_VLAN_PCP  4
+#define BLK_IPv6_HDR_OFFS 8
+#define BLK_VLAN_IPv6_HDR_OFFS9
+#define BLK_IPv6_TCP_FLAG 9
+#define BLK_VLAN_IPv6_TCP_FLAG10
+#define BLK_L4_UDP_OFFS   9
+#define BLK_L4_TCP_OFFS   10
+#define BLK_VLAN_L4_UDP_OFFS  10
+#define BLK_VLAN_L4_TCP_OFFS  11
+
+/* Below Offsets simply shifts the offsets by 9 blocks as
+ * in the tunneling case the first 9 blocks are reserved and
+ * written with the outer tunnel data.
+ */
+#define BLK_TUN_IPv6_HDR_OFFS (BLK_IPv6_HDR_OFFS + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_IPv6_HDR_OFFS(BLK_VLAN_IPv6_HDR_OFFS + \
+   BLK_META_DATA_OFFS)
+#define BLK_TUN_IPv6_TCP_FLAG (BLK_IPv6_TCP_FLAG + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_IPv6_TCP_FLAG(BLK_VLAN_IPv6_TCP_FLAG + \
+   BLK_META_DATA_OFFS)
+#define BLK_TUN_L4_UDP_OFFS   (BLK_L4_UDP_OFFS + BLK_META_DATA_OFFS)
+#define BLK_TUN_L4_TCP_OFFS   (BLK_L4_TCP_OFFS + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_L4_UDP_OFFS  (BLK_VLAN_L4_UDP_OFFS + \
+   BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_L4_TCP_OFFS  (BLK_VLAN_L4_TCP_OFFS + \
+   BLK_META_DATA_OFFS)
+#define BLK_TUN_IPv4_TCP_FLAG (BLK_IPv4_TCP_FLAG + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_PCP  (BLK_VLAN_PCP + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_IPv4_TCP_FLAG(BLK_VLAN_IPv4_TCP_FLAG + \
+   BLK_META_DATA_OFFS)
+
 #define PATTERN_STRIP_IPV6_MASK \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
@@ -744,7 +791,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 uint32_t keys_size OVS_UNUSED,
  

[ovs-dev] [PATCH v5 8/9] mfex-study: Modify study func to select outer and inner mfex funcs.

2022-08-25 Thread Kumar Amber
The Mfex study function is split into outer and inner to allow
for independent selection and studying of packets in outer and inner
flows to different ISA optimized Mfexs.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-study.c | 126 +---
 1 file changed, 83 insertions(+), 43 deletions(-)

diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
index 71354cc4c..03d97c64e 100644
--- a/lib/dpif-netdev-extract-study.c
+++ b/lib/dpif-netdev-extract-study.c
@@ -30,7 +30,9 @@ static atomic_uint32_t mfex_study_pkts_count = 
MFEX_MAX_PKT_COUNT;
 /* Struct to hold miniflow study stats. */
 struct study_stats {
 uint32_t pkt_count;
+uint32_t pkt_inner_count;
 uint32_t impl_hitcount[MFEX_IMPL_MAX];
+uint32_t impl_inner_hitcount[MFEX_IMPL_MAX];
 };
 
 /* Define per thread data to hold the study stats. */
@@ -67,6 +69,58 @@ mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char 
*name)
 return -EINVAL;
 }
 
+
+static inline void
+mfex_reset_stats(uint32_t *impls_hitcount, uint32_t *pkt_cnt) {
+/* Reset stats so that study function can be called again
+ * for next traffic type and optimal function ptr can be
+ * chosen.
+ */
+memset(impls_hitcount, 0, sizeof(uint32_t) * MFEX_IMPL_MAX);
+*pkt_cnt = 0;
+}
+
+static inline void
+mfex_study_select_best_impls(struct dpif_miniflow_extract_impl *mfex_funcs,
+ uint32_t pkt_cnt, uint32_t *impls_arr,
+ atomic_uintptr_t *pmd_func, char *name)
+{
+
+uint32_t best_func_index = MFEX_IMPL_START_IDX;
+uint32_t max_hits = 0;
+
+for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
+if (impls_arr[i] > max_hits) {
+max_hits = impls_arr[i];
+best_func_index = i;
+}
+}
+
+/* If 50% of the packets hit, enable the function. */
+if (max_hits >= (mfex_study_pkts_count / 2)) {
+atomic_store_relaxed(pmd_func,
+(uintptr_t) mfex_funcs[best_func_index].extract_func);
+VLOG_INFO("MFEX %s study chose impl %s: (hits %u/%u pkts)",
+  name, mfex_funcs[best_func_index].name, max_hits,
+  pkt_cnt);
+} else {
+/* Set the implementation to null for default miniflow. */
+atomic_store_relaxed(pmd_func,
+(uintptr_t) mfex_funcs[MFEX_IMPL_SCALAR].extract_func);
+VLOG_INFO("Not enough packets matched (%u/%u), disabling"
+  " optimized MFEX.", max_hits, pkt_cnt);
+}
+
+/* In debug mode show stats for all the counters. */
+if (VLOG_IS_DBG_ENABLED()) {
+for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
+VLOG_DBG("MFEX study results for implementation %s:"
+ " (hits %u/%u pkts)", mfex_funcs[i].name,
+ impls_arr[i], pkt_cnt);
+}
+}
+}
+
 uint32_t
 mfex_study_traffic(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
@@ -76,10 +130,12 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 {
 uint32_t hitmask = 0;
 uint32_t mask = 0;
+uint32_t study_cnt_pkts;
 struct dp_netdev_pmd_thread *pmd = pmd_handle;
 struct dpif_miniflow_extract_impl *miniflow_funcs;
 struct study_stats *stats = mfex_study_get_study_stats_ptr();
 miniflow_funcs = dpif_mfex_impl_info_get();
+atomic_read_relaxed(_study_pkts_count, _cnt_pkts);
 
 /* Run traffic optimized miniflow_extract to collect the hitmask
  * to be compared after certain packets have been hit to choose
@@ -93,7 +149,11 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
  in_port, pmd_handle,
  md_is_valid);
-stats->impl_hitcount[i] += count_1bits(hitmask);
+if (!md_is_valid) {
+stats->impl_hitcount[i] += count_1bits(hitmask);
+} else {
+stats->impl_inner_hitcount[i] += count_1bits(hitmask);
+}
 
 /* If traffic is not classified then we dont overwrite the keys
  * array in minfiflow implementations so its safe to create a
@@ -102,54 +162,34 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 mask |= hitmask;
 }
 
-stats->pkt_count += dp_packet_batch_size(packets);
-
 /* Choose the best implementation after a minimum packets have been
  * processed.
  */
-uint32_t study_cnt_pkts;
-atomic_read_relaxed(_study_pkts_count, _cnt_pkts);
-
-if (stats->pkt_count >= study_cnt_pkts) {
-uint32_t best_func_index = MFEX_IMPL_START_IDX;
-uint32_t max_hits = 0;
-for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
-if (stats

[ovs-dev] [PATCH v5 7/9] dpif-mfex: Change mfex fn pointer prototype to include md_is_valid.

2022-08-25 Thread Kumar Amber
The md_is_valid parameter is passed from DPIF to MFEX to allow mfex
functions to detect the tunneling and decide the processing of Inner
packets in static predictable branches.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-avx512.c  |  3 ++-
 lib/dpif-netdev-extract-avx512.c  |  9 +
 lib/dpif-netdev-extract-study.c   |  6 --
 lib/dpif-netdev-private-extract.c |  6 --
 lib/dpif-netdev-private-extract.h | 13 -
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 1db20c1cf..1c3b67b02 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -191,7 +191,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 miniflow_extract_func mfex_func;
 atomic_read_relaxed(>miniflow_extract_opt, _func);
 if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+md_is_valid);
 }
 }
 
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 4afbed97e..833e9bd31 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -744,6 +744,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 uint32_t keys_size OVS_UNUSED,
 odp_port_t in_port,
 void *pmd_handle OVS_UNUSED,
+bool md_is_valid OVS_UNUSED,
 const enum MFEX_PROFILES profile_id,
 const uint32_t use_vbmi OVS_UNUSED)
 {
@@ -978,10 +979,10 @@ __attribute__((__target__("avx512vbmi"))) 
  \
 mfex_avx512_vbmi_##name(struct dp_packet_batch *packets,\
 struct netdev_flow_key *keys, uint32_t keys_size,\
 odp_port_t in_port, struct dp_netdev_pmd_thread \
-*pmd_handle)\
+*pmd_handle, bool md_is_valid)  \
 {   \
 return mfex_avx512_process(packets, keys, keys_size, in_port,   \
-   pmd_handle, profile, 1); \
+   pmd_handle, md_is_valid, profile, 1);\
 }
 #else
 #define VBMI_MFEX_FUNC(name, profile)
@@ -992,10 +993,10 @@ uint32_t  
  \
 mfex_avx512_##name(struct dp_packet_batch *packets, \
struct netdev_flow_key *keys, uint32_t keys_size,\
odp_port_t in_port, struct dp_netdev_pmd_thread  \
-   *pmd_handle) \
+   *pmd_handle, bool md_is_valid)   \
 {   \
 return mfex_avx512_process(packets, keys, keys_size, in_port,   \
-   pmd_handle, profile, 0); \
+   pmd_handle, md_is_valid, profile, 0);\
 }
 
 #define DECLARE_MFEX_FUNC(name, profile)\
diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
index 69077c844..71354cc4c 100644
--- a/lib/dpif-netdev-extract-study.c
+++ b/lib/dpif-netdev-extract-study.c
@@ -71,7 +71,8 @@ uint32_t
 mfex_study_traffic(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
uint32_t keys_size, odp_port_t in_port,
-   struct dp_netdev_pmd_thread *pmd_handle)
+   struct dp_netdev_pmd_thread *pmd_handle,
+   bool md_is_valid)
 {
 uint32_t hitmask = 0;
 uint32_t mask = 0;
@@ -90,7 +91,8 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 }
 
 hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
- in_port, pmd_handle);
+ in_port, pmd_handle,
+ md_is_valid);
 stats->impl_hitcount[i] += count_1bits(hitmask);
 
 /* If traffic is not classified then we dont overwrite the keys
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index fe0a53c2c..12ac8ecce 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -340,7 +340,8 @@ uint32_t
 dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
 struct netdev_flow_key *keys,
 uint32_t keys_size, odp_port_t in_port,
-struct dp_netdev_pmd_thread *pmd_handle)
+  

[ovs-dev] [PATCH v5 6/9] dpif-mfex: Modify set/get mfex commands to include inner.

2022-08-25 Thread Kumar Amber
The set command in MFEX is changed as to allow the user to select
different optimized mfex ISA for processing Inner packets in case
of tunneling.

$ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 study 1024 -recirc

The get command is modified to indcitate both inner and Outer MFEXs in
use.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 Documentation/topics/dpdk/bridge.rst | 18 --
 lib/dpif-netdev-private-extract.c| 23 ++-
 lib/dpif-netdev-private-extract.h|  6 +-
 lib/dpif-netdev-private-thread.h |  3 +++
 lib/dpif-netdev.c| 23 ---
 5 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index 354f1ced1..d306417ec 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -293,13 +293,15 @@ command also shows whether the CPU supports each 
implementation::
 An implementation can be selected manually by the following command::
 
 $ ovs-appctl dpif-netdev/miniflow-parser-set [-pmd core_id] name \
-  [study_cnt]
+  [study_cnt] [-recirc]
 
-The above command has two optional parameters: ``study_cnt`` and ``core_id``.
-The ``core_id`` sets a particular packet parsing function to a specific
-PMD thread on the core.  The third parameter ``study_cnt``, which is specific
-to ``study`` and ignored by other implementations, means how many packets
-are needed to choose the best implementation.
+The above command has three optional parameters: ``study_cnt``, ``core_id``
+and ``-recirc``. The ``core_id`` sets a particular packet parsing function
+to a specific PMD thread on the core.  The third parameter ``study_cnt``,
+which is specific to ``study`` and ignored by other implementations, means
+how many packets are needed to choose the best implementation. The fourth
+parameter ``-recirc`` acts like flag which indicates to MFEX to use optimized
+MFEX inner for processing tunneled inner packets.
 
 Also user can select the ``study`` implementation which studies the traffic for
 a specific number of packets by applying all available implementations of
@@ -322,6 +324,10 @@ following command::
 
 $ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 scalar
 
+``study`` can be selected with packet count and explicit PMD selection along
+with the ``recirc`` by following command::
+
+$ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 study 1024 -recirc
 
 Actions Implementations (Experimental)
 --
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 1a9b35420..fe0a53c2c 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -33,6 +33,8 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
 
 /* Variable to hold the default MFEX implementation. */
 static ATOMIC(miniflow_extract_func) default_mfex_func;
+/* Variable to hold the default MFEX inner implementation. */
+static ATOMIC(miniflow_extract_func) default_mfex_inner_func;
 
 #if MFEX_IMPL_AVX512_CHECK
 static int32_t
@@ -231,16 +233,31 @@ dp_mfex_impl_get_default(void)
 return return_func;
 }
 
+miniflow_extract_func
+dp_mfex_inner_impl_get_default(void)
+{
+miniflow_extract_func return_func;
+atomic_uintptr_t *mfex_func = (void *)_mfex_inner_func;
+
+atomic_read_relaxed(mfex_func, (uintptr_t *) _func);
+
+return return_func;
+}
+
 int
-dp_mfex_impl_set_default_by_name(const char *name)
+dp_mfex_impl_set_default_by_name(const char *name, bool mfex_inner)
 {
 miniflow_extract_func new_default;
 atomic_uintptr_t *mfex_func = (void *)_mfex_func;
+atomic_uintptr_t *mfex_inner_func = (void *)_mfex_inner_func;
 
 int err = dp_mfex_impl_get_by_name(name, _default);
 
 if (!err) {
 atomic_store_relaxed(mfex_func, (uintptr_t) new_default);
+if (mfex_inner) {
+atomic_store_relaxed(mfex_inner_func, (uintptr_t) new_default);
+}
 }
 
 return err;
@@ -268,6 +285,10 @@ dp_mfex_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
 if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
 ds_put_format(reply, "%u,", pmd->core_id);
 }
+if (pmd->miniflow_extract_inner_opt ==
+mfex_impls[i].extract_func) {
+ds_put_format(reply, "%u,", pmd->core_id);
+}
 }
 
 ds_chomp(reply, ',');
diff --git a/lib/dpif-netdev-private-extract.h 
b/lib/dpif-netdev-private-extract.h
index 8a7f9b01a..f5e6d33c1 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -159,8 +159,12 @@ dp_mfex_impl_get_by_name(const char *name, 
miniflow_extract_func *out_func);
  * overridden at runtime. */
 miniflow_extract_func dp_mfex_impl_

[ovs-dev] [PATCH v5 5/9] dpif-netdev: Add function pointer for dpif re-circulate.

2022-08-25 Thread Kumar Amber
The patch adds and re-uses the dpif set command to set the
function pointers to be used to switch between different inner
dpifs.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Add description  to the dpif recirc function.
- Fix use of return value to fall back to scalar dpif.
---
---
 lib/dpif-netdev-private-dpif.c   | 53 +++-
 lib/dpif-netdev-private-dpif.h   | 18 +++
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 14 -
 4 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 07039b1c2..bf1131c5e 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -55,18 +55,21 @@ dp_netdev_input_avx512_probe(void)
 static struct dpif_netdev_impl_info_t dpif_impls[] = {
 /* The default scalar C code implementation. */
 [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input,
+  .recirc_func = dp_netdev_recirculate,
   .probe = NULL,
   .name = "dpif_scalar", },
 
 #if DPIF_NETDEV_IMPL_AVX512_CHECK
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .recirc_func = dp_netdev_input_avx512_recirc,
   .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
 
 static dp_netdev_input_func default_dpif_func;
+static dp_netdev_recirc_func default_dpif_recirc_func;
 
 dp_netdev_input_func
 dp_netdev_impl_get_default(void)
@@ -97,6 +100,35 @@ dp_netdev_impl_get_default(void)
 return default_dpif_func;
 }
 
+dp_netdev_recirc_func
+dp_netdev_recirc_impl_get_default(void)
+{
+/* For the first call, this will be NULL. Compute the compile time default.
+ */
+if (!default_dpif_recirc_func) {
+int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
+
+/* Configure-time overriding to run test suite on all implementations. */
+#if DPIF_NETDEV_IMPL_AVX512_CHECK
+#ifdef DPIF_AVX512_DEFAULT
+dp_netdev_input_func_probe probe;
+
+/* Check if the compiled default is compatible. */
+probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
+if (!probe || !probe()) {
+dpif_idx = DPIF_NETDEV_IMPL_AVX512;
+}
+#endif
+#endif
+
+VLOG_INFO("Default re-circulate DPIF implementation is %s.\n",
+  dpif_impls[dpif_idx].name);
+default_dpif_recirc_func = dpif_impls[dpif_idx].recirc_func;
+}
+
+return default_dpif_recirc_func;
+}
+
 void
 dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
size_t n)
@@ -132,10 +164,12 @@ dp_netdev_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
  * returns the function pointer to the one requested by "name".
  */
 static int32_t
-dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func)
+dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *dpif_func,
+   dp_netdev_recirc_func *dpif_recirc_func)
 {
 ovs_assert(name);
-ovs_assert(out_func);
+ovs_assert(dpif_func);
+ovs_assert(dpif_recirc_func);
 
 uint32_t i;
 
@@ -145,11 +179,13 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 if (dpif_impls[i].probe) {
 int probe_err = dpif_impls[i].probe();
 if (probe_err) {
-*out_func = NULL;
+*dpif_func = NULL;
+*dpif_recirc_func = NULL;
 return probe_err;
 }
 }
-*out_func = dpif_impls[i].input_func;
+*dpif_func = dpif_impls[i].input_func;
+*dpif_recirc_func = dpif_impls[i].recirc_func;
 return 0;
 }
 }
@@ -160,12 +196,15 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 int32_t
 dp_netdev_impl_set_default_by_name(const char *name)
 {
-dp_netdev_input_func new_default;
+dp_netdev_input_func new_dpif_default;
+dp_netdev_recirc_func new_dpif_recirc_default;
 
-int32_t err = dp_netdev_impl_get_by_name(name, _default);
+int32_t err = dp_netdev_impl_get_by_name(name, _dpif_default,
+ _dpif_recirc_default);
 
 if (!err) {
-default_dpif_func = new_default;
+default_dpif_func = new_dpif_default;
+default_dpif_recirc_func = new_dpif_recirc_default;
 }
 
 return err;
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 46ce4ecf6..250dd5f10 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -36,6 +36,16 @@ typedef int32_t (*dp_netdev_input_func)(struct 
dp_netdev_pmd_thread *pmd,

[ovs-dev] [PATCH v5 4/9] dpif-netdev-avx512: Add inner packet handling to dpif.

2022-08-25 Thread Kumar Amber
This patch adds the necessary changes required to support
tunnel packet types in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v4:
- Rebase onto Simple match.
v3:
- Apply in_port optimization suggested by Harry.
---
---
 lib/dpif-netdev-avx512.c | 39 ---
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index a36f4f312..1db20c1cf 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -61,7 +61,7 @@ struct dpif_userdata {
 static inline int32_t ALWAYS_INLINE
 dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet_batch *packets,
- bool md_is_valid OVS_UNUSED, odp_port_t in_port)
+ bool md_is_valid, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -73,6 +73,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 struct netdev_flow_key *keys = ud->keys;
 struct netdev_flow_key **key_ptrs = ud->key_ptrs;
 struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
+const uint32_t recirc_depth = *recirc_depth_get();
 
 /* The AVX512 DPIF implementation handles rules in a way that is optimized
  * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
@@ -106,7 +107,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_metadata_prefetch_init(>md);
 }
 
-const bool simple_match_enabled = dp_netdev_simple_match_enabled(pmd,
+const bool simple_match_enabled = !md_is_valid &&
+  dp_netdev_simple_match_enabled(pmd,
  in_port);
 /* Check if EMC or SMC are enabled. */
 struct dfc_cache *cache = >flow_cache;
@@ -183,11 +185,14 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 }
 
 /* Do a batch minfilow extract into keys. */
+ /* Do a batch minfilow extract into keys, but only for outer packets. */
 uint32_t mf_mask = 0;
-miniflow_extract_func mfex_func;
-atomic_read_relaxed(>miniflow_extract_opt, _func);
-if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+if (recirc_depth == 0) {
+miniflow_extract_func mfex_func;
+atomic_read_relaxed(>miniflow_extract_opt, _func);
+if (mfex_func) {
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+}
 }
 
 uint32_t iter = lookup_pkts_bitmask;
@@ -204,7 +209,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* Get packet pointer from bitmask and packet md. */
 struct dp_packet *packet = packets->packets[i];
-pkt_metadata_init(>md, in_port);
+if (!md_is_valid) {
+pkt_metadata_init(>md, in_port);
+}
 
 struct dp_netdev_flow *f = NULL;
 struct netdev_flow_key *key = [i];
@@ -216,7 +223,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
 
 /* Check for a partial hardware offload match. */
-if (hwol_enabled) {
+if (hwol_enabled && recirc_depth == 0) {
 if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
 /* Packet restoration failed and it was dropped, do not
  * continue processing. */
@@ -249,7 +256,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
 key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
+key->hash = (md_is_valid == false)
+? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf)
+: dpif_netdev_packet_get_rss_hash(packet, >mf);
 
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
@@ -287,7 +296,13 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  * dpcls_rules[] array.
  */
 if (dpcls_key_idx > 0) {
-struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+odp_port_t port_no;
+if (!md_is_valid) {
+port_no = in_port;
+} else {
+port_no = packets->packets[0]->md.in_port.odp_port;
+}
+struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, port_no);
 if (OVS_UNLIKELY(!cls)) {
 return -1;
 }
@@ -353,7 +368,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pmd_perf_update_counter(>perf_stats, PMD_STAT_MASKED_LOOKUP,
 dpcls_key_idx);
 action_stage:
-pmd_perf_update_counter(>perf_stats, PMD_STAT_

[ovs-dev] [PATCH v5 3/9] dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.

2022-08-25 Thread Kumar Amber
Create new APIs for the avx512 DPIF, enabling one baseline
common code to be specialized into DPIF implementations for
"outer" processing, and "recirc" processing.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v4:
- Rebase onto master.
v3:
- Fix comments from Harry.
---
---
 lib/dpif-netdev-avx512.c   | 25 +
 lib/dpif-netdev-private-dpif.c |  6 +++---
 lib/dpif-netdev-private-dpif.h | 14 +++---
 lib/dpif-netdev.c  |  5 ++---
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 83e7a1394..a36f4f312 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -58,10 +58,10 @@ struct dpif_userdata {
 struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
 };
 
-int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port)
+static inline int32_t ALWAYS_INLINE
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid OVS_UNUSED, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -413,5 +413,22 @@ action_stage:
 return 0;
 }
 
+int32_t
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, false, in_port);
+return ret;
+}
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, true, 0);
+return ret;
+}
+
 #endif
 #endif
diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 7f16fa0dc..07039b1c2 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -40,7 +40,7 @@ enum dpif_netdev_impl_info_idx {
 
 #if DPIF_NETDEV_IMPL_AVX512_CHECK
 static int32_t
-dp_netdev_input_outer_avx512_probe(void)
+dp_netdev_input_avx512_probe(void)
 {
 if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512F)
 || !cpu_has_isa(OVS_CPU_ISA_X86_BMI2)) {
@@ -60,8 +60,8 @@ static struct dpif_netdev_impl_info_t dpif_impls[] = {
 
 #if DPIF_NETDEV_IMPL_AVX512_CHECK
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
-[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512,
-  .probe = dp_netdev_input_outer_avx512_probe,
+[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index b3e75b7a2..46ce4ecf6 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -86,8 +86,16 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 
 /* AVX512 enabled DPIF implementation function. */
 int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port);
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port);
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets);
+
+int32_t
+dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
+  struct dp_packet_batch *);
 
 #endif /* netdev-private.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 92e63599e..a50571dc8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -545,8 +545,6 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
   const struct flow *flow,
   const struct nlattr *actions,
   size_t actions_len);
-static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
-  struct dp_packet_batch *);
 
 static void dp_netdev_disable_upcall(struct dp_netdev *);
 static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
@@ -8492,11 +8490,12 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 return 0;
 }
 
-static void
+int32_t
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets)
 {
 dp_netdev_input__(pmd, packets, true, 0);
+return 0;
 }
 
 struct dp_netdev_execute_aux {
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 2/9] dpif-netdev: Refactor hash function to own header.

2022-08-25 Thread Kumar Amber
The refactor allows us to use hash function accross
multiple files which was earlier restricted to
dpif-netdev.c only.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Fix minor comments from Harry.
---
---
 lib/dpif-netdev-private-dpcls.h | 23 +++
 lib/dpif-netdev.c   | 22 --
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 2a9279437..1b37ecb16 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -25,6 +25,7 @@
 
 #include "cmap.h"
 #include "openvswitch/thread.h"
+#include "dpif-netdev-private-dpif.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -125,6 +126,28 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet 
*packet,
 return hash;
 }
 
+static inline uint32_t
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+const struct miniflow *mf)
+{
+uint32_t hash, recirc_depth;
+
+if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
+hash = dp_packet_get_rss_hash(packet);
+} else {
+hash = miniflow_hash_5tuple(mf, 0);
+dp_packet_set_rss_hash(packet, hash);
+}
+
+/* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+recirc_depth = *recirc_depth_get_unsafe();
+if (OVS_UNLIKELY(recirc_depth)) {
+hash = hash_finish(hash, recirc_depth);
+}
+return hash;
+}
+
 /* Allow other implementations to call dpcls_lookup() for subtable search. */
 bool
 dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4866231a3..92e63599e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7806,28 +7806,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
  actions, wc, put_actions, dp->upcall_aux);
 }
 
-static inline uint32_t
-dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-const struct miniflow *mf)
-{
-uint32_t hash, recirc_depth;
-
-if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-hash = dp_packet_get_rss_hash(packet);
-} else {
-hash = miniflow_hash_5tuple(mf, 0);
-dp_packet_set_rss_hash(packet, hash);
-}
-
-/* The RSS hash must account for the recirculation depth to avoid
- * collisions in the exact match cache */
-recirc_depth = *recirc_depth_get_unsafe();
-if (OVS_UNLIKELY(recirc_depth)) {
-hash = hash_finish(hash, recirc_depth);
-}
-return hash;
-}
-
 struct packet_batch_per_flow {
 unsigned int byte_count;
 uint16_t tcp_flags;
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 1/9] dpif-netdev: Refactor per thread recirc data allocation.

2022-08-25 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Harry van Haaren 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index ef4cee2ba..7f16fa0dc 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -31,6 +31,8 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 #define DPIF_NETDEV_IMPL_AVX512_CHECK (__x86_64__ && HAVE_AVX512F \
 && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index cf331cec7..b3e75b7a2 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a45b46014..4866231a3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 0/9] DPIF + MFEX Inner AVX512

2022-08-25 Thread Kumar Amber
This Series of Patchsets introduce the Optimizations for
supporting tunneled packets in DPIF and MFEX. Along with
the optimization various refactoring of scalar
path is done to be used accross without duplication.

Over the Tests we have observed a gain of approximate 20~25%
gain in performance over the scalar path.

---
v5:
- Added comments to decribe method for handling  MFEX inner.
- Fixed garbage passing of incorrect tunnel values.
---

Kumar Amber (9):
  dpif-netdev: Refactor per thread recirc data allocation.
  dpif-netdev: Refactor hash function to own header.
  dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.
  dpif-netdev-avx512: Add inner packet handling to dpif.
  dpif-netdev: Add function pointer for dpif re-circulate.
  dpif-mfex: Modify set/get mfex commands to include inner.
  dpif-mfex: Change mfex fn pointer prototype to include md_is_valid.
  mfex-study: Modify study func to select outer and inner mfex funcs.
  mfex-avx512: Add support for tunnel packets in avx512 mfex.

 Documentation/topics/dpdk/bridge.rst |  18 ++-
 lib/dpif-netdev-avx512.c |  61 ++--
 lib/dpif-netdev-extract-avx512.c | 202 +--
 lib/dpif-netdev-extract-study.c  | 132 +++--
 lib/dpif-netdev-private-dpcls.h  |  23 +++
 lib/dpif-netdev-private-dpif.c   |  61 ++--
 lib/dpif-netdev-private-dpif.h   |  37 -
 lib/dpif-netdev-private-extract.c|  33 -
 lib/dpif-netdev-private-extract.h|  19 ++-
 lib/dpif-netdev-private-thread.h |   6 +
 lib/dpif-netdev.c|  67 -
 11 files changed, 501 insertions(+), 158 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 9/9] mfex-avx512: Add support for tunnel packets in avx512 mfex.

2022-08-07 Thread Kumar Amber
This patch adds the necessary support to avx512 mfex to
support handling of tunnel packet type.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-avx512.c  |  16 ++--
 lib/dpif-netdev-extract-avx512.c  | 146 +-
 lib/dpif-netdev-private-extract.c |   4 +-
 3 files changed, 117 insertions(+), 49 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 1c3b67b02..d5c61baff 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -185,15 +185,17 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 }
 
 /* Do a batch minfilow extract into keys. */
- /* Do a batch minfilow extract into keys, but only for outer packets. */
 uint32_t mf_mask = 0;
-if (recirc_depth == 0) {
-miniflow_extract_func mfex_func;
-atomic_read_relaxed(>miniflow_extract_opt, _func);
-if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+miniflow_extract_func mfex_func;
+atomic_read_relaxed(>miniflow_extract_opt, _func);
+miniflow_extract_func mfex_inner_func;
+atomic_read_relaxed(>miniflow_extract_inner_opt, _inner_func);
+if (md_is_valid && mfex_inner_func) {
+mf_mask = mfex_inner_func(packets, keys, batch_size, in_port, pmd,
+  md_is_valid);
+} else if (!md_is_valid && mfex_func) {
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
 md_is_valid);
-}
 }
 
 uint32_t iter = lookup_pkts_bitmask;
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 833e9bd31..c87480a4e 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -744,7 +744,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 uint32_t keys_size OVS_UNUSED,
 odp_port_t in_port,
 void *pmd_handle OVS_UNUSED,
-bool md_is_valid OVS_UNUSED,
+bool md_is_valid,
 const enum MFEX_PROFILES profile_id,
 const uint32_t use_vbmi OVS_UNUSED)
 {
@@ -770,6 +770,11 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 __m128i v_blocks01 = _mm_insert_epi32(v_zeros, odp_to_u32(in_port), 1);
 
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+/* Handle meta-data init in the loop. */
+if (!md_is_valid) {
+pkt_metadata_init(>md, in_port);
+}
+const struct pkt_metadata *md = >md;
 /* If the packet is smaller than the probe size, skip it. */
 const uint32_t size = dp_packet_size(packet);
 if (size < dp_pkt_min_size) {
@@ -808,7 +813,16 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 use_vbmi);
 
 __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
-_mm512_storeu_si512([2], v_blk0_strip);
+/* Handle inner meta-data if valid. */
+if (!md_is_valid) {
+_mm512_storeu_si512([2], v_blk0_strip);
+} else {
+__m512i v_tun = _mm512_loadu_si512(>tunnel);
+_mm512_storeu_si512([0], v_tun);
+_mm512_storeu_si512([11], v_blk0_strip);
+blocks[9] = md->dp_hash |
+((uint64_t) odp_to_u32(md->in_port.odp_port) << 32);
+}
 
 /* Perform "post-processing" per profile, handling details not easily
  * handled in the above generic AVX512 code. Examples include TCP flag
@@ -820,38 +834,44 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 break;
 
 case PROFILE_ETH_VLAN_IPV4_TCP: {
-mfex_vlan_pcp(pkt[14], [i].buf[4]);
-
 uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
 struct ip_header *nh = (void *)[VLAN_ETH_HEADER_LEN];
 if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
   TCP_HEADER_LEN)) {
 continue;
 }
-
 /* Process TCP flags, and store to blocks. */
 const struct tcp_header *tcp = (void *)[38];
-mfex_handle_tcp_flags(tcp, [7]);
+if (!md_is_valid) {
+mfex_vlan_pcp(pkt[14], [i].buf[4]);
+mfex_handle_tcp_flags(tcp, [7]);
+} else {
+mfex_vlan_pcp(pkt[14], [i].buf[13]);
+mfex_handle_tcp_flags(tcp, [16]);
+mf->map.bits[0] = 0x38a001ff;
+}
+
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV4_UDP: {
-mfex_vlan_pcp(pkt[14], [i].buf[4]);
-
 uint32_t size_from_ipv4 = size - VLAN

[ovs-dev] [PATCH v4 8/9] mfex-study: Modify study func to select outer and inner mfex funcs.

2022-08-07 Thread Kumar Amber
The Mfex study function is split into outer and inner to allow
for independent selection and studying of packets in outer and inner
flows to different ISA optimized Mfexs.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-study.c | 126 +---
 1 file changed, 83 insertions(+), 43 deletions(-)

diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
index 71354cc4c..03d97c64e 100644
--- a/lib/dpif-netdev-extract-study.c
+++ b/lib/dpif-netdev-extract-study.c
@@ -30,7 +30,9 @@ static atomic_uint32_t mfex_study_pkts_count = 
MFEX_MAX_PKT_COUNT;
 /* Struct to hold miniflow study stats. */
 struct study_stats {
 uint32_t pkt_count;
+uint32_t pkt_inner_count;
 uint32_t impl_hitcount[MFEX_IMPL_MAX];
+uint32_t impl_inner_hitcount[MFEX_IMPL_MAX];
 };
 
 /* Define per thread data to hold the study stats. */
@@ -67,6 +69,58 @@ mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char 
*name)
 return -EINVAL;
 }
 
+
+static inline void
+mfex_reset_stats(uint32_t *impls_hitcount, uint32_t *pkt_cnt) {
+/* Reset stats so that study function can be called again
+ * for next traffic type and optimal function ptr can be
+ * chosen.
+ */
+memset(impls_hitcount, 0, sizeof(uint32_t) * MFEX_IMPL_MAX);
+*pkt_cnt = 0;
+}
+
+static inline void
+mfex_study_select_best_impls(struct dpif_miniflow_extract_impl *mfex_funcs,
+ uint32_t pkt_cnt, uint32_t *impls_arr,
+ atomic_uintptr_t *pmd_func, char *name)
+{
+
+uint32_t best_func_index = MFEX_IMPL_START_IDX;
+uint32_t max_hits = 0;
+
+for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
+if (impls_arr[i] > max_hits) {
+max_hits = impls_arr[i];
+best_func_index = i;
+}
+}
+
+/* If 50% of the packets hit, enable the function. */
+if (max_hits >= (mfex_study_pkts_count / 2)) {
+atomic_store_relaxed(pmd_func,
+(uintptr_t) mfex_funcs[best_func_index].extract_func);
+VLOG_INFO("MFEX %s study chose impl %s: (hits %u/%u pkts)",
+  name, mfex_funcs[best_func_index].name, max_hits,
+  pkt_cnt);
+} else {
+/* Set the implementation to null for default miniflow. */
+atomic_store_relaxed(pmd_func,
+(uintptr_t) mfex_funcs[MFEX_IMPL_SCALAR].extract_func);
+VLOG_INFO("Not enough packets matched (%u/%u), disabling"
+  " optimized MFEX.", max_hits, pkt_cnt);
+}
+
+/* In debug mode show stats for all the counters. */
+if (VLOG_IS_DBG_ENABLED()) {
+for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
+VLOG_DBG("MFEX study results for implementation %s:"
+ " (hits %u/%u pkts)", mfex_funcs[i].name,
+ impls_arr[i], pkt_cnt);
+}
+}
+}
+
 uint32_t
 mfex_study_traffic(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
@@ -76,10 +130,12 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 {
 uint32_t hitmask = 0;
 uint32_t mask = 0;
+uint32_t study_cnt_pkts;
 struct dp_netdev_pmd_thread *pmd = pmd_handle;
 struct dpif_miniflow_extract_impl *miniflow_funcs;
 struct study_stats *stats = mfex_study_get_study_stats_ptr();
 miniflow_funcs = dpif_mfex_impl_info_get();
+atomic_read_relaxed(_study_pkts_count, _cnt_pkts);
 
 /* Run traffic optimized miniflow_extract to collect the hitmask
  * to be compared after certain packets have been hit to choose
@@ -93,7 +149,11 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
  in_port, pmd_handle,
  md_is_valid);
-stats->impl_hitcount[i] += count_1bits(hitmask);
+if (!md_is_valid) {
+stats->impl_hitcount[i] += count_1bits(hitmask);
+} else {
+stats->impl_inner_hitcount[i] += count_1bits(hitmask);
+}
 
 /* If traffic is not classified then we dont overwrite the keys
  * array in minfiflow implementations so its safe to create a
@@ -102,54 +162,34 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 mask |= hitmask;
 }
 
-stats->pkt_count += dp_packet_batch_size(packets);
-
 /* Choose the best implementation after a minimum packets have been
  * processed.
  */
-uint32_t study_cnt_pkts;
-atomic_read_relaxed(_study_pkts_count, _cnt_pkts);
-
-if (stats->pkt_count >= study_cnt_pkts) {
-uint32_t best_func_index = MFEX_IMPL_START_IDX;
-uint32_t max_hits = 0;
-for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) {
-if (stats

[ovs-dev] [PATCH v4 7/9] dpif-mfex: Change mfex fn pointer prototype to include md_is_valid.

2022-08-07 Thread Kumar Amber
The md_is_valid parameter is passed from DPIF to MFEX to allow mfex
functions to detect the tunneling and decide the processing of Inner
packets in static predictable branches.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-avx512.c  |  3 ++-
 lib/dpif-netdev-extract-avx512.c  |  9 +
 lib/dpif-netdev-extract-study.c   |  6 --
 lib/dpif-netdev-private-extract.c |  6 --
 lib/dpif-netdev-private-extract.h | 13 -
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 1db20c1cf..1c3b67b02 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -191,7 +191,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 miniflow_extract_func mfex_func;
 atomic_read_relaxed(>miniflow_extract_opt, _func);
 if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+md_is_valid);
 }
 }
 
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 4afbed97e..833e9bd31 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -744,6 +744,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 uint32_t keys_size OVS_UNUSED,
 odp_port_t in_port,
 void *pmd_handle OVS_UNUSED,
+bool md_is_valid OVS_UNUSED,
 const enum MFEX_PROFILES profile_id,
 const uint32_t use_vbmi OVS_UNUSED)
 {
@@ -978,10 +979,10 @@ __attribute__((__target__("avx512vbmi"))) 
  \
 mfex_avx512_vbmi_##name(struct dp_packet_batch *packets,\
 struct netdev_flow_key *keys, uint32_t keys_size,\
 odp_port_t in_port, struct dp_netdev_pmd_thread \
-*pmd_handle)\
+*pmd_handle, bool md_is_valid)  \
 {   \
 return mfex_avx512_process(packets, keys, keys_size, in_port,   \
-   pmd_handle, profile, 1); \
+   pmd_handle, md_is_valid, profile, 1);\
 }
 #else
 #define VBMI_MFEX_FUNC(name, profile)
@@ -992,10 +993,10 @@ uint32_t  
  \
 mfex_avx512_##name(struct dp_packet_batch *packets, \
struct netdev_flow_key *keys, uint32_t keys_size,\
odp_port_t in_port, struct dp_netdev_pmd_thread  \
-   *pmd_handle) \
+   *pmd_handle, bool md_is_valid)   \
 {   \
 return mfex_avx512_process(packets, keys, keys_size, in_port,   \
-   pmd_handle, profile, 0); \
+   pmd_handle, md_is_valid, profile, 0);\
 }
 
 #define DECLARE_MFEX_FUNC(name, profile)\
diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
index 69077c844..71354cc4c 100644
--- a/lib/dpif-netdev-extract-study.c
+++ b/lib/dpif-netdev-extract-study.c
@@ -71,7 +71,8 @@ uint32_t
 mfex_study_traffic(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
uint32_t keys_size, odp_port_t in_port,
-   struct dp_netdev_pmd_thread *pmd_handle)
+   struct dp_netdev_pmd_thread *pmd_handle,
+   bool md_is_valid)
 {
 uint32_t hitmask = 0;
 uint32_t mask = 0;
@@ -90,7 +91,8 @@ mfex_study_traffic(struct dp_packet_batch *packets,
 }
 
 hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
- in_port, pmd_handle);
+ in_port, pmd_handle,
+ md_is_valid);
 stats->impl_hitcount[i] += count_1bits(hitmask);
 
 /* If traffic is not classified then we dont overwrite the keys
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 789cba4c5..f67f08f8b 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -342,7 +342,8 @@ uint32_t
 dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
 struct netdev_flow_key *keys,
 uint32_t keys_size, odp_port_t in_port,
-struct dp_netdev_pmd_thread *pmd_handle)
+  

[ovs-dev] [PATCH v4 6/9] dpif-mfex: Modify set/get mfex commands to include inner.

2022-08-07 Thread Kumar Amber
The set command in MFEX is changed as to allow the user to select
different optimized mfex ISA for processing Inner packets in case
of tunneling.

$ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 study 1024 -inner

The get command is modified to indcitate both inner and Outer MFEXs in
use.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 Documentation/topics/dpdk/bridge.rst | 18 --
 lib/dpif-netdev-private-extract.c| 23 ++-
 lib/dpif-netdev-private-extract.h|  6 +-
 lib/dpif-netdev-private-thread.h |  3 +++
 lib/dpif-netdev.c| 21 ++---
 5 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index 354f1ced1..b7ffb4885 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -293,13 +293,15 @@ command also shows whether the CPU supports each 
implementation::
 An implementation can be selected manually by the following command::
 
 $ ovs-appctl dpif-netdev/miniflow-parser-set [-pmd core_id] name \
-  [study_cnt]
+  [study_cnt] [-recirc]
 
-The above command has two optional parameters: ``study_cnt`` and ``core_id``.
-The ``core_id`` sets a particular packet parsing function to a specific
-PMD thread on the core.  The third parameter ``study_cnt``, which is specific
-to ``study`` and ignored by other implementations, means how many packets
-are needed to choose the best implementation.
+The above command has three optional parameters: ``study_cnt``, ``core_id``
+and ``-inner``. The ``core_id`` sets a particular packet parsing function
+to a specific PMD thread on the core.  The third parameter ``study_cnt``,
+which is specific to ``study`` and ignored by other implementations, means
+how many packets are needed to choose the best implementation. The fourth
+parameter ``-recirc`` acts like flag which indicates to MFEX to use optimized
+MFEX inner for processing tunneled inner packets.
 
 Also user can select the ``study`` implementation which studies the traffic for
 a specific number of packets by applying all available implementations of
@@ -322,6 +324,10 @@ following command::
 
 $ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 scalar
 
+``study`` can be selected with packet count and explicit PMD selection along
+with the ``recirc`` by following command::
+
+$ ovs-appctl dpif-netdev/miniflow-parser-set -pmd 3 study 1024 -recirc
 
 Actions Implementations (Experimental)
 --
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 0e6fdbf31..789cba4c5 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -33,6 +33,8 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
 
 /* Variable to hold the default MFEX implementation. */
 static ATOMIC(miniflow_extract_func) default_mfex_func;
+/* Variable to hold the default MFEX inner implementation. */
+static ATOMIC(miniflow_extract_func) default_mfex_inner_func;
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && HAVE_AVX512BW \
  && __SSE4_2__)
@@ -233,16 +235,31 @@ dp_mfex_impl_get_default(void)
 return return_func;
 }
 
+miniflow_extract_func
+dp_mfex_inner_impl_get_default(void)
+{
+miniflow_extract_func return_func;
+atomic_uintptr_t *mfex_func = (void *)_mfex_inner_func;
+
+atomic_read_relaxed(mfex_func, (uintptr_t *) _func);
+
+return return_func;
+}
+
 int
-dp_mfex_impl_set_default_by_name(const char *name)
+dp_mfex_impl_set_default_by_name(const char *name, bool mfex_inner)
 {
 miniflow_extract_func new_default;
 atomic_uintptr_t *mfex_func = (void *)_mfex_func;
+atomic_uintptr_t *mfex_inner_func = (void *)_mfex_inner_func;
 
 int err = dp_mfex_impl_get_by_name(name, _default);
 
 if (!err) {
 atomic_store_relaxed(mfex_func, (uintptr_t) new_default);
+if (mfex_inner) {
+atomic_store_relaxed(mfex_inner_func, (uintptr_t) new_default);
+}
 }
 
 return err;
@@ -270,6 +287,10 @@ dp_mfex_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
 if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
 ds_put_format(reply, "%u,", pmd->core_id);
 }
+if (pmd->miniflow_extract_inner_opt ==
+mfex_impls[i].extract_func) {
+ds_put_format(reply, "%u,", pmd->core_id);
+}
 }
 
 ds_chomp(reply, ',');
diff --git a/lib/dpif-netdev-private-extract.h 
b/lib/dpif-netdev-private-extract.h
index ff233c35b..1a2490762 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -159,8 +159,12 @@ dp_mfex_impl_get_by_name(const char *name, 
miniflow_extract_func 

[ovs-dev] [PATCH v4 5/9] dpif-netdev: Add function pointer for dpif re-circulate.

2022-08-07 Thread Kumar Amber
The patch adds and re-uses the dpif set command to set the
function pointers to be used to switch between different inner
dpifs.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Add description  to the dpif recirc function.
- Fix use of return value to fall back to scalar dpif.
---
---
 lib/dpif-netdev-private-dpif.c   | 57 +++-
 lib/dpif-netdev-private-dpif.h   | 18 ++
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 19 +--
 4 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 7019b0931..51fe219ba 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -28,6 +28,8 @@
 #include "util.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
+#define DPIF_NETDEV_IMPL_AVX512_CHECK (__x86_64__ && HAVE_AVX512F \
+&& HAVE_LD_AVX512_GOOD && __SSE4_2__)
 
 DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
 
@@ -53,18 +55,21 @@ dp_netdev_input_avx512_probe(void)
 static struct dpif_netdev_impl_info_t dpif_impls[] = {
 /* The default scalar C code implementation. */
 [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input,
+  .recirc_func = dp_netdev_recirculate,
   .probe = NULL,
   .name = "dpif_scalar", },
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .recirc_func = dp_netdev_input_avx512_recirc,
   .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
 
 static dp_netdev_input_func default_dpif_func;
+static dp_netdev_recirc_func default_dpif_recirc_func;
 
 dp_netdev_input_func
 dp_netdev_impl_get_default(void)
@@ -75,7 +80,7 @@ dp_netdev_impl_get_default(void)
 int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
 
 /* Configure-time overriding to run test suite on all implementations. */
-#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
+#if DPIF_NETDEV_IMPL_AVX512_CHECK
 #ifdef DPIF_AVX512_DEFAULT
 dp_netdev_input_func_probe probe;
 
@@ -95,6 +100,35 @@ dp_netdev_impl_get_default(void)
 return default_dpif_func;
 }
 
+dp_netdev_recirc_func
+dp_netdev_recirc_impl_get_default(void)
+{
+/* For the first call, this will be NULL. Compute the compile time default.
+ */
+if (!default_dpif_recirc_func) {
+int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
+
+/* Configure-time overriding to run test suite on all implementations. */
+#if DPIF_NETDEV_IMPL_AVX512_CHECK
+#ifdef DPIF_AVX512_DEFAULT
+dp_netdev_input_func_probe probe;
+
+/* Check if the compiled default is compatible. */
+probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
+if (!probe || !probe()) {
+dpif_idx = DPIF_NETDEV_IMPL_AVX512;
+}
+#endif
+#endif
+
+VLOG_INFO("Default re-circulate DPIF implementation is %s.\n",
+  dpif_impls[dpif_idx].name);
+default_dpif_recirc_func = dpif_impls[dpif_idx].recirc_func;
+}
+
+return default_dpif_recirc_func;
+}
+
 void
 dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
size_t n)
@@ -130,10 +164,12 @@ dp_netdev_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
  * returns the function pointer to the one requested by "name".
  */
 static int32_t
-dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func)
+dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *dpif_func,
+   dp_netdev_recirc_func *dpif_recirc_func)
 {
 ovs_assert(name);
-ovs_assert(out_func);
+ovs_assert(dpif_func);
+ovs_assert(dpif_recirc_func);
 
 uint32_t i;
 
@@ -143,11 +179,13 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 if (dpif_impls[i].probe) {
 int probe_err = dpif_impls[i].probe();
 if (probe_err) {
-*out_func = NULL;
+*dpif_func = NULL;
+*dpif_recirc_func = NULL;
 return probe_err;
 }
 }
-*out_func = dpif_impls[i].input_func;
+*dpif_func = dpif_impls[i].input_func;
+*dpif_recirc_func = dpif_impls[i].recirc_func;
 return 0;
 }
 }
@@ -158,12 +196,15 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 int32_t
 dp_netdev_impl_set_default_by_name(const char *name)
 {
-dp_netdev_input_func new_default;
+dp_netdev_input_func new_dpif_default;
+dp_netdev_recirc_func new_dpif_recirc_default;
 
-i

[ovs-dev] [PATCH v4 4/9] dpif-netdev-avx512: Add inner packet handling to dpif.

2022-08-07 Thread Kumar Amber
This patch adds the necessary changes required to support
tunnel packet types in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v4:
- Rebase onto Simple match.
v3:
- Apply in_port optimization suggested by Harry.
---
---
 lib/dpif-netdev-avx512.c | 39 ---
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index a36f4f312..1db20c1cf 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -61,7 +61,7 @@ struct dpif_userdata {
 static inline int32_t ALWAYS_INLINE
 dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet_batch *packets,
- bool md_is_valid OVS_UNUSED, odp_port_t in_port)
+ bool md_is_valid, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -73,6 +73,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 struct netdev_flow_key *keys = ud->keys;
 struct netdev_flow_key **key_ptrs = ud->key_ptrs;
 struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
+const uint32_t recirc_depth = *recirc_depth_get();
 
 /* The AVX512 DPIF implementation handles rules in a way that is optimized
  * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
@@ -106,7 +107,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_metadata_prefetch_init(>md);
 }
 
-const bool simple_match_enabled = dp_netdev_simple_match_enabled(pmd,
+const bool simple_match_enabled = !md_is_valid &&
+  dp_netdev_simple_match_enabled(pmd,
  in_port);
 /* Check if EMC or SMC are enabled. */
 struct dfc_cache *cache = >flow_cache;
@@ -183,11 +185,14 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 }
 
 /* Do a batch minfilow extract into keys. */
+ /* Do a batch minfilow extract into keys, but only for outer packets. */
 uint32_t mf_mask = 0;
-miniflow_extract_func mfex_func;
-atomic_read_relaxed(>miniflow_extract_opt, _func);
-if (mfex_func) {
-mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+if (recirc_depth == 0) {
+miniflow_extract_func mfex_func;
+atomic_read_relaxed(>miniflow_extract_opt, _func);
+if (mfex_func) {
+mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd);
+}
 }
 
 uint32_t iter = lookup_pkts_bitmask;
@@ -204,7 +209,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* Get packet pointer from bitmask and packet md. */
 struct dp_packet *packet = packets->packets[i];
-pkt_metadata_init(>md, in_port);
+if (!md_is_valid) {
+pkt_metadata_init(>md, in_port);
+}
 
 struct dp_netdev_flow *f = NULL;
 struct netdev_flow_key *key = [i];
@@ -216,7 +223,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
 
 /* Check for a partial hardware offload match. */
-if (hwol_enabled) {
+if (hwol_enabled && recirc_depth == 0) {
 if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
 /* Packet restoration failed and it was dropped, do not
  * continue processing. */
@@ -249,7 +256,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
 key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
+key->hash = (md_is_valid == false)
+? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf)
+: dpif_netdev_packet_get_rss_hash(packet, >mf);
 
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
@@ -287,7 +296,13 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  * dpcls_rules[] array.
  */
 if (dpcls_key_idx > 0) {
-struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+odp_port_t port_no;
+if (!md_is_valid) {
+port_no = in_port;
+} else {
+port_no = packets->packets[0]->md.in_port.odp_port;
+}
+struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, port_no);
 if (OVS_UNLIKELY(!cls)) {
 return -1;
 }
@@ -353,7 +368,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pmd_perf_update_counter(>perf_stats, PMD_STAT_MASKED_LOOKUP,
 dpcls_key_idx);
 action_stage:
-pmd_perf_update_counter(>perf_stats, PMD_STAT_

[ovs-dev] [PATCH v4 3/9] dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.

2022-08-07 Thread Kumar Amber
Create new APIs for the avx512 DPIF, enabling one baseline
common code to be specialized into DPIF implementations for
"outer" processing, and "recirc" processing.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v4:
- Rebase onto master.
v3:
- Fix comments from Harry.
---
---
 lib/dpif-netdev-avx512.c   | 25 +
 lib/dpif-netdev-private-dpif.c |  6 +++---
 lib/dpif-netdev-private-dpif.h | 14 +++---
 lib/dpif-netdev.c  |  5 ++---
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 83e7a1394..a36f4f312 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -58,10 +58,10 @@ struct dpif_userdata {
 struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
 };
 
-int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port)
+static inline int32_t ALWAYS_INLINE
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid OVS_UNUSED, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -413,5 +413,22 @@ action_stage:
 return 0;
 }
 
+int32_t
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, false, in_port);
+return ret;
+}
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, true, 0);
+return ret;
+}
+
 #endif
 #endif
diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index f3cc200e5..7019b0931 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -38,7 +38,7 @@ enum dpif_netdev_impl_info_idx {
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 static int32_t
-dp_netdev_input_outer_avx512_probe(void)
+dp_netdev_input_avx512_probe(void)
 {
 if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512F)
 || !cpu_has_isa(OVS_CPU_ISA_X86_BMI2)) {
@@ -58,8 +58,8 @@ static struct dpif_netdev_impl_info_t dpif_impls[] = {
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
-[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512,
-  .probe = dp_netdev_input_outer_avx512_probe,
+[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index b3e75b7a2..46ce4ecf6 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -86,8 +86,16 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 
 /* AVX512 enabled DPIF implementation function. */
 int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port);
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port);
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets);
+
+int32_t
+dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
+  struct dp_packet_batch *);
 
 #endif /* netdev-private.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 92e63599e..a50571dc8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -545,8 +545,6 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
   const struct flow *flow,
   const struct nlattr *actions,
   size_t actions_len);
-static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
-  struct dp_packet_batch *);
 
 static void dp_netdev_disable_upcall(struct dp_netdev *);
 static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
@@ -8492,11 +8490,12 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 return 0;
 }
 
-static void
+int32_t
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets)
 {
 dp_netdev_input__(pmd, packets, true, 0);
+return 0;
 }
 
 struct dp_netdev_execute_aux

[ovs-dev] [PATCH v4 2/9] dpif-netdev: Refactor hash function to own header.

2022-08-07 Thread Kumar Amber
The refactor allows us to use hash function accross
multiple files which was earlier restricted to
dpif-netdev.c only.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Fix minor comments from Harry.
---
---
 lib/dpif-netdev-private-dpcls.h | 23 +++
 lib/dpif-netdev.c   | 22 --
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 2a9279437..1b37ecb16 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -25,6 +25,7 @@
 
 #include "cmap.h"
 #include "openvswitch/thread.h"
+#include "dpif-netdev-private-dpif.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -125,6 +126,28 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet 
*packet,
 return hash;
 }
 
+static inline uint32_t
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+const struct miniflow *mf)
+{
+uint32_t hash, recirc_depth;
+
+if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
+hash = dp_packet_get_rss_hash(packet);
+} else {
+hash = miniflow_hash_5tuple(mf, 0);
+dp_packet_set_rss_hash(packet, hash);
+}
+
+/* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+recirc_depth = *recirc_depth_get_unsafe();
+if (OVS_UNLIKELY(recirc_depth)) {
+hash = hash_finish(hash, recirc_depth);
+}
+return hash;
+}
+
 /* Allow other implementations to call dpcls_lookup() for subtable search. */
 bool
 dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4866231a3..92e63599e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7806,28 +7806,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
  actions, wc, put_actions, dp->upcall_aux);
 }
 
-static inline uint32_t
-dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-const struct miniflow *mf)
-{
-uint32_t hash, recirc_depth;
-
-if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-hash = dp_packet_get_rss_hash(packet);
-} else {
-hash = miniflow_hash_5tuple(mf, 0);
-dp_packet_set_rss_hash(packet, hash);
-}
-
-/* The RSS hash must account for the recirculation depth to avoid
- * collisions in the exact match cache */
-recirc_depth = *recirc_depth_get_unsafe();
-if (OVS_UNLIKELY(recirc_depth)) {
-hash = hash_finish(hash, recirc_depth);
-}
-return hash;
-}
-
 struct packet_batch_per_flow {
 unsigned int byte_count;
 uint16_t tcp_flags;
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 1/9] dpif-netdev: Refactor per thread recirc data allocation.

2022-08-07 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Harry van Haaren 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 5ae119a30..f3cc200e5 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -29,6 +29,8 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index cf331cec7..b3e75b7a2 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a45b46014..4866231a3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 0/9] DPIF + MFEX Inner Vxlan AVX512

2022-08-07 Thread Kumar Amber
This Series of Patchsets introduce the Optimizations for
supporting Vxlan tunneled packets in DPIF and MFEX. Along with
the optimization various refactoring of scalar
path is done to be used accross without duplication.

Over the Tests we have observed a gain of approximate 20~25%
gain in performance over the scalar path.

Kumar Amber (9):
  dpif-netdev: Refactor per thread recirc data allocation.
  dpif-netdev: Refactor hash function to own header.
  dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.
  dpif-netdev-avx512: Add inner packet handling to dpif.
  dpif-netdev: Add function pointer for dpif re-circulate.
  dpif-mfex: Modify set/get mfex commands to include inner.
  dpif-mfex: Change mfex fn pointer prototype to include md_is_valid.
  mfex-study: Modify study func to select outer and inner mfex funcs.
  mfex-avx512: Add support for tunnel packets in avx512 mfex.

 Documentation/topics/dpdk/bridge.rst |  18 ++--
 lib/dpif-netdev-avx512.c |  61 ---
 lib/dpif-netdev-extract-avx512.c | 153 +++
 lib/dpif-netdev-extract-study.c  | 132 +++
 lib/dpif-netdev-private-dpcls.h  |  23 
 lib/dpif-netdev-private-dpif.c   |  65 ++--
 lib/dpif-netdev-private-dpif.h   |  37 ++-
 lib/dpif-netdev-private-extract.c|  33 +-
 lib/dpif-netdev-private-extract.h|  19 ++--
 lib/dpif-netdev-private-thread.h |   6 ++
 lib/dpif-netdev.c|  70 ++--
 11 files changed, 453 insertions(+), 164 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 4/4] dpif-netdev/mfex: Add ipv6 profile based hashing.

2022-07-01 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv6 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Acked-by: Harry van Haaren 
---
 lib/dp-packet.h  | 43 
 lib/dpif-netdev-extract-avx512.c |  8 +++---
 lib/flow.c   |  4 +++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index bddaa2b5d..eea5a9215 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -1117,6 +1117,49 @@ dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet 
*packet)
 dp_packet_set_rss_hash(packet, hash);
 }
 
+static inline void ALWAYS_INLINE
+dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
+{
+if (dp_packet_rss_valid(packet)) {
+return;
+}
+
+const uint8_t *pkt = dp_packet_data(packet);
+const uint16_t l3_ofs = packet->l3_ofs;
+uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
+uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
+uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
+   ip6_ctlun.ip6_un1.ip6_un1_nxt);
+const void *ipv6_src_l = [l3_ofs + ipv6_src_off];
+const void *ipv6_src_h = [l3_ofs + ipv6_src_off + 8];
+const void *ipv6_dst_l = [l3_ofs + ipv6_dst_off];
+const void *ipv6_dst_h = [l3_ofs + ipv6_dst_off + 8];
+const void *l4_ports = [packet->l4_ofs];
+uint64_t ipv6_src_lo, ipv6_src_hi;
+uint64_t ipv6_dst_lo, ipv6_dst_hi;
+uint32_t ports;
+uint32_t hash = 0;
+
+memcpy(_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
+memcpy(_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
+memcpy(_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
+memcpy(_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
+memcpy(, l4_ports, sizeof ports);
+
+/* IPv6 Src and Dst. */
+hash = hash_add64(hash, ipv6_src_lo);
+hash = hash_add64(hash, ipv6_src_hi);
+hash = hash_add64(hash, ipv6_dst_lo);
+hash = hash_add64(hash, ipv6_dst_hi);
+/* IPv6 proto. */
+hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 3b6bc62e3..4afbed97e 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -887,7 +887,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[54], [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV6_TCP: {
@@ -910,7 +910,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_TCP: {
@@ -936,7 +936,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_UDP: {
@@ -957,7 +957,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[58], [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 default:
 break;
diff --git a/lib/flow.c b/lib/flow.c
index 8ab9df3fc..b1e1fb34d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1019,6 +1019,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 }
@@ -1032,6 +1034,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
-- 

[ovs-dev] [PATCH v11 3/4] dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles

2022-07-01 Thread Kumar Amber
Add AVX512 Ipv6 optimized profile for vlan/IPv6/UDP and
vlan/IPv6/TCP, IPv6/UDP and IPv6/TCP.

MFEX autovalidaton test-case already has the IPv6 support for
validating against the scalar mfex.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v11:
- Rebase on top of David AVX512 ISA runtime check.
v10:
- Rebase on top of Partial Avx512 changes patch.
v9:
- Fix Ubscan memory alinged access.
v8:
- Rename defines for packet offsets.
v7:
- Fix Lenght checks for plen.
v5:
- Add variable length checks for IPv6 and TCP.
v4:
- Rebase to master.
v2:
- Fix CI build error.
- Fix check-patch sign-offs.
---
---
 NEWS  |   4 +
 acinclude.m4  |   1 +
 lib/automake.mk   |   5 +-
 lib/dpif-netdev-extract-avx512.c  | 303 +-
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  20 ++
 6 files changed, 380 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index 994fdf6a9..860e926b8 100644
--- a/NEWS
+++ b/NEWS
@@ -43,6 +43,10 @@ Post-v2.17.0
  * 'dpif-netdev/subtable-lookup-prio-get' appctl command renamed to
'dpif-netdev/subtable-lookup-info-get' to better reflect its purpose.
The old variant is kept for backward compatibility.
+ * Add AVX512 optimized profiles to miniflow extract for IPv6/UDP and
+   IPv6/TCP.
+ * Add AVX512 optimized profiles to miniflow extract for VLAN/IPv6/UDP
+   and VLAN/IPv6/TCP.
 
 
 v2.17.0 - 17 Feb 2022
diff --git a/acinclude.m4 b/acinclude.m4
index 7b2889a40..b518aa624 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -78,6 +78,7 @@ AC_DEFUN([OVS_CHECK_AVX512], [
   OVS_CHECK_BINUTILS_AVX512
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512f], [HAVE_AVX512F])
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512bw], [HAVE_AVX512BW])
+  OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512vl], [HAVE_AVX512VL])
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512vbmi], [HAVE_AVX512VBMI])
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512vpopcntdq], [HAVE_AVX512VPOPCNTDQ])
 ])
diff --git a/lib/automake.mk b/lib/automake.mk
index 3b9e775d4..1d00cfa20 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -38,11 +38,14 @@ lib_libopenvswitchavx512_la_CFLAGS = \
 lib_libopenvswitchavx512_la_SOURCES = \
lib/dpif-netdev-avx512.c
 if HAVE_AVX512BW
+if HAVE_AVX512VL
 lib_libopenvswitchavx512_la_CFLAGS += \
-   -mavx512bw
+   -mavx512bw \
+   -mavx512vl
 lib_libopenvswitchavx512_la_SOURCES += \
lib/dpif-netdev-extract-avx512.c \
lib/dpif-netdev-lookup-avx512-gather.c
+endif # HAVE_AVX512VL
 endif # HAVE_AVX512BW
 lib_libopenvswitchavx512_la_LDFLAGS = \
-static
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index ea77b2519..3b6bc62e3 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -48,6 +48,7 @@
 #include "dpif-netdev-private-extract.h"
 #include "dpif-netdev-private-flow.h"
 #include "dp-packet.h"
+#include "packets.h"
 
 /* AVX512-BW level permutex2var_epi8 emulation. */
 static inline __m512i
@@ -169,6 +170,7 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
 #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
 #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
 #define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
+#define PATTERN_ETHERTYPE_IPV6 PATTERN_ETHERTYPE_GEN(0x86, 0xDD)
 
 /* VLAN (Dot1Q) patterns and masks. */
 #define PATTERN_DT1Q_MASK   \
@@ -233,6 +235,40 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
   NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
   NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
+/* Generator for checking IPv6 ver. */
+#define PATTERN_IPV6_GEN(VER_TRC, PROTO)  \
+  VER_TRC, /* Version: 4bits and Traffic class: 4bits. */ \
+  0, 0, 0, /* Traffic class: 4bits and Flow Label: 24bits. */ \
+  0, 0,/* Payload length 16bits. */   \
+  PROTO, 0,/* Next Header 8bits and Hop limit 8bits. */   \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Src IP: 128bits. */  \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dst IP: 128bits. */
+
+#define PATTERN_IPV6_MASK PATTERN_IPV6_GEN(0xF0, 0xFF)
+#define PATTERN_IPV6_UDP PATTERN_IPV6_GEN(0x60, 0x11)
+#define PATTERN_IPV6_TCP PATTERN_IPV6_GEN(0x60, 0x06)
+
+#define PATTERN_IPV6_SHUFFLE  \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, /* IPv6 */  \
+  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, /* IPv6 */  \
+  NU, NU, NU, NU, NU

[ovs-dev] [PATCH v11 2/4] mfex_avx512: Calculate miniflow_bits at compile time.

2022-07-01 Thread Kumar Amber
The patch removes magic numbers from miniflow_bits
and calculates the bits at compile time. This also
makes it easier to handle any ABI changes.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-avx512.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 8eec1451c..ea77b2519 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -288,6 +288,19 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
 #define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
 #define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
 
+/* MF bits. */
+#define MF_BIT(field) (MAP_1 << ((offsetof(struct flow, field) / 8) % \
+   MAP_T_BITS))
+
+#define MF_ETH(MF_BIT(dp_hash) | MF_BIT(in_port) | MF_BIT(packet_type)\
+   | MF_BIT(dl_dst) | MF_BIT(dl_src)| MF_BIT(dl_type))
+
+#define MF_ETH_VLAN   (MF_ETH | MF_BIT(vlans))
+#define MF_IPV4_UDP   (MF_BIT(nw_src) | MF_BIT(ipv6_label) | MF_BIT(tp_src) | \
+   MF_BIT(tp_dst))
+
+#define MF_IPV4_TCP   (MF_IPV4_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -385,7 +398,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00040401},
+.mf_bits = { MF_ETH, MF_IPV4_UDP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -408,7 +421,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00044401},
+.mf_bits = { MF_ETH, MF_IPV4_TCP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -427,7 +440,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00040401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_UDP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
@@ -453,7 +466,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00044401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_TCP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 0/4] MFEX Optimizations IPv6 + Hashing Optimizations

2022-07-01 Thread Kumar Amber
The patchset introuduces IPv6 optimized MFEX profiles
with AVX512 which can deliver upto 20% to 30% gain in
performance over the existing scalar data-path.

Hashing Optimization are also included which can further
improve performance by approximately 10%.

The patch also removes the magic numbers for MF bits, packet offsets
and packet lenghts.

---
v11:
- Rebase on top of AVX512 runtime ISA check patch.
v10:
- Reabse onto Cian Partial AVX512 build patch.
v9:
- Fix Ubsan un-alinged memory load.
v8:
- Rename packet offsets defines to aling with packet struct.
v7:
- Remove magic numbers from AVX512 Profiles.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  mfex_avx512: Calculate pkt offsets at compile time.
  mfex_avx512: Calculate miniflow_bits at compile time.
  dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles
  dpif-netdev/mfex: Add ipv6 profile based hashing.

 NEWS  |   4 +
 acinclude.m4  |   1 +
 lib/automake.mk   |   5 +-
 lib/dp-packet.h   |  43 
 lib/dpif-netdev-extract-avx512.c  | 349 +-
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  20 ++
 lib/flow.c|   4 +
 8 files changed, 463 insertions(+), 14 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 1/4] mfex_avx512: Calculate pkt offsets at compile time.

2022-07-01 Thread Kumar Amber
The patch removes magic numbers pkt offsets and
minimum packet lenght and instead calculate it at
compile time.

Signed-off-by: Kumar Amber 

---
v8:
- Rename offset defines.
---
---
 lib/dpif-netdev-extract-avx512.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 502973029..8eec1451c 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -277,6 +277,17 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
 NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,   \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
 
+#define PKT_OFFSET_L2_PAD_SIZE(ETH_HEADER_LEN)
+#define PKT_OFFSET_L3 (ETH_HEADER_LEN)
+#define PKT_OFFSET_VLAN_L3(ETH_HEADER_LEN + VLAN_HEADER_LEN)
+#define PKT_OFFSET_IPV4_L4(ETH_HEADER_LEN + IP_HEADER_LEN)
+#define PKT_OFFSET_VLAN_IPV4_L4   (PKT_OFFSET_IPV4_L4 + VLAN_HEADER_LEN)
+
+#define PKT_MIN_ETH_IPV4_UDP  (PKT_OFFSET_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_UDP (PKT_OFFSET_VLAN_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -376,9 +387,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00040401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 42,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_UDP,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -399,9 +410,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00044401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 54,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_TCP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -418,9 +429,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00040401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 46,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_UDP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -443,9 +455,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00044401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 58,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_TCP,
 },
 };
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] Pmd.at: fix dpcls and dpif configuration test cases.

2022-06-30 Thread Kumar Amber
Without running set command first the string matching
fails on get command beacuse DPCLS prio value is different
for different default builds like with --enable-autovalidator
build auto-validator prio is set to 255 and if the build
is a scalar than generic value is default 255.

The same problem is seen with dpif where re-arranging the get
command after set makes it consistent across any builds.

Fixes: cc0a87b11c (pmd.at: Add test-cases for DPCLS and DPIF commands.)
Signed-off-by: Kumar Amber 
Acked-by: Michael Phelan 
---
 tests/pmd.at | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/tests/pmd.at b/tests/pmd.at
index e6b173dab..4342c50e0 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -1113,15 +1113,15 @@ AT_SETUP([PMD - dpif configuration])
 OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0])
 AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd])
 
+AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl
+DPIF implementation set to dpif_scalar.
+])
+
 AT_CHECK([ovs-vsctl show], [], [stdout])
 AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-get | grep "dpif_scalar"], [], [dnl
   dpif_scalar (pmds: 0)
 ])
 
-AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl
-DPIF implementation set to dpif_scalar.
-])
-
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
@@ -1130,14 +1130,6 @@ OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0])
 AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd])
 
 AT_CHECK([ovs-vsctl show], [], [stdout])
-AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-info-get | grep generic], [], 
[dnl
-  generic (Use count: 0, Priority: 1)
-])
-
-AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-info-get | grep 
autovalidator], [], [dnl
-  autovalidator (Use count: 0, Priority: 0)
-])
-
 AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 3], 
[0], [dnl
 Lookup priority change affected 0 dpcls ports and 0 subtables.
 ])
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 4/5] dpif-netdev: Add function pointer for dpif re-circulate.

2022-06-28 Thread Kumar Amber
The patch adds and re-uses the dpif set command to set the
function pointers to be used to switch between different inner
dpifs.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Add description  to the dpif recirc function.
- Fix use of return value to fall back to scalar dpif.
---
---
 lib/dpif-netdev-private-dpif.c   | 57 +++-
 lib/dpif-netdev-private-dpif.h   | 18 ++
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 19 +--
 4 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 2dc51270a..96bfd4824 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -27,6 +27,8 @@
 #include "util.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
+#define DPIF_NETDEV_IMPL_AVX512_CHECK (__x86_64__ && HAVE_AVX512F \
+&& HAVE_LD_AVX512_GOOD && __SSE4_2__)
 
 DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
 
@@ -39,18 +41,21 @@ enum dpif_netdev_impl_info_idx {
 static struct dpif_netdev_impl_info_t dpif_impls[] = {
 /* The default scalar C code implementation. */
 [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input,
+  .recirc_func = dp_netdev_recirculate,
   .probe = NULL,
   .name = "dpif_scalar", },
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .recirc_func = dp_netdev_input_avx512_recirc,
   .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
 
 static dp_netdev_input_func default_dpif_func;
+static dp_netdev_recirc_func default_dpif_recirc_func;
 
 dp_netdev_input_func
 dp_netdev_impl_get_default(void)
@@ -61,7 +66,7 @@ dp_netdev_impl_get_default(void)
 int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
 
 /* Configure-time overriding to run test suite on all implementations. */
-#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
+#if DPIF_NETDEV_IMPL_AVX512_CHECK
 #ifdef DPIF_AVX512_DEFAULT
 dp_netdev_input_func_probe probe;
 
@@ -81,6 +86,35 @@ dp_netdev_impl_get_default(void)
 return default_dpif_func;
 }
 
+dp_netdev_recirc_func
+dp_netdev_recirc_impl_get_default(void)
+{
+/* For the first call, this will be NULL. Compute the compile time default.
+ */
+if (!default_dpif_recirc_func) {
+int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
+
+/* Configure-time overriding to run test suite on all implementations. */
+#if DPIF_NETDEV_IMPL_AVX512_CHECK
+#ifdef DPIF_AVX512_DEFAULT
+dp_netdev_input_func_probe probe;
+
+/* Check if the compiled default is compatible. */
+probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
+if (!probe || !probe()) {
+dpif_idx = DPIF_NETDEV_IMPL_AVX512;
+}
+#endif
+#endif
+
+VLOG_INFO("Default re-circulate DPIF implementation is %s.\n",
+  dpif_impls[dpif_idx].name);
+default_dpif_recirc_func = dpif_impls[dpif_idx].recirc_func;
+}
+
+return default_dpif_recirc_func;
+}
+
 void
 dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
size_t n)
@@ -116,10 +150,12 @@ dp_netdev_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
  * returns the function pointer to the one requested by "name".
  */
 static int32_t
-dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func)
+dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *dpif_func,
+   dp_netdev_recirc_func *dpif_recirc_func)
 {
 ovs_assert(name);
-ovs_assert(out_func);
+ovs_assert(dpif_func);
+ovs_assert(dpif_recirc_func);
 
 uint32_t i;
 
@@ -129,11 +165,13 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 if (dpif_impls[i].probe) {
 int probe_err = dpif_impls[i].probe();
 if (probe_err) {
-*out_func = NULL;
+*dpif_func = NULL;
+*dpif_recirc_func = NULL;
 return probe_err;
 }
 }
-*out_func = dpif_impls[i].input_func;
+*dpif_func = dpif_impls[i].input_func;
+*dpif_recirc_func = dpif_impls[i].recirc_func;
 return 0;
 }
 }
@@ -144,12 +182,15 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 int32_t
 dp_netdev_impl_set_default_by_name(const char *name)
 {
-dp_netdev_input_func new_default;
+dp_netdev_input_func new_dpif_default;
+dp_netdev_recirc_func new_dpif_recirc_default;
 
-i

[ovs-dev] [PATCH v3 5/5] dpif-netdev-avx512: Add inner packet handling to dpif.

2022-06-28 Thread Kumar Amber
This patch adds the necessary changes required to support
tunnel packet types in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Apply in_port optimization suggested by Harry.
---
---
 lib/dpif-netdev-avx512.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index ecf512651..8a1bf3ad6 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -80,7 +80,7 @@ dp_netdev_input_avx512_probe(void)
 static inline int32_t ALWAYS_INLINE
 dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet_batch *packets,
- bool md_is_valid OVS_UNUSED, odp_port_t in_port)
+ bool md_is_valid, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -92,6 +92,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 struct netdev_flow_key *keys = ud->keys;
 struct netdev_flow_key **key_ptrs = ud->key_ptrs;
 struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
+const uint32_t recirc_depth = *recirc_depth_get();
 
 /* The AVX512 DPIF implementation handles rules in a way that is optimized
  * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
@@ -179,7 +180,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* Get packet pointer from bitmask and packet md. */
 struct dp_packet *packet = packets->packets[i];
-pkt_metadata_init(>md, in_port);
+if (!md_is_valid) {
+pkt_metadata_init(>md, in_port);
+}
 
 struct dp_netdev_flow *f = NULL;
 struct netdev_flow_key *key = [i];
@@ -191,7 +194,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
 
 /* Check for a partial hardware offload match. */
-if (hwol_enabled) {
+if (hwol_enabled && recirc_depth == 0) {
 if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
 /* Packet restoration failed and it was dropped, do not
  * continue processing. */
@@ -224,7 +227,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
 key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
+key->hash = (md_is_valid == false)
+? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf)
+: dpif_netdev_packet_get_rss_hash(packet, >mf);
 
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
@@ -262,7 +267,13 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  * dpcls_rules[] array.
  */
 if (dpcls_key_idx > 0) {
-struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+odp_port_t port_no;
+if (!md_is_valid) {
+port_no = in_port;
+} else {
+port_no = packets->packets[0]->md.in_port.odp_port;
+}
+struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, port_no);
 if (OVS_UNLIKELY(!cls)) {
 return -1;
 }
@@ -318,7 +329,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* At this point we don't return error anymore, so commit stats here. */
 uint32_t mfex_hit_cnt = __builtin_popcountll(mf_mask);
-pmd_perf_update_counter(>perf_stats, PMD_STAT_RECV, batch_size);
+pmd_perf_update_counter(>perf_stats,
+md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
+batch_size);
 pmd_perf_update_counter(>perf_stats, PMD_STAT_PHWOL_HIT, phwol_hits);
 pmd_perf_update_counter(>perf_stats, PMD_STAT_MFEX_OPT_HIT,
 mfex_hit_cnt);
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 3/5] dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.

2022-06-28 Thread Kumar Amber
Create new APIs for the avx512 DPIF, enabling one baseline
common code to be specialized into DPIF implementations for
"outer" processing, and "recirc" processing.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Fix comments from Harry.
---
---
 lib/dpif-netdev-avx512.c   | 32 +++-
 lib/dpif-netdev-private-dpif.c |  4 ++--
 lib/dpif-netdev-private-dpif.h | 16 
 lib/dpif-netdev.c  |  5 ++---
 4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 11d9a0005..ecf512651 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -59,8 +59,13 @@ struct dpif_userdata {
 struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
 };
 
+static int32_t
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid, odp_port_t in_port);
+
 int32_t
-dp_netdev_input_outer_avx512_probe(void)
+dp_netdev_input_avx512_probe(void)
 {
 bool avx512f_available = cpu_has_isa(OVS_CPU_ISA_X86_AVX512F);
 bool bmi2_available = cpu_has_isa(OVS_CPU_ISA_X86_BMI2);
@@ -72,10 +77,10 @@ dp_netdev_input_outer_avx512_probe(void)
 return 0;
 }
 
-int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port)
+static inline int32_t ALWAYS_INLINE
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid OVS_UNUSED, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -380,5 +385,22 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,
 return 0;
 }
 
+int32_t
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, false, in_port);
+return ret;
+}
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, true, 0);
+return ret;
+}
+
 #endif
 #endif
diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 6f8de9094..2dc51270a 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -44,8 +44,8 @@ static struct dpif_netdev_impl_info_t dpif_impls[] = {
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
-[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512,
-  .probe = dp_netdev_input_outer_avx512_probe,
+[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 15f1f36b3..37908de9a 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -74,11 +74,19 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 
 /* AVX512 enabled DPIF implementation and probe functions. */
 int32_t
-dp_netdev_input_outer_avx512_probe(void);
+dp_netdev_input_avx512_probe(void);
 
 int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port);
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port);
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets);
+
+int32_t
+dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
+  struct dp_packet_batch *);
 
 #endif /* netdev-private.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c4e47f715..ea95acde0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -545,8 +545,6 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
   const struct flow *flow,
   const struct nlattr *actions,
   size_t actions_len);
-static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
-  struct dp_packet_batch *);
 
 static void dp_netdev_disable_upcall(struct dp_netdev *);
 static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
@@ -8493,11 +8491,12 @@ dp_netdev_input(struct dp

[ovs-dev] [PATCH v3 1/5] dpif-netdev: Refactor per thread recirc data allocation.

2022-06-28 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
Acked-by: Harry van Haaren 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 84d4ec156..6f8de9094 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 0da639c55..15f1f36b3 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ff57b3961..f65d9ee8c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 2/5] dpif-netdev: Refactor hash function to own header.

2022-06-28 Thread Kumar Amber
The refactor allows us to use hash function accross
multiple files which was earlier restricted to
dpif-netdev.c only.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 

---
v3:
- Fix minor comments from Harry.
---
---
 lib/dpif-netdev-private-dpcls.h | 23 +++
 lib/dpif-netdev.c   | 22 --
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 2a9279437..1b37ecb16 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -25,6 +25,7 @@
 
 #include "cmap.h"
 #include "openvswitch/thread.h"
+#include "dpif-netdev-private-dpif.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -125,6 +126,28 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet 
*packet,
 return hash;
 }
 
+static inline uint32_t
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+const struct miniflow *mf)
+{
+uint32_t hash, recirc_depth;
+
+if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
+hash = dp_packet_get_rss_hash(packet);
+} else {
+hash = miniflow_hash_5tuple(mf, 0);
+dp_packet_set_rss_hash(packet, hash);
+}
+
+/* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+recirc_depth = *recirc_depth_get_unsafe();
+if (OVS_UNLIKELY(recirc_depth)) {
+hash = hash_finish(hash, recirc_depth);
+}
+return hash;
+}
+
 /* Allow other implementations to call dpcls_lookup() for subtable search. */
 bool
 dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f65d9ee8c..c4e47f715 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7807,28 +7807,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
  actions, wc, put_actions, dp->upcall_aux);
 }
 
-static inline uint32_t
-dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-const struct miniflow *mf)
-{
-uint32_t hash, recirc_depth;
-
-if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-hash = dp_packet_get_rss_hash(packet);
-} else {
-hash = miniflow_hash_5tuple(mf, 0);
-dp_packet_set_rss_hash(packet, hash);
-}
-
-/* The RSS hash must account for the recirculation depth to avoid
- * collisions in the exact match cache */
-recirc_depth = *recirc_depth_get_unsafe();
-if (OVS_UNLIKELY(recirc_depth)) {
-hash = hash_finish(hash, recirc_depth);
-}
-return hash;
-}
-
 struct packet_batch_per_flow {
 unsigned int byte_count;
 uint16_t tcp_flags;
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 0/5] DPIF AVX512 Recirculation

2022-06-28 Thread Kumar Amber
The patch adds support for recirculation of packets in AVX512
DPIF which would allow for processing tunneled packets.

---
v3:
* Fix comments from Harry.
V2:
* Split DPIF AVX512 into separate patchset.
---

Kumar Amber (5):
  dpif-netdev: Refactor per thread recirc data allocation.
  dpif-netdev: Refactor hash function to own header.
  dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.
  dpif-netdev: Add function pointer for dpif re-circulate.
  dpif-netdev-avx512: Add inner packet handling to dpif.

 lib/dpif-netdev-avx512.c | 55 +++-
 lib/dpif-netdev-private-dpcls.h  | 23 
 lib/dpif-netdev-private-dpif.c   | 63 +++-
 lib/dpif-netdev-private-dpif.h   | 39 ++--
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 49 ++---
 6 files changed, 178 insertions(+), 54 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v10 4/4] dpif-netdev/mfex: Add ipv6 profile based hashing.

2022-05-31 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv6 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Acked-by: Harry van Haaren 
---
 lib/dp-packet.h  | 43 
 lib/dpif-netdev-extract-avx512.c |  8 +++---
 lib/flow.c   |  4 +++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index bddaa2b5d..eea5a9215 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -1117,6 +1117,49 @@ dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet 
*packet)
 dp_packet_set_rss_hash(packet, hash);
 }
 
+static inline void ALWAYS_INLINE
+dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
+{
+if (dp_packet_rss_valid(packet)) {
+return;
+}
+
+const uint8_t *pkt = dp_packet_data(packet);
+const uint16_t l3_ofs = packet->l3_ofs;
+uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
+uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
+uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
+   ip6_ctlun.ip6_un1.ip6_un1_nxt);
+const void *ipv6_src_l = [l3_ofs + ipv6_src_off];
+const void *ipv6_src_h = [l3_ofs + ipv6_src_off + 8];
+const void *ipv6_dst_l = [l3_ofs + ipv6_dst_off];
+const void *ipv6_dst_h = [l3_ofs + ipv6_dst_off + 8];
+const void *l4_ports = [packet->l4_ofs];
+uint64_t ipv6_src_lo, ipv6_src_hi;
+uint64_t ipv6_dst_lo, ipv6_dst_hi;
+uint32_t ports;
+uint32_t hash = 0;
+
+memcpy(_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
+memcpy(_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
+memcpy(_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
+memcpy(_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
+memcpy(, l4_ports, sizeof ports);
+
+/* IPv6 Src and Dst. */
+hash = hash_add64(hash, ipv6_src_lo);
+hash = hash_add64(hash, ipv6_src_hi);
+hash = hash_add64(hash, ipv6_dst_lo);
+hash = hash_add64(hash, ipv6_dst_hi);
+/* IPv6 proto. */
+hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 0d540853b..186aa84e5 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -884,7 +884,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[54], [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV6_TCP: {
@@ -907,7 +907,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_TCP: {
@@ -933,7 +933,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_UDP: {
@@ -954,7 +954,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[58], [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 default:
 break;
diff --git a/lib/flow.c b/lib/flow.c
index 8ab9df3fc..b1e1fb34d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1019,6 +1019,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 }
@@ -1032,6 +1034,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
-- 

[ovs-dev] [PATCH v10 3/4] dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles

2022-05-31 Thread Kumar Amber
Add AVX512 Ipv6 optimized profile for vlan/IPv6/UDP and
vlan/IPv6/TCP, IPv6/UDP and IPv6/TCP.

MFEX autovalidaton test-case already has the IPv6 support for
validating against the scalar mfex.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v10:
- Rebase on top of Partial Avx512 changes patch.
v9:
- Fix Ubscan memory alinged access.
v8:
- Rename defines for packet offsets.
v7:
- Fix Lenght checks for plen.
v5:
- Add variable length checks for IPv6 and TCP.
v4:
- Rebase to master.
v2:
- Fix CI build error.
- Fix check-patch sign-offs.
---
---
 NEWS  |   5 +
 acinclude.m4  |   1 +
 lib/automake.mk   |   5 +-
 lib/dpif-netdev-extract-avx512.c  | 303 +-
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  20 ++
 6 files changed, 380 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 9fe3f44f4..db17752a1 100644
--- a/NEWS
+++ b/NEWS
@@ -32,6 +32,11 @@ Post-v2.17.0
- DPDK:
  * OVS validated with DPDK 21.11.1.  It is recommended to use this version
until further releases.
+   - Userspace datapath:
+ * Add AVX512 optimized profiles to miniflow extract for IPv6/UDP and
+   IPv6/TCP.
+ * Add AVX512 optimized profiles to miniflow extract for VLAN/IPv6/UDP
+   and VLAN/IPv6/TCP.
 
 
 v2.17.0 - 17 Feb 2022
diff --git a/acinclude.m4 b/acinclude.m4
index 7b2889a40..b518aa624 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -78,6 +78,7 @@ AC_DEFUN([OVS_CHECK_AVX512], [
   OVS_CHECK_BINUTILS_AVX512
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512f], [HAVE_AVX512F])
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512bw], [HAVE_AVX512BW])
+  OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512vl], [HAVE_AVX512VL])
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512vbmi], [HAVE_AVX512VBMI])
   OVS_CONDITIONAL_CC_OPTION_DEFINE([-mavx512vpopcntdq], [HAVE_AVX512VPOPCNTDQ])
 ])
diff --git a/lib/automake.mk b/lib/automake.mk
index cb50578eb..d3e4d475d 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -40,11 +40,14 @@ lib_libopenvswitchavx512_la_SOURCES = \
lib/cpu.h \
lib/dpif-netdev-avx512.c
 if HAVE_AVX512BW
+if HAVE_AVX512VL
 lib_libopenvswitchavx512_la_CFLAGS += \
-   -mavx512bw
+   -mavx512bw \
+   -mavx512vl
 lib_libopenvswitchavx512_la_SOURCES += \
lib/dpif-netdev-extract-avx512.c \
lib/dpif-netdev-lookup-avx512-gather.c
+endif # HAVE_AVX512VL
 endif # HAVE_AVX512BW
 lib_libopenvswitchavx512_la_LDFLAGS = \
-static
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 7c897eab3..0d540853b 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -49,6 +49,7 @@
 #include "dpif-netdev-private-extract.h"
 #include "dpif-netdev-private-flow.h"
 #include "dp-packet.h"
+#include "packets.h"
 
 /* AVX512-BW level permutex2var_epi8 emulation. */
 static inline __m512i
@@ -166,6 +167,7 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
 #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
 #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
 #define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
+#define PATTERN_ETHERTYPE_IPV6 PATTERN_ETHERTYPE_GEN(0x86, 0xDD)
 
 /* VLAN (Dot1Q) patterns and masks. */
 #define PATTERN_DT1Q_MASK   \
@@ -230,6 +232,40 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
   NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
   NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
+/* Generator for checking IPv6 ver. */
+#define PATTERN_IPV6_GEN(VER_TRC, PROTO)  \
+  VER_TRC, /* Version: 4bits and Traffic class: 4bits. */ \
+  0, 0, 0, /* Traffic class: 4bits and Flow Label: 24bits. */ \
+  0, 0,/* Payload length 16bits. */   \
+  PROTO, 0,/* Next Header 8bits and Hop limit 8bits. */   \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Src IP: 128bits. */  \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dst IP: 128bits. */
+
+#define PATTERN_IPV6_MASK PATTERN_IPV6_GEN(0xF0, 0xFF)
+#define PATTERN_IPV6_UDP PATTERN_IPV6_GEN(0x60, 0x11)
+#define PATTERN_IPV6_TCP PATTERN_IPV6_GEN(0x60, 0x06)
+
+#define PATTERN_IPV6_SHUFFLE  \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, /* IPv6 */  \
+  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
+/* VLAN (Dot1Q) patterns 

[ovs-dev] [PATCH v10 2/4] mfex_avx512: Calculate miniflow_bits at compile time.

2022-05-31 Thread Kumar Amber
The patch removes magic numbers from miniflow_bits
and calculates the bits at compile time. This also
makes it easier to handle any ABI changes.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-avx512.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index a740e0e27..7c897eab3 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -285,6 +285,19 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
 #define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
 #define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
 
+/* MF bits. */
+#define MF_BIT(field) (MAP_1 << ((offsetof(struct flow, field) / 8) % \
+   MAP_T_BITS))
+
+#define MF_ETH(MF_BIT(dp_hash) | MF_BIT(in_port) | MF_BIT(packet_type)\
+   | MF_BIT(dl_dst) | MF_BIT(dl_src)| MF_BIT(dl_type))
+
+#define MF_ETH_VLAN   (MF_ETH | MF_BIT(vlans))
+#define MF_IPV4_UDP   (MF_BIT(nw_src) | MF_BIT(ipv6_label) | MF_BIT(tp_src) | \
+   MF_BIT(tp_dst))
+
+#define MF_IPV4_TCP   (MF_IPV4_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -382,7 +395,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00040401},
+.mf_bits = { MF_ETH, MF_IPV4_UDP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -405,7 +418,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00044401},
+.mf_bits = { MF_ETH, MF_IPV4_TCP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -424,7 +437,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00040401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_UDP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
@@ -450,7 +463,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00044401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_TCP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v10 1/4] mfex_avx512: Calculate pkt offsets at compile time.

2022-05-31 Thread Kumar Amber
The patch removes magic numbers pkt offsets and
minimum packet lenght and instead calculate it at
compile time.

Signed-off-by: Kumar Amber 

---
v8:
- Rename offset defines.
---
---
 lib/dpif-netdev-extract-avx512.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 12271be17..a740e0e27 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -274,6 +274,17 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, 
__m512i v_shuf,
 NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,   \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
 
+#define PKT_OFFSET_L2_PAD_SIZE(ETH_HEADER_LEN)
+#define PKT_OFFSET_L3 (ETH_HEADER_LEN)
+#define PKT_OFFSET_VLAN_L3(ETH_HEADER_LEN + VLAN_HEADER_LEN)
+#define PKT_OFFSET_IPV4_L4(ETH_HEADER_LEN + IP_HEADER_LEN)
+#define PKT_OFFSET_VLAN_IPV4_L4   (PKT_OFFSET_IPV4_L4 + VLAN_HEADER_LEN)
+
+#define PKT_MIN_ETH_IPV4_UDP  (PKT_OFFSET_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_UDP (PKT_OFFSET_VLAN_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -373,9 +384,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00040401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 42,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_UDP,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -396,9 +407,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00044401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 54,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_TCP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -415,9 +426,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00040401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 46,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_UDP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -440,9 +452,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00044401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 58,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_TCP,
 },
 };
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v10 0/4] MFEX Optimizations IPv6 + Hashing Optimizations

2022-05-31 Thread Kumar Amber
The patchset introuduces IPv6 optimized MFEX profiles
with AVX512 which can deliver upto 20% to 30% gain in
performance over the existing scalar data-path.

Hashing Optimization are also included which can further
improve performance by approximately 10%.

The patch also removes the magic numbers for MF bits, packet offsets
and packet lenghts.

---
v10:
- Reabse onto Cian Partial AVX512 build patch.
v9:
- Fix Ubsan un-alinged memory load.
v8:
- Rename packet offsets defines to aling with packet struct.
v7:
- Remove magic numbers from AVX512 Profiles.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  mfex_avx512: Calculate pkt offsets at compile time.
  mfex_avx512: Calculate miniflow_bits at compile time.
  dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles
  dpif-netdev/mfex: Add ipv6 profile based hashing.

 NEWS  |   5 +
 acinclude.m4  |   1 +
 lib/automake.mk   |   5 +-
 lib/dp-packet.h   |  43 
 lib/dpif-netdev-extract-avx512.c  | 349 --
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  20 ++
 lib/flow.c|   4 +
 8 files changed, 463 insertions(+), 15 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9 4/4] dpif-netdev/mfex: Add ipv6 profile based hashing.

2022-05-31 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv6 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Acked-by: Harry van Haaren 
---
 lib/dp-packet.h  | 43 
 lib/dpif-netdev-extract-avx512.c |  8 +++---
 lib/flow.c   |  4 +++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 7c5da258a..277bb51b8 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -1117,6 +1117,49 @@ dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet 
*packet)
 dp_packet_set_rss_hash(packet, hash);
 }
 
+static inline void ALWAYS_INLINE
+dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
+{
+if (dp_packet_rss_valid(packet)) {
+return;
+}
+
+const uint8_t *pkt = dp_packet_data(packet);
+const uint16_t l3_ofs = packet->l3_ofs;
+uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
+uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
+uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
+   ip6_ctlun.ip6_un1.ip6_un1_nxt);
+const void *ipv6_src_l = [l3_ofs + ipv6_src_off];
+const void *ipv6_src_h = [l3_ofs + ipv6_src_off + 8];
+const void *ipv6_dst_l = [l3_ofs + ipv6_dst_off];
+const void *ipv6_dst_h = [l3_ofs + ipv6_dst_off + 8];
+const void *l4_ports = [packet->l4_ofs];
+uint64_t ipv6_src_lo, ipv6_src_hi;
+uint64_t ipv6_dst_lo, ipv6_dst_hi;
+uint32_t ports;
+uint32_t hash = 0;
+
+memcpy(_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
+memcpy(_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
+memcpy(_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
+memcpy(_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
+memcpy(, l4_ports, sizeof ports);
+
+/* IPv6 Src and Dst. */
+hash = hash_add64(hash, ipv6_src_lo);
+hash = hash_add64(hash, ipv6_src_hi);
+hash = hash_add64(hash, ipv6_dst_lo);
+hash = hash_add64(hash, ipv6_dst_hi);
+/* IPv6 proto. */
+hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index b6b7294fc..de823e32a 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -866,7 +866,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[54], [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV6_TCP: {
@@ -889,7 +889,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_TCP: {
@@ -915,7 +915,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_UDP: {
@@ -936,7 +936,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[58], [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 default:
 break;
diff --git a/lib/flow.c b/lib/flow.c
index 8ab9df3fc..b1e1fb34d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1019,6 +1019,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 }
@@ -1032,6 +1034,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
-- 

[ovs-dev] [PATCH v9 3/4] dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles

2022-05-31 Thread Kumar Amber
Add AVX512 Ipv6 optimized profile for vlan/IPv6/UDP and
vlan/IPv6/TCP, IPv6/UDP and IPv6/TCP.

MFEX autovalidaton test-case already has the IPv6 support for
validating against the scalar mfex.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v9:
- Fix Ubscan memory alinged access.
v8:
- Rename defines for packet offsets.
v7:
- Fix Lenght checks for plen.
v5:
- Add variable length checks for IPv6 and TCP.
v4:
- Rebase to master.
v2:
- Fix CI build error.
- Fix check-patch sign-offs.
---
---
 NEWS  |   6 +-
 lib/automake.mk   |   1 +
 lib/dpif-netdev-extract-avx512.c  | 303 +-
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  12 ++
 5 files changed, 368 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index eece0d0b2..f963967d8 100644
--- a/NEWS
+++ b/NEWS
@@ -29,7 +29,11 @@ Post-v2.17.0
- Windows:
  * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
  * IPv6 Geneve tunnel support.
-
+   - Userspace datapath:
+ * Add AVX512 optimized profiles to miniflow extract for IPv6/UDP and
+   IPv6/TCP.
+ * Add AVX512 optimized profiles to miniflow extract for VLAN/IPv6/UDP
+   and VLAN/IPv6/TCP.
 
 v2.17.0 - 17 Feb 2022
 -
diff --git a/lib/automake.mk b/lib/automake.mk
index a23cdc4ad..139220c23 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -33,6 +33,7 @@ lib_libopenvswitchavx512_la_CFLAGS = \
-mavx512f \
-mavx512bw \
-mavx512dq \
+   -mavx512vl \
-mbmi \
-mbmi2 \
-fPIC \
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 388df7c42..b6b7294fc 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -49,6 +49,7 @@
 #include "dpif-netdev-private-extract.h"
 #include "dpif-netdev-private-flow.h"
 #include "dp-packet.h"
+#include "packets.h"
 
 /* AVX512-BW level permutex2var_epi8 emulation. */
 static inline __m512i
@@ -138,6 +139,7 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i 
idx, __m512i a)
 #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
 #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
 #define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
+#define PATTERN_ETHERTYPE_IPV6 PATTERN_ETHERTYPE_GEN(0x86, 0xDD)
 
 /* VLAN (Dot1Q) patterns and masks. */
 #define PATTERN_DT1Q_MASK   \
@@ -202,6 +204,40 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
   NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
   NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
+/* Generator for checking IPv6 ver. */
+#define PATTERN_IPV6_GEN(VER_TRC, PROTO)  \
+  VER_TRC, /* Version: 4bits and Traffic class: 4bits. */ \
+  0, 0, 0, /* Traffic class: 4bits and Flow Label: 24bits. */ \
+  0, 0,/* Payload length 16bits. */   \
+  PROTO, 0,/* Next Header 8bits and Hop limit 8bits. */   \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Src IP: 128bits. */  \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dst IP: 128bits. */
+
+#define PATTERN_IPV6_MASK PATTERN_IPV6_GEN(0xF0, 0xFF)
+#define PATTERN_IPV6_UDP PATTERN_IPV6_GEN(0x60, 0x11)
+#define PATTERN_IPV6_TCP PATTERN_IPV6_GEN(0x60, 0x06)
+
+#define PATTERN_IPV6_SHUFFLE  \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, /* IPv6 */  \
+  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
+/* VLAN (Dot1Q) patterns and masks. */
+#define PATTERN_DT1Q_MASK \
+  0x00, 0x00, 0xFF, 0xFF,
+#define PATTERN_DT1Q_IPV6 \
+  0x00, 0x00, 0x86, 0xDD,
+
+#define PATTERN_DT1Q_IPV6_SHUFFLE \
+  /* Ether (2 blocks): Note that *VLAN* type is written here. */  \
+  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,   \
+  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */   \
+  12, 13, 14, 15, 0, 0, 0, 0, \
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, /* IPv6 */  \
+  42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
 /* Generation of K-mask bitmask values, to zero out data in result. Note that
  * t

[ovs-dev] [PATCH v9 2/4] mfex_avx512: Calculate miniflow_bits at compile time.

2022-05-31 Thread Kumar Amber
The patch removes magic numbers from miniflow_bits
and calculates the bits at compile time. This also
makes it easier to handle any ABI changes.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-avx512.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 211c4cbe4..388df7c42 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -257,6 +257,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
 #define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
 #define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
 
+/* MF bits. */
+#define MF_BIT(field) (MAP_1 << ((offsetof(struct flow, field) / 8) % \
+   MAP_T_BITS))
+
+#define MF_ETH(MF_BIT(dp_hash) | MF_BIT(in_port) | MF_BIT(packet_type)\
+   | MF_BIT(dl_dst) | MF_BIT(dl_src)| MF_BIT(dl_type))
+
+#define MF_ETH_VLAN   (MF_ETH | MF_BIT(vlans))
+#define MF_IPV4_UDP   (MF_BIT(nw_src) | MF_BIT(ipv6_label) | MF_BIT(tp_src) | \
+   MF_BIT(tp_dst))
+
+#define MF_IPV4_TCP   (MF_IPV4_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -354,7 +367,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00040401},
+.mf_bits = { MF_ETH, MF_IPV4_UDP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -377,7 +390,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00044401},
+.mf_bits = { MF_ETH, MF_IPV4_TCP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -396,7 +409,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00040401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_UDP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
@@ -422,7 +435,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00044401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_TCP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9 1/4] mfex_avx512: Calculate pkt offsets at compile time.

2022-05-31 Thread Kumar Amber
The patch removes magic numbers pkt offsets and
minimum packet lenght and instead calculate it at
compile time.

Signed-off-by: Kumar Amber 

---
v8:
- Rename offset defines.
---
---
 lib/dpif-netdev-extract-avx512.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 6b6fe07db..211c4cbe4 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -246,6 +246,17 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
 NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,   \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
 
+#define PKT_OFFSET_L2_PAD_SIZE(ETH_HEADER_LEN)
+#define PKT_OFFSET_L3 (ETH_HEADER_LEN)
+#define PKT_OFFSET_VLAN_L3(ETH_HEADER_LEN + VLAN_HEADER_LEN)
+#define PKT_OFFSET_IPV4_L4(ETH_HEADER_LEN + IP_HEADER_LEN)
+#define PKT_OFFSET_VLAN_IPV4_L4   (PKT_OFFSET_IPV4_L4 + VLAN_HEADER_LEN)
+
+#define PKT_MIN_ETH_IPV4_UDP  (PKT_OFFSET_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_UDP (PKT_OFFSET_VLAN_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -345,9 +356,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00040401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 42,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_UDP,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -368,9 +379,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00044401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 54,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_TCP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -387,9 +398,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00040401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 46,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_UDP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -412,9 +424,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00044401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 58,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_TCP,
 },
 };
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9 0/4] MFEX Optimizations IPv6 + Hashing Optimizations

2022-05-31 Thread Kumar Amber
The patchset introuduces IPv6 optimized MFEX profiles
with AVX512 which can deliver upto 20% to 30% gain in
performance over the existing scalar data-path.

Hashing Optimization are also included which can further
improve performance by approximately 10%.

The patch also removes the magic numbers for MF bits, packet offsets
and packet lenghts.

---
v9:
- Fix Ubsan un-alinged memory load.
v8:
- Rename packet offsets defines to aling with packet struct.
v7:
- Remove magic numbers from AVX512 Profiles.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  mfex_avx512: Calculate pkt offsets at compile time.
  mfex_avx512: Calculate miniflow_bits at compile time.
  dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles
  dpif-netdev/mfex: Add ipv6 profile based hashing.

 NEWS  |   6 +-
 lib/automake.mk   |   1 +
 lib/dp-packet.h   |  43 
 lib/dpif-netdev-extract-avx512.c  | 349 --
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  12 +
 lib/flow.c|   4 +
 7 files changed, 451 insertions(+), 15 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6] tests/mfex: Improve pcap script for mfex tests.

2022-05-26 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 

---
v6:
- move the pcap generation to individual test folders.
- removed relative file path to full path.
- fix minor nits.
v5:
- fix mac and ip address generation.
v4:
- Fix MAC and L4 ports to a value.
- Generate Ip addresses in fixed range.
v3:
- Fix comments(Eelco).
- Script generates mac/ip/l4_ports in a fixed range.
v2:
- Add huge page test-skip.
- Change core id to 3 to 0 to allow the mfex config test-case
  to run on any system.
---
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  92 --
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  52 +++--
 4 files changed, 97 insertions(+), 48 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 34ddda6aa..204e86fac 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -146,7 +146,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..ee2183f8e 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,74 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
+
+# Path for the pcap file location.
+path = str(sys.argv[1])
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+if len(sys.argv) > 3:
+traffic_opt = str(sys.argv[3])
+else:
+traffic_opt = ""
 
-path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
-
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
-eth = Ether(src=RandMAC(), dst=RandMAC())
-vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
-udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+pkt = []
+
+for i in range(0, size):
+if traffic_opt == "fuzzy":
+
+eth = Ether(src=RandMAC(), dst=RandMAC())
+vlan = Dot1Q()
+udp = UDP(dport=RandShort(), sport=RandShort())
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 15))
+
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+mac_addr_src = "52:54:00:FF:FF:{:02X}".format(i % 0xff)
+mac_addr_dst = "80:FF:FF:FF:FF:{:02X}".format(i % 0xff)
+src_port = 200 + (i % 20)
+dst_port = 1000 + (i % 20)
+eth = Ether(src=mac_addr_src, dst=mac_addr_dst)
+vlan = Dot1Q(vlan=(i % 10))
+udp = UDP(dport=src_port, sport=dst_port)
+# IPv4 address range limits to 255 and IPv6 limit to 65535
+ipv4_addr_src = "192.168.150." + str((i % 255))
+ipv4_addr_dst = "200.100.198." + str((i % 255))
+ipv6_addr_src = "2001:0db8:85a3:::8a2e:0370:{:04x}" \
+.format(i % 0x)
+ipv6_addr_dst = "3021::85a3:::8a2e:0480:{:04x}" \
+.format(i % 0x)
+ipv4 = IP(src=ipv4_addr_src, dst=ipv4_addr_dst)
+ipv6 = IPv6(src=ipv6_addr_src, dst=ipv6_addr_dst)
+tcp = TCP(dport=src_port, sport=dst_port, flags='S')
+
+ 

[ovs-dev] [PATCH v8 4/4] dpif-netdev/mfex: Add ipv6 profile based hashing.

2022-05-26 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv6 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Acked-by: Harry van Haaren 
---
 lib/dp-packet.h  | 43 
 lib/dpif-netdev-extract-avx512.c |  8 +++---
 lib/flow.c   |  4 +++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 7c5da258a..277bb51b8 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -1117,6 +1117,49 @@ dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet 
*packet)
 dp_packet_set_rss_hash(packet, hash);
 }
 
+static inline void ALWAYS_INLINE
+dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
+{
+if (dp_packet_rss_valid(packet)) {
+return;
+}
+
+const uint8_t *pkt = dp_packet_data(packet);
+const uint16_t l3_ofs = packet->l3_ofs;
+uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
+uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
+uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
+   ip6_ctlun.ip6_un1.ip6_un1_nxt);
+const void *ipv6_src_l = [l3_ofs + ipv6_src_off];
+const void *ipv6_src_h = [l3_ofs + ipv6_src_off + 8];
+const void *ipv6_dst_l = [l3_ofs + ipv6_dst_off];
+const void *ipv6_dst_h = [l3_ofs + ipv6_dst_off + 8];
+const void *l4_ports = [packet->l4_ofs];
+uint64_t ipv6_src_lo, ipv6_src_hi;
+uint64_t ipv6_dst_lo, ipv6_dst_hi;
+uint32_t ports;
+uint32_t hash = 0;
+
+memcpy(_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
+memcpy(_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
+memcpy(_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
+memcpy(_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
+memcpy(, l4_ports, sizeof ports);
+
+/* IPv6 Src and Dst. */
+hash = hash_add64(hash, ipv6_src_lo);
+hash = hash_add64(hash, ipv6_src_hi);
+hash = hash_add64(hash, ipv6_dst_lo);
+hash = hash_add64(hash, ipv6_dst_hi);
+/* IPv6 proto. */
+hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 58532ad49..efeabacbf 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -868,7 +868,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[54], [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV6_TCP: {
@@ -891,7 +891,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_TCP: {
@@ -917,7 +917,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_UDP: {
@@ -938,7 +938,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[58], [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 default:
 break;
diff --git a/lib/flow.c b/lib/flow.c
index 8ab9df3fc..b1e1fb34d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1019,6 +1019,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 }
@@ -1032,6 +1034,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
-- 

[ovs-dev] [PATCH v8 3/4] dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles

2022-05-26 Thread Kumar Amber
Add AVX512 Ipv6 optimized profile for vlan/IPv6/UDP and
vlan/IPv6/TCP, IPv6/UDP and IPv6/TCP.

MFEX autovalidaton test-case already has the IPv6 support for
validating against the scalar mfex.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v8:
- Rename defines for packet offsets.
v7:
- Fix Lenght checks for plen.
v5:
- Add variable length checks for IPv6 and TCP.
v4:
- Rebase to master.
v2:
- Fix CI build error.
- Fix check-patch sign-offs.
---
---
 NEWS  |   6 +-
 lib/automake.mk   |   1 +
 lib/dpif-netdev-extract-avx512.c  | 305 +-
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  12 ++
 5 files changed, 370 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index eece0d0b2..f963967d8 100644
--- a/NEWS
+++ b/NEWS
@@ -29,7 +29,11 @@ Post-v2.17.0
- Windows:
  * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
  * IPv6 Geneve tunnel support.
-
+   - Userspace datapath:
+ * Add AVX512 optimized profiles to miniflow extract for IPv6/UDP and
+   IPv6/TCP.
+ * Add AVX512 optimized profiles to miniflow extract for VLAN/IPv6/UDP
+   and VLAN/IPv6/TCP.
 
 v2.17.0 - 17 Feb 2022
 -
diff --git a/lib/automake.mk b/lib/automake.mk
index a23cdc4ad..139220c23 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -33,6 +33,7 @@ lib_libopenvswitchavx512_la_CFLAGS = \
-mavx512f \
-mavx512bw \
-mavx512dq \
+   -mavx512vl \
-mbmi \
-mbmi2 \
-fPIC \
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 388df7c42..58532ad49 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -49,6 +49,7 @@
 #include "dpif-netdev-private-extract.h"
 #include "dpif-netdev-private-flow.h"
 #include "dp-packet.h"
+#include "packets.h"
 
 /* AVX512-BW level permutex2var_epi8 emulation. */
 static inline __m512i
@@ -138,6 +139,7 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i 
idx, __m512i a)
 #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
 #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
 #define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
+#define PATTERN_ETHERTYPE_IPV6 PATTERN_ETHERTYPE_GEN(0x86, 0xDD)
 
 /* VLAN (Dot1Q) patterns and masks. */
 #define PATTERN_DT1Q_MASK   \
@@ -202,6 +204,40 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
   NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
   NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
+/* Generator for checking IPv6 ver. */
+#define PATTERN_IPV6_GEN(VER_TRC, PROTO)  \
+  VER_TRC, /* Version: 4bits and Traffic class: 4bits. */ \
+  0, 0, 0, /* Traffic class: 4bits and Flow Label: 24bits. */ \
+  0, 0,/* Payload length 16bits. */   \
+  PROTO, 0,/* Next Header 8bits and Hop limit 8bits. */   \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Src IP: 128bits. */  \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dst IP: 128bits. */
+
+#define PATTERN_IPV6_MASK PATTERN_IPV6_GEN(0xF0, 0xFF)
+#define PATTERN_IPV6_UDP PATTERN_IPV6_GEN(0x60, 0x11)
+#define PATTERN_IPV6_TCP PATTERN_IPV6_GEN(0x60, 0x06)
+
+#define PATTERN_IPV6_SHUFFLE  \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, /* IPv6 */  \
+  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
+/* VLAN (Dot1Q) patterns and masks. */
+#define PATTERN_DT1Q_MASK \
+  0x00, 0x00, 0xFF, 0xFF,
+#define PATTERN_DT1Q_IPV6 \
+  0x00, 0x00, 0x86, 0xDD,
+
+#define PATTERN_DT1Q_IPV6_SHUFFLE \
+  /* Ether (2 blocks): Note that *VLAN* type is written here. */  \
+  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,   \
+  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */   \
+  12, 13, 14, 15, 0, 0, 0, 0, \
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, /* IPv6 */  \
+  42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
 /* Generation of K-mask bitmask values, to zero out data in result. Note that
  * these correspond 1:1 to the above "

[ovs-dev] [PATCH v8 2/4] mfex_avx512: Calculate miniflow_bits at compile time.

2022-05-26 Thread Kumar Amber
The patch removes magic numbers from miniflow_bits
and calculates the bits at compile time. This also
makes it easier to handle any ABI changes.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-avx512.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 211c4cbe4..388df7c42 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -257,6 +257,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
 #define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
 #define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
 
+/* MF bits. */
+#define MF_BIT(field) (MAP_1 << ((offsetof(struct flow, field) / 8) % \
+   MAP_T_BITS))
+
+#define MF_ETH(MF_BIT(dp_hash) | MF_BIT(in_port) | MF_BIT(packet_type)\
+   | MF_BIT(dl_dst) | MF_BIT(dl_src)| MF_BIT(dl_type))
+
+#define MF_ETH_VLAN   (MF_ETH | MF_BIT(vlans))
+#define MF_IPV4_UDP   (MF_BIT(nw_src) | MF_BIT(ipv6_label) | MF_BIT(tp_src) | \
+   MF_BIT(tp_dst))
+
+#define MF_IPV4_TCP   (MF_IPV4_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -354,7 +367,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00040401},
+.mf_bits = { MF_ETH, MF_IPV4_UDP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -377,7 +390,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00044401},
+.mf_bits = { MF_ETH, MF_IPV4_TCP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
@@ -396,7 +409,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00040401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_UDP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
@@ -422,7 +435,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00044401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_TCP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
 PKT_OFFSET_VLAN_IPV4_L4,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 1/4] mfex_avx512: Calculate pkt offsets at compile time.

2022-05-26 Thread Kumar Amber
The patch removes magic numbers pkt offsets and
minimum packet lenght and instead calculate it at
compile time.

Signed-off-by: Kumar Amber 

---
v8:
- Rename offset defines.
---
---
 lib/dpif-netdev-extract-avx512.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 6b6fe07db..211c4cbe4 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -246,6 +246,17 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
 NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,   \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
 
+#define PKT_OFFSET_L2_PAD_SIZE(ETH_HEADER_LEN)
+#define PKT_OFFSET_L3 (ETH_HEADER_LEN)
+#define PKT_OFFSET_VLAN_L3(ETH_HEADER_LEN + VLAN_HEADER_LEN)
+#define PKT_OFFSET_IPV4_L4(ETH_HEADER_LEN + IP_HEADER_LEN)
+#define PKT_OFFSET_VLAN_IPV4_L4   (PKT_OFFSET_IPV4_L4 + VLAN_HEADER_LEN)
+
+#define PKT_MIN_ETH_IPV4_UDP  (PKT_OFFSET_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_UDP (PKT_OFFSET_VLAN_IPV4_L4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_IPV4_TCP  (PKT_OFFSET_IPV4_L4 + TCP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPV4_TCP (PKT_OFFSET_VLAN_IPV4_L4 + TCP_HEADER_LEN)
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -345,9 +356,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00040401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 42,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_UDP,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -368,9 +379,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00044401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L3, PKT_OFFSET_IPV4_L4,
 },
-.dp_pkt_min_size = 54,
+.dp_pkt_min_size = PKT_MIN_ETH_IPV4_TCP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -387,9 +398,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00040401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 46,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_UDP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -412,9 +424,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00044401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2_PAD_SIZE, UINT16_MAX, PKT_OFFSET_VLAN_L3,
+PKT_OFFSET_VLAN_IPV4_L4,
 },
-.dp_pkt_min_size = 58,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPV4_TCP,
 },
 };
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 0/4] MFEX Optimizations IPv6 + Hashing Optimizations

2022-05-26 Thread Kumar Amber
The patchset introuduces IPv6 optimized MFEX profiles
with AVX512 which can deliver upto 20% to 30% gain in
performance over the existing scalar data-path.

Hashing Optimization are also included which can further
improve performance by approximately 10%.

The patch also removes the magic numbers for MF bits, packet offsets
and packet lenghts.

---
v8:
- Rename packet offsets defines to aling with packet struct.
v7:
- Remove magic numbers from AVX512 Profiles.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  mfex_avx512: Calculate pkt offsets at compile time.
  mfex_avx512: Calculate miniflow_bits at compile time.
  dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles
  dpif-netdev/mfex: Add ipv6 profile based hashing.

 NEWS  |   6 +-
 lib/automake.mk   |   1 +
 lib/dp-packet.h   |  43 
 lib/dpif-netdev-extract-avx512.c  | 351 --
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  12 +
 lib/flow.c|   4 +
 7 files changed, 453 insertions(+), 15 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5] tests/mfex: Improve pcap script for mfex tests.

2022-05-24 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 

---
v5:
- fix mac and ip address generation.
v4:
- Fix MAC and L4 ports to a value.
- Generate Ip addresses in fixed range.
v3:
- Fix comments(Eelco).
- Script generates mac/ip/l4_ports in a fixed range.
v2:
- Add huge page test-skip.
- Change core id to 3 to 0 to allow the mfex config test-case
  to run on any system.
---
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  90 --
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  53 --
 4 files changed, 97 insertions(+), 47 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 34ddda6aa..204e86fac 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -146,7 +146,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..5c85e6e29 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,74 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+if (len(sys.argv) > 3):
+traffic_opt = str(sys.argv[3])
+else:
+traffic_opt = ""
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
-
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
-eth = Ether(src=RandMAC(), dst=RandMAC())
-vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
-udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+pkt = []
+
+for i in range(0, size):
+if traffic_opt == "fuzzy":
+
+eth = Ether(src=RandMAC(), dst=RandMAC())
+vlan = Dot1Q()
+udp = UDP(dport=RandShort(), sport=RandShort())
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 15))
+
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+mac_addr_src = "52:54:00:FF:FF:%02x" % ((i % 0xff),)
+mac_addr_dst = "80:FF:FF:FF:FF:%02x" % ((i % 0xff),)
+src_port = 200 + (i % 20)
+dst_port = 1000 + (i % 20)
+eth = Ether(src=mac_addr_src, dst=mac_addr_dst)
+vlan = Dot1Q(vlan=(i % 10))
+udp = UDP(dport=src_port, sport=dst_port)
+# IPv4 address range limits to 255 and IPv6 limit to 65535
+ipv4_addr_src = "192.168.150." + str((i % 255))
+ipv4_addr_dst = "200.100.198." + str((i % 255))
+ipv6_addr_src = "2001:0db8:85a3:::8a2e:0370:{:04x}" \
+.format(i % 0x)
+ipv6_addr_dst = "3021::85a3:::8a2e:0480:{:04x}" \
+.format(i % 0x)
+ipv4 = IP(src=ipv4_addr_src, dst=ipv4_addr_dst)
+ipv6 = IPv6(src=ipv6_addr_src, dst=ipv6_addr_dst)
+tcp = TCP(dport=src_port, sport=dst_port, flags='S')
+
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp

[ovs-dev] [PATCH v2 5/5] dpif-netdev-avx512: Add inner packet handling to dpif.

2022-05-24 Thread Kumar Amber
This patch adds the necessary changes required to support
tunnel packet types in avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-avx512.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index ecf512651..e7140e08c 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -80,7 +80,7 @@ dp_netdev_input_avx512_probe(void)
 static inline int32_t ALWAYS_INLINE
 dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet_batch *packets,
- bool md_is_valid OVS_UNUSED, odp_port_t in_port)
+ bool md_is_valid, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -92,6 +92,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 struct netdev_flow_key *keys = ud->keys;
 struct netdev_flow_key **key_ptrs = ud->key_ptrs;
 struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
+const uint32_t recirc_depth = *recirc_depth_get();
 
 /* The AVX512 DPIF implementation handles rules in a way that is optimized
  * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
@@ -179,7 +180,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* Get packet pointer from bitmask and packet md. */
 struct dp_packet *packet = packets->packets[i];
-pkt_metadata_init(>md, in_port);
+if (!md_is_valid) {
+pkt_metadata_init(>md, in_port);
+}
 
 struct dp_netdev_flow *f = NULL;
 struct netdev_flow_key *key = [i];
@@ -191,7 +194,7 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i));
 
 /* Check for a partial hardware offload match. */
-if (hwol_enabled) {
+if (hwol_enabled && recirc_depth == 0) {
 if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, ))) {
 /* Packet restoration failed and it was dropped, do not
  * continue processing. */
@@ -224,7 +227,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
 key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
+key->hash = (md_is_valid == false)
+? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf)
+: dpif_netdev_packet_get_rss_hash(packet, >mf);
 
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
@@ -262,7 +267,8 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
  * dpcls_rules[] array.
  */
 if (dpcls_key_idx > 0) {
-struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+odp_port_t port_no = packets->packets[0]->md.in_port.odp_port;
+struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, port_no);
 if (OVS_UNLIKELY(!cls)) {
 return -1;
 }
@@ -318,7 +324,9 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
 
 /* At this point we don't return error anymore, so commit stats here. */
 uint32_t mfex_hit_cnt = __builtin_popcountll(mf_mask);
-pmd_perf_update_counter(>perf_stats, PMD_STAT_RECV, batch_size);
+pmd_perf_update_counter(>perf_stats,
+md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
+batch_size);
 pmd_perf_update_counter(>perf_stats, PMD_STAT_PHWOL_HIT, phwol_hits);
 pmd_perf_update_counter(>perf_stats, PMD_STAT_MFEX_OPT_HIT,
 mfex_hit_cnt);
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 4/5] dpif-netdev: Add function pointer for dpif re-circulate.

2022-05-24 Thread Kumar Amber
The patch adds and re-uses the dpif set command to set the
function pointers to be used to switch between different inner
dpifs.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-private-dpif.c   | 53 +++-
 lib/dpif-netdev-private-dpif.h   | 14 +
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 22 +++--
 4 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 2dc51270a..49e719bde 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -39,18 +39,21 @@ enum dpif_netdev_impl_info_idx {
 static struct dpif_netdev_impl_info_t dpif_impls[] = {
 /* The default scalar C code implementation. */
 [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input,
+  .recirc_func = dp_netdev_recirculate,
   .probe = NULL,
   .name = "dpif_scalar", },
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .recirc_func = dp_netdev_input_avx512_recirc,
   .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
 
 static dp_netdev_input_func default_dpif_func;
+static dp_netdev_recirc_func default_dpif_recirc_func;
 
 dp_netdev_input_func
 dp_netdev_impl_get_default(void)
@@ -81,6 +84,35 @@ dp_netdev_impl_get_default(void)
 return default_dpif_func;
 }
 
+dp_netdev_recirc_func
+dp_netdev_recirc_impl_get_default(void)
+{
+/* For the first call, this will be NULL. Compute the compile time default.
+ */
+if (!default_dpif_recirc_func) {
+int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
+
+/* Configure-time overriding to run test suite on all implementations. */
+#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
+#ifdef DPIF_AVX512_DEFAULT
+dp_netdev_input_func_probe probe;
+
+/* Check if the compiled default is compatible. */
+probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
+if (!probe || !probe()) {
+dpif_idx = DPIF_NETDEV_IMPL_AVX512;
+}
+#endif
+#endif
+
+VLOG_INFO("Default re-circulate DPIF implementation is %s.\n",
+  dpif_impls[dpif_idx].name);
+default_dpif_recirc_func = dpif_impls[dpif_idx].recirc_func;
+}
+
+return default_dpif_recirc_func;
+}
+
 void
 dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
size_t n)
@@ -116,10 +148,12 @@ dp_netdev_impl_get(struct ds *reply, struct 
dp_netdev_pmd_thread **pmd_list,
  * returns the function pointer to the one requested by "name".
  */
 static int32_t
-dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func)
+dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *dpif_func,
+   dp_netdev_recirc_func *dpif_recirc_func)
 {
 ovs_assert(name);
-ovs_assert(out_func);
+ovs_assert(dpif_func);
+ovs_assert(dpif_recirc_func);
 
 uint32_t i;
 
@@ -129,11 +163,13 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 if (dpif_impls[i].probe) {
 int probe_err = dpif_impls[i].probe();
 if (probe_err) {
-*out_func = NULL;
+*dpif_func = NULL;
+*dpif_recirc_func = NULL;
 return probe_err;
 }
 }
-*out_func = dpif_impls[i].input_func;
+*dpif_func = dpif_impls[i].input_func;
+*dpif_recirc_func = dpif_impls[i].recirc_func;
 return 0;
 }
 }
@@ -144,12 +180,15 @@ dp_netdev_impl_get_by_name(const char *name, 
dp_netdev_input_func *out_func)
 int32_t
 dp_netdev_impl_set_default_by_name(const char *name)
 {
-dp_netdev_input_func new_default;
+dp_netdev_input_func new_dpif_default;
+dp_netdev_recirc_func new_dpif_recirc_default;
 
-int32_t err = dp_netdev_impl_get_by_name(name, _default);
+int32_t err = dp_netdev_impl_get_by_name(name, _dpif_default,
+ _dpif_recirc_default);
 
 if (!err) {
-default_dpif_func = new_default;
+default_dpif_func = new_dpif_default;
+default_dpif_recirc_func = new_dpif_recirc_default;
 }
 
 return err;
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 37908de9a..2ba032364 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -36,6 +36,12 @@ typedef int32_t (*dp_netdev_input_func)(struct 
dp_netdev_pmd_thread *pmd,
 struct dp_packet_batch *packets,

[ovs-dev] [PATCH v2 3/5] dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.

2022-05-24 Thread Kumar Amber
This Patch creates new APIs for avx512 dpif.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-avx512.c   | 32 +++-
 lib/dpif-netdev-private-dpif.c |  4 ++--
 lib/dpif-netdev-private-dpif.h | 12 
 3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 11d9a0005..ecf512651 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -59,8 +59,13 @@ struct dpif_userdata {
 struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
 };
 
+static int32_t
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid, odp_port_t in_port);
+
 int32_t
-dp_netdev_input_outer_avx512_probe(void)
+dp_netdev_input_avx512_probe(void)
 {
 bool avx512f_available = cpu_has_isa(OVS_CPU_ISA_X86_AVX512F);
 bool bmi2_available = cpu_has_isa(OVS_CPU_ISA_X86_BMI2);
@@ -72,10 +77,10 @@ dp_netdev_input_outer_avx512_probe(void)
 return 0;
 }
 
-int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port)
+static inline int32_t ALWAYS_INLINE
+dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ bool md_is_valid OVS_UNUSED, odp_port_t in_port)
 {
 /* Allocate DPIF userdata. */
 if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
@@ -380,5 +385,22 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,
 return 0;
 }
 
+int32_t
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, false, in_port);
+return ret;
+}
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets)
+{
+int ret = dp_netdev_input_avx512__(pmd, packets, true, 0);
+return ret;
+}
+
 #endif
 #endif
diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 6f8de9094..2dc51270a 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -44,8 +44,8 @@ static struct dpif_netdev_impl_info_t dpif_impls[] = {
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
-[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512,
-  .probe = dp_netdev_input_outer_avx512_probe,
+[DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_avx512,
+  .probe = dp_netdev_input_avx512_probe,
   .name = "dpif_avx512", },
 #endif
 };
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 958669b32..37908de9a 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -74,12 +74,16 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 
 /* AVX512 enabled DPIF implementation and probe functions. */
 int32_t
-dp_netdev_input_outer_avx512_probe(void);
+dp_netdev_input_avx512_probe(void);
 
 int32_t
-dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t in_port);
+dp_netdev_input_avx512(struct dp_netdev_pmd_thread *pmd,
+   struct dp_packet_batch *packets,
+   odp_port_t in_port);
+
+int32_t
+dp_netdev_input_avx512_recirc(struct dp_netdev_pmd_thread *pmd,
+  struct dp_packet_batch *packets);
 
 int32_t
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 2/5] dpif-netdev: Refactor hash function to own header.

2022-05-24 Thread Kumar Amber
The refactor allows us to use hash function accross
multiple files which was earlier restricted to
dpif-netdev.c only.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-private-dpcls.h | 23 +++
 lib/dpif-netdev-private-dpif.h  |  4 
 lib/dpif-netdev.c   | 27 ++-
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 0d5da73c7..f13088ce8 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -25,6 +25,7 @@
 
 #include "cmap.h"
 #include "openvswitch/thread.h"
+#include "dpif-netdev-private-dpif.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -124,6 +125,28 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet 
*packet,
 return hash;
 }
 
+static inline uint32_t
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+const struct miniflow *mf)
+{
+uint32_t hash, recirc_depth;
+
+if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
+hash = dp_packet_get_rss_hash(packet);
+} else {
+hash = miniflow_hash_5tuple(mf, 0);
+dp_packet_set_rss_hash(packet, hash);
+}
+
+/* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+recirc_depth = *recirc_depth_get_unsafe();
+if (OVS_UNLIKELY(recirc_depth)) {
+hash = hash_finish(hash, recirc_depth);
+}
+return hash;
+}
+
 /* Allow other implementations to call dpcls_lookup() for subtable search. */
 bool
 dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 15f1f36b3..958669b32 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -81,4 +81,8 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet_batch *packets,
  odp_port_t in_port);
 
+int32_t
+dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
+  struct dp_packet_batch *);
+
 #endif /* netdev-private.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b8fd926ad..d095291cd 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -544,8 +544,6 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
   const struct flow *flow,
   const struct nlattr *actions,
   size_t actions_len);
-static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
-  struct dp_packet_batch *);
 
 static void dp_netdev_disable_upcall(struct dp_netdev *);
 static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
@@ -7792,28 +7790,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
  actions, wc, put_actions, dp->upcall_aux);
 }
 
-static inline uint32_t
-dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-const struct miniflow *mf)
-{
-uint32_t hash, recirc_depth;
-
-if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-hash = dp_packet_get_rss_hash(packet);
-} else {
-hash = miniflow_hash_5tuple(mf, 0);
-dp_packet_set_rss_hash(packet, hash);
-}
-
-/* The RSS hash must account for the recirculation depth to avoid
- * collisions in the exact match cache */
-recirc_depth = *recirc_depth_get_unsafe();
-if (OVS_UNLIKELY(recirc_depth)) {
-hash = hash_finish(hash, recirc_depth);
-}
-return hash;
-}
-
 struct packet_batch_per_flow {
 unsigned int byte_count;
 uint16_t tcp_flags;
@@ -8500,11 +8476,12 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
 return 0;
 }
 
-static void
+int32_t
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets)
 {
 dp_netdev_input__(pmd, packets, true, 0);
+return 0;
 }
 
 struct dp_netdev_execute_aux {
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 0/5] DPIF AVX512 Recirculation

2022-05-24 Thread Kumar Amber
The patch adds support for recirculation of packets in AVX512
DPIF which would allow for processing tunneled packets.

---
V2:
* Split DPIF AVX512 into separate patchset.
---
Kumar Amber (5):
  dpif-netdev: Refactor per thread recirc data allocation.
  dpif-netdev: Refactor hash function to own header.
  dpif-netdev-avx512: Refactor avx512 dpif and create new APIs.
  dpif-netdev: Add function pointer for dpif re-circulate.
  dpif-netdev-avx512: Add inner packet handling to dpif.

 lib/dpif-netdev-avx512.c | 50 +--
 lib/dpif-netdev-private-dpcls.h  | 23 +
 lib/dpif-netdev-private-dpif.c   | 59 +++-
 lib/dpif-netdev-private-dpif.h   | 35 ---
 lib/dpif-netdev-private-thread.h |  3 ++
 lib/dpif-netdev.c| 52 
 6 files changed, 169 insertions(+), 53 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/5] dpif-netdev: Refactor per thread recirc data allocation.

2022-05-24 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 84d4ec156..6f8de9094 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 0da639c55..15f1f36b3 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 21277b236..b8fd926ad 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -97,9 +97,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4] tests/mfex: Improve pcap script for mfex tests.

2022-05-24 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 

---
v4:
- Fix MAC and L4 ports to a value.
- Generate Ip addresses in fixed range.
v3:
- Fix comments(Eelco).
- Script generates mac/ip/l4_ports in a fixed range.
v2:
- Add huge page test-skip.
- Change core id to 3 to 0 to allow the mfex config test-case
  to run on any system.
---
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  85 +++---
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  53 +---
 4 files changed, 92 insertions(+), 47 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 34ddda6aa..204e86fac 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -146,7 +146,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..b4f8796b4 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,69 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+if (len(sys.argv) > 3):
+traffic_opt = str(sys.argv[3])
+else:
+traffic_opt = ""
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
-
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
-eth = Ether(src=RandMAC(), dst=RandMAC())
-vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
-udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+pkt = []
+
+for i in range(0, size):
+if traffic_opt == "fuzzy":
+
+eth = Ether(src=RandMAC(), dst=RandMAC())
+vlan = Dot1Q()
+udp = UDP(dport=RandShort(), sport=RandShort())
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 15))
+
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+mac_addr = "52:54:00:FF:FF:FF"
+src_port = 200
+dst_port = 1000
+eth = Ether(src=mac_addr, dst=mac_addr)
+vlan = Dot1Q(vlan=(i % 10))
+udp = UDP(dport=src_port, sport=dst_port)
+# IPv4 address range limits to 255
+ipv4_addr = "192.168.150." + str((i % 255))
+ipv6_addr = "2001:0db8:85a3:::8a2e:0370:" + str(i % 0x)
+ipv4 = IP(src=ipv4_addr, dst=ipv4_addr)
+ipv6 = IPv6(src=ipv6_addr, dst=ipv6_addr)
+tcp = TCP(dport=src_port, sport=dst_port, flags='S')
+
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp)
+pkt.append(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pkt.append(eth / ipv6 / udp)
+pkt.append(eth / ipv6 / tcp)
+pkt.append(eth / vlan / ipv6 / udp)
+pkt.append(eth / vlan / ipv6 / tcp)
+
+pktdump.write(pkt)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..000

[ovs-dev] [PATCH v3] tests/mfex: Improve pcap script for mfex tests.

2022-05-18 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 

---
v3:
- Fix comments(Eelco).
- Script generates mac/ip/l4_ports in a fixed range.
v2:
- Add huge page test-skip.
- Change core id to 3 to 0 to allow the mfex config test-case
  to run on any system.
---
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  80 +++---
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  62 ++---
 4 files changed, 96 insertions(+), 47 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 34ddda6aa..204e86fac 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -146,7 +146,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..5a15c49e1 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,64 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+if (len(sys.argv) > 3):
+traffic_opt = str(sys.argv[3])
+else:
+traffic_opt = ""
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
-
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
-eth = Ether(src=RandMAC(), dst=RandMAC())
-vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
-udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+pkt = []
+
+for i in range(0, size):
+if traffic_opt == "fuzzy":
+
+eth = Ether(src=RandMAC(), dst=RandMAC())
+vlan = Dot1Q()
+udp = UDP(dport=RandShort(), sport=RandShort())
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 15))
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+mac_addr = "52:54:00:FF:FF:%02x" % (random.randint(0, 255),)
+src_port = random.randrange(600, 800)
+dst_port = random.randrange(800, 1000)
+eth = Ether(src=mac_addr, dst=mac_addr)
+vlan = Dot1Q(vlan=random.randrange(1, 20))
+udp = UDP(dport=src_port, sport=dst_port)
+ipv4 = IP(src=RandIP()._fix(), dst=RandIP()._fix())
+ipv6 = IPv6(src=RandIP6()._fix(), dst=RandIP6()._fix())
+tcp = TCP(dport=src_port, sport=dst_port, flags='S')
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp)
+pkt.append(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pkt.append(eth / ipv6 / udp)
+pkt.append(eth / ipv6 / tcp)
+pkt.append(eth / vlan / ipv6 / udp)
+pkt.append(eth / vlan / ipv6 / tcp)
+
+pktdump.write(pkt)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..
GIT binary patch
literal 0
HcmV?d1

literal 416
zcmca|c+)~A1{MYw`2U}Qff2}QK`M68ITRa|G@yFii5$Gfk6YL%z>@uY&}o|
z2s4N<1VH2&7y^V87$)XGOtD~MV

[ovs-dev] [PATCH v1] Pmd.at: fix dpcls and dpif configuration test cases.

2022-05-17 Thread Kumar Amber
Without running set command first the string matching
fails on get command beacuse DPCLS prio value is different
for different default builds like with --enable-autovalidator
build auto-validator prio is set to 255 and if the build
is a scalar than generic value is default 255.

The same problem is seen with dpif where re-arranging the get
command after set makes it consistent across any builds.

Fixes: cc0a87b11c (pmd.at: Add test-cases for DPCLS and DPIF commands.)
Signed-off-by: Kumar Amber 
---
 tests/pmd.at | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/tests/pmd.at b/tests/pmd.at
index 0a451f33c..3962dd2bd 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -1113,15 +1113,15 @@ AT_SETUP([PMD - dpif configuration])
 OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0])
 AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd])
 
+AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl
+DPIF implementation set to dpif_scalar.
+])
+
 AT_CHECK([ovs-vsctl show], [], [stdout])
 AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-get | grep "dpif_scalar"], [], [dnl
   dpif_scalar (pmds: 0)
 ])
 
-AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl
-DPIF implementation set to dpif_scalar.
-])
-
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
@@ -1130,13 +1130,6 @@ OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0])
 AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd])
 
 AT_CHECK([ovs-vsctl show], [], [stdout])
-AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-get | grep generic], [], 
[dnl
-  1 : generic
-])
-
-AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-get | grep 
autovalidator], [], [dnl
-  0 : autovalidator
-])
 
 AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 3], 
[0], [dnl
 Lookup priority change affected 0 dpcls ports and 0 subtables.
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] tests/mfex: Improve pcap script for mfex tests.

2022-05-11 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 

---
v2:
- Add huge page test-skip.
- Change core id to 3 to 0 to allow the mfex config test-case
  to run on any system.
---
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  66 +++---
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  44 +
 4 files changed, 77 insertions(+), 34 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 34ddda6aa..204e86fac 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -146,7 +146,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..dbde5fe1b 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,58 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+traffic_opt = str(sys.argv[3])
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
+pkt = []
+
+for i in range(0, size):
 
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
 eth = Ether(src=RandMAC(), dst=RandMAC())
 vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
 udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+
+if traffic_opt == "fuzzy":
+
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 20))
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+
+ipv4 = IP(src=RandIP(), dst=RandIP())
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S')
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp)
+pkt.append(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pkt.append(eth / ipv6 / udp)
+pkt.append(eth / ipv6 / tcp)
+pkt.append(eth / vlan / ipv6 / udp)
+pkt.append(eth / vlan / ipv6 / tcp)
+
+pktdump.write(pkt)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..
GIT binary patch
literal 0
HcmV?d1

literal 416
zcmca|c+)~A1{MYw`2U}Qff2}QK`M68ITRa|G@yFii5$Gfk6YL%z>@uY&}o|
z2s4N<1VH2&7y^V87$)XGOtD~MV$cFgfG~zBGGJ2#YtF$KST_NTIwYriok6N4Vm)gX-Q@c^{cp<7_5LgK^UuU{2>VS0RZ!RQ+EIW

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 7d2715c4a..27ba42954 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -226,12 +226,14 @@ dnl 
--
 dnl Add standard DPDK PHY port
 AT_SETUP([OVS-DPDK - MFEX Autovalidator])
 AT_KEYWORDS([dpdk])
-
+OVS_DPDK_PRE_CHECK()
+AT_SKIP_IF([! $PYTHON3 -c "import scapy"], [], [])
+AT_CHECK([$PYTHON3 $srcdir/mfex_fuzzy.py $srcdir 2000 0], [], [stdout])
 OVS_DPDK_START()
 
 dnl Add userspace bridge

[ovs-dev] [PATCH v7] dpcls: Change info-get function to fetch dpcls usage stats.

2022-05-11 Thread Kumar Amber
Modified the dplcs info-get command output to include
the count for different dpcls implementations.

$ovs-appctl dpif-netdev/subtable-lookup-info-get

Available dpcls implementations:
  autovalidator (Use count: 1, Priority: 5)
  generic (Use count: 0, Priority: 1)
  avx512_gather (Use count: 0, Priority: 3)

Test case to verify changes:
1061: PMD - dpcls configuration ok

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Signed-off-by: Eelco Chaudron 
Co-authored-by: Harry van Haaren 
Co-authored-by: Eelco Chaudron 
Acked-by: Eelco Chaudron 

---
v7:
- Rename dpcls subtable command.
---
---
 Documentation/topics/dpdk/bridge.rst | 20 +++---
 lib/dpif-netdev-lookup.c | 98 +---
 lib/dpif-netdev-lookup.h | 18 -
 lib/dpif-netdev-private-dpcls.h  |  1 +
 lib/dpif-netdev.c| 40 ++--
 tests/pmd.at | 32 -
 6 files changed, 138 insertions(+), 71 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index ceee91015..1f626c7c2 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -179,11 +179,11 @@ these CPU ISA additions are available, and to allow the 
user to enable them.
 OVS provides multiple implementations of dpcls. The following command enables
 the user to check what implementations are available in a running instance::
 
-$ ovs-appctl dpif-netdev/subtable-lookup-prio-get
-Available lookup functions (priority : name)
-0 : autovalidator
-1 : generic
-0 : avx512_gather
+$ ovs-appctl dpif-netdev/subtable-lookup-info-get
+Available dpcls implementations:
+autovalidator (Use count: 1, Priority: 5)
+generic (Use count: 0, Priority: 1)
+avx512_gather (Use count: 0, Priority: 3)
 
 To set the priority of a lookup function, run the ``prio-set`` command::
 
@@ -195,11 +195,11 @@ above indicates that one subtable of one DPCLS port is 
has changed its lookup
 function due to the command being run. To verify the prioritization, re-run the
 get command, note the updated priority of the ``avx512_gather`` function::
 
-$ ovs-appctl dpif-netdev/subtable-lookup-prio-get
-Available lookup functions (priority : name)
-0 : autovalidator
-1 : generic
-5 : avx512_gather
+$ ovs-appctl dpif-netdev/subtable-lookup-info-get
+Available dpcls implementations:
+autovalidator (Use count: 1, Priority: 5)
+generic (Use count: 0, Priority: 1)
+avx512_gather (Use count: 0, Priority: 3)
 
 If two lookup functions have the same priority, the first one in the list is
 chosen, and the 2nd occurance of that priority is not used. Put in logical
diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c
index bd0a99abe..e641e4028 100644
--- a/lib/dpif-netdev-lookup.c
+++ b/lib/dpif-netdev-lookup.c
@@ -36,18 +36,21 @@ static struct dpcls_subtable_lookup_info_t 
subtable_lookups[] = {
 { .prio = 0,
 #endif
   .probe = dpcls_subtable_autovalidator_probe,
-  .name = "autovalidator", },
+  .name = "autovalidator",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 
 /* The default scalar C code implementation. */
 { .prio = 1,
   .probe = dpcls_subtable_generic_probe,
-  .name = "generic", },
+  .name = "generic",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 { .prio = 0,
   .probe = dpcls_subtable_avx512_gather_probe,
-  .name = "avx512_gather", },
+  .name = "avx512_gather",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 #else
 /* Disabling AVX512 at compile time, as compile time requirements not met.
  * This could be due to a number of reasons:
@@ -64,7 +67,7 @@ static struct dpcls_subtable_lookup_info_t subtable_lookups[] 
= {
 #endif
 };
 
-int32_t
+int
 dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr)
 {
 if (out_ptr == NULL) {
@@ -76,7 +79,7 @@ dpcls_subtable_lookup_info_get(struct 
dpcls_subtable_lookup_info_t **out_ptr)
 }
 
 /* sets the priority of the lookup function with "name". */
-int32_t
+int
 dpcls_subtable_set_prio(const char *name, uint8_t priority)
 {
 for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) {
@@ -93,32 +96,81 @@ dpcls_subtable_set_prio(const char *name, uint8_t priority)
 }
 
 dpcls_subtable_lookup_func
-dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count)
+dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count,
+ struct dpcls_subtable_lookup_info_t **info)
 {
-/* Iter over each subtable impl, a

[ovs-dev] [PATCH v7 4/4] dpif-netdev/mfex: Add ipv6 profile based hashing.

2022-05-05 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv6 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
---
 lib/dp-packet.h  | 43 
 lib/dpif-netdev-extract-avx512.c |  8 +++---
 lib/flow.c   |  4 +++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 7c5da258a..277bb51b8 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -1117,6 +1117,49 @@ dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet 
*packet)
 dp_packet_set_rss_hash(packet, hash);
 }
 
+static inline void ALWAYS_INLINE
+dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
+{
+if (dp_packet_rss_valid(packet)) {
+return;
+}
+
+const uint8_t *pkt = dp_packet_data(packet);
+const uint16_t l3_ofs = packet->l3_ofs;
+uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
+uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
+uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
+   ip6_ctlun.ip6_un1.ip6_un1_nxt);
+const void *ipv6_src_l = [l3_ofs + ipv6_src_off];
+const void *ipv6_src_h = [l3_ofs + ipv6_src_off + 8];
+const void *ipv6_dst_l = [l3_ofs + ipv6_dst_off];
+const void *ipv6_dst_h = [l3_ofs + ipv6_dst_off + 8];
+const void *l4_ports = [packet->l4_ofs];
+uint64_t ipv6_src_lo, ipv6_src_hi;
+uint64_t ipv6_dst_lo, ipv6_dst_hi;
+uint32_t ports;
+uint32_t hash = 0;
+
+memcpy(_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
+memcpy(_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
+memcpy(_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
+memcpy(_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
+memcpy(, l4_ports, sizeof ports);
+
+/* IPv6 Src and Dst. */
+hash = hash_add64(hash, ipv6_src_lo);
+hash = hash_add64(hash, ipv6_src_hi);
+hash = hash_add64(hash, ipv6_dst_lo);
+hash = hash_add64(hash, ipv6_dst_hi);
+/* IPv6 proto. */
+hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 708cf657a..838136e45 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -868,7 +868,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[54], [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV6_TCP: {
@@ -891,7 +891,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [9]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_TCP: {
@@ -917,7 +917,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 continue;
 }
 mfex_handle_tcp_flags(tcp, [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV6_UDP: {
@@ -938,7 +938,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 
 /* Process UDP header. */
 mfex_handle_ipv6_l4((void *)[58], [10]);
-
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 } break;
 default:
 break;
diff --git a/lib/flow.c b/lib/flow.c
index 8ab9df3fc..b1e1fb34d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1019,6 +1019,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 }
@@ -1032,6 +1034,8 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
+dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
 }
 } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
-- 

[ovs-dev] [PATCH v7 3/4] dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles

2022-05-05 Thread Kumar Amber
Add AVX512 Ipv6 optimized profile for vlan/IPv6/UDP and
vlan/IPv6/TCP, IPv6/UDP and IPv6/TCP.

MFEX autovalidaton test-case already has the IPv6 support for
validating against the scalar mfex.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v7:
- Fix Lenght checks for plen.
v5:
- Add variable length checks for IPv6 and TCP.
v4:
- Rebase to master.
v2:
- Fix CI build error.
- Fix check-patch sign-offs.
---
---
 NEWS  |   6 +-
 lib/automake.mk   |   1 +
 lib/dpif-netdev-extract-avx512.c  | 306 +-
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  12 ++
 5 files changed, 371 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index eece0d0b2..f963967d8 100644
--- a/NEWS
+++ b/NEWS
@@ -29,7 +29,11 @@ Post-v2.17.0
- Windows:
  * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
  * IPv6 Geneve tunnel support.
-
+   - Userspace datapath:
+ * Add AVX512 optimized profiles to miniflow extract for IPv6/UDP and
+   IPv6/TCP.
+ * Add AVX512 optimized profiles to miniflow extract for VLAN/IPv6/UDP
+   and VLAN/IPv6/TCP.
 
 v2.17.0 - 17 Feb 2022
 -
diff --git a/lib/automake.mk b/lib/automake.mk
index a23cdc4ad..139220c23 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -33,6 +33,7 @@ lib_libopenvswitchavx512_la_CFLAGS = \
-mavx512f \
-mavx512bw \
-mavx512dq \
+   -mavx512vl \
-mbmi \
-mbmi2 \
-fPIC \
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index e77bb3214..708cf657a 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -49,6 +49,7 @@
 #include "dpif-netdev-private-extract.h"
 #include "dpif-netdev-private-flow.h"
 #include "dp-packet.h"
+#include "packets.h"
 
 /* AVX512-BW level permutex2var_epi8 emulation. */
 static inline __m512i
@@ -138,6 +139,7 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i 
idx, __m512i a)
 #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
 #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
 #define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
+#define PATTERN_ETHERTYPE_IPV6 PATTERN_ETHERTYPE_GEN(0x86, 0xDD)
 
 /* VLAN (Dot1Q) patterns and masks. */
 #define PATTERN_DT1Q_MASK   \
@@ -202,6 +204,40 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
   NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
   NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
+/* Generator for checking IPv6 ver. */
+#define PATTERN_IPV6_GEN(VER_TRC, PROTO)  \
+  VER_TRC, /* Version: 4bits and Traffic class: 4bits. */ \
+  0, 0, 0, /* Traffic class: 4bits and Flow Label: 24bits. */ \
+  0, 0,/* Payload length 16bits. */   \
+  PROTO, 0,/* Next Header 8bits and Hop limit 8bits. */   \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Src IP: 128bits. */  \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dst IP: 128bits. */
+
+#define PATTERN_IPV6_MASK PATTERN_IPV6_GEN(0xF0, 0xFF)
+#define PATTERN_IPV6_UDP PATTERN_IPV6_GEN(0x60, 0x11)
+#define PATTERN_IPV6_TCP PATTERN_IPV6_GEN(0x60, 0x06)
+
+#define PATTERN_IPV6_SHUFFLE  \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, /* IPv6 */  \
+  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
+/* VLAN (Dot1Q) patterns and masks. */
+#define PATTERN_DT1Q_MASK \
+  0x00, 0x00, 0xFF, 0xFF,
+#define PATTERN_DT1Q_IPV6 \
+  0x00, 0x00, 0x86, 0xDD,
+
+#define PATTERN_DT1Q_IPV6_SHUFFLE \
+  /* Ether (2 blocks): Note that *VLAN* type is written here. */  \
+  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,   \
+  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */   \
+  12, 13, 14, 15, 0, 0, 0, 0, \
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, /* IPv6 */  \
+  42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, /* IPv6 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, /* Unused */
+
 /* Generation of K-mask bitmask values, to zero out data in result. Note that
  * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used must be

[ovs-dev] [PATCH v7 1/4] mfex_avx512: Calculate pkt offsets at compile time.

2022-05-05 Thread Kumar Amber
The patch removes magic numbers pkt offsets and
minimum packet lenght and instead calculate it at
compile time.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-avx512.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 6b6fe07db..6ae15a4db 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -246,6 +246,16 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
 NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,   \
 NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
 
+#define PKT_OFFSET_L2 (ETH_HEADER_LEN)
+#define PKT_OFFSET_L3_VLAN(ETH_HEADER_LEN + VLAN_HEADER_LEN)
+#define PKT_OFFSET_L4_IPv4(ETH_HEADER_LEN + IP_HEADER_LEN)
+#define PKT_OFFSET_L4_VLAN_IPv4   (PKT_OFFSET_L4_IPv4 + VLAN_HEADER_LEN)
+
+#define PKT_MIN_ETH_IPv4_UDP  (PKT_OFFSET_L4_IPv4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPv4_UDP (PKT_OFFSET_L4_VLAN_IPv4 + UDP_HEADER_LEN)
+#define PKT_MIN_ETH_IPv4_TCP  (PKT_OFFSET_L4_IPv4 + TCP_HEADER_LEN)
+#define PKT_MIN_ETH_VLAN_IPv4_TCP (PKT_OFFSET_L4_VLAN_IPv4 + TCP_HEADER_LEN)
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -345,9 +355,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00040401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L2, PKT_OFFSET_L4_IPv4,
 },
-.dp_pkt_min_size = 42,
+.dp_pkt_min_size = PKT_MIN_ETH_IPv4_UDP,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -368,9 +378,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x18a0, 0x00044401},
 .dp_pkt_offs = {
-0, UINT16_MAX, 14, 34,
+0, UINT16_MAX, PKT_OFFSET_L2, PKT_OFFSET_L4_IPv4,
 },
-.dp_pkt_min_size = 54,
+.dp_pkt_min_size = PKT_MIN_ETH_IPv4_TCP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -387,9 +397,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00040401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2, UINT16_MAX, PKT_OFFSET_L3_VLAN,
+PKT_OFFSET_L4_VLAN_IPv4,
 },
-.dp_pkt_min_size = 46,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPv4_UDP,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -412,9 +423,10 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 
 .mf_bits = { 0x38a0, 0x00044401},
 .dp_pkt_offs = {
-14, UINT16_MAX, 18, 38,
+PKT_OFFSET_L2, UINT16_MAX, PKT_OFFSET_L3_VLAN,
+PKT_OFFSET_L4_VLAN_IPv4,
 },
-.dp_pkt_min_size = 58,
+.dp_pkt_min_size = PKT_MIN_ETH_VLAN_IPv4_TCP,
 },
 };
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v7 2/4] mfex_avx512: Calculate miniflow_bits at compile time.

2022-05-05 Thread Kumar Amber
The patch removes magic numbers from miniflow_bits
and calculates the bits at compile time. This also
makes it easier to handle any ABI changes.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-extract-avx512.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 6ae15a4db..e77bb3214 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -256,6 +256,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, 
__m512i idx, __m512i a)
 #define PKT_MIN_ETH_IPv4_TCP  (PKT_OFFSET_L4_IPv4 + TCP_HEADER_LEN)
 #define PKT_MIN_ETH_VLAN_IPv4_TCP (PKT_OFFSET_L4_VLAN_IPv4 + TCP_HEADER_LEN)
 
+/* MF bits. */
+#define MF_BIT(field) (MAP_1 << ((offsetof(struct flow, field) / 8) % \
+   MAP_T_BITS))
+
+#define MF_ETH(MF_BIT(dp_hash) | MF_BIT(in_port) | MF_BIT(packet_type)\
+   | MF_BIT(dl_dst) | MF_BIT(dl_src)| MF_BIT(dl_type))
+
+#define MF_ETH_VLAN   (MF_ETH | MF_BIT(vlans))
+#define MF_IPV4_UDP   (MF_BIT(nw_src) | MF_BIT(ipv6_label) | MF_BIT(tp_src) | \
+   MF_BIT(tp_dst))
+
+#define MF_IPV4_TCP   (MF_IPV4_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
+
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
  */
@@ -353,7 +366,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00040401},
+.mf_bits = { MF_ETH, MF_IPV4_UDP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L2, PKT_OFFSET_L4_IPv4,
 },
@@ -376,7 +389,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
 .store_kmsk = PATTERN_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x18a0, 0x00044401},
+.mf_bits = { MF_ETH, MF_IPV4_TCP},
 .dp_pkt_offs = {
 0, UINT16_MAX, PKT_OFFSET_L2, PKT_OFFSET_L4_IPv4,
 },
@@ -395,7 +408,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00040401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_UDP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2, UINT16_MAX, PKT_OFFSET_L3_VLAN,
 PKT_OFFSET_L4_VLAN_IPv4,
@@ -421,7 +434,7 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
 .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
 
-.mf_bits = { 0x38a0, 0x00044401},
+.mf_bits = { MF_ETH_VLAN, MF_IPV4_TCP},
 .dp_pkt_offs = {
 PKT_OFFSET_L2, UINT16_MAX, PKT_OFFSET_L3_VLAN,
 PKT_OFFSET_L4_VLAN_IPv4,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v7 0/4] MFEX Optimizations IPv6 + Hashing Optimizations

2022-05-05 Thread Kumar Amber
The patchset introuduces IPv6 optimized MFEX profiles
with AVX512 which can deliver upto 20% to 30% gain in
performance over the existing scalar data-path.

Hashing Optimization are also included which can further
improve performance by approximately 10%.

The patch also removes the magic numbers for MF bits, packet offsets
and packet lenghts.

---
v7:
- Remove magic numbers from AVX512 Profiles.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  mfex_avx512: Calculate pkt offsets at compile time.
  mfex_avx512: Calculate miniflow_bits at compile time.
  dpif-netdev/mfex: Add AVX512 ipv6 traffic profiles
  dpif-netdev/mfex: Add ipv6 profile based hashing.

 NEWS  |   6 +-
 lib/automake.mk   |   1 +
 lib/dp-packet.h   |  43 
 lib/dpif-netdev-extract-avx512.c  | 351 --
 lib/dpif-netdev-private-extract.c |  51 -
 lib/dpif-netdev-private-extract.h |  12 +
 lib/flow.c|   4 +
 7 files changed, 453 insertions(+), 15 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 3/3] flow: Add autovalidator support to miniflow_extract.

2022-04-27 Thread Kumar Amber
The patch adds the flag based switch between choice of using
miniflow_extract in normal pipeline or select mfex_autovalidator
in debug and test builds.

The compile time flag used to select autoval can be done using option:

 ./configure CFLAGS="--enable-mfex-default-autovalidator"

Signed-off-by: Kumar Amber 

---
v3:
- Fix comments from Cian.
---
---
 lib/dpif-netdev-private-extract.c |  8 
 lib/flow.c| 34 ++-
 lib/flow.h|  4 
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 42b970e75..bbc0e3c78 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -124,8 +124,8 @@ dpif_miniflow_extract_init(void)
 /* For the first call, this will be choosen based on the
  * compile time flag.
  */
-VLOG_INFO("Default MFEX Extract implementation is %s.\n",
-  mfex_impls[mfex_idx].name);
+VLOG_DBG("Default MFEX Extract implementation is %s.\n",
+ mfex_impls[mfex_idx].name);
 atomic_store_relaxed(mfex_func, (uintptr_t) mfex_impls
  [mfex_idx].extract_func);
 }
@@ -251,7 +251,7 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 /* Run scalar miniflow_extract to get default result. */
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 pkt_metadata_init(>md, in_port);
-miniflow_extract(packet, [i]);
+miniflow_extract_(packet, [i]);
 
 /* Store known good metadata to compare with optimized metadata. */
 good_l2_5_ofs[i] = packet->l2_5_ofs;
@@ -347,7 +347,7 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 }
 
 /* Having dumped the debug info for the batch, disable autovalidator. */
-if (batch_failed) {
+if (batch_failed && (pmd != NULL)) {
 atomic_store_relaxed(>miniflow_extract_opt, NULL);
 }
 
diff --git a/lib/flow.c b/lib/flow.c
index 086096d5e..16698cedd 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -36,6 +36,8 @@
 #include "openvswitch/match.h"
 #include "dp-packet.h"
 #include "dpif-netdev-private-dpcls.h"
+#include "dpif-netdev-private-dpif.h"
+#include "dpif-netdev-private-extract.h"
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "odp-util.h"
@@ -757,7 +759,7 @@ dump_invalid_packet(struct dp_packet *packet, const char 
*reason)
  *  of interest for the flow, otherwise UINT16_MAX.
  */
 void
-miniflow_extract(struct dp_packet *packet, struct netdev_flow_key *key)
+miniflow_extract_(struct dp_packet *packet, struct netdev_flow_key *key)
 {
 /* Add code to this function (or its callees) to extract new fields. */
 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
@@ -1112,6 +1114,36 @@ miniflow_extract(struct dp_packet *packet, struct 
netdev_flow_key *key)
 key->mf.map = mf.map;
 }
 
+void
+miniflow_extract(struct dp_packet *packet, struct netdev_flow_key *key)
+{
+#ifdef MFEX_AUTOVALIDATOR_DEFAULT
+static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
+if (ovsthread_once_start(_enable)) {
+dpif_miniflow_extract_init();
+ovsthread_once_done(_enable);
+}
+struct dp_packet_batch packets;
+const struct pkt_metadata *md = >md;
+dp_packet_batch_init();
+dp_packet_batch_add(, packet);
+const uint32_t recirc_depth = *recirc_depth_get();
+
+/* Currently AVX512 DPIF dont support recirculation
+ * Once the support will be added the condition would
+ * be removed.
+ */
+if (recirc_depth) {
+miniflow_extract_(packet, key);
+} else {
+dpif_miniflow_extract_autovalidator(, key, 1,
+odp_to_u32(md->in_port.odp_port), NULL);
+}
+#else
+miniflow_extract_(packet, key);
+#endif
+}
+
 static ovs_be16
 parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p)
 {
diff --git a/lib/flow.h b/lib/flow.h
index ba7c3c63a..7b277275f 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -543,6 +543,10 @@ struct pkt_metadata;
  * were extracted. */
 void
 miniflow_extract(struct dp_packet *packet, struct netdev_flow_key *key);
+
+void
+miniflow_extract_(struct dp_packet *packet, struct netdev_flow_key *key);
+
 void miniflow_map_init(struct miniflow *, const struct flow *);
 void flow_wc_map(const struct flow *, struct flowmap *);
 size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 2/3] flow: Refactor miniflow_extract into api.

2022-04-27 Thread Kumar Amber
Miniflow extract used to takes the ABI parameter struct
miniflow which was removed and added inside
the struct netdev_flow_key and at many places temperory
structs were created inside the functions which could be
cleaned in favour of a uniform API.

Changing parameter to key will not affect anything as
buff array is still followed by the mf bit map inside
netdev_flow_key, thus there wont be any impact on offset
calculations which were done earlier.

Signed-off-by: Kumar Amber 

---
v3:
- Fix comments from Cian.
---
---
 lib/dpif-netdev-avx512.c  |  2 +-
 lib/dpif-netdev-private-extract.c |  2 +-
 lib/dpif-netdev.c |  2 +-
 lib/flow.c| 17 -
 lib/flow.h|  4 +++-
 ofproto/ofproto.c | 10 --
 6 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index b7131ba3f..76eeecc9a 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -211,7 +211,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,
 
 if (!mfex_hit) {
 /* Do a scalar miniflow extract into keys. */
-miniflow_extract(packet, >mf);
+miniflow_extract(packet, key);
 }
 
 /* Cache TCP and byte values for all packets. */
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 4b2f12015..42b970e75 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -251,7 +251,7 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 /* Run scalar miniflow_extract to get default result. */
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 pkt_metadata_init(>md, in_port);
-miniflow_extract(packet, [i].mf);
+miniflow_extract(packet, [i]);
 
 /* Store known good metadata to compare with optimized metadata. */
 good_l2_5_ofs[i] = packet->l2_5_ofs;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 139e22f38..e4e4c912b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -8170,7 +8170,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
 }
 }
 
-miniflow_extract(packet, >mf);
+miniflow_extract(packet, key);
 key->len = 0; /* Not computed yet. */
 key->hash =
 (md_is_valid == false)
diff --git a/lib/flow.c b/lib/flow.c
index dd523c889..086096d5e 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -35,6 +35,7 @@
 #include "jhash.h"
 #include "openvswitch/match.h"
 #include "dp-packet.h"
+#include "dpif-netdev-private-dpcls.h"
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "odp-util.h"
@@ -633,15 +634,13 @@ parse_nsh(const void **datap, size_t *sizep, struct 
ovs_key_nsh *key)
 void
 flow_extract(struct dp_packet *packet, struct flow *flow)
 {
-struct {
-struct miniflow mf;
-uint64_t buf[FLOW_U64S];
-} m;
+
+struct netdev_flow_key key;
 
 COVERAGE_INC(flow_extract);
 
-miniflow_extract(packet, );
-miniflow_expand(, flow);
+miniflow_extract(packet, );
+miniflow_expand(, flow);
 }
 
 static inline bool
@@ -758,7 +757,7 @@ dump_invalid_packet(struct dp_packet *packet, const char 
*reason)
  *  of interest for the flow, otherwise UINT16_MAX.
  */
 void
-miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
+miniflow_extract(struct dp_packet *packet, struct netdev_flow_key *key)
 {
 /* Add code to this function (or its callees) to extract new fields. */
 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
@@ -767,7 +766,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow 
*dst)
 const void *data = dp_packet_data(packet);
 size_t size = dp_packet_size(packet);
 ovs_be32 packet_type = packet->packet_type;
-uint64_t *values = miniflow_values(dst);
+uint64_t *values = miniflow_values(>mf);
 struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
  values + FLOW_U64S };
 const char *frame;
@@ -1110,7 +1109,7 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 }
 }
  out:
-dst->map = mf.map;
+key->mf.map = mf.map;
 }
 
 static ovs_be16
diff --git a/lib/flow.h b/lib/flow.h
index c647ad83c..ba7c3c63a 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -41,6 +41,7 @@ struct dp_packet;
 struct ofputil_port_map;
 struct pkt_metadata;
 struct match;
+struct netdev_flow_key;
 
 /* Some flow fields are mutually exclusive or only appear within the flow
  * pipeline.  IPv6 headers are bigger than IPv4 and MPLS, and IPv6 ND packets
@@ -540,7 +541,8 @@ struct pkt_metadata;
 /* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units.
  * 'dst->map' is ignored on input and set on output to indicate which fields
  * were extracted. */
-void miniflow_extract(st

[ovs-dev] [PATCH v3 1/3] dpif-netdev: Refactor per thread recirc data allocation.

2022-04-27 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 84d4ec156..6f8de9094 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 0da639c55..15f1f36b3 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 676434308..139e22f38 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -97,9 +97,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 0/3] Miniflow Extract Testing Improvements

2022-04-27 Thread Kumar Amber
The patch-set introduces changes which would improve
the testing of miniflow_extract for AVX512 based
miniflow_extract optimizations whithout affecting scalar
code path.

---
v3:
- Fix comments from Cian.
---

Kumar Amber (3):
  dpif-netdev: Refactor per thread recirc data allocation.
  flow: Refactor miniflow_extract into api.
  flow: Add autovalidator support to miniflow_extract.

 lib/dpif-netdev-avx512.c  |  2 +-
 lib/dpif-netdev-private-dpif.c|  2 ++
 lib/dpif-netdev-private-dpif.h|  5 
 lib/dpif-netdev-private-extract.c |  8 ++---
 lib/dpif-netdev.c |  5 +---
 lib/flow.c| 49 +--
 lib/flow.h|  8 -
 ofproto/ofproto.c | 10 +++
 8 files changed, 64 insertions(+), 25 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v10] dpif-netdev/mfex: Add ipv4 profile based hashing.

2022-04-12 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 
Signed-off-by: Ilya Maximets 
Co-authored-by: Ilya Maximets 
Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 
---
v10:
- Make the hahsing function generic and independent of
  magic numbers.
v9:
- Use memcpy in place of typecast to fix memory alingment.
v8:
- Fix comments from cian.
v4:
- Use pre-defined hash length values.
v3:
- Fix check-patch sign-offs.
---
---
---
 NEWS |  3 ++-
 lib/dp-packet.h  | 32 
 lib/dpif-netdev-extract-avx512.c |  6 +-
 lib/flow.c   |  6 ++
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index 8fa57836a..5d3d048f6 100644
--- a/NEWS
+++ b/NEWS
@@ -3,7 +3,8 @@ Post-v2.17.0
- OVSDB:
  * 'relay' service model now supports transaction history, i.e. honors the
'last-txn-id' field in 'monitor_cond_since' requests from clients.
-
+   - Userspace datapath:
+ * Add IPv4 profile based 5tuple hashing optimizations.
 
 v2.17.0 - 17 Feb 2022
 -
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index ee0805ae6..7c5da258a 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -1085,6 +1085,38 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p)
 DP_PACKET_OL_RX_L4_CKSUM_BAD;
 }
 
+static inline void ALWAYS_INLINE
+dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet)
+{
+if (dp_packet_rss_valid(packet)) {
+return;
+}
+
+const uint8_t *pkt = dp_packet_data(packet);
+const uint16_t l3_ofs = packet->l3_ofs;
+const void *ipv4_src = [l3_ofs + offsetof(struct ip_header, ip_src)];
+const void *ipv4_dst = [l3_ofs + offsetof(struct ip_header, ip_dst)];
+const void *l4_ports = [packet->l4_ofs];
+uint32_t ip_src, ip_dst, ports;
+uint32_t hash = 0;
+
+memcpy(_src, ipv4_src, sizeof ip_src);
+memcpy(_dst, ipv4_dst, sizeof ip_dst);
+memcpy(,  l4_ports, sizeof ports);
+
+/* IPv4 Src and Dst. */
+hash = hash_add(hash, ip_src);
+hash = hash_add(hash, ip_dst);
+/* IPv4 proto. */
+hash = hash_add(hash,
+pkt[l3_ofs + offsetof(struct ip_header, ip_proto)]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index c1c1fefb6..fa9148efe 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -48,6 +48,7 @@
 #include "dpif-netdev-private-dpcls.h"
 #include "dpif-netdev-private-extract.h"
 #include "dpif-netdev-private-flow.h"
+#include "dp-packet.h"
 
 /* AVX512-BW level permutex2var_epi8 emulation. */
 static inline __m512i
@@ -577,6 +578,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 /* Process TCP flags, and store to blocks. */
 const struct tcp_header *tcp = (void *)[38];
 mfex_handle_tcp_flags(tcp, [7]);
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_VLAN_IPV4_UDP: {
@@ -588,6 +590,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
   UDP_HEADER_LEN)) {
 continue;
 }
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV4_TCP: {
@@ -602,6 +605,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
   TCP_HEADER_LEN)) {
 continue;
 }
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
 } break;
 
 case PROFILE_ETH_IPV4_UDP: {
@@ -612,7 +616,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
   UDP_HEADER_LEN)) {
 continue;
 }
-
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
 } break;
 default:
 break;
diff --git a/lib/flow.c b/lib/flow.c
index dd523c889..8ab9df3fc 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1017,6 +1017,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+if (dl_type == htons(ETH_TYPE_IP)) {
+

[ovs-dev] [PATCH v1] tests/mfex: Improve pcap script for mfex tests.

2022-04-12 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 

---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  66 +++---
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  27 
 4 files changed, 67 insertions(+), 27 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 8a9151f81..507da2ee8 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -145,7 +145,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..dbde5fe1b 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,58 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+traffic_opt = str(sys.argv[3])
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
+pkt = []
+
+for i in range(0, size):
 
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
 eth = Ether(src=RandMAC(), dst=RandMAC())
 vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
 udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+
+if traffic_opt == "fuzzy":
+
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 20))
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+
+ipv4 = IP(src=RandIP(), dst=RandIP())
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S')
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp)
+pkt.append(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pkt.append(eth / ipv6 / udp)
+pkt.append(eth / ipv6 / tcp)
+pkt.append(eth / vlan / ipv6 / udp)
+pkt.append(eth / vlan / ipv6 / tcp)
+
+pktdump.write(pkt)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..
GIT binary patch
literal 0
HcmV?d1

literal 416
zcmca|c+)~A1{MYw`2U}Qff2}QK`M68ITRa|G@yFii5$Gfk6YL%z>@uY&}o|
z2s4N<1VH2&7y^V87$)XGOtD~MV$cFgfG~zBGGJ2#YtF$KST_NTIwYriok6N4Vm)gX-Q@c^{cp<7_5LgK^UuU{2>VS0RZ!RQ+EIW

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 7d2715c4a..1b851cc89 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -226,12 +226,13 @@ dnl 
--
 dnl Add standard DPDK PHY port
 AT_SETUP([OVS-DPDK - MFEX Autovalidator])
 AT_KEYWORDS([dpdk])
-
+AT_SKIP_IF([! $PYTHON3 -c "import scapy"], [], [])
+AT_CHECK([$PYTHON3 $srcdir/mfex_fuzzy.py $srcdir 2000 0], [], [stdout])
 OVS_DPDK_START()
 
 dnl Add userspace bridge and attach it to OVS
 AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev])
-AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dpdk

[ovs-dev] [PATCH v2] dpcls: Add the dpcls subtable lookup function in flow dump.

2022-04-11 Thread Kumar Amber
The patch adds the subtable lookup name to the existing
dp-extra-info mentioned below:

dp-extra-info:miniflow_bits(18,4), lookup(generic)
dp-extra-info:miniflow_bits(9,4), lookup(avx512_gather)

Suggested-by: Ilya Maximets 
Signed-off-by: Kumar Amber 

---
v2:
- Add RCU protection for ds_extra_info.
- Add string regeneration for dpcls reprobe.
---
---
 lib/dpif-netdev-lookup.c| 39 +
 lib/dpif-netdev-lookup.h| 14 +++-
 lib/dpif-netdev-private-dpcls.h |  3 +++
 lib/dpif-netdev-private-flow.h  |  6 +++--
 lib/dpif-netdev.c   | 26 +-
 5 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c
index bd0a99abe..e20a45589 100644
--- a/lib/dpif-netdev-lookup.c
+++ b/lib/dpif-netdev-lookup.c
@@ -20,6 +20,8 @@
 
 #include "openvswitch/vlog.h"
 
+#define SUBTABLE_STRING_OFFSET 18
+
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_lookup);
 
 /* Actual list of implementations goes here */
@@ -93,11 +95,11 @@ dpcls_subtable_set_prio(const char *name, uint8_t priority)
 }
 
 dpcls_subtable_lookup_func
-dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count)
+dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count,
+ const char **name)
 {
 /* Iter over each subtable impl, and get highest priority one. */
 int32_t prio = -1;
-const char *name = NULL;
 dpcls_subtable_lookup_func best_func = NULL;
 
 for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) {
@@ -109,16 +111,45 @@ dpcls_subtable_get_best_impl(uint32_t u0_bit_count, 
uint32_t u1_bit_count)
 if (probed_func) {
 best_func = probed_func;
 prio = probed_prio;
-name = subtable_lookups[i].name;
+if (name) {
+*name = subtable_lookups[i].name;
+}
 }
 }
 }
 
 VLOG_DBG("Subtable lookup function '%s' with units (%d,%d), priority %d\n",
- name, u0_bit_count, u1_bit_count, prio);
+ *name, u0_bit_count, u1_bit_count, prio);
 
 /* Programming error - we must always return a valid func ptr. */
 ovs_assert(best_func != NULL);
 
 return best_func;
 }
+
+void
+dpcls_update_flow_dump(struct cmap flow_table,
+   struct dpcls_subtable_lookup_info_t *lookup_funcs,
+   int impls_count)
+{
+struct dp_netdev_flow *flow;
+const char *name = NULL;
+int32_t prio = -1;
+
+for (int i = 0; i < impls_count; i++) {
+int32_t probed_prio = lookup_funcs[i].prio;
+if (probed_prio > prio) {
+name = lookup_funcs[i].name;
+prio = probed_prio;
+}
+}
+CMAP_FOR_EACH (flow, node, _table) {
+struct ds info = DS_EMPTY_INITIALIZER;
+char *extra_info = ovsrcu_get(char *, >dp_extra_info);
+ds_put_cstr(, extra_info);
+ds_truncate(, SUBTABLE_STRING_OFFSET);
+ds_put_format(,",lookup(%s)", name);
+ovsrcu_set(>dp_extra_info, ds_steal_cstr());
+ds_destroy();
+}
+}
diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h
index 59f51faa0..a8a83d9a9 100644
--- a/lib/dpif-netdev-lookup.h
+++ b/lib/dpif-netdev-lookup.h
@@ -20,6 +20,7 @@
 #include 
 #include "dpif-netdev.h"
 #include "dpif-netdev-private-dpcls.h"
+#include "dpif-netdev-private-flow.h"
 
 /* Function to perform a probe for the subtable bit fingerprint.
  * Returns NULL if not valid, or a valid function pointer to call for this
@@ -66,8 +67,14 @@ struct dpcls_subtable_lookup_info_t {
 
 int32_t dpcls_subtable_set_prio(const char *name, uint8_t priority);
 
+/* Retrieve the best implementation for dpcls subtable.
+ * Name parameter is used to fetch the name of dpcls subtable
+ * selected by the function.
+ * The function can also be called with name as NULL.
+ */
 dpcls_subtable_lookup_func
-dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count);
+dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count,
+ const char **name);
 
 /* Retrieve the array of lookup implementations for iteration.
  * On error, returns a negative number.
@@ -76,4 +83,9 @@ dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t 
u1_bit_count);
 int32_t
 dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr);
 
+void
+dpcls_update_flow_dump(struct cmap flow_table,
+   struct dpcls_subtable_lookup_info_t *lookup_funcs,
+   int impls_count);
+
 #endif /* dpif-netdev-lookup.h */
diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 0d5da73c7..d17ef32af 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -88,6 +88,9 @@ struct dpcls

[ovs-dev] [PATCH v9 4/4] tests/mfex: Improve pcap script for mfex tests.

2022-04-06 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 
---
v8:
- Reduce IO writes.
--
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  66 +++---
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  23 +
 4 files changed, 63 insertions(+), 27 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 8a9151f81..507da2ee8 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -145,7 +145,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..dbde5fe1b 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,58 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+traffic_opt = str(sys.argv[3])
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
+pkt = []
+
+for i in range(0, size):
 
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
 eth = Ether(src=RandMAC(), dst=RandMAC())
 vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
 udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+
+if traffic_opt == "fuzzy":
+
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 20))
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+
+ipv4 = IP(src=RandIP(), dst=RandIP())
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S')
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp)
+pkt.append(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pkt.append(eth / ipv6 / udp)
+pkt.append(eth / ipv6 / tcp)
+pkt.append(eth / vlan / ipv6 / udp)
+pkt.append(eth / vlan / ipv6 / tcp)
+
+pktdump.write(pkt)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..
GIT binary patch
literal 0
HcmV?d1

literal 416
zcmca|c+)~A1{MYw`2U}Qff2}QK`M68ITRa|G@yFii5$Gfk6YL%z>@uY&}o|
z2s4N<1VH2&7y^V87$)XGOtD~MV$cFgfG~zBGGJ2#YtF$KST_NTIwYriok6N4Vm)gX-Q@c^{cp<7_5LgK^UuU{2>VS0RZ!RQ+EIW

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 7d2715c4a..1476e470c 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -226,12 +226,13 @@ dnl 
--
 dnl Add standard DPDK PHY port
 AT_SETUP([OVS-DPDK - MFEX Autovalidator])
 AT_KEYWORDS([dpdk])
-
+AT_SKIP_IF([! $PYTHON3 -c "import scapy"], [], [])
+AT_CHECK([$PYTHON3 $srcdir/mfex_fuzzy.py $srcdir 2000 0], [], [stdout])
 OVS_DPDK_START()
 
 dnl Add userspace bridge and attach it to OVS
 AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev])
-AT_CHECK([ovs-vsctl add-port br0 p

[ovs-dev] [PATCH v9 3/4] dpif-netdev/mfex: Avoid hashing when opt mfex called.

2022-04-06 Thread Kumar Amber
This patch avoids calculating the software hash of the packet again
if the optimized miniflow-extract hit. In cases of scalar miniflow
extract, the normal hashing calculation is performed.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 
---
 lib/dpif-netdev-avx512.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index b7131ba3f..c68b79f6b 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -212,15 +212,15 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,
 if (!mfex_hit) {
 /* Do a scalar miniflow extract into keys. */
 miniflow_extract(packet, >mf);
+key->len = netdev_flow_key_size(miniflow_n_values(>mf));
+key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet,
+ >mf);
 }
 
 /* Cache TCP and byte values for all packets. */
 pkt_meta[i].bytes = dp_packet_size(packet);
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
-key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
-
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9 2/4] dpif-netdev/mfex: Add packet hash check to autovalidator.

2022-04-06 Thread Kumar Amber
This patch adds the scalar hash calls to the autovalidator.
It also adds checks for comparing the scalar hash against
the profile based hash calculated as part of AVX512 MFEX implementations.

The per profile AVX512 optimized hash was added to the autovalidator
in the last commit. The autovalidator was already calling that code,
we just add the checks and scalar hashing in this commit.

Signed-off-by: Kumar Amber 
Acked-by: Cian Ferriter 
---
v8:
- Fix hash validation.
---
---
 lib/dpif-netdev-private-extract.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 4b2f12015..e8550c1fb 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -252,6 +252,9 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 pkt_metadata_init(>md, in_port);
 miniflow_extract(packet, [i].mf);
+keys[i].len = netdev_flow_key_size(miniflow_n_values([i].mf));
+keys[i].hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet,
+[i].mf);
 
 /* Store known good metadata to compare with optimized metadata. */
 good_l2_5_ofs[i] = packet->l2_5_ofs;
@@ -266,10 +269,11 @@ dpif_miniflow_extract_autovalidator(struct 
dp_packet_batch *packets,
 if (!mfex_impls[j].available) {
 continue;
 }
-/* Reset keys and offsets before each implementation. */
+/* Reset keys, offsets and hash before each implementation. */
 memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 dp_packet_reset_offsets(packet);
+*dp_packet_ol_flags_ptr(packet) &= ~DP_PACKET_OL_RSS_HASH;
 }
 /* Call optimized miniflow for each batch of packet. */
 uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
@@ -335,6 +339,15 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 failed = 1;
 }
 
+/* Check hashes are equal. */
+if ((keys[i].hash != test_keys[i].hash) ||
+(keys[i].len != test_keys[i].len)) {
+ds_put_format(_msg, "Good hash: %d len: %d\tTest hash:%d"
+  " len:%d\n", keys[i].hash, keys[i].len,
+  test_keys[i].hash, test_keys[i].len);
+failed = 1;
+}
+
 if (failed) {
 VLOG_ERR("Autovalidation for %s failed in pkt %d,"
  " disabling.", mfex_impls[j].name, i);
@@ -351,13 +364,10 @@ dpif_miniflow_extract_autovalidator(struct 
dp_packet_batch *packets,
 atomic_store_relaxed(>miniflow_extract_opt, NULL);
 }
 
-/* Preserve packet correctness by storing back the good offsets in
- * packets back. */
+/* Reset all packet values. */
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
-packet->l2_5_ofs = good_l2_5_ofs[i];
-packet->l3_ofs = good_l3_ofs[i];
-packet->l4_ofs = good_l4_ofs[i];
-packet->l2_pad_size = good_l2_pad_size[i];
+dp_packet_reset_offsets(packet);
+*dp_packet_ol_flags_ptr(packet) &= ~DP_PACKET_OL_RSS_HASH;
 }
 
 /* Returning zero implies no packets were hit by autovalidation. This
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9 1/4] dpif-netdev/mfex: Add ipv4 profile based hashing.

2022-04-06 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 
Acked-by: Cian Ferriter 
---
v9:
- Use memcpy in place of typecast to fix memory alingment.
v8:
- Fix comments from cian.
v4:
- Use pre-defined hash length values.
v3:
- Fix check-patch sign-offs.
---
---
 NEWS |  2 +
 lib/dpif-netdev-extract-avx512.c | 72 
 2 files changed, 74 insertions(+)

diff --git a/NEWS b/NEWS
index 8fa57836a..db58be457 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,8 @@ Post-v2.17.0
- OVSDB:
  * 'relay' service model now supports transaction history, i.e. honors the
'last-txn-id' field in 'monitor_cond_since' requests from clients.
+   - Userspace datapath:
+ * Add IPv4 profile based 5tuple hashing optimizations.
 
 
 v2.17.0 - 17 Feb 2022
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index c1c1fefb6..1e0d4e31a 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -278,6 +278,10 @@ struct mfex_profile {
 uint64_t mf_bits[FLOWMAP_UNITS];
 uint16_t dp_pkt_offs[4];
 uint16_t dp_pkt_min_size;
+
+/* Constant data offsets for hashing. */
+uint8_t hash_pkt_offs[4];
+uint32_t hash_len;
 };
 
 /* Ensure dp_pkt_offs[4] is the correct size as in struct dp_packet. */
@@ -327,6 +331,13 @@ enum MFEX_PROFILES {
 PROFILE_COUNT,
 };
 
+/* Packet offsets for 5 tuple hash function. */
+#define HASH_IPV4 \
+26, 30, 23, 34
+
+#define HASH_DT1Q_IPV4 \
+30, 34, 27, 38
+
 /* Static const instances of profiles. These are compile-time constants,
  * and are specialized into individual miniflow-extract functions.
  * NOTE: Order of the fields is significant, any change in the order must be
@@ -347,6 +358,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 0, UINT16_MAX, 14, 34,
 },
 .dp_pkt_min_size = 42,
+
+.hash_pkt_offs = { HASH_IPV4 },
+.hash_len = 72,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -370,6 +384,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 0, UINT16_MAX, 14, 34,
 },
 .dp_pkt_min_size = 54,
+
+.hash_pkt_offs = { HASH_IPV4 },
+.hash_len = 80,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -389,6 +406,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 14, UINT16_MAX, 18, 38,
 },
 .dp_pkt_min_size = 46,
+
+.hash_pkt_offs = { HASH_DT1Q_IPV4 },
+.hash_len = 80,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -414,6 +434,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 14, UINT16_MAX, 18, 38,
 },
 .dp_pkt_min_size = 58,
+
+.hash_pkt_offs = { HASH_DT1Q_IPV4 },
+.hash_len = 88,
 },
 };
 
@@ -467,6 +490,40 @@ mfex_handle_tcp_flags(const struct tcp_header *tcp, 
uint64_t *block)
 *block = ctl_u64 << 32;
 }
 
+static inline void
+mfex_5tuple_hash_ipv4(struct dp_packet *packet, const uint8_t *pkt,
+  struct netdev_flow_key *key,
+  const uint8_t *pkt_offsets)
+{
+if (!dp_packet_rss_valid(packet)) {
+uint32_t hash = 0;
+uint32_t ip_src = 0;
+uint32_t ip_dst = 0;
+uint32_t ports = 0;
+void *ipv4_src = (void *) [pkt_offsets[0]];
+void *ipv4_dst = (void *) [pkt_offsets[1]];
+void *ports_l4 = (void *) [pkt_offsets[3]];
+
+memcpy(_src, ipv4_src, sizeof(uint32_t));
+memcpy(_dst, ipv4_dst, sizeof(uint32_t));
+memcpy(, ports_l4, sizeof(uint32_t));
+
+/* IPv4 Src and Dst. */
+hash = hash_add(hash, ip_src);
+hash = hash_add(hash, ip_dst);
+/* IPv4 proto. */
+hash = hash_add(hash, pkt[pkt_offsets[2]]);
+/* L4 ports. */
+hash = hash_add(hash, ports);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+key->hash = hash;
+} else {
+key->hash = dp_packet_get_rss_hash(packet);
+}
+}
+
 /* Generic loop to process any mfex profile. This code is specialized into
  * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
  * to ensure the compiler specializes each instance. The code is marked "hot"
@@ -577,6 +634,10 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 /* Process TCP flags, and store to blocks. */
 const struct tcp_header *tcp = (void *)[38];
 mfex_handle_tcp_flags(tcp, [7]);
+
+mfex_5tuple_hash_ipv4(packet, pkt, [i],
+  

[ovs-dev] [PATCH v9 0/4] IPv4 Hashing AVX512 Optimizations

2022-04-06 Thread Kumar Amber
Hashing Optimization are also included which can further
improve performance by approximately 10%.

---
v9:
- Replace tycast with memcpy to resolve un-alinged
  memory access.
v8:
- Optimize mfex_fuzzy script for less IO writes.
v7:
- Split Ipv4 Hahsing to separate Patchset
v6:
- Reorder Patches in the Patchset.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  dpif-netdev/mfex: Add ipv4 profile based hashing.
  dpif-netdev/mfex: Add packet hash check to autovalidator.
  dpif-netdev/mfex: Avoid hashing when opt mfex called.
  tests/mfex: Improve pcap script for mfex tests.

 NEWS  |   2 +
 lib/dpif-netdev-avx512.c  |   6 +--
 lib/dpif-netdev-extract-avx512.c  |  72 ++
 lib/dpif-netdev-private-extract.c |  24 +++---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  66 +++
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  23 +++---
 8 files changed, 157 insertions(+), 37 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 4/4] tests/mfex: Improve pcap script for mfex tests.

2022-04-01 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 

---
v8:
- Reduce IO writes.
--
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  66 +++---
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  23 +
 4 files changed, 63 insertions(+), 27 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 8a9151f81..507da2ee8 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -145,7 +145,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..dbde5fe1b 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,58 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
+# Relative path for the pcap file location.
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+# The number of packets generated will be size * 8.
+size = int(sys.argv[2])
+# Traffic option is used to choose between fuzzy or simple packet type.
+traffic_opt = str(sys.argv[3])
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
+pkt = []
+
+for i in range(0, size):
 
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
 eth = Ether(src=RandMAC(), dst=RandMAC())
 vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
 udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+
+if traffic_opt == "fuzzy":
+
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S',
+  dataofs=random.randint(0, 20))
+# IPv4 packets with fuzzing
+pkt.append(fuzz(eth / ipv4 / udp))
+pkt.append(fuzz(eth / ipv4 / tcp))
+pkt.append(fuzz(eth / vlan / ipv4 / udp))
+pkt.append(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pkt.append(fuzz(eth / ipv6 / udp))
+pkt.append(fuzz(eth / ipv6 / tcp))
+pkt.append(fuzz(eth / vlan / ipv6 / udp))
+pkt.append(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+
+ipv4 = IP(src=RandIP(), dst=RandIP())
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S')
+# IPv4 packets
+pkt.append(eth / ipv4 / udp)
+pkt.append(eth / ipv4 / tcp)
+pkt.append(eth / vlan / ipv4 / udp)
+pkt.append(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pkt.append(eth / ipv6 / udp)
+pkt.append(eth / ipv6 / tcp)
+pkt.append(eth / vlan / ipv6 / udp)
+pkt.append(eth / vlan / ipv6 / tcp)
+
+pktdump.write(pkt)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..
GIT binary patch
literal 0
HcmV?d1

literal 416
zcmca|c+)~A1{MYw`2U}Qff2}QK`M68ITRa|G@yFii5$Gfk6YL%z>@uY&}o|
z2s4N<1VH2&7y^V87$)XGOtD~MV$cFgfG~zBGGJ2#YtF$KST_NTIwYriok6N4Vm)gX-Q@c^{cp<7_5LgK^UuU{2>VS0RZ!RQ+EIW

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 7d2715c4a..1476e470c 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -226,12 +226,13 @@ dnl 
--
 dnl Add standard DPDK PHY port
 AT_SETUP([OVS-DPDK - MFEX Autovalidator])
 AT_KEYWORDS([dpdk])
-
+AT_SKIP_IF([! $PYTHON3 -c "import scapy"], [], [])
+AT_CHECK([$PYTHON3 $srcdir/mfex_fuzzy.py $srcdir 2000 0], [], [stdout])
 OVS_DPDK_START()
 
 dnl Add userspace bridge and attach it to OVS
 AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev])
-AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type

[ovs-dev] [PATCH v8 3/4] dpif-netdev/mfex: Avoid hashing when opt mfex called.

2022-04-01 Thread Kumar Amber
This patch avoids calculating the software hash of the packet again
if the optimized miniflow-extract hit. In cases of scalar miniflow
extract, the normal hashing calculation is performed.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-avx512.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index b7131ba3f..c68b79f6b 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -212,15 +212,15 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,
 if (!mfex_hit) {
 /* Do a scalar miniflow extract into keys. */
 miniflow_extract(packet, >mf);
+key->len = netdev_flow_key_size(miniflow_n_values(>mf));
+key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet,
+ >mf);
 }
 
 /* Cache TCP and byte values for all packets. */
 pkt_meta[i].bytes = dp_packet_size(packet);
 pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(>mf);
 
-key->len = netdev_flow_key_size(miniflow_n_values(>mf));
-key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, >mf);
-
 if (emc_enabled) {
 f = emc_lookup(>emc_cache, key);
 
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 2/4] dpif-netdev/mfex: Add packet hash check to autovalidator.

2022-04-01 Thread Kumar Amber
This patch adds the scalar hash calls to the autovalidator.
It also adds checks for comparing the scalar hash against
the profile based hash calculated as part of AVX512 MFEX implementations.

The per profile AVX512 optimized hash was added to the autovalidator
in the last commit. The autovalidator was already calling that code,
we just add the checks and scalar hashing in this commit.

Signed-off-by: Kumar Amber 

---
v8:
- Fix hash validation.
---
---
 lib/dpif-netdev-private-extract.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 4b2f12015..e8550c1fb 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -252,6 +252,9 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 pkt_metadata_init(>md, in_port);
 miniflow_extract(packet, [i].mf);
+keys[i].len = netdev_flow_key_size(miniflow_n_values([i].mf));
+keys[i].hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet,
+[i].mf);
 
 /* Store known good metadata to compare with optimized metadata. */
 good_l2_5_ofs[i] = packet->l2_5_ofs;
@@ -266,10 +269,11 @@ dpif_miniflow_extract_autovalidator(struct 
dp_packet_batch *packets,
 if (!mfex_impls[j].available) {
 continue;
 }
-/* Reset keys and offsets before each implementation. */
+/* Reset keys, offsets and hash before each implementation. */
 memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 dp_packet_reset_offsets(packet);
+*dp_packet_ol_flags_ptr(packet) &= ~DP_PACKET_OL_RSS_HASH;
 }
 /* Call optimized miniflow for each batch of packet. */
 uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
@@ -335,6 +339,15 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 failed = 1;
 }
 
+/* Check hashes are equal. */
+if ((keys[i].hash != test_keys[i].hash) ||
+(keys[i].len != test_keys[i].len)) {
+ds_put_format(_msg, "Good hash: %d len: %d\tTest hash:%d"
+  " len:%d\n", keys[i].hash, keys[i].len,
+  test_keys[i].hash, test_keys[i].len);
+failed = 1;
+}
+
 if (failed) {
 VLOG_ERR("Autovalidation for %s failed in pkt %d,"
  " disabling.", mfex_impls[j].name, i);
@@ -351,13 +364,10 @@ dpif_miniflow_extract_autovalidator(struct 
dp_packet_batch *packets,
 atomic_store_relaxed(>miniflow_extract_opt, NULL);
 }
 
-/* Preserve packet correctness by storing back the good offsets in
- * packets back. */
+/* Reset all packet values. */
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
-packet->l2_5_ofs = good_l2_5_ofs[i];
-packet->l3_ofs = good_l3_ofs[i];
-packet->l4_ofs = good_l4_ofs[i];
-packet->l2_pad_size = good_l2_pad_size[i];
+dp_packet_reset_offsets(packet);
+*dp_packet_ol_flags_ptr(packet) &= ~DP_PACKET_OL_RSS_HASH;
 }
 
 /* Returning zero implies no packets were hit by autovalidation. This
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 1/4] dpif-netdev/mfex: Add ipv4 profile based hashing.

2022-04-01 Thread Kumar Amber
For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v8:
- Fix comments from cian.
v4:
- Use pre-defined hash length values.
v3:
- Fix check-patch sign-offs.
---
---
 NEWS |  2 +
 lib/dpif-netdev-extract-avx512.c | 65 
 2 files changed, 67 insertions(+)

diff --git a/NEWS b/NEWS
index 8fa57836a..db58be457 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,8 @@ Post-v2.17.0
- OVSDB:
  * 'relay' service model now supports transaction history, i.e. honors the
'last-txn-id' field in 'monitor_cond_since' requests from clients.
+   - Userspace datapath:
+ * Add IPv4 profile based 5tuple hashing optimizations.
 
 
 v2.17.0 - 17 Feb 2022
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index c1c1fefb6..76e20a6ed 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -278,6 +278,10 @@ struct mfex_profile {
 uint64_t mf_bits[FLOWMAP_UNITS];
 uint16_t dp_pkt_offs[4];
 uint16_t dp_pkt_min_size;
+
+/* Constant data offsets for hashing. */
+uint8_t hash_pkt_offs[4];
+uint32_t hash_len;
 };
 
 /* Ensure dp_pkt_offs[4] is the correct size as in struct dp_packet. */
@@ -327,6 +331,13 @@ enum MFEX_PROFILES {
 PROFILE_COUNT,
 };
 
+/* Packet offsets for 5 tuple hash function. */
+#define HASH_IPV4 \
+26, 30, 23, 34
+
+#define HASH_DT1Q_IPV4 \
+30, 34, 27, 38
+
 /* Static const instances of profiles. These are compile-time constants,
  * and are specialized into individual miniflow-extract functions.
  * NOTE: Order of the fields is significant, any change in the order must be
@@ -347,6 +358,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 0, UINT16_MAX, 14, 34,
 },
 .dp_pkt_min_size = 42,
+
+.hash_pkt_offs = { HASH_IPV4 },
+.hash_len = 72,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -370,6 +384,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 0, UINT16_MAX, 14, 34,
 },
 .dp_pkt_min_size = 54,
+
+.hash_pkt_offs = { HASH_IPV4 },
+.hash_len = 80,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -389,6 +406,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 14, UINT16_MAX, 18, 38,
 },
 .dp_pkt_min_size = 46,
+
+.hash_pkt_offs = { HASH_DT1Q_IPV4 },
+.hash_len = 80,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -414,6 +434,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 14, UINT16_MAX, 18, 38,
 },
 .dp_pkt_min_size = 58,
+
+.hash_pkt_offs = { HASH_DT1Q_IPV4 },
+.hash_len = 88,
 },
 };
 
@@ -467,6 +490,33 @@ mfex_handle_tcp_flags(const struct tcp_header *tcp, 
uint64_t *block)
 *block = ctl_u64 << 32;
 }
 
+static inline void
+mfex_5tuple_hash_ipv4(struct dp_packet *packet, const uint8_t *pkt,
+  struct netdev_flow_key *key,
+  const uint8_t *pkt_offsets)
+{
+if (!dp_packet_rss_valid(packet)) {
+uint32_t hash = 0;
+void *ipv4_src = (void *) [pkt_offsets[0]];
+void *ipv4_dst = (void *) [pkt_offsets[1]];
+void *ports_l4 = (void *) [pkt_offsets[3]];
+
+/* IPv4 Src and Dst. */
+hash = hash_add(hash, *(uint32_t *) ipv4_src);
+hash = hash_add(hash, *(uint32_t *) ipv4_dst);
+/* IPv4 proto. */
+hash = hash_add(hash, pkt[pkt_offsets[2]]);
+/* L4 ports. */
+hash = hash_add(hash, *(uint32_t *) ports_l4);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+key->hash = hash;
+} else {
+key->hash = dp_packet_get_rss_hash(packet);
+}
+}
+
 /* Generic loop to process any mfex profile. This code is specialized into
  * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
  * to ensure the compiler specializes each instance. The code is marked "hot"
@@ -577,6 +627,10 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 /* Process TCP flags, and store to blocks. */
 const struct tcp_header *tcp = (void *)[38];
 mfex_handle_tcp_flags(tcp, [7]);
+
+mfex_5tuple_hash_ipv4(packet, pkt, [i],
+  profile->hash_pkt_offs);
+keys[i].len = profile->hash_len;
 } break;
 
 case PROFILE_ETH_VLAN_IPV4_UDP: {
@@ -588,6 +642,10 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 

[ovs-dev] [PATCH v8 0/4] IPv4 Hashing AVX512 Optimizations

2022-04-01 Thread Kumar Amber
Hashing Optimization are also included which can further
improve performance by approximately 10%.

---
v8:
- Optimize mfex_fuzzy script for less IO writes.
v7:
- Split Ipv4 Hahsing to separate Patchset
v6:
- Reorder Patches in the Patchset.
v5:
- Add Ipv6 and TCP packet length checks.
v4:
- rebase to master.
- use static key lenghts for different packet types.
v3:
- rebase to master.
v2:
- fix the CI build.
- fix check-patch for co-author.
---

Kumar Amber (4):
  dpif-netdev/mfex: Add ipv4 profile based hashing.
  dpif-netdev/mfex: Add packet hash check to autovalidator.
  dpif-netdev/mfex: Avoid hashing when opt mfex called.
  tests/mfex: Improve pcap script for mfex tests.

 NEWS  |   2 +
 lib/dpif-netdev-avx512.c  |   6 +--
 lib/dpif-netdev-extract-avx512.c  |  65 +
 lib/dpif-netdev-private-extract.c |  24 +++
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  66 +-
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  23 +++
 8 files changed, 150 insertions(+), 37 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6] dpcls: Change info-get function to fetch dpcls usage stats.

2022-03-29 Thread Kumar Amber
Modified the dplcs info-get command output to include
the count for different dpcls implementations.

$ovs-appctl dpif-netdev/subtable-lookup-prio-get

Available dpcls implementations:
  autovalidator (Use count: 1, Priority: 5)
  generic (Use count: 0, Priority: 1)
  avx512_gather (Use count: 0, Priority: 3)

Test case to verify changes:
1021: PMD - dpcls configuration ok

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Signed-off-by: Eelco Chaudron 
Co-authored-by: Harry van Haaren 
Co-authored-by: Eelco Chaudron 
Acked-by: Eelco Chaudron 
---
v6:
- Rebase to master
v5:
- change the info-incr and decr APIs.
- Reduce the complexity of dpcls stats APIs.
v4:
- Fix comments on the patch.
- Change API from an overloaded method of counting, to returning the
  old and new subtable structs. This allows the caller to identify the
  modified subtable implementations, and update the statistics accordingly.
v3:
- Fix comments on the patch.
- Function API remains same, see discussion on OVS ML here:
  "https://mail.openvswitch.org/pipermail/ovs-dev/2021-October/388737.html;
v2:
- Dependency merged rebased to master.

---
---
 Documentation/topics/dpdk/bridge.rst | 16 ++---
 lib/dpif-netdev-lookup.c | 98 +---
 lib/dpif-netdev-lookup.h | 18 -
 lib/dpif-netdev-private-dpcls.h  |  1 +
 lib/dpif-netdev.c| 38 ++-
 tests/pmd.at | 16 ++---
 6 files changed, 127 insertions(+), 60 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index ceee91015..314c31a47 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -180,10 +180,10 @@ OVS provides multiple implementations of dpcls. The 
following command enables
 the user to check what implementations are available in a running instance::
 
 $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
-Available lookup functions (priority : name)
-0 : autovalidator
-1 : generic
-0 : avx512_gather
+Available dpcls implementations:
+autovalidator (Use count: 1, Priority: 5)
+generic (Use count: 0, Priority: 1)
+avx512_gather (Use count: 0, Priority: 3)
 
 To set the priority of a lookup function, run the ``prio-set`` command::
 
@@ -196,10 +196,10 @@ function due to the command being run. To verify the 
prioritization, re-run the
 get command, note the updated priority of the ``avx512_gather`` function::
 
 $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
-Available lookup functions (priority : name)
-0 : autovalidator
-1 : generic
-5 : avx512_gather
+Available dpcls implementations:
+autovalidator (Use count: 1, Priority: 5)
+generic (Use count: 0, Priority: 1)
+avx512_gather (Use count: 0, Priority: 3)
 
 If two lookup functions have the same priority, the first one in the list is
 chosen, and the 2nd occurance of that priority is not used. Put in logical
diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c
index bd0a99abe..e641e4028 100644
--- a/lib/dpif-netdev-lookup.c
+++ b/lib/dpif-netdev-lookup.c
@@ -36,18 +36,21 @@ static struct dpcls_subtable_lookup_info_t 
subtable_lookups[] = {
 { .prio = 0,
 #endif
   .probe = dpcls_subtable_autovalidator_probe,
-  .name = "autovalidator", },
+  .name = "autovalidator",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 
 /* The default scalar C code implementation. */
 { .prio = 1,
   .probe = dpcls_subtable_generic_probe,
-  .name = "generic", },
+  .name = "generic",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 { .prio = 0,
   .probe = dpcls_subtable_avx512_gather_probe,
-  .name = "avx512_gather", },
+  .name = "avx512_gather",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 #else
 /* Disabling AVX512 at compile time, as compile time requirements not met.
  * This could be due to a number of reasons:
@@ -64,7 +67,7 @@ static struct dpcls_subtable_lookup_info_t subtable_lookups[] 
= {
 #endif
 };
 
-int32_t
+int
 dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr)
 {
 if (out_ptr == NULL) {
@@ -76,7 +79,7 @@ dpcls_subtable_lookup_info_get(struct 
dpcls_subtable_lookup_info_t **out_ptr)
 }
 
 /* sets the priority of the lookup function with "name". */
-int32_t
+int
 dpcls_subtable_set_prio(const char *name, uint8_t priority)
 {
 for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) {
@@ -93,32 +96,81 @@ dpcls_subtable_set_prio(const char *name, uint8_t priority)
 }
 
 dpcls_subtable_lookup_func
-dpcls_subta

[ovs-dev] [PATCH v2 4/4] miniflow_extract: Add autovalidator support to miniflow_extract.

2022-03-27 Thread Kumar Amber
The patch adds the flag based switch between choice of using
miniflow_extract in normal pipeline or select mfex_autovalidator
in debug and test builds.

The compile time flag used to select autoval can be done using option:

 ./configure CFLAGS="--enable-mfex-default-autovalidator"

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-private-extract.c |  4 ++--
 lib/flow.c| 24 
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 8538d069f..0d7091caa 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -124,8 +124,8 @@ dpif_miniflow_extract_init(void)
 /* For the first call, this will be choosen based on the
  * compile time flag.
  */
-VLOG_INFO("Default MFEX Extract implementation is %s.\n",
-  mfex_impls[mfex_idx].name);
+VLOG_DBG("Default MFEX Extract implementation is %s.\n",
+ mfex_impls[mfex_idx].name);
 atomic_store_relaxed(mfex_func, (uintptr_t) mfex_impls
  [mfex_idx].extract_func);
 }
diff --git a/lib/flow.c b/lib/flow.c
index 127de2d7a..ddec31523 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -37,6 +37,7 @@
 #include "dp-packet.h"
 #include "dpif-netdev-private-thread.h"
 #include "dpif-netdev-private-dpcls.h"
+#include "dpif-netdev-private-extract.h"
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "odp-util.h"
@@ -1121,7 +1122,30 @@ miniflow_extract_(struct dp_packet *packet, struct 
netdev_flow_key *key)
 void
 miniflow_extract(struct dp_packet *packet, struct netdev_flow_key *key)
 {
+#ifdef MFEX_AUTOVALIDATOR_DEFAULT
+static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
+if (ovsthread_once_start(_enable)) {
+dpif_miniflow_extract_init();
+ovsthread_once_done(_enable);
+}
+struct dp_packet_batch packets;
+const struct pkt_metadata *md = >md;
+dp_packet_batch_init();
+dp_packet_batch_add(, packet);
+const uint32_t recirc_depth = *recirc_depth_get();
+/* Currently AVX512 DPIF dont support recirculation
+ * Once the support will be added the condition would
+ * be removed.
+ */
+if (recirc_depth) {
+miniflow_extract_(packet, key);
+} else {
+dpif_miniflow_extract_autovalidator(, key, 1,
+odp_to_u32(md->in_port.odp_port), NULL);
+}
+#else
 miniflow_extract_(packet, key);
+#endif
 }
 static ovs_be16
 parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p)
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 3/4] Miniflow_extract: Refactor miniflow_extract into api.

2022-03-27 Thread Kumar Amber
Miniflow extract used to takes the ABI parameter struct
miniflow which was removed and added inside
the struct netdev_flow_key and at many places temperory
structs were created inside the functions which could be
cleaned in favour of a uniform API.

Signed-off-by: Kumar Amber 
---
 lib/dpif-netdev-avx512.c  |  2 +-
 lib/dpif-netdev-private-extract.c |  2 +-
 lib/dpif-netdev.c |  2 +-
 lib/flow.c| 28 +++-
 lib/flow.h|  6 +-
 ofproto/ofproto.c | 10 --
 6 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index b7131ba3f..76eeecc9a 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -211,7 +211,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread 
*pmd,
 
 if (!mfex_hit) {
 /* Do a scalar miniflow extract into keys. */
-miniflow_extract(packet, >mf);
+miniflow_extract(packet, key);
 }
 
 /* Cache TCP and byte values for all packets. */
diff --git a/lib/dpif-netdev-private-extract.c 
b/lib/dpif-netdev-private-extract.c
index 4b2f12015..8538d069f 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -251,7 +251,7 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch 
*packets,
 /* Run scalar miniflow_extract to get default result. */
 DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
 pkt_metadata_init(>md, in_port);
-miniflow_extract(packet, [i].mf);
+miniflow_extract_(packet, [i]);
 
 /* Store known good metadata to compare with optimized metadata. */
 good_l2_5_ofs[i] = packet->l2_5_ofs;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5fc68bdbe..fd3fe510d 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -8144,7 +8144,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
 }
 }
 
-miniflow_extract(packet, >mf);
+miniflow_extract(packet, key);
 key->len = 0; /* Not computed yet. */
 key->hash =
 (md_is_valid == false)
diff --git a/lib/flow.c b/lib/flow.c
index dd523c889..127de2d7a 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -35,6 +35,8 @@
 #include "jhash.h"
 #include "openvswitch/match.h"
 #include "dp-packet.h"
+#include "dpif-netdev-private-thread.h"
+#include "dpif-netdev-private-dpcls.h"
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "odp-util.h"
@@ -633,15 +635,18 @@ parse_nsh(const void **datap, size_t *sizep, struct 
ovs_key_nsh *key)
 void
 flow_extract(struct dp_packet *packet, struct flow *flow)
 {
-struct {
-struct miniflow mf;
-uint64_t buf[FLOW_U64S];
-} m;
+
+struct netdev_flow_key key;
 
 COVERAGE_INC(flow_extract);
 
-miniflow_extract(packet, );
-miniflow_expand(, flow);
+/* Changing parameter to key will not affect anything as
+ * buff array is still followed by the mf bit map inside
+ * netdev_flow_key, thus there wont be any impact on offset
+ * calculations which were done earlier.
+ */
+miniflow_extract(packet, );
+miniflow_expand(, flow);
 }
 
 static inline bool
@@ -758,7 +763,7 @@ dump_invalid_packet(struct dp_packet *packet, const char 
*reason)
  *  of interest for the flow, otherwise UINT16_MAX.
  */
 void
-miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
+miniflow_extract_(struct dp_packet *packet, struct netdev_flow_key *key)
 {
 /* Add code to this function (or its callees) to extract new fields. */
 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
@@ -767,7 +772,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow 
*dst)
 const void *data = dp_packet_data(packet);
 size_t size = dp_packet_size(packet);
 ovs_be32 packet_type = packet->packet_type;
-uint64_t *values = miniflow_values(dst);
+uint64_t *values = miniflow_values(>mf);
 struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
  values + FLOW_U64S };
 const char *frame;
@@ -1110,9 +1115,14 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 }
 }
  out:
-dst->map = mf.map;
+key->mf.map = mf.map;
 }
 
+void
+miniflow_extract(struct dp_packet *packet, struct netdev_flow_key *key)
+{
+miniflow_extract_(packet, key);
+}
 static ovs_be16
 parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p)
 {
diff --git a/lib/flow.h b/lib/flow.h
index c647ad83c..3bfe12f31 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -41,6 +41,7 @@ struct dp_packet;
 struct ofputil_port_map;
 struct pkt_metadata;
 struct match;
+struct netdev_flow_key;
 
 /* Some flow fields are mutually exclusive or only appear within the flow
  * pipeline.  IPv6 headers are bigger than IPv4 and M

[ovs-dev] [PATCH v2 2/4] dpif-netdev: Refactor hashing function.

2022-03-27 Thread Kumar Amber
The changes moves the get_rss hashing function to its
own .h files so that it can be used accross ovs which
was previously only limited to dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-private-dpcls.h | 23 +++
 lib/dpif-netdev.c   | 22 --
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 0d5da73c7..a86ea449b 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -25,6 +25,7 @@
 
 #include "cmap.h"
 #include "openvswitch/thread.h"
+#include "dpif-netdev-private-dpif.h"
 
 #ifdef  __cplusplus
 extern "C" {
@@ -124,6 +125,28 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet 
*packet,
 return hash;
 }
 
+static inline uint32_t
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+const struct miniflow *mf)
+{
+uint32_t hash;
+
+if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
+hash = dp_packet_get_rss_hash(packet);
+} else {
+hash = miniflow_hash_5tuple(mf, 0);
+dp_packet_set_rss_hash(packet, hash);
+}
+
+/* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+uint32_t recirc_depth = *recirc_depth_get();
+if (OVS_UNLIKELY(recirc_depth)) {
+hash = hash_finish(hash, recirc_depth);
+}
+return hash;
+}
+
 /* Allow other implementations to call dpcls_lookup() for subtable search. */
 bool
 dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5c2123e0c..5fc68bdbe 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7787,28 +7787,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, 
struct dp_packet *packet_,
  actions, wc, put_actions, dp->upcall_aux);
 }
 
-static inline uint32_t
-dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-const struct miniflow *mf)
-{
-uint32_t hash, recirc_depth;
-
-if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-hash = dp_packet_get_rss_hash(packet);
-} else {
-hash = miniflow_hash_5tuple(mf, 0);
-dp_packet_set_rss_hash(packet, hash);
-}
-
-/* The RSS hash must account for the recirculation depth to avoid
- * collisions in the exact match cache */
-recirc_depth = *recirc_depth_get_unsafe();
-if (OVS_UNLIKELY(recirc_depth)) {
-hash = hash_finish(hash, recirc_depth);
-}
-return hash;
-}
-
 struct packet_batch_per_flow {
 unsigned int byte_count;
 uint16_t tcp_flags;
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/4] dpif-netdev: Refactor per thread recirc data allocation.

2022-03-27 Thread Kumar Amber
The refactor allows us to use *recirc_depth_get() to obtain
the depth across ovs which was previously limited to only
dpif-netdev.c.

Signed-off-by: Kumar Amber 
Signed-off-by: Cian Ferriter 
Co-authored-by: Cian Ferriter 
---
 lib/dpif-netdev-private-dpif.c | 2 ++
 lib/dpif-netdev-private-dpif.h | 5 +
 lib/dpif-netdev.c  | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c
index 84d4ec156..6f8de9094 100644
--- a/lib/dpif-netdev-private-dpif.c
+++ b/lib/dpif-netdev-private-dpif.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
 
+DEFINE_EXTERN_PER_THREAD_DATA(recirc_depth, 0);
+
 enum dpif_netdev_impl_info_idx {
 DPIF_NETDEV_IMPL_SCALAR,
 DPIF_NETDEV_IMPL_AVX512
diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
index 0da639c55..15f1f36b3 100644
--- a/lib/dpif-netdev-private-dpif.h
+++ b/lib/dpif-netdev-private-dpif.h
@@ -18,6 +18,11 @@
 #define DPIF_NETDEV_PRIVATE_DPIF_H 1
 
 #include "openvswitch/types.h"
+#include "ovs-thread.h"
+
+#define MAX_RECIRC_DEPTH 6
+/* Use per thread recirc_depth to prevent recirculation loop. */
+DECLARE_EXTERN_PER_THREAD_DATA(uint32_t, recirc_depth);
 
 /* Forward declarations to avoid including files. */
 struct dp_netdev_pmd_thread;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 88a5459cc..5c2123e0c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -97,9 +97,6 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MIN_TO_MSEC  6
 
 #define FLOW_DUMP_MAX_BATCH 50
-/* Use per thread recirc_depth to prevent recirculation loop. */
-#define MAX_RECIRC_DEPTH 6
-DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Use instant packet send by default. */
 #define DEFAULT_TX_FLUSH_INTERVAL 0
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 0/4] Miniflow Extract Testing Improvements

2022-03-27 Thread Kumar Amber
The patch-set introduces changes which would improve
the testing of miniflow_extract for AVX512 based
miniflow_extract optimizations whithout affecting scalar
code path.

Kumar Amber (4):
  dpif-netdev: Refactor per thread recirc data allocation.
  dpif-netdev: Refactor hashing function.
  Miniflow_extract: Refactor miniflow_extract into api.
  miniflow_extract: Add autovalidator support to miniflow_extract.

 lib/dpif-netdev-avx512.c  |  2 +-
 lib/dpif-netdev-private-dpcls.h   | 23 ++
 lib/dpif-netdev-private-dpif.c|  2 ++
 lib/dpif-netdev-private-dpif.h|  5 +++
 lib/dpif-netdev-private-extract.c |  6 ++--
 lib/dpif-netdev.c | 27 +---
 lib/flow.c| 52 +--
 lib/flow.h|  6 +++-
 ofproto/ofproto.c | 10 +++---
 9 files changed, 87 insertions(+), 46 deletions(-)

-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v7 4/4] tests/mfex: Improve pcap script for mfex tests.

2022-03-23 Thread Kumar Amber
The mfex pcap generation script is improved for varied length
traffic and also removes the hard coded mfex_pcap and instead uses
the script itself to generate complex traffic patterns for testing.

Signed-off-by: Kumar Amber 
---
 tests/automake.mk |   1 -
 tests/mfex_fuzzy.py   |  55 +-
 tests/pcap/mfex_test.pcap | Bin 416 -> 0 bytes
 tests/system-dpdk.at  |  23 +++-
 4 files changed, 52 insertions(+), 27 deletions(-)
 delete mode 100644 tests/pcap/mfex_test.pcap

diff --git a/tests/automake.mk b/tests/automake.mk
index 8a9151f81..507da2ee8 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -145,7 +145,6 @@ $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk
 
 EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
 MFEX_AUTOVALIDATOR_TESTS = \
-   tests/pcap/mfex_test.pcap \
tests/mfex_fuzzy.py
 
 OVSDB_CLUSTER_TESTSUITE_AT = \
diff --git a/tests/mfex_fuzzy.py b/tests/mfex_fuzzy.py
index 3efe1152d..012eac76f 100755
--- a/tests/mfex_fuzzy.py
+++ b/tests/mfex_fuzzy.py
@@ -3,30 +3,47 @@
 import sys
 
 from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
-from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP
+from scapy.all import IPv6, Dot1Q, IP, Ether, UDP, TCP, random
 
 path = str(sys.argv[1]) + "/pcap/fuzzy.pcap"
+size = int(sys.argv[2])
+traffic_opt = str(sys.argv[3])
+
 pktdump = PcapWriter(path, append=False, sync=True)
 
-for i in range(0, 2000):
+for i in range(0, size):
 
-# Generate random protocol bases, use a fuzz() over the combined packet
-# for full fuzzing.
 eth = Ether(src=RandMAC(), dst=RandMAC())
 vlan = Dot1Q()
-ipv4 = IP(src=RandIP(), dst=RandIP())
-ipv6 = IPv6(src=RandIP6(), dst=RandIP6())
+ipv4 = IP(src=RandIP(), dst=RandIP(), len=random.randint(0, 100))
+ipv6 = IPv6(src=RandIP6(), dst=RandIP6(), plen=random.randint(0, 100))
 udp = UDP(dport=RandShort(), sport=RandShort())
-tcp = TCP(dport=RandShort(), sport=RandShort())
-
-# IPv4 packets with fuzzing
-pktdump.write(fuzz(eth / ipv4 / udp))
-pktdump.write(fuzz(eth / ipv4 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv4 / udp))
-pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
-
-# IPv6 packets with fuzzing
-pktdump.write(fuzz(eth / ipv6 / udp))
-pktdump.write(fuzz(eth / ipv6 / tcp))
-pktdump.write(fuzz(eth / vlan / ipv6 / udp))
-pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+tcp = TCP(dport=RandShort(), sport=RandShort(), flags='S', dataofs=(0, 20))
+
+if traffic_opt == "fuzzy":
+
+# IPv4 packets with fuzzing
+pktdump.write(fuzz(eth / ipv4 / udp))
+pktdump.write(fuzz(eth / ipv4 / tcp))
+pktdump.write(fuzz(eth / vlan / ipv4 / udp))
+pktdump.write(fuzz(eth / vlan / ipv4 / tcp))
+
+# IPv6 packets with fuzzing
+pktdump.write(fuzz(eth / ipv6 / udp))
+pktdump.write(fuzz(eth / ipv6 / tcp))
+pktdump.write(fuzz(eth / vlan / ipv6 / udp))
+pktdump.write(fuzz(eth / vlan / ipv6 / tcp))
+
+else:
+
+# IPv4 packets
+pktdump.write(eth / ipv4 / udp)
+pktdump.write(eth / ipv4 / tcp)
+pktdump.write(eth / vlan / ipv4 / udp)
+pktdump.write(eth / vlan / ipv4 / tcp)
+
+# IPv6 packets
+pktdump.write(eth / ipv6 / udp)
+pktdump.write(eth / ipv6 / tcp)
+pktdump.write(eth / vlan / ipv6 / udp)
+pktdump.write(eth / vlan / ipv6 / tcp)
diff --git a/tests/pcap/mfex_test.pcap b/tests/pcap/mfex_test.pcap
deleted file mode 100644
index 
1aac67b8d643ecb016c758cba4cc32212a80f52a..
GIT binary patch
literal 0
HcmV?d1

literal 416
zcmca|c+)~A1{MYw`2U}Qff2}QK`M68ITRa|G@yFii5$Gfk6YL%z>@uY&}o|
z2s4N<1VH2&7y^V87$)XGOtD~MV$cFgfG~zBGGJ2#YtF$KST_NTIwYriok6N4Vm)gX-Q@c^{cp<7_5LgK^UuU{2>VS0RZ!RQ+EIW

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 7d2715c4a..1476e470c 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -226,12 +226,13 @@ dnl 
--
 dnl Add standard DPDK PHY port
 AT_SETUP([OVS-DPDK - MFEX Autovalidator])
 AT_KEYWORDS([dpdk])
-
+AT_SKIP_IF([! $PYTHON3 -c "import scapy"], [], [])
+AT_CHECK([$PYTHON3 $srcdir/mfex_fuzzy.py $srcdir 2000 0], [], [stdout])
 OVS_DPDK_START()
 
 dnl Add userspace bridge and attach it to OVS
 AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev])
-AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dpdk 
options:dpdk-devargs=net_pcap1,rx_pcap=$srcdir/pcap/mfex_test.pcap,infinite_rx=1],
 [], [stdout], [stderr])
+AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dpdk 
options:dpdk-devargs=net_pcap1,rx_pcap=$srcdir/pcap/fuzzy.pcap,infinite_rx=1], 
[], [stdout], [stderr])
 AT_CHECK([ovs-vsctl show], [], [stdout])
 
 AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | 

[ovs-dev] [PATCH v7 1/4] dpif-netdev/mfex: Add ipv4 profile based hashing.

2022-03-23 Thread Kumar Amber
This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Co-authored-by: Harry van Haaren 

---
v4:
- Use pre-defined hash length values.
v3:
- Fix check-patch sign-offs.
---
---
 NEWS |  2 +
 lib/dpif-netdev-extract-avx512.c | 65 
 2 files changed, 67 insertions(+)

diff --git a/NEWS b/NEWS
index df633e8e2..2090498bb 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,8 @@ Post-v2.17.0
  * 'relay' service model now supports transaction history, i.e. honors the
'last-txn-id' field in 'monitor_cond_since' requests from clients.
 
+   - Userspace datapath:
+ * Add IPv4 profile based 5tuple hashing optimizations.
 
 v2.17.0 - xx xxx 
 -
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index c1c1fefb6..e0db86629 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -278,6 +278,10 @@ struct mfex_profile {
 uint64_t mf_bits[FLOWMAP_UNITS];
 uint16_t dp_pkt_offs[4];
 uint16_t dp_pkt_min_size;
+
+/* Constant data offsets for Hashing. */
+uint8_t hash_pkt_offs[6];
+uint32_t hash_len;
 };
 
 /* Ensure dp_pkt_offs[4] is the correct size as in struct dp_packet. */
@@ -327,6 +331,13 @@ enum MFEX_PROFILES {
 PROFILE_COUNT,
 };
 
+/* Packet offsets for 5 tuple Hash function. */
+#define HASH_IPV4 \
+26, 30, 23, 34, 0, 0
+
+#define HASH_DT1Q_IPV4 \
+30, 34, 27, 38, 0, 0
+
 /* Static const instances of profiles. These are compile-time constants,
  * and are specialized into individual miniflow-extract functions.
  * NOTE: Order of the fields is significant, any change in the order must be
@@ -347,6 +358,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 0, UINT16_MAX, 14, 34,
 },
 .dp_pkt_min_size = 42,
+
+.hash_pkt_offs = { HASH_IPV4 },
+.hash_len = 72,
 },
 
 [PROFILE_ETH_IPV4_TCP] = {
@@ -370,6 +384,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 0, UINT16_MAX, 14, 34,
 },
 .dp_pkt_min_size = 54,
+
+.hash_pkt_offs = { HASH_IPV4 },
+.hash_len = 80,
 },
 
 [PROFILE_ETH_VLAN_IPV4_UDP] = {
@@ -389,6 +406,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 14, UINT16_MAX, 18, 38,
 },
 .dp_pkt_min_size = 46,
+
+.hash_pkt_offs = { HASH_DT1Q_IPV4 },
+.hash_len = 80,
 },
 
 [PROFILE_ETH_VLAN_IPV4_TCP] = {
@@ -414,6 +434,9 @@ static const struct mfex_profile 
mfex_profiles[PROFILE_COUNT] =
 14, UINT16_MAX, 18, 38,
 },
 .dp_pkt_min_size = 58,
+
+.hash_pkt_offs = { HASH_DT1Q_IPV4 },
+.hash_len = 88,
 },
 };
 
@@ -467,6 +490,33 @@ mfex_handle_tcp_flags(const struct tcp_header *tcp, 
uint64_t *block)
 *block = ctl_u64 << 32;
 }
 
+static inline void
+mfex_5tuple_hash_ipv4(struct dp_packet *packet, const uint8_t *pkt,
+  struct netdev_flow_key *key,
+  const uint8_t *pkt_offsets)
+{
+if (!dp_packet_rss_valid(packet)) {
+uint32_t hash = 0;
+void *ipv4_src = (void *) [pkt_offsets[0]];
+void *ipv4_dst = (void *) [pkt_offsets[1]];
+void *ports_l4 = (void *) [pkt_offsets[3]];
+
+/* IPv4 Src and Dst. */
+hash = hash_add(hash, *(uint32_t *) ipv4_src);
+hash = hash_add(hash, *(uint32_t *) ipv4_dst);
+/* IPv4 proto. */
+hash = hash_add(hash, pkt[pkt_offsets[2]]);
+/* L4 ports. */
+hash = hash_add(hash, *(uint32_t *) ports_l4);
+hash = hash_finish(hash, 42);
+
+dp_packet_set_rss_hash(packet, hash);
+key->hash = hash;
+} else {
+key->hash = dp_packet_get_rss_hash(packet);
+}
+}
+
 /* Generic loop to process any mfex profile. This code is specialized into
  * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
  * to ensure the compiler specializes each instance. The code is marked "hot"
@@ -577,6 +627,10 @@ mfex_avx512_process(struct dp_packet_batch *packets,
 /* Process TCP flags, and store to blocks. */
 const struct tcp_header *tcp = (void *)[38];
 mfex_handle_tcp_flags(tcp, [7]);
+
+mfex_5tuple_hash_ipv4(packet, pkt, [i],
+  profile->hash_pkt_offs);
+keys[i].len = profile->hash_len;
 } break;
 
 case PROFILE_ETH_VLAN_IPV4_UDP: {
@@ -588,6 +642,10 @@ mfex_avx512_process(struct dp_packet_batch *packets,
   UDP_HEADER_LEN)) {
 continue;
 }
+
+mfex_5tuple_hash_ipv4(packet, pkt, [i],
+

  1   2   3   4   >