From: Yipeng Wang <[email protected]>
This patch adds the AVX2 implementation during CD lookup. 16 entries of a
bucket will be compared together with the lookup key. This patch depends
on the first patch.
evaluation:
We setup the testing enviornment same to the previous patch. The AVX2
CD implementation's results are shown below.
AVX2 data:
1M flows:
no.subtable: 10 20 30
cd-ovs 3895961 3170530 2968555
orig-ovs 2683455 1646227 1240501
speedup 1.45x 1.92x 2.39x
Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
Signed-off-by: Charlie Tai <charlie.tai at intel.com>
Co-authored-by: Charlie Tai <charlie.tai at intel.com>
Signed-off-by: Sameh Gobriel <sameh.gobriel at intel.com>
Co-authored-by: Sameh Gobriel <sameh.gobriel at intel.com>
Signed-off-by: Ren Wang <ren.wang at intel.com>
Co-authored-by: Ren Wang <ren.wang at intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com>
---
lib/dpif-netdev.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 63 insertions(+), 1 deletion(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c697e78..23b3e42 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2291,7 +2291,37 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct
netdev_flow_key keys[],
rte_prefetch0(prim_bkt1);
rte_prefetch0(sec_bkt1);
+#ifdef __AVX2__
+ prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+ sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+ if (prim_hitmask) {
+ loc = raw_ctz(prim_hitmask) / 2;
+ data[i-1] =
+ prim_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ prim_bkt0 = prim_bkt1;
+ sec_bkt0 = sec_bkt1;
+ temp_sig0 = temp_sig1;
+ continue;
+ }
+ }
+
+ if (sec_hitmask) {
+ loc = raw_ctz(sec_hitmask) / 2;
+ data[i-1] = sec_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ }
+ }
+#else
unsigned int j;
prim_hitmask = 0;
sec_hitmask = 0;
@@ -2320,12 +2350,42 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct
netdev_flow_key keys[],
hits |= 1 << (i - 1);
}
}
-
+#endif
prim_bkt0 = prim_bkt1;
sec_bkt0 = sec_bkt1;
temp_sig0 = temp_sig1;
}
+#ifdef __AVX2__
+ prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+
+ sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+ if (prim_hitmask) {
+ loc = raw_ctz(prim_hitmask) / 2;
+ data[i-1] = prim_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ if (hit_mask != NULL) {
+ *hit_mask = hits;
+ }
+ return;
+ }
+ }
+
+ if (sec_hitmask) {
+ loc = raw_ctz(sec_hitmask) / 2;
+ data[i-1] = sec_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ }
+ }
+#else
unsigned int j;
prim_hitmask = 0;
sec_hitmask = 0;
@@ -2355,9 +2415,11 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct
netdev_flow_key keys[],
}
}
+#endif
if (hit_mask != NULL) {
*hit_mask = hits;
}
+
}
static int
--
1.9.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev