This patch adds the AVX2 implementation during CD lookup. 16 entries of a bucket will be compared together with the lookup key. This patch depends on the first patch.
CC: Darrell Ball <dball at vmware.com> CC: Jan Scheurich <jan.scheurich at ericsson.com> Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com> Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com> Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com> --- evaluation: We setup the testing enviornment same to the previous patch. The AVX2 CD implementation's results are shown below. AVX2 data: 1M flows: no.subtable: 10 20 30 cd-ovs 3895961 3170530 2968555 orig-ovs 2683455 1646227 1240501 speedup 1.45x 1.92x 2.39x --- lib/dpif-netdev.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ea1d625..78219ba 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -30,6 +30,9 @@ #include <sys/socket.h> #include <sys/stat.h> #include <unistd.h> +#if defined(__AVX2__) +#include <immintrin.h> +#endif #ifdef DPDK_NETDEV #include <rte_cycles.h> @@ -2378,7 +2381,37 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[], OVS_PREFETCH(prim_bkt1); OVS_PREFETCH(sec_bkt1); +#ifdef __AVX2__ + prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)prim_bkt0->sig), + _mm256_set1_epi16(temp_sig0))); + + + sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)sec_bkt0->sig), + _mm256_set1_epi16(temp_sig0))); + if (prim_hitmask) { + loc = raw_ctz(prim_hitmask) >> 1; + data[i-1] = + prim_bkt0->table_index[loc]; + if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) { + hits |= 1 << (i - 1); + prim_bkt0 = prim_bkt1; + sec_bkt0 = sec_bkt1; + temp_sig0 = temp_sig1; + continue; + } + } + + if (sec_hitmask) { + loc = raw_ctz(sec_hitmask) >> 1; + data[i-1] = sec_bkt0->table_index[loc]; + if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) { + hits |= 1 << (i - 1); + } + } +#else unsigned int j; prim_hitmask = 0; sec_hitmask = 0; @@ -2407,12 +2440,42 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[], hits |= 1 << (i - 1); } } - +#endif prim_bkt0 = prim_bkt1; sec_bkt0 = sec_bkt1; temp_sig0 = temp_sig1; } +#ifdef __AVX2__ + prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)prim_bkt0->sig), + _mm256_set1_epi16(temp_sig0))); + + + sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)sec_bkt0->sig), + _mm256_set1_epi16(temp_sig0))); + + if (prim_hitmask) { + loc = raw_ctz(prim_hitmask) >> 1; + data[i-1] = prim_bkt0->table_index[loc]; + if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) { + hits |= 1 << (i - 1); + if (hit_mask != NULL) { + *hit_mask = hits; + } + return; + } + } + + if (sec_hitmask) { + loc = raw_ctz(sec_hitmask) >> 1; + data[i-1] = sec_bkt0->table_index[loc]; + if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) { + hits |= 1 << (i - 1); + } + } +#else unsigned int j; prim_hitmask = 0; sec_hitmask = 0; @@ -2442,9 +2505,11 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[], } } +#endif if (hit_mask != NULL) { *hit_mask = hits; } + } static int -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev