Checking for AVX512VPOPCNTDQ separately will allow the compiler to generate other AVX512 ISA code where supported. This is relevant for GCC where AVX512VPOPCNTDQ support is added in GCC 7. In GCC 5 and 6, most AVX512 code can be generated, just without VPOPCNTDQ support.
Signed-off-by: Cian Ferriter <cian.ferri...@intel.com> --- acinclude.m4 | 16 +++++++++++++++- configure.ac | 1 + lib/dpif-netdev-lookup-avx512-gather.c | 10 +++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 31033edca..d1cc9431c 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -77,7 +77,7 @@ dnl Checks if compiler and binutils supports AVX512. AC_DEFUN([OVS_CHECK_AVX512], [ OVS_CHECK_BINUTILS_AVX512 OVS_CHECK_CC_OPTION( - [-mavx512f -mavx512vpopcntdq], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no]) + [-mavx512f], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no]) AM_CONDITIONAL([HAVE_AVX512F], [test $ovs_have_cc_mavx512f = yes]) if test "$ovs_have_cc_mavx512f" = yes; then AC_DEFINE([HAVE_AVX512F], [1], @@ -114,6 +114,20 @@ AC_DEFUN([OVS_CHECK_AVX512VBMI], [ fi ]) +dnl OVS_CHECK_AVX512VPOPCNTDQ +dnl +dnl Checks if compiler supports AVX512VPOPCNTDQ instructions. +AC_DEFUN([OVS_CHECK_AVX512VPOPCNTDQ], [ + OVS_CHECK_CC_OPTION( + [-mavx512vpopcntdq], [ovs_have_cc_mavx512vpopcntdq=yes], + [ovs_have_cc_mavx512vpopcntdq=no]) + AM_CONDITIONAL([HAVE_AVX512VPOPCNTDQ], [test $ovs_have_cc_mavx512vpopcntdq = yes]) + if test "$ovs_have_cc_mavx512vpopcntdq" = yes; then + AC_DEFINE([HAVE_AVX512VPOPCNTDQ], [1], + [Define to 1 if compiler supports AVX512VPOPCNTDQ.]) + fi +]) + dnl OVS_ENABLE_WERROR AC_DEFUN([OVS_ENABLE_WERROR], [AC_ARG_ENABLE( diff --git a/configure.ac b/configure.ac index 12b4010e5..7161299d5 100644 --- a/configure.ac +++ b/configure.ac @@ -189,6 +189,7 @@ OVS_CHECK_MFEX_AUTOVALIDATOR OVS_CHECK_AVX512 OVS_CHECK_AVX512BW_DQ OVS_CHECK_AVX512VBMI +OVS_CHECK_AVX512VPOPCNTDQ AC_ARG_VAR(KARCH, [Kernel Architecture String]) AC_SUBST(KARCH) diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev-lookup-avx512-gather.c index 7bc1e9e9a..7fbecfd14 100644 --- a/lib/dpif-netdev-lookup-avx512-gather.c +++ b/lib/dpif-netdev-lookup-avx512-gather.c @@ -85,7 +85,9 @@ _mm512_popcnt_epi64_manual(__m512i v_in) * requested ISA level, so fallback to the integer manual implementation. */ static inline __m512i +#if HAVE_AVX512VPOPCNTDQ __attribute__((__target__("avx512vpopcntdq"))) +#endif _mm512_popcnt_epi64_wrapper(__m512i v_in) { #ifdef __AVX512VPOPCNTDQ__ @@ -339,6 +341,12 @@ avx512_lookup_impl(struct dpcls_subtable *subtable, * create two functions for each miniflow signature. This allows the runtime * CPU detection in probe() to select the ideal implementation. */ +#if HAVE_AVX512VPOPCNTDQ +#define VPOPCNTDQ_TARGET __attribute__((__target__("avx512vpopcntdq"))) +#else +#define VPOPCNTDQ_TARGET +#endif + #define DECLARE_OPTIMIZED_LOOKUP_FUNCTION(U0, U1) \ static uint32_t \ dpcls_avx512_gather_mf_##U0##_##U1(struct dpcls_subtable *subtable, \ @@ -351,7 +359,7 @@ avx512_lookup_impl(struct dpcls_subtable *subtable, U0, U1, use_vpop); \ } \ \ - static uint32_t __attribute__((__target__("avx512vpopcntdq"))) \ + static uint32_t VPOPCNTDQ_TARGET \ dpcls_avx512_gather_mf_##U0##_##U1##_vpop(struct dpcls_subtable *subtable,\ uint32_t keys_map, \ const struct netdev_flow_key *keys[], \ -- 2.25.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev