On Tue, Apr 18, 2023 at 3:18 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi all,
>
> Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is
> under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL
> and VPCLMULQDQ, The 128 bit intrin is not available when we just use
> compiler flag -mvpclmulqdq. But it should according to Intel SDM.
>
> Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to
> add dependency between them.
>
> Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so
> VPCLMULQDQ should imply AVX.
>
> Tested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
>         * common/config/i386/i386-common.cc
>         (OPTION_MASK_ISA_VPCLMULQDQ_SET):
>         Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET.
>         (OPTION_MASK_ISA_AVX_UNSET):
>         Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET.
>         (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto.
>         * config/i386/i386.md (vpclmulqdqvl): New.
>         * config/i386/sse.md (pclmulqdq): Add evex encoding.
>         * config/i386/vpclmulqdqintrin.h: Remove redudant avx target
>         push.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm.
> ---
>  gcc/common/config/i386/i386-common.cc      |  9 ++++++---
>  gcc/config/i386/i386.md                    |  4 +++-
>  gcc/config/i386/sse.md                     | 11 ++++++-----
>  gcc/config/i386/vpclmulqdqintrin.h         |  4 ++--
>  gcc/testsuite/gcc.target/i386/vpclmulqdq.c |  3 +++
>  5 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/common/config/i386/i386-common.cc 
> b/gcc/common/config/i386/i386-common.cc
> index 315db854862..c7954da8e34 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -171,7 +171,9 @@ along with GCC; see the file COPYING3.  If not see
>  #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
>  #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
>  #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
> -#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
> +#define OPTION_MASK_ISA_VPCLMULQDQ_SET \
> +  (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \
> +   | OPTION_MASK_ISA_AVX_SET)
>  #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
>  #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
>  #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
> @@ -211,7 +213,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define OPTION_MASK_ISA_AVX_UNSET \
>    (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
>     | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
> -   | OPTION_MASK_ISA_AVX2_UNSET )
> +   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
>  #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
>  #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
>  #define OPTION_MASK_ISA_XSAVE_UNSET \
> @@ -314,7 +316,8 @@ along with GCC; see the file COPYING3.  If not see
>
>  #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
>  #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
> -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
> +#define OPTION_MASK_ISA_PCLMUL_UNSET \
> +  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
>  #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
>  #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
>  #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index ed689b044c3..acc994226e7 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -841,7 +841,7 @@
>                     avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
>                     avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
>                     
> avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
> -                   avx512ifmavl,avxneconvert,avx512bf16vl"
> +                   avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl"
>    (const_string "base"))
>
>  ;; Define instruction set of MMX instructions
> @@ -903,6 +903,8 @@
>          (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
>          (eq_attr "isa" "avx512bf16vl")
>            (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
> +        (eq_attr "isa" "vpclmulqdqvl")
> +          (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
>
>          (eq_attr "mmx_isa" "native")
>            (symbol_ref "!TARGET_MMX_WITH_SSE")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 26812ab6106..33e281901cf 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -25195,20 +25195,21 @@
>     (set_attr "mode" "TI")])
>
>  (define_insn "pclmulqdq"
> -  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
> -       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
> -                     (match_operand:V2DI 2 "vector_operand" "xBm,xm")
> +  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
> +       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
> +                     (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
Just change x to Yv instead of introducing a new alternative.
Others LGTM.
>                       (match_operand:SI 3 "const_0_to_255_operand")]
>                      UNSPEC_PCLMUL))]
>    "TARGET_PCLMUL"
>    "@
>     pclmulqdq\t{%3, %2, %0|%0, %2, %3}
> +   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}
>     vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,vpclmulqdqvl")
>     (set_attr "type" "sselog1")
>     (set_attr "prefix_extra" "1")
>     (set_attr "length_immediate" "1")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "orig,vex,evex")
>     (set_attr "mode" "TI")])
>
>  (define_expand "avx_vzeroall"
> diff --git a/gcc/config/i386/vpclmulqdqintrin.h 
> b/gcc/config/i386/vpclmulqdqintrin.h
> index ba93fc4ff9c..2c83b6037a0 100644
> --- a/gcc/config/i386/vpclmulqdqintrin.h
> +++ b/gcc/config/i386/vpclmulqdqintrin.h
> @@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const 
> int __C)
>  #pragma GCC pop_options
>  #endif /* __DISABLE_VPCLMULQDQF__ */
>
> -#if !defined(__VPCLMULQDQ__) || !defined(__AVX__)
> +#if !defined(__VPCLMULQDQ__)
>  #pragma GCC push_options
> -#pragma GCC target("vpclmulqdq,avx")
> +#pragma GCC target("vpclmulqdq")
>  #define __DISABLE_VPCLMULQDQ__
>  #endif /* __VPCLMULQDQ__ */
>
> diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c 
> b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> index d93f776803f..27b2fd71ea4 100644
> --- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> @@ -2,16 +2,19 @@
>  /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */
>  /* { dg-final { scan-assembler-times "vpclmulqdq\[ 
> \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[
>  \\t\]+#)" 1 } } */
>  /* { dg-final { scan-assembler-times "vpclmulqdq\[ 
> \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[
>  \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpclmulqdq\[ 
> \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[
>  \\t\]+#)" 1 } } */
>
>  #include <x86intrin.h>
>
>  volatile __m512i x1, x2;
>  volatile __m256i x3, x4;
> +volatile __m128i x5, x6;
>
>  void extern
>  avx512vl_test (void)
>  {
>      x1 = _mm512_clmulepi64_epi128(x1, x2, 3);
>      x3 = _mm256_clmulepi64_epi128(x3, x4, 3);
> +    x5 = _mm_clmulepi64_si128(x5, x6, 3);
>  }
>
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to