> -----Original Message----- > From: Jiang, Haochen <haochen.ji...@intel.com> > Sent: Wednesday, April 19, 2023 10:41 AM > To: Hongtao Liu <crazy...@gmail.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>; > ubiz...@gmail.com > Subject: RE: [PATCH] i386: Share AES xmm intrin with VAES > > > > a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > > > 33e281901cf..e7d565a8389 100644 > > > --- a/gcc/config/i386/sse.md > > > +++ b/gcc/config/i386/sse.md > > > @@ -25107,67 +25107,71 @@ > > > > > > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > > > ;; > > > ;; > > > > > > (define_insn "aesenc" > > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x") > > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") > > > - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] > > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") > > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") > > > + (match_operand:V2DI 2 "vector_operand" > > > + "xBm,xm,vm")] > > > UNSPEC_AESENC))] > > > - "TARGET_AES" > > > + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" > > > "@ > > > aesenc\t{%2, %0|%0, %2} > > > + vaesenc\t{%2, %1, %0|%0, %1, %2} > > > vaesenc\t{%2, %1, %0|%0, %1, %2}" > > > - [(set_attr "isa" "noavx,avx") > > > + [(set_attr "isa" "noavx,aes,avx512vl") > > Shouldn't it be vaes_avx512vl and then remove " || (TARGET_VAES && > > TARGET_AVX512VL)" from condition. > > Since VAES should not imply AES, we need that "|| (TARGET_VAES && > TARGET_AVX512VL)" > > And there is no need to add vaes_avx512vl since the last alternative will only > be hit when there is no aes. When there is no aes, the pattern will need vaes > and avx512vl both or we could not use this pattern. avx512vl here is just > like a > placeholder. Ok, I see, then LGTM. > > BRs, > Haochen > > > Similar for below patterns. > > Others LGTM. > > > (set_attr "type" "sselog1") > > > (set_attr "prefix_extra" "1") > > > - (set_attr "prefix" "orig,vex") > > > - (set_attr "btver2_decode" "double,double") > > > + (set_attr "prefix" "orig,vex,evex") > > > + (set_attr "btver2_decode" "double,double,double") > > > (set_attr "mode" "TI")]) > > > > > > (define_insn "aesenclast" > > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x") > > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") > > > - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] > > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") > > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") > > > + (match_operand:V2DI 2 "vector_operand" > > > + "xBm,xm,vm")] > > > UNSPEC_AESENCLAST))] > > > - "TARGET_AES" > > > + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" > > > "@ > > > aesenclast\t{%2, %0|%0, %2} > > > + vaesenclast\t{%2, %1, %0|%0, %1, %2} > > > vaesenclast\t{%2, %1, %0|%0, %1, %2}" > > > - [(set_attr "isa" "noavx,avx") > > > + [(set_attr "isa" "noavx,aes,avx512vl") > > > (set_attr "type" "sselog1") > > > (set_attr "prefix_extra" "1") > > > - (set_attr "prefix" "orig,vex") > > > - (set_attr "btver2_decode" "double,double") > > > + (set_attr "prefix" "orig,vex,evex") > > > + (set_attr "btver2_decode" "double,double,double") > > > (set_attr "mode" "TI")]) > > > > > > (define_insn "aesdec" > > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x") > > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") > > > - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] > > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") > > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") > > > + (match_operand:V2DI 2 "vector_operand" > > > + "xBm,xm,vm")] > > > UNSPEC_AESDEC))] > > > - "TARGET_AES" > > > + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" > > > "@ > > > aesdec\t{%2, %0|%0, %2} > > > + vaesdec\t{%2, %1, %0|%0, %1, %2} > > > vaesdec\t{%2, %1, %0|%0, %1, %2}" > > > - [(set_attr "isa" "noavx,avx") > > > + [(set_attr "isa" "noavx,aes,avx512vl") > > > (set_attr "type" "sselog1") > > > (set_attr "prefix_extra" "1") > > > - (set_attr "prefix" "orig,vex") > > > - (set_attr "btver2_decode" "double,double") > > > + (set_attr "prefix" "orig,vex,evex") > > > + (set_attr "btver2_decode" "double,double,double") > > > (set_attr "mode" "TI")]) > > > > > > (define_insn "aesdeclast" > > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x") > > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") > > > - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] > > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") > > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") > > > + (match_operand:V2DI 2 "vector_operand" > > > + "xBm,xm,vm")] > > > UNSPEC_AESDECLAST))] > > > - "TARGET_AES" > > > + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" > > > "@ > > > aesdeclast\t{%2, %0|%0, %2} > > > + vaesdeclast\t{%2, %1, %0|%0, %1, %2} > > > vaesdeclast\t{%2, %1, %0|%0, %1, %2}" > > > - [(set_attr "isa" "noavx,avx") > > > + [(set_attr "isa" "noavx,aes,avx512vl") > > > (set_attr "type" "sselog1") > > > (set_attr "prefix_extra" "1") > > > - (set_attr "prefix" "orig,vex") > > > - (set_attr "btver2_decode" "double,double") > > > + (set_attr "prefix" "orig,vex,evex") > > > + (set_attr "btver2_decode" "double,double,double") > > > (set_attr "mode" "TI")]) > > > > > > (define_insn "aesimc" > > > diff --git a/gcc/config/i386/vaesintrin.h > > > b/gcc/config/i386/vaesintrin.h index 0f1cffe71e9..58fc19c9eb3 100644 > > > --- a/gcc/config/i386/vaesintrin.h > > > +++ b/gcc/config/i386/vaesintrin.h > > > @@ -24,9 +24,9 @@ > > > #ifndef __VAESINTRIN_H_INCLUDED > > > #define __VAESINTRIN_H_INCLUDED > > > > > > -#if !defined(__VAES__) || !defined(__AVX__) > > > +#if !defined(__VAES__) > > > #pragma GCC push_options > > > -#pragma GCC target("vaes,avx") > > > +#pragma GCC target("vaes") > > > #define __DISABLE_VAES__ > > > #endif /* __VAES__ */ > > > > > > diff --git a/gcc/config/i386/wmmintrin.h > > > b/gcc/config/i386/wmmintrin.h index ae15cea429e..da314dbd44d 100644 > > > --- a/gcc/config/i386/wmmintrin.h > > > +++ b/gcc/config/i386/wmmintrin.h > > > @@ -40,36 +40,23 @@ > > > > > > /* Performs 1 round of AES decryption of the first m128i using > > > the second m128i as a round key. */ -extern __inline __m128i > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_aesdec_si128 (__m128i __X, __m128i __Y) -{ > > > - return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, > > > (__v2di)__Y); -} > > > +#define _mm_aesdec_si128(X, Y) \ > > > + (__m128i) __builtin_ia32_aesdec128 ((__v2di) (X), (__v2di) (Y)) > > > > > > /* Performs the last round of AES decryption of the first m128i > > > using the second m128i as a round key. */ -extern __inline > > > __m128i __attribute__((__gnu_inline__, __always_inline__, > > > __artificial__)) > > > -_mm_aesdeclast_si128 (__m128i __X, __m128i __Y) -{ > > > - return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X, > > > - (__v2di)__Y); > > > -} > > > +#define _mm_aesdeclast_si128(X, Y) \ > > > + (__m128i) __builtin_ia32_aesdeclast128 ((__v2di) (X), (__v2di) > > > +(Y)) > > > > > > /* Performs 1 round of AES encryption of the first m128i using > > > the second m128i as a round key. */ -extern __inline __m128i > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_aesenc_si128 (__m128i __X, __m128i __Y) -{ > > > - return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, > > > (__v2di)__Y); -} > > > +#define _mm_aesenc_si128(X, Y) \ > > > + (__m128i) __builtin_ia32_aesenc128 ((__v2di) (X), (__v2di) (Y)) > > > > > > /* Performs the last round of AES encryption of the first m128i > > > using the second m128i as a round key. */ -extern __inline > > > __m128i __attribute__((__gnu_inline__, __always_inline__, > > > __artificial__)) > > > -_mm_aesenclast_si128 (__m128i __X, __m128i __Y) -{ > > > - return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, > > > (__v2di)__Y); -} > > > +#define _mm_aesenclast_si128(X, Y) \ > > > + (__m128i) __builtin_ia32_aesenclast128 ((__v2di) (X), (__v2di) > > > +(Y)) > > > > > > /* Performs the InverseMixColumn operation on the source m128i > > > and stores the result into m128i destination. */ diff --git > > > a/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c > > > b/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c > > > index c65b570cd47..f35742ec98b 100644 > > > --- a/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c > > > @@ -10,10 +10,16 @@ > > > /* { dg-final { scan-assembler-times "vaesenc\[ > > > \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0- > > 9\]+\[^\{\n\]*%ymm\[0-9\ > > > ]+(?:\n|\[ \\t\]+#)" 1 } } */ > > > /* { dg-final { scan-assembler-times "vaesenclast\[ > > > \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0- > > 9\]+\[^\{\n\]*%ymm\[0-9\ > > > ]+(?:\n|\[ \\t\]+#)" 1 } } */ > > > > > > +/* { dg-final { scan-assembler-times "vaesdec\[ > > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0- > > 9\]+\[^\{\n\]*%xmm\[0-9 > > > +\]+(?:\n|\[ \\t\]+#)" 1 } } */ > > > +/* { dg-final { scan-assembler-times "vaesdeclast\[ > > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0- > > 9\]+\[^\{\n\]*%xmm\[0-9 > > > +\]+(?:\n|\[ \\t\]+#)" 1 } } */ > > > +/* { dg-final { scan-assembler-times "vaesenc\[ > > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0- > > 9\]+\[^\{\n\]*%xmm\[0-9 > > > +\]+(?:\n|\[ \\t\]+#)" 1 } } */ > > > +/* { dg-final { scan-assembler-times "vaesenclast\[ > > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0- > > 9\]+\[^\{\n\]*%xmm\[0-9 > > > +\]+(?:\n|\[ \\t\]+#)" 1 } } */ > > > + > > > #include <immintrin.h> > > > > > > volatile __m512i x,y; > > > volatile __m256i x256, y256; > > > +volatile __m128i x128, y128; > > > > > > void extern > > > avx512f_test (void) > > > @@ -27,4 +33,9 @@ avx512f_test (void) > > > x256 = _mm256_aesdeclast_epi128 (x256, y256); > > > x256 = _mm256_aesenc_epi128 (x256, y256); > > > x256 = _mm256_aesenclast_epi128 (x256, y256); > > > + > > > + x128 = _mm_aesdec_si128 (x128, y128); > > > + x128 = _mm_aesdeclast_si128 (x128, y128); > > > + x128 = _mm_aesenc_si128 (x128, y128); > > > + x128 = _mm_aesenclast_si128 (x128, y128); > > > } > > > diff --git a/gcc/testsuite/gcc.target/i386/pr84335.c > > > b/gcc/testsuite/gcc.target/i386/pr84335.c > > > index c8d2a712f1f..5e45e2b322a 100644 > > > --- a/gcc/testsuite/gcc.target/i386/pr84335.c > > > +++ b/gcc/testsuite/gcc.target/i386/pr84335.c > > > @@ -6,5 +6,5 @@ typedef long long V __attribute__ ((__vector_size__ > > > (16))); V foo (V *a, V *b) { > > > - return __builtin_ia32_aesenc128 (*a, *b); /* { dg-error "needs isa > > option" } */ > > > -} > > > + return __builtin_ia32_aesenc128 (*a, *b); /* { dg-warning "implicit > > declaration of function" } */ > > > +} /* { dg-error > > > "incompatible types when > returning > > type" "" { target *-*-* } .-1 } */ > > > -- > > > 2.31.1 > > > > > > > > > -- > > BR, > > Hongtao
RE: [PATCH] i386: Share AES xmm intrin with VAES
Liu, Hongtao via Gcc-patches Tue, 18 Apr 2023 19:43:31 -0700
- [PATCH] i386: Share AES xmm intrin with VAE... Haochen Jiang via Gcc-patches
- [PATCH] i386: Share AES xmm intrin wit... Haochen Jiang via Gcc-patches
- Re: [PATCH] i386: Share AES xmm intrin... Hongtao Liu via Gcc-patches
- RE: [PATCH] i386: Share AES xmm in... Jiang, Haochen via Gcc-patches
- RE: [PATCH] i386: Share AES xm... Liu, Hongtao via Gcc-patches