On Mon, Jan 27, 2020 at 7:23 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > movaps/movups is one byte shorter than movdaq/movdqu. But it isn't the > case for AVX nor AVX512. We should disable TARGET_SSE_TYPELESS_STORES > for TARGET_AVX. > > gcc/ > > PR target/91461 > * config/i386/i386.h (TARGET_SSE_TYPELESS_STORES): Disable for > TARGET_AVX. > * config/i386/i386.md (*movoi_internal_avx): Remove > TARGET_SSE_TYPELESS_STORES check. > > gcc/testsuite/ > > PR target/91461 > * gcc.target/i386/pr91461-1.c: New test. > * gcc.target/i386/pr91461-2.c: Likewise. > * gcc.target/i386/pr91461-3.c: Likewise. > * gcc.target/i386/pr91461-4.c: Likewise. > * gcc.target/i386/pr91461-5.c: Likewise. > --- > gcc/config/i386/i386.h | 4 +- > gcc/config/i386/i386.md | 4 +- > gcc/testsuite/gcc.target/i386/pr91461-1.c | 66 ++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr91461-2.c | 19 ++++++ > gcc/testsuite/gcc.target/i386/pr91461-3.c | 76 +++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr91461-4.c | 21 +++++++ > gcc/testsuite/gcc.target/i386/pr91461-5.c | 17 +++++ > 7 files changed, 203 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-5.c > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 943e9a5c783..c134b04c5c4 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -516,8 +516,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; > #define TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL \ > ix86_tune_features[X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL] > #define TARGET_SSE_SPLIT_REGS ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS] > +/* NB: movaps/movups is one byte shorter than movdaq/movdqu. But it > + isn't the case for AVX nor AVX512. */ > #define TARGET_SSE_TYPELESS_STORES \ > - ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES] > + (!TARGET_AVX && ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES])
This is wrong place to disable the feature. Uros. > #define TARGET_SSE_LOAD0_BY_PXOR > ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR] > #define TARGET_MEMORY_MISMATCH_STALL \ > ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL] > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 6e9c9bd2fb6..bb096133880 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1980,9 +1980,7 @@ > (and (eq_attr "alternative" "1") > (match_test "TARGET_AVX512VL")) > (const_string "XI") > - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > - (and (eq_attr "alternative" "3") > - (match_test "TARGET_SSE_TYPELESS_STORES"))) > + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > (const_string "V8SF") > ] > (const_string "OI")))]) > diff --git a/gcc/testsuite/gcc.target/i386/pr91461-1.c > b/gcc/testsuite/gcc.target/i386/pr91461-1.c > new file mode 100644 > index 00000000000..0c94b8e2b76 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91461-1.c > @@ -0,0 +1,66 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx" } */ > +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ > +/* { dg-final { scan-assembler "\tvmovdqu\t" } } */ > +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ > +/* { dg-final { scan-assembler "\tvmovupd\t" } } */ > +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ > +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ > + > +#include <immintrin.h> > + > +void > +foo1 (__m128i *p, __m128i x) > +{ > + *p = x; > +} > + > +void > +foo2 (__m128d *p, __m128d x) > +{ > + *p = x; > +} > + > +void > +foo3 (__float128 *p, __float128 x) > +{ > + *p = x; > +} > + > +void > +foo4 (__m128i_u *p, __m128i x) > +{ > + *p = x; > +} > + > +void > +foo5 (__m128d_u *p, __m128d x) > +{ > + *p = x; > +} > + > +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); > + > +void > +foo6 (__float128_u *p, __float128 x) > +{ > + *p = x; > +} > + > +#ifdef __x86_64__ > +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); > + > +extern __int128 int128; > + > +void > +foo7 (__int128 *p) > +{ > + *p = int128; > +} > + > +void > +foo8 (__int128_u *p) > +{ > + *p = int128; > +} > +#endif > diff --git a/gcc/testsuite/gcc.target/i386/pr91461-2.c > b/gcc/testsuite/gcc.target/i386/pr91461-2.c > new file mode 100644 > index 00000000000..921cfaf9780 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91461-2.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx" } */ > +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ > +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ > +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ > + > +#include <immintrin.h> > + > +void > +foo1 (__m256i *p, __m256i x) > +{ > + *p = x; > +} > + > +void > +foo2 (__m256d *p, __m256d x) > +{ > + *p = x; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr91461-3.c > b/gcc/testsuite/gcc.target/i386/pr91461-3.c > new file mode 100644 > index 00000000000..c67a48063bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91461-3.c > @@ -0,0 +1,76 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ > +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ > +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ > + > +#include <immintrin.h> > + > +void > +foo1 (__m128i *p, __m128i a) > +{ > + register __m128i x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +void > +foo2 (__m128d *p, __m128d a) > +{ > + register __m128d x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +void > +foo3 (__float128 *p, __float128 a) > +{ > + register __float128 x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +void > +foo4 (__m128i_u *p, __m128i a) > +{ > + register __m128i x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +void > +foo5 (__m128d_u *p, __m128d a) > +{ > + register __m128d x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); > + > +void > +foo6 (__float128_u *p, __float128 a) > +{ > + register __float128 x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); > + > +extern __int128 int128; > + > +void > +foo7 (__int128 *p) > +{ > + register __int128 x __asm ("xmm16") = int128; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +void > +foo8 (__int128_u *p) > +{ > + register __int128 x __asm ("xmm16") = int128; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr91461-4.c > b/gcc/testsuite/gcc.target/i386/pr91461-4.c > new file mode 100644 > index 00000000000..69df590de3a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91461-4.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ > +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ > + > +#include <immintrin.h> > + > +void > +foo1 (__m256i *p, __m256i a) > +{ > + register __m256i x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > + > +void > +foo2 (__m256d *p, __m256d a) > +{ > + register __m256d x __asm ("xmm16") = a; > + asm volatile ("" : "+v" (x)); > + *p = x; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr91461-5.c > b/gcc/testsuite/gcc.target/i386/pr91461-5.c > new file mode 100644 > index 00000000000..974263042f3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91461-5.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ > + > +#include <immintrin.h> > + > +void > +foo1 (__m512i *p, __m512i x) > +{ > + *p = x; > +} > + > +void > +foo2 (__m512d *p, __m512d x) > +{ > + *p = x; > +} > -- > 2.24.1 >