On Mon, Jan 27, 2020 at 7:23 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> movaps/movups is one byte shorter than movdaq/movdqu.  But it isn't the
> case for AVX nor AVX512.  We should disable TARGET_SSE_TYPELESS_STORES
> for TARGET_AVX.
>
> gcc/
>
>         PR target/91461
>         * config/i386/i386.h (TARGET_SSE_TYPELESS_STORES): Disable for
>         TARGET_AVX.
>         * config/i386/i386.md (*movoi_internal_avx): Remove
>         TARGET_SSE_TYPELESS_STORES check.
>
> gcc/testsuite/
>
>         PR target/91461
>         * gcc.target/i386/pr91461-1.c: New test.
>         * gcc.target/i386/pr91461-2.c: Likewise.
>         * gcc.target/i386/pr91461-3.c: Likewise.
>         * gcc.target/i386/pr91461-4.c: Likewise.
>         * gcc.target/i386/pr91461-5.c: Likewise.
> ---
>  gcc/config/i386/i386.h                    |  4 +-
>  gcc/config/i386/i386.md                   |  4 +-
>  gcc/testsuite/gcc.target/i386/pr91461-1.c | 66 ++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr91461-2.c | 19 ++++++
>  gcc/testsuite/gcc.target/i386/pr91461-3.c | 76 +++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr91461-4.c | 21 +++++++
>  gcc/testsuite/gcc.target/i386/pr91461-5.c | 17 +++++
>  7 files changed, 203 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr91461-5.c
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 943e9a5c783..c134b04c5c4 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -516,8 +516,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>  #define TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL \
>         ix86_tune_features[X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL]
>  #define TARGET_SSE_SPLIT_REGS  ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
> +/* NB: movaps/movups is one byte shorter than movdaq/movdqu.  But it
> +   isn't the case for AVX nor AVX512.  */
>  #define TARGET_SSE_TYPELESS_STORES \
> -       ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
> +       (!TARGET_AVX && ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES])

This is wrong place to disable the feature.

Uros.

>  #define TARGET_SSE_LOAD0_BY_PXOR 
> ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
>  #define TARGET_MEMORY_MISMATCH_STALL \
>         ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 6e9c9bd2fb6..bb096133880 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1980,9 +1980,7 @@
>                (and (eq_attr "alternative" "1")
>                     (match_test "TARGET_AVX512VL"))
>                  (const_string "XI")
> -              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
> -                   (and (eq_attr "alternative" "3")
> -                        (match_test "TARGET_SSE_TYPELESS_STORES")))
> +              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
>                  (const_string "V8SF")
>               ]
>               (const_string "OI")))])
> diff --git a/gcc/testsuite/gcc.target/i386/pr91461-1.c 
> b/gcc/testsuite/gcc.target/i386/pr91461-1.c
> new file mode 100644
> index 00000000000..0c94b8e2b76
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr91461-1.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx" } */
> +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */
> +/* { dg-final { scan-assembler "\tvmovdqu\t" } } */
> +/* { dg-final { scan-assembler "\tvmovapd\t" } } */
> +/* { dg-final { scan-assembler "\tvmovupd\t" } } */
> +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
> +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */
> +
> +#include <immintrin.h>
> +
> +void
> +foo1 (__m128i *p, __m128i x)
> +{
> +  *p = x;
> +}
> +
> +void
> +foo2 (__m128d *p, __m128d x)
> +{
> +  *p = x;
> +}
> +
> +void
> +foo3 (__float128 *p, __float128 x)
> +{
> +  *p = x;
> +}
> +
> +void
> +foo4 (__m128i_u *p, __m128i x)
> +{
> +  *p = x;
> +}
> +
> +void
> +foo5 (__m128d_u *p, __m128d x)
> +{
> +  *p = x;
> +}
> +
> +typedef __float128 __float128_u __attribute__ ((__aligned__ (1)));
> +
> +void
> +foo6 (__float128_u *p, __float128 x)
> +{
> +  *p = x;
> +}
> +
> +#ifdef __x86_64__
> +typedef __int128 __int128_u __attribute__ ((__aligned__ (1)));
> +
> +extern __int128 int128;
> +
> +void
> +foo7 (__int128 *p)
> +{
> +  *p = int128;
> +}
> +
> +void
> +foo8 (__int128_u *p)
> +{
> +  *p = int128;
> +}
> +#endif
> diff --git a/gcc/testsuite/gcc.target/i386/pr91461-2.c 
> b/gcc/testsuite/gcc.target/i386/pr91461-2.c
> new file mode 100644
> index 00000000000..921cfaf9780
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr91461-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx" } */
> +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */
> +/* { dg-final { scan-assembler "\tvmovapd\t" } } */
> +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
> +
> +#include <immintrin.h>
> +
> +void
> +foo1 (__m256i *p, __m256i x)
> +{
> +  *p = x;
> +}
> +
> +void
> +foo2 (__m256d *p, __m256d x)
> +{
> +  *p = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr91461-3.c 
> b/gcc/testsuite/gcc.target/i386/pr91461-3.c
> new file mode 100644
> index 00000000000..c67a48063bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr91461-3.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mavx512vl" } */
> +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
> +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */
> +
> +#include <immintrin.h>
> +
> +void
> +foo1 (__m128i *p, __m128i a)
> +{
> +  register __m128i x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +void
> +foo2 (__m128d *p, __m128d a)
> +{
> +  register __m128d x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +void
> +foo3 (__float128 *p, __float128 a)
> +{
> +  register __float128 x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +void
> +foo4 (__m128i_u *p, __m128i a)
> +{
> +  register __m128i x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +void
> +foo5 (__m128d_u *p, __m128d a)
> +{
> +  register __m128d x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +typedef __float128 __float128_u __attribute__ ((__aligned__ (1)));
> +
> +void
> +foo6 (__float128_u *p, __float128 a)
> +{
> +  register __float128 x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +typedef __int128 __int128_u __attribute__ ((__aligned__ (1)));
> +
> +extern __int128 int128;
> +
> +void
> +foo7 (__int128 *p)
> +{
> +  register __int128 x __asm ("xmm16") = int128;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +void
> +foo8 (__int128_u *p)
> +{
> +  register __int128 x __asm ("xmm16") = int128;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr91461-4.c 
> b/gcc/testsuite/gcc.target/i386/pr91461-4.c
> new file mode 100644
> index 00000000000..69df590de3a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr91461-4.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mavx512vl" } */
> +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
> +
> +#include <immintrin.h>
> +
> +void
> +foo1 (__m256i *p, __m256i a)
> +{
> +  register __m256i x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> +
> +void
> +foo2 (__m256d *p, __m256d a)
> +{
> +  register __m256d x __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (x));
> +  *p = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr91461-5.c 
> b/gcc/testsuite/gcc.target/i386/pr91461-5.c
> new file mode 100644
> index 00000000000..974263042f3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr91461-5.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
> +
> +#include <immintrin.h>
> +
> +void
> +foo1 (__m512i *p, __m512i x)
> +{
> +  *p = x;
> +}
> +
> +void
> +foo2 (__m512d *p, __m512d x)
> +{
> +  *p = x;
> +}
> --
> 2.24.1
>

Reply via email to