On Mon, Jun 4, 2018 at 3:08 PM, Jakub Jelinek <ja...@redhat.com> wrote:
> Hi!
>
> On Wed, May 23, 2018 at 08:45:19AM +0200, Jakub Jelinek wrote:
>> As mentioned in the PR, vptestm* instructions with the same input operand 
>> used
>> twice perform the same comparison as vpcmpeq* against zero vector, with the
>> advantage that a register holding CONST0_RTX (mode) is not needed.
>>
>> 2018-05-23  Jakub Jelinek  <ja...@redhat.com>
>>
>>       PR target/85832
>>       * config/i386/sse.md (<avx512>_eq<mode>3<mask_scalar_merge_name>_1):
>>       Add (=Yk,v,C) variant using vptestm insn.  Use TARGET_AVX512BW
>>       in test instead of TARGET_AVX512F for VI12_AVX512VL iterator.
>>
>>       * gcc.target/i386/avx512f-pr85832.c: New test.
>>       * gcc.target/i386/avx512vl-pr85832.c: New test.
>>       * gcc.target/i386/avx512bw-pr85832.c: New test.
>>       * gcc.target/i386/avx512vlbw-pr85832.c: New test.
>
> I've unfortunately not added an executable testcase nor tested it under sde,
> so missed that say vpcmpeqw with 0 vector as one of the operands actually
> doesn't do what vptestmw with the other argument repeated does, it does
> exactly the opposite, vpcmpeqw sets bits in the mask register for elements
> that are equal to 0, but vptestmw sets bits in the mask register for elements
> where and of first arg and second arg (i.e. the argument that is repeated)
> is non-zero.  Fortunately there is vptestnmw which does what we want.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux + tested on the
> testcase with sde.  Ok for trunk?
>
> 2018-06-04  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/85832
>         PR target/86036
>         * config/i386/sse.md (<avx512>_eq<mode>3<mask_scalar_merge_name>_1):
>         Use vptestnm rather than vptestm in (=Yc,v,C) variant.
>
>         * gcc.target/i386/avx512f-pr85832.c: Expect vptestnm rather than
>         vptestm.
>         * gcc.target/i386/avx512vl-pr85832.c: Likewise.
>         * gcc.target/i386/avx512vlbw-pr85832.c: Likewise.
>         * gcc.target/i386/avx512bw-pr85832.c: Likewise.
>         * gcc.target/i386/avx512bw-pr86036.c: New test.

OK (I'd say it is obvious patch).

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2018-05-31 20:53:41.933453308 +0200
> +++ gcc/config/i386/sse.md      2018-06-04 10:29:02.667720644 +0200
> @@ -11287,7 +11287,7 @@ (define_insn "<avx512>_eq<mode>3<mask_sc
>    "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
>     vpcmpeq<ssemodesuffix>\t{%2, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
> -   vptestm<ssemodesuffix>\t{%1, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
> +   vptestnm<ssemodesuffix>\t{%1, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
>    [(set_attr "type" "ssecmp")
>     (set_attr "prefix_extra" "1")
>     (set_attr "prefix" "evex")
> @@ -11302,7 +11302,7 @@ (define_insn "<avx512>_eq<mode>3<mask_sc
>    "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
>     vpcmpeq<ssemodesuffix>\t{%2, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
> -   vptestm<ssemodesuffix>\t{%1, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
> +   vptestnm<ssemodesuffix>\t{%1, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
>    [(set_attr "type" "ssecmp")
>     (set_attr "prefix_extra" "1")
>     (set_attr "prefix" "evex")
> --- gcc/testsuite/gcc.target/i386/avx512f-pr85832.c.jj  2018-05-25 
> 14:35:23.123416639 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-pr85832.c     2018-06-04 
> 11:00:00.773880446 +0200
> @@ -1,8 +1,8 @@
>  /* PR target/85832 */
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mavx512f -mno-avx512vl -mno-avx512bw -masm=att" } */
> -/* { dg-final { scan-assembler-times {\mvptestmd\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mvptestmq\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmd\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmq\M} 1 } } */
>
>  #include <x86intrin.h>
>
> --- gcc/testsuite/gcc.target/i386/avx512vl-pr85832.c.jj 2018-05-25 
> 14:35:23.123416639 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-pr85832.c    2018-06-04 
> 11:00:09.995895313 +0200
> @@ -1,8 +1,8 @@
>  /* PR target/85832 */
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mavx512vl -mno-avx512bw -masm=att" } */
> -/* { dg-final { scan-assembler-times {\mvptestmd\M} 2 } } */
> -/* { dg-final { scan-assembler-times {\mvptestmq\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmd\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmq\M} 2 } } */
>
>  #include <x86intrin.h>
>
> --- gcc/testsuite/gcc.target/i386/avx512vlbw-pr85832.c.jj       2018-05-25 
> 14:35:23.124416640 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vlbw-pr85832.c  2018-06-04 
> 11:00:06.020888898 +0200
> @@ -1,8 +1,8 @@
>  /* PR target/85832 */
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
> -/* { dg-final { scan-assembler-times {\mvptestmb\M} 2 } } */
> -/* { dg-final { scan-assembler-times {\mvptestmw\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmb\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmw\M} 2 } } */
>
>  #include <x86intrin.h>
>
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr85832.c.jj 2018-05-25 
> 14:35:23.124416640 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr85832.c    2018-06-04 
> 10:59:53.015867934 +0200
> @@ -1,8 +1,8 @@
>  /* PR target/85832 */
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mavx512bw -mno-avx512vl -masm=att" } */
> -/* { dg-final { scan-assembler-times {\mvptestmb\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mvptestmw\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmb\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvptestnmw\M} 1 } } */
>
>  #include <x86intrin.h>
>
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr86036.c.jj 2018-06-04 
> 11:04:24.860193859 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr86036.c    2018-06-04 
> 11:18:47.618218756 +0200
> @@ -0,0 +1,48 @@
> +/* PR target/86036 */
> +/* { dg-do run } */
> +/* { dg-options "-O -mavx512bw" } */
> +/* { dg-require-effective-target avx512bw } */
> +
> +#define AVX512BW
> +#include "avx512f-helper.h"
> +
> +typedef unsigned short V __attribute__ ((vector_size (64)));
> +
> +__attribute__((noipa)) V
> +foo (V a)
> +{
> +  return a >= 3;
> +}
> +
> +__attribute__((noipa)) V
> +bar (V a)
> +{
> +  return a != 0;
> +}
> +
> +__attribute__((noipa)) V
> +baz (V a)
> +{
> +  return a == 0;
> +}
> +
> +void
> +TEST (void)
> +{
> +  V a = (V) { 3, 17, 2, 0, 9, 1, 2, 3, 0, 0, 0, 3, 3, 3, 3, 3,
> +             9, 16387, 9, 3, 3, 0, 0, 3, 3, 3, 0, 0, 0, 0, 3, 3 };
> +  V b = foo (a);
> +  V c = (V) { -1, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, -1, -1, -1, -1,
> +             -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1 };
> +  if (__builtin_memcmp (&b, &c, sizeof (b)))
> +    abort ();
> +  V d = bar (a);
> +  V e = (V) { -1, -1, -1, 0, -1, -1, -1, -1, 0, 0, 0, -1, -1, -1, -1, -1,
> +             -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1 };
> +  if (__builtin_memcmp (&d, &e, sizeof (d)))
> +    abort ();
> +  V f = baz (a);
> +  V g = ~e;
> +  if (__builtin_memcmp (&f, &g, sizeof (f)))
> +    abort ();
> +}
>
>
>         Jakub

Reply via email to