On Mon, Jun 4, 2018 at 3:08 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > On Wed, May 23, 2018 at 08:45:19AM +0200, Jakub Jelinek wrote: >> As mentioned in the PR, vptestm* instructions with the same input operand >> used >> twice perform the same comparison as vpcmpeq* against zero vector, with the >> advantage that a register holding CONST0_RTX (mode) is not needed. >> >> 2018-05-23 Jakub Jelinek <ja...@redhat.com> >> >> PR target/85832 >> * config/i386/sse.md (<avx512>_eq<mode>3<mask_scalar_merge_name>_1): >> Add (=Yk,v,C) variant using vptestm insn. Use TARGET_AVX512BW >> in test instead of TARGET_AVX512F for VI12_AVX512VL iterator. >> >> * gcc.target/i386/avx512f-pr85832.c: New test. >> * gcc.target/i386/avx512vl-pr85832.c: New test. >> * gcc.target/i386/avx512bw-pr85832.c: New test. >> * gcc.target/i386/avx512vlbw-pr85832.c: New test. > > I've unfortunately not added an executable testcase nor tested it under sde, > so missed that say vpcmpeqw with 0 vector as one of the operands actually > doesn't do what vptestmw with the other argument repeated does, it does > exactly the opposite, vpcmpeqw sets bits in the mask register for elements > that are equal to 0, but vptestmw sets bits in the mask register for elements > where and of first arg and second arg (i.e. the argument that is repeated) > is non-zero. Fortunately there is vptestnmw which does what we want. > > Bootstrapped/regtested on x86_64-linux and i686-linux + tested on the > testcase with sde. Ok for trunk? > > 2018-06-04 Jakub Jelinek <ja...@redhat.com> > > PR target/85832 > PR target/86036 > * config/i386/sse.md (<avx512>_eq<mode>3<mask_scalar_merge_name>_1): > Use vptestnm rather than vptestm in (=Yc,v,C) variant. > > * gcc.target/i386/avx512f-pr85832.c: Expect vptestnm rather than > vptestm. > * gcc.target/i386/avx512vl-pr85832.c: Likewise. > * gcc.target/i386/avx512vlbw-pr85832.c: Likewise. > * gcc.target/i386/avx512bw-pr85832.c: Likewise. > * gcc.target/i386/avx512bw-pr86036.c: New test.
OK (I'd say it is obvious patch). Thanks, Uros. > --- gcc/config/i386/sse.md.jj 2018-05-31 20:53:41.933453308 +0200 > +++ gcc/config/i386/sse.md 2018-06-04 10:29:02.667720644 +0200 > @@ -11287,7 +11287,7 @@ (define_insn "<avx512>_eq<mode>3<mask_sc > "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))" > "@ > vpcmpeq<ssemodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2} > - vptestm<ssemodesuffix>\t{%1, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}" > + vptestnm<ssemodesuffix>\t{%1, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}" > [(set_attr "type" "ssecmp") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "evex") > @@ -11302,7 +11302,7 @@ (define_insn "<avx512>_eq<mode>3<mask_sc > "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" > "@ > vpcmpeq<ssemodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2} > - vptestm<ssemodesuffix>\t{%1, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}" > + vptestnm<ssemodesuffix>\t{%1, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}" > [(set_attr "type" "ssecmp") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "evex") > --- gcc/testsuite/gcc.target/i386/avx512f-pr85832.c.jj 2018-05-25 > 14:35:23.123416639 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-pr85832.c 2018-06-04 > 11:00:00.773880446 +0200 > @@ -1,8 +1,8 @@ > /* PR target/85832 */ > /* { dg-do compile } */ > /* { dg-options "-O2 -mavx512f -mno-avx512vl -mno-avx512bw -masm=att" } */ > -/* { dg-final { scan-assembler-times {\mvptestmd\M} 1 } } */ > -/* { dg-final { scan-assembler-times {\mvptestmq\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmq\M} 1 } } */ > > #include <x86intrin.h> > > --- gcc/testsuite/gcc.target/i386/avx512vl-pr85832.c.jj 2018-05-25 > 14:35:23.123416639 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512vl-pr85832.c 2018-06-04 > 11:00:09.995895313 +0200 > @@ -1,8 +1,8 @@ > /* PR target/85832 */ > /* { dg-do compile } */ > /* { dg-options "-O2 -mavx512vl -mno-avx512bw -masm=att" } */ > -/* { dg-final { scan-assembler-times {\mvptestmd\M} 2 } } */ > -/* { dg-final { scan-assembler-times {\mvptestmq\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmd\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmq\M} 2 } } */ > > #include <x86intrin.h> > > --- gcc/testsuite/gcc.target/i386/avx512vlbw-pr85832.c.jj 2018-05-25 > 14:35:23.124416640 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512vlbw-pr85832.c 2018-06-04 > 11:00:06.020888898 +0200 > @@ -1,8 +1,8 @@ > /* PR target/85832 */ > /* { dg-do compile } */ > /* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */ > -/* { dg-final { scan-assembler-times {\mvptestmb\M} 2 } } */ > -/* { dg-final { scan-assembler-times {\mvptestmw\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmb\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmw\M} 2 } } */ > > #include <x86intrin.h> > > --- gcc/testsuite/gcc.target/i386/avx512bw-pr85832.c.jj 2018-05-25 > 14:35:23.124416640 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512bw-pr85832.c 2018-06-04 > 10:59:53.015867934 +0200 > @@ -1,8 +1,8 @@ > /* PR target/85832 */ > /* { dg-do compile } */ > /* { dg-options "-O2 -mavx512bw -mno-avx512vl -masm=att" } */ > -/* { dg-final { scan-assembler-times {\mvptestmb\M} 1 } } */ > -/* { dg-final { scan-assembler-times {\mvptestmw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmb\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvptestnmw\M} 1 } } */ > > #include <x86intrin.h> > > --- gcc/testsuite/gcc.target/i386/avx512bw-pr86036.c.jj 2018-06-04 > 11:04:24.860193859 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512bw-pr86036.c 2018-06-04 > 11:18:47.618218756 +0200 > @@ -0,0 +1,48 @@ > +/* PR target/86036 */ > +/* { dg-do run } */ > +/* { dg-options "-O -mavx512bw" } */ > +/* { dg-require-effective-target avx512bw } */ > + > +#define AVX512BW > +#include "avx512f-helper.h" > + > +typedef unsigned short V __attribute__ ((vector_size (64))); > + > +__attribute__((noipa)) V > +foo (V a) > +{ > + return a >= 3; > +} > + > +__attribute__((noipa)) V > +bar (V a) > +{ > + return a != 0; > +} > + > +__attribute__((noipa)) V > +baz (V a) > +{ > + return a == 0; > +} > + > +void > +TEST (void) > +{ > + V a = (V) { 3, 17, 2, 0, 9, 1, 2, 3, 0, 0, 0, 3, 3, 3, 3, 3, > + 9, 16387, 9, 3, 3, 0, 0, 3, 3, 3, 0, 0, 0, 0, 3, 3 }; > + V b = foo (a); > + V c = (V) { -1, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, -1, -1, -1, -1, > + -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1 }; > + if (__builtin_memcmp (&b, &c, sizeof (b))) > + abort (); > + V d = bar (a); > + V e = (V) { -1, -1, -1, 0, -1, -1, -1, -1, 0, 0, 0, -1, -1, -1, -1, -1, > + -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1 }; > + if (__builtin_memcmp (&d, &e, sizeof (d))) > + abort (); > + V f = baz (a); > + V g = ~e; > + if (__builtin_memcmp (&f, &g, sizeof (f))) > + abort (); > +} > > > Jakub