> > diff --git a/gcc/testsuite/gcc.target/i386/pr115069.c > b/gcc/testsuite/gcc.target/i386/pr115069.c > > new file mode 100644 > > index 00000000000..c4b48b602ef > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr115069.c > > @@ -0,0 +1,78 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -mavx2" } */ > > +/* { dg-final { scan-assembler-not "vpermq" } } */ > > + > > +#include <stdio.h> > > +#include <unistd.h> > > +#include <stdlib.h> > > +#include <inttypes.h> > > + > > +typedef int8_t stress_vint8_t __attribute__ ((vector_size (16))); > No need for such big testcase, > > typedef char v16qi __attribute__((vector_size(16))); > v16qi > foo (v16qi a, v16qi b) > { > return a * b; > } > > should be enough, with -mavx2 -mno-avx512f
Yes. I will change to that. Thx, Haochen > > + > > +#define OPS(a, b, c, s, v23, v3) \ > > +do { \ > > + a += b; \ > > + a |= b; \ > > + a -= b; \ > > + a &= ~b; \ > > + a *= c; \ > > + a = ~a; \ > > + a *= s; \ > > + a ^= c; \ > > + a <<= 1; \ > > + b >>= 1; \ > > + b += c; \ > > + a %= v23; \ > > + c /= v3; \ > > + b = b ^ c; \ > > + c = b ^ c; \ > > + b = b ^ c; \ > > +} while (0) > > + > > +volatile uint8_t csum8_put; > > + > > +void stress_vecmath(void) > > +{ > > + const stress_vint8_t v23_8 = { > > + 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, > > + 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 > > + }; > > + const stress_vint8_t v3_8 = { > > + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, > > + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 > > + }; > > + stress_vint8_t a8 = { > > + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > > + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 > > + }; > > + stress_vint8_t b8 = { > > + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, > > + 0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78 > > + }; > > + stress_vint8_t c8 = { > > + 0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02, > > + 0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02 > > + }; > > + stress_vint8_t s8 = { > > + 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, > > + 0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02, > > + }; > > + const uint8_t csum8_val = (uint8_t)0x1b; > > + int i; > > + uint8_t csum8; > > + > > + for (i = 1000; i; i--) { > > + OPS(a8, b8, c8, s8, v23_8, v3_8); > > + OPS(a8, b8, c8, s8, v23_8, v3_8); > > + OPS(a8, b8, c8, s8, v23_8, v3_8); > > + OPS(a8, b8, c8, s8, v23_8, v3_8); > > + OPS(a8, b8, c8, s8, v23_8, v3_8); > > + OPS(a8, b8, c8, s8, v23_8, v3_8); > > + } > > + > > + csum8 = a8[0] ^ a8[1] ^ a8[2] ^ a8[3] ^ > > + a8[4] ^ a8[5] ^ a8[6] ^ a8[7] ^ > > + a8[8] ^ a8[9] ^ a8[10] ^ a8[11] ^ > > + a8[12] ^ a8[13] ^ a8[14] ^ a8[15]; > > + csum8_put = csum8; > > +} > > -- > > 2.31.1 > > > > > -- > BR, > Hongtao