--- Begin Message ---
Hi!
When working on the PR, I've noticed we generate terrible code for
V32HImode or V64QImode permutations for -mavx512f -mno-avx512bw.
Generally we can't do much with such permutations, but since PR68655
we can handle at least some, those expressible using V16SImode or V8DImode
permutations, but that wasn't reachable, because ix86_vectorize_vec_perm_const
didn't even try, it said without TARGET_AVX512BW it can't do anything, and
with it can do everything, no d.testing_p attempts.
This patch makes it try it for TARGET_AVX512F && !TARGET_AVX512BW.
The first hunk is to avoid ICE, expand_vec_perm_even_odd_1 asserts d->vmode
isn't V32HImode because expand_vec_perm_1 for AVX512BW handles already
all permutations, but when we let it through without !TARGET_AVX512BW,
expand_vec_perm_1 doesn't handle it.
If we want, that hunk can be dropped if we implement in
expand_vec_perm_even_odd_1 and its helper the even permutation as
vpmovdw + vpmovdw + vinserti64x4 and odd permutation as
vpsrld $16 + vpsrld $16 + vpmovdw + vpmovdw + vinserti64x4.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2021-08-10 Jakub Jelinek <ja...@redhat.com>
PR target/80355
* config/i386/i386-expand.c (expand_vec_perm_even_odd): Return false
for V32HImode if !TARGET_AVX512BW.
(ix86_vectorize_vec_perm_const) <case E_V32HImode, case E_V64QImode>:
If !TARGET_AVX512BW and TARGET_AVX512F and d.testing_p, don't fail
early, but actually check the permutation.
* gcc.target/i386/avx512f-pr80355-2.c: New test.
--- gcc/config/i386/i386-expand.c.jj 2021-08-05 10:26:15.589555028 +0200
+++ gcc/config/i386/i386-expand.c 2021-08-09 14:14:35.466268680 +0200
@@ -20337,6 +20337,11 @@ expand_vec_perm_even_odd (struct expand_
if (d->perm[i] != 2 * i + odd)
return false;
+ if (d->vmode == E_V32HImode
+ && d->testing_p
+ && !TARGET_AVX512BW)
+ return false;
+
return expand_vec_perm_even_odd_1 (d, odd);
}
@@ -20877,16 +20882,16 @@ ix86_vectorize_vec_perm_const (machine_m
return true;
break;
case E_V32HImode:
- if (!TARGET_AVX512BW)
+ if (!TARGET_AVX512F)
return false;
- if (d.testing_p)
+ if (d.testing_p && TARGET_AVX512BW)
/* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V64QImode:
- if (!TARGET_AVX512BW)
+ if (!TARGET_AVX512F)
return false;
- if (d.testing_p)
+ if (d.testing_p && TARGET_AVX512BW)
/* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
return true;
break;
--- gcc/testsuite/gcc.target/i386/avx512f-pr80355-2.c.jj 2021-08-09
14:24:27.176142589 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr80355-2.c 2021-08-09
14:29:23.308074276 +0200
@@ -0,0 +1,23 @@
+/* PR target/80355 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-avx512vl -mno-avx512dq -mno-avx512bw" } */
+/* { dg-final { scan-assembler-times "\tvshufi(?:32x4|64x2)\t" 2 } } */
+
+typedef short V __attribute__((vector_size (64)));
+typedef char W __attribute__((vector_size (64)));
+
+W
+f0 (W x)
+{
+ return __builtin_shuffle (x, (W) { 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31 });
+}
+
+V
+f1 (V x)
+{
+ return __builtin_shuffle (x, (V) { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15 });
+}
Jakub
To declare a filtering error, please use the following link :
https://www.security-mail.net/reporter.php?mid=bf1d.61123f06.99556.0&r=marc.poulhies%40kalray.eu&s=gcc-patches-bounces%2Bmarc.poulhies%3Dkalray.eu%40gcc.gnu.org&o=%5BPATCH%5D+i386%3A+Allow+some+V32HImode+and+V64QImode+permutations+even+without+AVX512BW+%5BPR80355%5D&verdict=C&c=454b4fd04703e9c385a54af0fdd95c58945fbb66
--- End Message ---