https://gcc.gnu.org/g:d551f88f88b821dc80de9d39c5b275a3678b3e31
commit r16-3386-gd551f88f88b821dc80de9d39c5b275a3678b3e31 Author: Jakub Jelinek <ja...@redhat.com> Date: Tue Aug 26 06:43:39 2025 +0200 i386: Fix up recent changes to use GFNI for rotates/shifts [PR121658] The vgf2p8affineqb_<mode><mask_name> pattern uses "register_operand" predicate for the first input operand, so using "general_operand" for the rotate operand passed to it leads to ICEs, and so does the "nonimmediate_operand" in the <insn>v16qi3 define_expand. The following patch fixes it by using "register_operand" in the former case (that pattern is TARGET_GFNI only) and using force_reg in the latter case (the pattern is TARGET_XOP || TARGET_GFNI and for XOP we can handle MEM operand). The rest of the changes are small formatting tweaks or use of const0_rtx instead of GEN_INT (0). 2025-08-26 Jakub Jelinek <ja...@redhat.com> PR target/121658 * config/i386/sse.md (<insn><mode>3 any_shift): Use const0_rtx instead of GEN_INT (0). (cond_<insn><mode> any_shift): Likewise. Formatting fix. (<insn><mode>3 any_rotate): Use register_operand predicate instead of general_operand for match_operand 1. Use const0_rtx instead of GEN_INT (0). (<insn>v16qi3 any_rotate): Use force_reg on operands[1]. Formatting fix. * config/i386/i386.cc (ix86_shift_rotate_cost): Comment formatting fixes. * gcc.target/i386/pr121658.c: New test. Diff: --- gcc/config/i386/i386.cc | 6 +++--- gcc/config/i386/sse.md | 14 ++++++++------ gcc/testsuite/gcc.target/i386/pr121658.c | 11 +++++++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 9093f2077346..b2c1acd12dac 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22104,9 +22104,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, case V32QImode: if (TARGET_GFNI && constant_op1) { - /* Use vgf2p8affine. One extra load for the mask, but in a loop - with enough registers it will be moved out. So for now don't - account the constant mask load. This is not quite right + /* Use vgf2p8affine. One extra load for the mask, but in a loop + with enough registers it will be moved out. So for now don't + account the constant mask load. This is not quite right for non loop vectorization. */ extra = 0; return ix86_vec_cost (mode, cost->sse_op) + extra; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 951ee54589f3..505095040f75 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -26994,7 +26994,7 @@ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix, - GEN_INT (0))); + const0_rtx)); } else ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); @@ -27014,20 +27014,21 @@ { rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix, - GEN_INT (0), operands[4], operands[1])); + const0_rtx, operands[4], + operands[1])); DONE; }) (define_expand "<insn><mode>3" [(set (match_operand:VI1_AVX512_3264 0 "register_operand") (any_rotate:VI1_AVX512_3264 - (match_operand:VI1_AVX512_3264 1 "general_operand") + (match_operand:VI1_AVX512_3264 1 "register_operand") (match_operand:SI 2 "const_int_operand")))] "TARGET_GFNI" { rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix, - GEN_INT (0))); + const0_rtx)); DONE; }) @@ -27073,8 +27074,9 @@ else if (TARGET_GFNI && CONST_INT_P (operands[2])) { rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); - emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], operands[1], matrix, - GEN_INT (0))); + emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], + force_reg (V16QImode, operands[1]), + matrix, const0_rtx)); DONE; } else diff --git a/gcc/testsuite/gcc.target/i386/pr121658.c b/gcc/testsuite/gcc.target/i386/pr121658.c new file mode 100644 index 000000000000..04373161e688 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121658.c @@ -0,0 +1,11 @@ +/* PR target/121658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mgfni" } */ + +__attribute__((__vector_size__(64))) unsigned char v; + +void +foo (void) +{ + v = (v << 7) | (v >> 1); +}