https://gcc.gnu.org/g:d551f88f88b821dc80de9d39c5b275a3678b3e31

commit r16-3386-gd551f88f88b821dc80de9d39c5b275a3678b3e31
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Tue Aug 26 06:43:39 2025 +0200

    i386: Fix up recent changes to use GFNI for rotates/shifts [PR121658]
    
    The vgf2p8affineqb_<mode><mask_name> pattern uses "register_operand"
    predicate for the first input operand, so using "general_operand"
    for the rotate operand passed to it leads to ICEs, and so does
    the "nonimmediate_operand" in the <insn>v16qi3 define_expand.
    The following patch fixes it by using "register_operand" in the former
    case (that pattern is TARGET_GFNI only) and using force_reg in
    the latter case (the pattern is TARGET_XOP || TARGET_GFNI and for XOP
    we can handle MEM operand).
    
    The rest of the changes are small formatting tweaks or use of const0_rtx
    instead of GEN_INT (0).
    
    2025-08-26  Jakub Jelinek  <ja...@redhat.com>
    
            PR target/121658
            * config/i386/sse.md (<insn><mode>3 any_shift): Use const0_rtx
            instead of GEN_INT (0).
            (cond_<insn><mode> any_shift): Likewise.  Formatting fix.
            (<insn><mode>3 any_rotate): Use register_operand predicate instead 
of
            general_operand for match_operand 1.  Use const0_rtx instead of
            GEN_INT (0).
            (<insn>v16qi3 any_rotate): Use force_reg on operands[1].  Formatting
            fix.
            * config/i386/i386.cc (ix86_shift_rotate_cost): Comment formatting
            fixes.
    
            * gcc.target/i386/pr121658.c: New test.

Diff:
---
 gcc/config/i386/i386.cc                  |  6 +++---
 gcc/config/i386/sse.md                   | 14 ++++++++------
 gcc/testsuite/gcc.target/i386/pr121658.c | 11 +++++++++++
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 9093f2077346..b2c1acd12dac 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22104,9 +22104,9 @@ ix86_shift_rotate_cost (const struct processor_costs 
*cost,
        case V32QImode:
          if (TARGET_GFNI && constant_op1)
            {
-             /* Use vgf2p8affine. One extra load for the mask, but in a loop
-                with enough registers it will be moved out. So for now don't
-                account the constant mask load. This is not quite right
+             /* Use vgf2p8affine.  One extra load for the mask, but in a loop
+                with enough registers it will be moved out.  So for now don't
+                account the constant mask load.  This is not quite right
                 for non loop vectorization.  */
              extra = 0;
              return ix86_vec_cost (mode, cost->sse_op) + extra;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 951ee54589f3..505095040f75 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -26994,7 +26994,7 @@
       rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2],
                                                   <CODE>);
       emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
-                                           GEN_INT (0)));
+                                           const0_rtx));
     }
   else
     ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
@@ -27014,20 +27014,21 @@
 {
   rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
   emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix,
-               GEN_INT (0), operands[4], operands[1]));
+                                            const0_rtx, operands[4],
+                                            operands[1]));
   DONE;
 })
 
 (define_expand "<insn><mode>3"
   [(set (match_operand:VI1_AVX512_3264 0 "register_operand")
        (any_rotate:VI1_AVX512_3264
-         (match_operand:VI1_AVX512_3264 1 "general_operand")
+         (match_operand:VI1_AVX512_3264 1 "register_operand")
          (match_operand:SI 2 "const_int_operand")))]
   "TARGET_GFNI"
 {
   rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
   emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
-             GEN_INT (0)));
+             const0_rtx));
   DONE;
 })
 
@@ -27073,8 +27074,9 @@
   else if (TARGET_GFNI && CONST_INT_P (operands[2]))
     {
       rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
-      emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], operands[1], matrix,
-                 GEN_INT (0)));
+      emit_insn (gen_vgf2p8affineqb_v16qi (operands[0],
+                                          force_reg (V16QImode, operands[1]),
+                                          matrix, const0_rtx));
       DONE;
     }
   else
diff --git a/gcc/testsuite/gcc.target/i386/pr121658.c 
b/gcc/testsuite/gcc.target/i386/pr121658.c
new file mode 100644
index 000000000000..04373161e688
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121658.c
@@ -0,0 +1,11 @@
+/* PR target/121658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mgfni" } */
+
+__attribute__((__vector_size__(64))) unsigned char v;
+
+void
+foo (void)
+{
+  v = (v << 7) | (v >> 1);
+}

Reply via email to