Hi!

The following testcase shows that while we nicely optimize away the
useless and? of shift count before rotation for [SD]Imode rotates,
we don't do that for [QH]Imode.

The following patch optimizes that by using the right iterator on those
4 patterns.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?
Or just GCC12?

2021-03-05  Jakub Jelinek  <ja...@redhat.com>

        PR target/99405
        * config/i386/i386.md (*<insn><mode>3_mask, *<insn><mode>3_mask_1):
        For any_rotate define_insn_split and following splitters, use
        SWI iterator instead of SWI48.

        * gcc.target/i386/pr99405.c: New test.

--- gcc/config/i386/i386.md.jj  2021-03-03 10:02:27.871589603 +0100
+++ gcc/config/i386/i386.md     2021-03-05 14:00:35.768378973 +0100
@@ -11951,9 +11951,9 @@ (define_expand "<insn><mode>3"
 
 ;; Avoid useless masking of count operand.
 (define_insn_and_split "*<insn><mode>3_mask"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-       (any_rotate:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand")
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+       (any_rotate:SWI
+         (match_operand:SWI 1 "nonimmediate_operand")
          (subreg:QI
            (and:SI
              (match_operand:SI 2 "register_operand" "c")
@@ -11967,15 +11967,15 @@ (define_insn_and_split "*<insn><mode>3_m
   "&& 1"
   [(parallel
      [(set (match_dup 0)
-          (any_rotate:SWI48 (match_dup 1)
-                            (match_dup 2)))
+          (any_rotate:SWI (match_dup 1)
+                          (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
 (define_split
-  [(set (match_operand:SWI48 0 "register_operand")
-       (any_rotate:SWI48
-         (match_operand:SWI48 1 "const_int_operand")
+  [(set (match_operand:SWI 0 "register_operand")
+       (any_rotate:SWI
+         (match_operand:SWI 1 "const_int_operand")
          (subreg:QI
            (and:SI
              (match_operand:SI 2 "register_operand")
@@ -11984,14 +11984,14 @@ (define_split
    == GET_MODE_BITSIZE (<MODE>mode) - 1"
  [(set (match_dup 4) (match_dup 1))
   (set (match_dup 0)
-       (any_rotate:SWI48 (match_dup 4)
-                        (subreg:QI (match_dup 2) 0)))]
+       (any_rotate:SWI (match_dup 4)
+                      (subreg:QI (match_dup 2) 0)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
 (define_insn_and_split "*<insn><mode>3_mask_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-       (any_rotate:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand")
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+       (any_rotate:SWI
+         (match_operand:SWI 1 "nonimmediate_operand")
          (and:QI
            (match_operand:QI 2 "register_operand" "c")
            (match_operand:QI 3 "const_int_operand"))))
@@ -12004,14 +12004,14 @@ (define_insn_and_split "*<insn><mode>3_m
   "&& 1"
   [(parallel
      [(set (match_dup 0)
-          (any_rotate:SWI48 (match_dup 1)
-                            (match_dup 2)))
+          (any_rotate:SWI (match_dup 1)
+                          (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])])
 
 (define_split
-  [(set (match_operand:SWI48 0 "register_operand")
-       (any_rotate:SWI48
-         (match_operand:SWI48 1 "const_int_operand")
+  [(set (match_operand:SWI 0 "register_operand")
+       (any_rotate:SWI
+         (match_operand:SWI 1 "const_int_operand")
          (and:QI
            (match_operand:QI 2 "register_operand")
            (match_operand:QI 3 "const_int_operand"))))]
@@ -12019,7 +12019,7 @@ (define_split
   == GET_MODE_BITSIZE (<MODE>mode) - 1"
  [(set (match_dup 4) (match_dup 1))
   (set (match_dup 0)
-       (any_rotate:SWI48 (match_dup 4) (match_dup 2)))]
+       (any_rotate:SWI (match_dup 4) (match_dup 2)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
 ;; Implement rotation using two double-precision
--- gcc/testsuite/gcc.target/i386/pr99405.c.jj  2021-03-05 13:40:20.334860937 
+0100
+++ gcc/testsuite/gcc.target/i386/pr99405.c     2021-03-05 13:39:51.131185009 
+0100
@@ -0,0 +1,23 @@
+/* PR target/99405 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -fomit-frame-pointer" } */
+/* { dg-final { scan-assembler-not "\tand\[bl]\t\\\$" } } */
+
+unsigned char f1 (unsigned char x, unsigned y) { return (x << (y & 7)) | (x >> 
(-y & 7)); }
+unsigned short f2 (unsigned short x, unsigned y) { return (x << (y & 15)) | (x 
>> (-y & 15)); }
+unsigned int f3 (unsigned int x, unsigned y) { return (x << (y & 31)) | (x >> 
(-y & 31)); }
+unsigned char f4 (unsigned char x, unsigned y) { return (x >> (y & 7)) | (x << 
(-y & 7)); }
+unsigned short f5 (unsigned short x, unsigned y) { return (x >> (y & 15)) | (x 
<< (-y & 15)); }
+unsigned int f6 (unsigned int x, unsigned y) { return (x >> (y & 31)) | (x << 
(-y & 31)); }
+unsigned char f7 (unsigned char x, unsigned char y) { unsigned char v = y & 7; 
unsigned char w = -y & 7; return (x << v) | (x >> w); }
+unsigned short f8 (unsigned short x, unsigned char y) { unsigned char v = y & 
15; unsigned char w = -y & 15; return (x << v) | (x >> w); }
+unsigned int f9 (unsigned int x, unsigned char y) { unsigned char v = y & 31; 
unsigned char w = -y & 31; return (x << v) | (x >> w); }
+unsigned char f10 (unsigned char x, unsigned char y) { unsigned char v = y & 
7; unsigned char w = -y & 7; return (x >> v) | (x << w); }
+unsigned short f11 (unsigned short x, unsigned char y) { unsigned char v = y & 
15; unsigned char w = -y & 15; return (x >> v) | (x << w); }
+unsigned int f12 (unsigned int x, unsigned char y) { unsigned char v = y & 31; 
unsigned char w = -y & 31; return (x >> v) | (x << w); }
+#ifdef __x86_64__
+unsigned long long f13 (unsigned long long x, unsigned y) { return (x << (y & 
63)) | (x >> (-y & 63)); }
+unsigned long long f14 (unsigned long long x, unsigned y) { return (x >> (y & 
63)) | (x << (-y & 63)); }
+unsigned long long f15 (unsigned long long x, unsigned char y) { unsigned char 
v = y & 63; unsigned char w = -y & 63; return (x << v) | (x >> w); }
+unsigned long long f16 (unsigned long long x, unsigned char y) { unsigned char 
v = y & 63; unsigned char w = -y & 63; return (x >> v) | (x << w); }
+#endif

        Jakub

Reply via email to