Hi Marc,
Here's version #2 of the patch to recognize bswap32 and bswap64
incorporating your
suggestions and feedback.  The test cases now confirm the transformation is
applied
when int is 32 bits and long is 64 bits, and should pass otherwise; the
patterns now
reuse (more) capturing groups, and the patterns have been made more generic
to allow
the ultimate type to be signed or unsigned (hence there are now two new
gcc.dg tests).

Alas my efforts to allow the input argument to be signed, and use
fold_convert to coerce
it to the correct type before calling __builtin_bswap failed, with the error
messages:
>fold-bswap-2.c: In function 'swap64':
>fold-bswap-2.c:22:1: error: invalid argument to gimple call
>(long unsigned int) x_6(D)
>_12 = __builtin_bswap64 ((long unsigned int) x_6(D));
>during GIMPLE pass: forwprop
>fold-bswap-2.c:22:1: internal compiler error: verify_gimple failed
So I require arguments to be the expected type for now.  If anyone's
sufficiently motivated
to support these cases, this can be done as a follow-up patch.

This revised patch has been tested on x86_64-pc-linux-gnu with a "make
bootstrap"
and "make -k check" with no new failures.
Ok for mainline?

Thanks in advance,
Roger
--

-----Original Message-----
From: Marc Glisse <marc.gli...@inria.fr> 
Sent: 12 August 2020 10:43
To: Roger Sayle <ro...@nextmovesoftware.com>
Cc: 'GCC Patches' <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] middle-end: Recognize idioms for bswap32 and bswap64 in
match.pd.

On Wed, 12 Aug 2020, Roger Sayle wrote:

> This patch is inspired by a small code fragment in comment #3 of 
> bugzilla PR rtl-optimization/94804.  That snippet appears almost 
> unrelated to the topic of the PR, but recognizing __builtin_bswap64 
> from two __builtin_bswap32 calls, seems like a clever/useful trick.
> GCC's optabs.c contains the inverse logic to expand bswap64 by IORing 
> two bswap32 calls, so this transformation/canonicalization is safe, 
> even on targets without suitable optab support.  But on x86_64, the 
> swap64 of the test case becomes a single instruction.
>
>
> This patch has been tested on x86_64-pc-linux-gnu with a "make 
> bootstrap" and a "make -k check" with no new failures.
> Ok for mainline?

Your tests seem to assume that int has 32 bits and long 64.

+  (if (operand_equal_p (@0, @2, 0)

Why not reuse @0 instead of introducing @2 in the pattern? Similarly, it may
be a bit shorter to reuse @1 instead of a new @3 (I don't think the tricks
with @@ will be needed here).

+       && types_match (TREE_TYPE (@0), uint64_type_node)

that seems very specific. What goes wrong with a signed type for instance?

+(simplify
+  (bit_ior:c
+    (lshift
+      (convert (BUILT_IN_BSWAP16 (convert (bit_and @0
+                                                  INTEGER_CST@1))))
+      (INTEGER_CST@2))
+    (convert (BUILT_IN_BSWAP16 (convert (rshift @3
+                                               INTEGER_CST@4)))))

I didn't realize we kept this useless bit_and when casting to a smaller
type. We probably get a different pattern on 16-bit targets, but a pattern
they do not match won't hurt them.

--
Marc Glisse
diff --git a/gcc/match.pd b/gcc/match.pd
index c3b8816..c682d3d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3410,6 +3410,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (bswap (bitop:c (bswap @0) @1))
    (bitop @0 (bswap @1)))))
 
+/* Recognize ((T)bswap32(x)<<32)|bswap32(x>>32) as bswap64(x).  */
+(simplify
+  (bit_ior:c
+    (lshift (convert (BUILT_IN_BSWAP32 (convert@0 @1)))
+           INTEGER_CST@2)
+    (convert (BUILT_IN_BSWAP32 (convert@3 (rshift @1 @2)))))
+  (if (INTEGRAL_TYPE_P (type)
+       && TYPE_PRECISION (type) == 64
+       && types_match (TREE_TYPE (@1), uint64_type_node)
+       && types_match (TREE_TYPE (@0), uint32_type_node)
+       && types_match (TREE_TYPE (@3), uint32_type_node)
+       && wi::to_widest (@2) == 32)
+    (convert (BUILT_IN_BSWAP64 @1))))
+
+/* Recognize ((T)bswap16(x)<<16)|bswap16(x>>16) as bswap32(x).  */
+(simplify
+  (bit_ior:c
+    (lshift
+      (convert (BUILT_IN_BSWAP16 (convert (bit_and @0 INTEGER_CST@1))))
+      (INTEGER_CST@2))
+    (convert (BUILT_IN_BSWAP16 (convert (rshift @0 @2)))))
+  (if (INTEGRAL_TYPE_P (type)
+       && TYPE_PRECISION (type) == 32
+       && types_match (TREE_TYPE (@0), uint32_type_node)
+       && wi::to_widest (@1) == 65535
+       && wi::to_widest (@2) == 16)
+    (convert (BUILT_IN_BSWAP32 @0))))
 
 /* Combine COND_EXPRs and VEC_COND_EXPRs.  */
 
diff --git a/gcc/testsuite/gcc.dg/fold-bswap-1.c 
b/gcc/testsuite/gcc.dg/fold-bswap-1.c
new file mode 100644
index 0000000..3abb862
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bswap-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int swap32(unsigned int x)
+{
+  if (sizeof(unsigned int)==4 && sizeof(unsigned short)==2) {
+    unsigned int a = __builtin_bswap16(x);
+    x >>= 16;
+    a <<= 16;
+    return __builtin_bswap16(x) | a;
+  } else return __builtin_bswap32(x);
+}
+
+unsigned long swap64(unsigned long x)
+{
+  if (sizeof(unsigned long)==8 && sizeof(unsigned int)==4) {
+    unsigned long a = __builtin_bswap32(x);
+    x >>= 32;
+    a <<= 32;
+    return __builtin_bswap32(x) | a;
+  } else return __builtin_bswap64(x);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 1 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-bswap-2.c 
b/gcc/testsuite/gcc.dg/fold-bswap-2.c
new file mode 100644
index 0000000..a581fd6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bswap-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int swap32(unsigned int x)
+{
+  if (sizeof(int)==4 && sizeof(short)==2) {
+    int a = __builtin_bswap16(x);
+    x >>= 16;
+    a <<= 16;
+    return __builtin_bswap16(x) | a;
+  } else return __builtin_bswap32(x);
+}
+
+long swap64(unsigned long x)
+{
+  if (sizeof(long)==8 && sizeof(int)==4) {
+    long a = __builtin_bswap32(x);
+    x >>= 32;
+    a <<= 32;
+    return __builtin_bswap32(x) | a;
+  } else return __builtin_bswap64(x);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 1 "optimized" } } */
+

Reply via email to