In code like the following from KVM:

        /* it is a read fault? */
        error_code = (exit_qualification << 2) & PFERR_FETCH_MASK;

it would be nicer to write

        /* it is a read fault? */
        error_code = (exit_qualification & VMX_EPT_READ_FAULT_MASK) ? 
PFERR_FETCH_MASK : 0;

instead of having to know the difference between the positions of the
source and destination bits.  LLVM catches the latter just fine (which
is why I am sending this in stage 3...), but GCC does not, so this
patch adds two patterns to catch it.

The combine.c hunk instead is needed to simplify cases that do not use the
ternary operator (the "h" and "i" functions in the testcases) like this:

        return ((x >> 9) & 1) << 7;

Normally this is simplified just fine to a single shift and an AND.
Here, however, the bit to preserve after (x >> 9 << 7) is the QImode
sign bit, and if_then_else_cond produces a complicated concoction
involving (ne:SI (subreg:QI ...)).  simplify_if_then_else cannot then
reduce it back to the original.  In fact, simplify_if_then_else does
have a similar pattern, but it cannot deal with the subreg.  This is
easily done by ZERO_EXTENDing the result from QImode back to the
comparison's mode.  The shift/shift/and or shift/and/shift combination
can then be reduced to shift+and just like for any other bit position.

These forms are not included in the fold-and-rshift-2.c testcase,
because in this case a shift+shift (without the following AND) is a
valid alternative too; and at least on x86 it has the same cost as
shift+and.  Compare:

        movl    %edi, %eax
        sarl    $24, %eax
        andl    $128, %eax
        ret

and

        movl    %edi, %eax
        shrl    $31, %eax
        sall    $7, %eax

Bootstrapped/regtested x86_64-pc-linux-gnu, ok?

Paolo

2016-11-26  Paolo Bonzini  <bonz...@gnu.org>

        * combine.c (simplify_if_then_else): Simplify IF_THEN_ELSE
        that isolates a single bit, even if the condition involves
        subregs.
        * match.pd: Simplify X ? C : 0 where C is a power of 2 and
        X tests a single bit.

2016-11-26  Paolo Bonzini  <bonz...@gnu.org>

        * gcc.dg/fold-and-lshift.c, gcc.dg/fold-and-rshift-1.c,
        gcc.dg/fold-and-rshift-2.c: New testcases.

Index: combine.c
===================================================================
--- combine.c   (revision 242742)
+++ combine.c   (working copy)
@@ -6522,14 +6522,22 @@
       simplify_shift_const (NULL_RTX, ASHIFT, mode,
                            gen_lowpart (mode, XEXP (cond, 0)), i);
 
-  /* (IF_THEN_ELSE (NE REG 0) (0) (8)) is REG for nonzero_bits (REG) == 8.  */
+  /* (IF_THEN_ELSE (NE A 0) C1 0) is A or a zero-extend of A if the only
+     non-zero bit in A is C1.  */
   if (true_code == NE && XEXP (cond, 1) == const0_rtx
       && false_rtx == const0_rtx && CONST_INT_P (true_rtx)
-      && GET_MODE (XEXP (cond, 0)) == mode
+      && INTEGRAL_MODE_P (GET_MODE (XEXP (cond, 0)))
       && (UINTVAL (true_rtx) & GET_MODE_MASK (mode))
-         == nonzero_bits (XEXP (cond, 0), mode)
+         == nonzero_bits (XEXP (cond, 0), GET_MODE (XEXP (cond, 0)))
       && (i = exact_log2 (UINTVAL (true_rtx) & GET_MODE_MASK (mode))) >= 0)
-    return XEXP (cond, 0);
+    {
+      rtx val = XEXP (cond, 0);
+      enum machine_mode val_mode = GET_MODE (val);
+      if (val_mode == mode)
+        return val;
+      else if (GET_MODE_PRECISION (val_mode) < GET_MODE_PRECISION (mode))
+        return simplify_gen_unary (ZERO_EXTEND, mode, val, val_mode);
+    }
 
   return x;
 }
Index: match.pd
===================================================================
--- match.pd    (revision 242742)
+++ match.pd    (working copy)
@@ -2554,6 +2554,19 @@
   (cmp (bit_and@2 @0 integer_pow2p@1) @1)
   (icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
  
+/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2,
+   convert this into a shift of (A & C).  */
+(simplify
+ (cond
+  (ne (bit_and@2 @0 integer_pow2p@1) integer_zerop)
+  integer_pow2p@3 integer_zerop)
+ (with {
+    int shift = wi::exact_log2 (@3) - wi::exact_log2 (@1);
+  }
+  (if (shift > 0)
+   (lshift (convert @2) { build_int_cst (integer_type_node, shift); })
+   (convert (rshift @2 { build_int_cst (integer_type_node, -shift); })))))
+
 /* If we have (A & C) != 0 where C is the sign bit of A, convert
    this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
 (for cmp (eq ne)
@@ -2568,6 +2581,19 @@
    (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
     (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
 
+/* If we have A < 0 ? C : 0 where C and D are powers of 2,
+   convert this into a right shift and AND.  */
+(simplify
+ (cond
+  (lt @0 integer_zerop)
+  integer_pow2p@1 integer_zerop)
+ (with {
+    int shift = element_precision (@0) - wi::exact_log2 (@1) - 1;
+  }
+  (bit_and
+   (convert (rshift @0 { build_int_cst (integer_type_node, shift); }))
+   @1)))
+
 /* When the addresses are not directly of decls compare base and offset.
    This implements some remaining parts of fold_comparison address
    comparisons but still no complete part of it.  Still it is good
Index: testsuite/gcc.dg/fold-and-lshift.c
===================================================================
--- testsuite/gcc.dg/fold-and-lshift.c  (revision 0)
+++ testsuite/gcc.dg/fold-and-lshift.c  (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-original" } */
+
+int f(int x)
+{
+       return (x << 2) & 128;
+}
+
+int g(int x)
+{
+       return !!(x & 32) << 7;
+}
+
+int h(int x)
+{
+       return ((x >> 5) & 1) << 7;
+}
+
+int i(int x)
+{
+       return (x & 32) >> 5 << 7;
+}
+
+int j(int x)
+{
+       return ((x >> 5) & 1) ? 128 : 0;
+}
+
+int k(int x)
+{
+       return (x & 32) ? 128 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not " \\? " "original" } } */
+/* { dg-final { scan-assembler-not "sarl" { target i?86-*-* x86_64-*-* } } }" 
*/
Index: testsuite/gcc.dg/fold-and-rshift-1.c
===================================================================
--- testsuite/gcc.dg/fold-and-rshift-1.c        (revision 0)
+++ testsuite/gcc.dg/fold-and-rshift-1.c        (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-original" } */
+
+int f(int x)
+{
+       return (x >> 2) & 128;
+}
+
+int g(int x)
+{
+       return !!(x & 512) << 7;
+}
+
+int h(int x)
+{
+       return ((x >> 9) & 1) << 7;
+}
+
+int i(int x)
+{
+       return (x & 512) >> 9 << 7;
+}
+
+int j(int x)
+{
+       return ((x >> 9) & 1) ? 128 : 0;
+}
+
+int k(int x)
+{
+       return (x & 512) ? 128 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not " \\? " "original" } } */
+/* { dg-final { scan-assembler-not "sall" { target i?86-*-* x86_64-*-* } } }" 
*/
Index: testsuite/gcc.dg/fold-and-rshift-2.c
===================================================================
--- testsuite/gcc.dg/fold-and-rshift-2.c        (revision 0)
+++ testsuite/gcc.dg/fold-and-rshift-2.c        (working copy)
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-original" } */
+
+unsigned f(unsigned x)
+{
+       return (x >> 29) & 32;
+}
+
+unsigned g(unsigned x)
+{
+       return !!(x & 0x80000000) << 5;
+}
+
+unsigned j(unsigned x)
+{
+       return ((x >> 31) & 1) ? 32 : 0;
+}
+
+unsigned k(unsigned x)
+{
+       return (x & 0x80000000) ? 32 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not " \\? " "original" } } */
+/* { dg-final { scan-assembler-not "sall" { target i?86-*-* x86_64-*-* } } }" 
*/

Reply via email to