In code like the following from KVM: /* it is a read fault? */ error_code = (exit_qualification << 2) & PFERR_FETCH_MASK;
it would be nicer to write /* it is a read fault? */ error_code = (exit_qualification & VMX_EPT_READ_FAULT_MASK) ? PFERR_FETCH_MASK : 0; instead of having to know the difference between the positions of the source and destination bits. LLVM catches the latter just fine (which is why I am sending this in stage 3...), but GCC does not, so this patch adds two patterns to catch it. The combine.c hunk instead is needed to simplify cases that do not use the ternary operator (the "h" and "i" functions in the testcases) like this: return ((x >> 9) & 1) << 7; Normally this is simplified just fine to a single shift and an AND. Here, however, the bit to preserve after (x >> 9 << 7) is the QImode sign bit, and if_then_else_cond produces a complicated concoction involving (ne:SI (subreg:QI ...)). simplify_if_then_else cannot then reduce it back to the original. In fact, simplify_if_then_else does have a similar pattern, but it cannot deal with the subreg. This is easily done by ZERO_EXTENDing the result from QImode back to the comparison's mode. The shift/shift/and or shift/and/shift combination can then be reduced to shift+and just like for any other bit position. These forms are not included in the fold-and-rshift-2.c testcase, because in this case a shift+shift (without the following AND) is a valid alternative too; and at least on x86 it has the same cost as shift+and. Compare: movl %edi, %eax sarl $24, %eax andl $128, %eax ret and movl %edi, %eax shrl $31, %eax sall $7, %eax Bootstrapped/regtested x86_64-pc-linux-gnu, ok? Paolo 2016-11-26 Paolo Bonzini <bonz...@gnu.org> * combine.c (simplify_if_then_else): Simplify IF_THEN_ELSE that isolates a single bit, even if the condition involves subregs. * match.pd: Simplify X ? C : 0 where C is a power of 2 and X tests a single bit. 2016-11-26 Paolo Bonzini <bonz...@gnu.org> * gcc.dg/fold-and-lshift.c, gcc.dg/fold-and-rshift-1.c, gcc.dg/fold-and-rshift-2.c: New testcases. Index: combine.c =================================================================== --- combine.c (revision 242742) +++ combine.c (working copy) @@ -6522,14 +6522,22 @@ simplify_shift_const (NULL_RTX, ASHIFT, mode, gen_lowpart (mode, XEXP (cond, 0)), i); - /* (IF_THEN_ELSE (NE REG 0) (0) (8)) is REG for nonzero_bits (REG) == 8. */ + /* (IF_THEN_ELSE (NE A 0) C1 0) is A or a zero-extend of A if the only + non-zero bit in A is C1. */ if (true_code == NE && XEXP (cond, 1) == const0_rtx && false_rtx == const0_rtx && CONST_INT_P (true_rtx) - && GET_MODE (XEXP (cond, 0)) == mode + && INTEGRAL_MODE_P (GET_MODE (XEXP (cond, 0))) && (UINTVAL (true_rtx) & GET_MODE_MASK (mode)) - == nonzero_bits (XEXP (cond, 0), mode) + == nonzero_bits (XEXP (cond, 0), GET_MODE (XEXP (cond, 0))) && (i = exact_log2 (UINTVAL (true_rtx) & GET_MODE_MASK (mode))) >= 0) - return XEXP (cond, 0); + { + rtx val = XEXP (cond, 0); + enum machine_mode val_mode = GET_MODE (val); + if (val_mode == mode) + return val; + else if (GET_MODE_PRECISION (val_mode) < GET_MODE_PRECISION (mode)) + return simplify_gen_unary (ZERO_EXTEND, mode, val, val_mode); + } return x; } Index: match.pd =================================================================== --- match.pd (revision 242742) +++ match.pd (working copy) @@ -2554,6 +2554,19 @@ (cmp (bit_and@2 @0 integer_pow2p@1) @1) (icmp @2 { build_zero_cst (TREE_TYPE (@0)); }))) +/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2, + convert this into a shift of (A & C). */ +(simplify + (cond + (ne (bit_and@2 @0 integer_pow2p@1) integer_zerop) + integer_pow2p@3 integer_zerop) + (with { + int shift = wi::exact_log2 (@3) - wi::exact_log2 (@1); + } + (if (shift > 0) + (lshift (convert @2) { build_int_cst (integer_type_node, shift); }) + (convert (rshift @2 { build_int_cst (integer_type_node, -shift); }))))) + /* If we have (A & C) != 0 where C is the sign bit of A, convert this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ (for cmp (eq ne) @@ -2568,6 +2581,19 @@ (with { tree stype = signed_type_for (TREE_TYPE (@0)); } (ncmp (convert:stype @0) { build_zero_cst (stype); }))))) +/* If we have A < 0 ? C : 0 where C and D are powers of 2, + convert this into a right shift and AND. */ +(simplify + (cond + (lt @0 integer_zerop) + integer_pow2p@1 integer_zerop) + (with { + int shift = element_precision (@0) - wi::exact_log2 (@1) - 1; + } + (bit_and + (convert (rshift @0 { build_int_cst (integer_type_node, shift); })) + @1))) + /* When the addresses are not directly of decls compare base and offset. This implements some remaining parts of fold_comparison address comparisons but still no complete part of it. Still it is good Index: testsuite/gcc.dg/fold-and-lshift.c =================================================================== --- testsuite/gcc.dg/fold-and-lshift.c (revision 0) +++ testsuite/gcc.dg/fold-and-lshift.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-original" } */ + +int f(int x) +{ + return (x << 2) & 128; +} + +int g(int x) +{ + return !!(x & 32) << 7; +} + +int h(int x) +{ + return ((x >> 5) & 1) << 7; +} + +int i(int x) +{ + return (x & 32) >> 5 << 7; +} + +int j(int x) +{ + return ((x >> 5) & 1) ? 128 : 0; +} + +int k(int x) +{ + return (x & 32) ? 128 : 0; +} + +/* { dg-final { scan-tree-dump-not " \\? " "original" } } */ +/* { dg-final { scan-assembler-not "sarl" { target i?86-*-* x86_64-*-* } } }" */ Index: testsuite/gcc.dg/fold-and-rshift-1.c =================================================================== --- testsuite/gcc.dg/fold-and-rshift-1.c (revision 0) +++ testsuite/gcc.dg/fold-and-rshift-1.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-original" } */ + +int f(int x) +{ + return (x >> 2) & 128; +} + +int g(int x) +{ + return !!(x & 512) << 7; +} + +int h(int x) +{ + return ((x >> 9) & 1) << 7; +} + +int i(int x) +{ + return (x & 512) >> 9 << 7; +} + +int j(int x) +{ + return ((x >> 9) & 1) ? 128 : 0; +} + +int k(int x) +{ + return (x & 512) ? 128 : 0; +} + +/* { dg-final { scan-tree-dump-not " \\? " "original" } } */ +/* { dg-final { scan-assembler-not "sall" { target i?86-*-* x86_64-*-* } } }" */ Index: testsuite/gcc.dg/fold-and-rshift-2.c =================================================================== --- testsuite/gcc.dg/fold-and-rshift-2.c (revision 0) +++ testsuite/gcc.dg/fold-and-rshift-2.c (working copy) @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-original" } */ + +unsigned f(unsigned x) +{ + return (x >> 29) & 32; +} + +unsigned g(unsigned x) +{ + return !!(x & 0x80000000) << 5; +} + +unsigned j(unsigned x) +{ + return ((x >> 31) & 1) ? 32 : 0; +} + +unsigned k(unsigned x) +{ + return (x & 0x80000000) ? 32 : 0; +} + +/* { dg-final { scan-tree-dump-not " \\? " "original" } } */ +/* { dg-final { scan-assembler-not "sall" { target i?86-*-* x86_64-*-* } } }" */