Hi! This patch adds a few peephole2s to help optimize if (!--*x) etc. This is something the combiner doesn't and can't easily handle, because in reg0 = mem1 reg0 {+,-,&,|,^}= x mem1 = reg0 cc = compare (reg0, 0) reg0 is used also by both the store and compare and there is no dependence in between them, so for reg0 = mem1; reg0 {+,-,&,|,^}= x; mem1 = reg0 alone it can't do anything because reg0 is still needed and mem1 {+,-,&,|,^}= x doesn't set it, and the compare, being second user of reg0, doesn't have any LOG_LINKS and thus try_combine isn't called for it at all.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2011-05-27 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/49095 * config/i386/predicates.md (plusminuslogic_operator): New predicate. * config/i386/i386.md: Add peepholes for mem {+,-,&,|,^}= x; mem != 0. * gcc.target/i386/pr49095.c: New test. --- gcc/config/i386/predicates.md.jj 2011-05-11 19:39:00.000000000 +0200 +++ gcc/config/i386/predicates.md 2011-05-27 10:55:11.000000000 +0200 @@ -1066,6 +1066,10 @@ (define_predicate "mult_operator" (define_predicate "div_operator" (match_code "div")) +;; Return true if this is a plus, minus, and, ior or xor operation. +(define_predicate "plusminuslogic_operator" + (match_code "plus,minus,and,ior,xor")) + ;; Return true if this is a float extend operation. (define_predicate "float_operator" (match_code "float")) --- gcc/config/i386/i386.md.jj 2011-05-25 16:30:04.000000000 +0200 +++ gcc/config/i386/i386.md 2011-05-27 13:42:11.000000000 +0200 @@ -16852,6 +16852,91 @@ (define_peephole2 (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 2))]) +;; Attempt to use arith or logical operations with memory outputs with +;; setting of flags. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand" "") + (match_operand:SWI 1 "memory_operand" "")) + (parallel [(set (match_dup 0) + (match_operator:SWI 3 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 2 "<nonmemory_operand>" "")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && ix86_match_ccmode (peep2_next_insn (3), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 5)) + (set (match_dup 1) (match_op_dup 3 [(match_dup 1) + (match_dup 2)]))])] + "operands[4] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[2])); + operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]), + operands[5], const0_rtx);") + +(define_peephole2 + [(parallel [(set (match_operand:SWI 0 "register_operand" "") + (match_operator:SWI 2 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 1 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && GET_CODE (operands[2]) != MINUS + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && ix86_match_ccmode (peep2_next_insn (2), + GET_CODE (operands[2]) == PLUS + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 3) (match_dup 4)) + (set (match_dup 1) (match_op_dup 2 [(match_dup 1) + (match_dup 0)]))])] + "operands[3] = SET_DEST (PATTERN (peep2_next_insn (2))); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[0])); + operands[4] = gen_rtx_COMPARE (GET_MODE (operands[3]), + operands[4], const0_rtx);") + +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand" "") + (match_operand:SWI12 1 "memory_operand" "")) + (parallel [(set (match_operand:SI 4 "register_operand" "") + (match_operator:SI 3 "plusminuslogic_operator" + [(match_dup 4) + (match_operand:SI 2 "nonmemory_operand" "")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REG_P (operands[0]) && REG_P (operands[4]) + && REGNO (operands[0]) == REGNO (operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && ix86_match_ccmode (peep2_next_insn (3), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 5)) + (set (match_dup 1) (match_dup 6))])] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]); + operands[4] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), operands[2]); + operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]), + operands[5], const0_rtx); + operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[2]));") + ;; Attempt to always use XOR for zeroing registers. (define_peephole2 [(set (match_operand 0 "register_operand" "") --- gcc/testsuite/gcc.target/i386/pr49095.c.jj 2011-05-27 12:10:39.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/pr49095.c 2011-05-27 13:50:24.000000000 +0200 @@ -0,0 +1,73 @@ +/* PR rtl-optimization/49095 */ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ +/* { dg-options "-Os -mregparm=2" { target ilp32 } } */ + +void foo (void *); + +int * +f1 (int *x) +{ + if (!--*x) + foo (x); + return x; +} + +int +g1 (int x) +{ + if (!--x) + foo ((void *) 0); + return x; +} + +#define F(T, OP, OPN) \ +T * \ +f##T##OPN (T *x, T y) \ +{ \ + *x OP y; \ + if (!*x) \ + foo (x); \ + return x; \ +} \ + \ +T \ +g##T##OPN (T x, T y) \ +{ \ + x OP y; \ + if (!x) \ + foo ((void *) 0); \ + return x; \ +} \ + \ +T * \ +h##T##OPN (T *x) \ +{ \ + *x OP 24; \ + if (!*x) \ + foo (x); \ + return x; \ +} \ + \ +T \ +i##T##OPN (T x, T y) \ +{ \ + x OP 24; \ + if (!x) \ + foo ((void *) 0); \ + return x; \ +} + +#define G(T) \ +F (T, +=, plus) \ +F (T, -=, minus) \ +F (T, &=, and) \ +F (T, |=, or) \ +F (T, ^=, xor) + +G (char) +G (short) +G (int) +G (long) + +/* { dg-final { scan-assembler-not "test\[lq\]" } } */ Jakub