Hello! Back to converting x86 to post-reload compare elimination pass.
Arithmetic operations in x86_64 can implicitly zero extend the result, and set flags according to the non-extended result. Following testcase should exercise both features: --cut here-- void foo (long int, int); void test (unsigned int a, unsigned int b) { long int x = a + b; int f = a + b > 0; foo (x, f); } --cut here-- However, with pre-reload implementation, gcc was able to merge operation + compare, while ignoring implicit extension: test: addl %esi, %edi # 8 *addsi_2/3 setne %sil # 19 *setcc_qi movl %edi, %edi # 11 *zero_extendsidi2_rex64/1 movzbl %sil, %esi # 20 *zero_extendqisi2 jmp foo # 14 *sibcall Unpatched post-reload compare elimination was able to merge operation + zero extension (please note that attached WIP target patch is needed): test: addl %esi, %edi # 7 addsi_1_zext/2 xorl %esi, %esi # 22 *movsi_xor testl %edi, %edi # 23 *cmpsi_ccno_1/1 setne %sil # 24 *setcc_qi_slp jmp foo # 14 *sibcall And patched post-reload compare finally produces optimal code: test: addl %esi, %edi # 7 *addsi_2_zext/2 setne %sil # 19 *setcc_qi movzbl %sil, %esi # 20 *zero_extendqisi2 jmp foo # 14 *sibcall Where the operation looks like: #(insn:TI 7 9 19 2 (parallel [ # (set (reg:DI 5 di) # (zero_extend:DI (plus:SI (reg/v:SI 4 si [orig:64 b ] [64]) # (reg/v:SI 5 di [orig:63 a ] [63])))) # (set (reg:CCZ 17 flags) # (compare:CCZ (plus:SI (reg/v:SI 4 si [orig:64 b ] [64]) # (reg/v:SI 5 di [orig:63 a ] [63])) # (const_int 0 [0]))) # ]) cmpadd.c:5 261 {*addsi_2_zext} # (expr_list:REG_DEAD (reg/v:SI 4 si [orig:64 b ] [64]) # (nil))) addl %esi, %edi # 7 *addsi_2_zext/2 [length = 2] Attached patch teaches post-reload compare optimization how to handle arithmetic operations with implicit extensions. The input is: (insn 7 4 8 2 (parallel [ (set (reg:DI 5 di) (zero_extend:DI (plus:SI (reg/v:SI 4 si [orig:64 b ] [64]) (reg/v:SI 5 di [orig:63 a ] [63])))) (clobber (reg:CC 17 flags)) ]) cmpadd.c:5 253 {addsi_1_zext} (nil)) (insn 8 7 9 2 (set (reg:CCZ 17 flags) (compare:CCZ (reg:SI 5 di [orig:59 D.1714 ] [59]) (const_int 0 [0]))) cmpadd.c:6 2 {*cmpsi_ccno_1} (nil)) It simply checks if REGNOs of registers are the same, the mode of the compared register (against zero!) is checked to the mode of the inner part of the extension instruction. 2012-04-24 Uros Bizjak <ubiz...@gmail.com> * compare-elim.c (try_eliminate_compare): Also handle operands with implicit extensions. Patch is lightly tested on x86_64-pc-linux-gnu, together with attached WIP patch. Opinions? Since it looks quite safe, is it OK for mainline? Uros.
Index: compare-elim.c =================================================================== --- compare-elim.c (revision 186721) +++ compare-elim.c (working copy) @@ -563,10 +563,26 @@ Validate that PREV_CLOBBER itself does in fact refer to IN_A. Do recall that we've already validated the shape of PREV_CLOBBER. */ x = XVECEXP (PATTERN (insn), 0, 0); - if (!rtx_equal_p (SET_DEST (x), in_a)) + if (rtx_equal_p (SET_DEST (x), in_a)) + cmp_src = SET_SRC (x); + + /* Also check operations with implicit extensions, e.g.: + [(set (reg:DI) + (zero_extend:DI (plus:SI (reg:SI)(reg:SI)))) + (set (reg:CCZ flags) + (compare:CCZ + (plus:SI (reg:SI)(reg:SI)) + (const_int 0)))] */ + else if (REG_P (SET_DEST (x)) + && REG_P (in_a) + && REGNO (SET_DEST (x)) == REGNO (in_a) + && (GET_CODE (SET_SRC (x)) == ZERO_EXTEND + || GET_CODE (SET_SRC (x)) == SIGN_EXTEND) + && GET_MODE (XEXP (SET_SRC (x), 0)) == GET_MODE (in_a)) + cmp_src = XEXP (SET_SRC (x), 0); + else return false; - cmp_src = SET_SRC (x); - + /* Determine if we ought to use a different CC_MODE here. */ flags = maybe_select_cc_mode (cmp, cmp_src, cmp->in_b); if (flags == NULL) Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 186721) +++ config/i386/i386.c (working copy) @@ -17861,19 +17861,31 @@ emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } -/* Return TRUE or FALSE depending on whether the first SET in INSN - has source and destination with matching CC modes, and that the +/* Return TRUE or FALSE depending on whether the first SET from COMPARE + in INSN has source and destination with matching CC modes, and that the CC mode is at least as constrained as REQ_MODE. */ bool ix86_match_ccmode (rtx insn, enum machine_mode req_mode) { - rtx set; + rtx pat, set; enum machine_mode set_mode; + int i; - set = PATTERN (insn); - if (GET_CODE (set) == PARALLEL) - set = XVECEXP (set, 0, 0); + pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (pat, 0); i++) + { + set = XVECEXP (pat, 0, i); + if (GET_CODE (set) == SET + && GET_CODE (SET_SRC (set)) == COMPARE) + break; + } + } + else + set = pat; + gcc_assert (GET_CODE (set) == SET); gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); @@ -39090,6 +39102,8 @@ #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs #undef TARGET_CC_MODES_COMPATIBLE #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible +#undef TARGET_FLAGS_REGNUM +#define TARGET_FLAGS_REGNUM FLAGS_REG #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 186769) +++ config/i386/i386.md (working copy) @@ -5808,14 +5808,14 @@ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]) (define_insn "*add<mode>_2" - [(set (reg FLAGS_REG) + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>") + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>") + (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))) + (set (reg FLAGS_REG) (compare - (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>") - (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")) - (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>") - (plus:SWI (match_dup 1) (match_dup 2)))] + (plus:SWI (match_dup 1) (match_dup 2)) + (const_int 0)))] "ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" { @@ -5857,13 +5857,14 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*addsi_2_zext" - [(set (reg FLAGS_REG) + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") + (match_operand:SI 2 "x86_64_general_operand" "rme,0")))) + (set (reg FLAGS_REG) (compare - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") - (match_operand:SI 2 "x86_64_general_operand" "rme,0")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + (plus:SI (match_dup 1) (match_dup 2)) + (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (PLUS, SImode, operands)" { @@ -6090,7 +6091,7 @@ (match_operand:SWI 2 "<general_operand>" "<g>,0")) (const_int 0))) (clobber (match_scratch:SWI 0 "=<r>,<r>"))] - "ix86_match_ccmode (insn, CCGOCmode) + "0 && ix86_match_ccmode (insn, CCGOCmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn))
Index: i386.md =================================================================== --- i386.md (revision 186769) +++ i386.md (working copy) @@ -5808,14 +5808,14 @@ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]) (define_insn "*add<mode>_2" - [(set (reg FLAGS_REG) + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>") + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>") + (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))) + (set (reg FLAGS_REG) (compare - (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>") - (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")) - (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>") - (plus:SWI (match_dup 1) (match_dup 2)))] + (plus:SWI (match_dup 1) (match_dup 2)) + (const_int 0)))] "ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" { @@ -5857,13 +5857,14 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*addsi_2_zext" - [(set (reg FLAGS_REG) + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") + (match_operand:SI 2 "x86_64_general_operand" "rme,0")))) + (set (reg FLAGS_REG) (compare - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") - (match_operand:SI 2 "x86_64_general_operand" "rme,0")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + (plus:SI (match_dup 1) (match_dup 2)) + (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (PLUS, SImode, operands)" { @@ -6090,7 +6091,7 @@ (match_operand:SWI 2 "<general_operand>" "<g>,0")) (const_int 0))) (clobber (match_scratch:SWI 0 "=<r>,<r>"))] - "ix86_match_ccmode (insn, CCGOCmode) + "0 && ix86_match_ccmode (insn, CCGOCmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) Index: i386.c =================================================================== --- i386.c (revision 186721) +++ i386.c (working copy) @@ -17861,19 +17861,31 @@ emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } -/* Return TRUE or FALSE depending on whether the first SET in INSN - has source and destination with matching CC modes, and that the +/* Return TRUE or FALSE depending on whether the first SET from COMPARE + in INSN has source and destination with matching CC modes, and that the CC mode is at least as constrained as REQ_MODE. */ bool ix86_match_ccmode (rtx insn, enum machine_mode req_mode) { - rtx set; + rtx pat, set; enum machine_mode set_mode; + int i; - set = PATTERN (insn); - if (GET_CODE (set) == PARALLEL) - set = XVECEXP (set, 0, 0); + pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (pat, 0); i++) + { + set = XVECEXP (pat, 0, i); + if (GET_CODE (set) == SET + && GET_CODE (SET_SRC (set)) == COMPARE) + break; + } + } + else + set = pat; + gcc_assert (GET_CODE (set) == SET); gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); @@ -39090,6 +39102,8 @@ #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs #undef TARGET_CC_MODES_COMPATIBLE #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible +#undef TARGET_FLAGS_REGNUM +#define TARGET_FLAGS_REGNUM FLAGS_REG #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg