Attached patch implements zero-extended cmove patterns. Also, the patch merges a couple of peephole patterns into one.
2015-06-18 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*movsicc_noc_zext): New insn. (zero-extended cmove with mem peephole2): New pattern. (cmove with mem peephole2): Merge patterns. testsuite/ChangeLog: 2015-06-18 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/cmov9.c: New test. Tested on x86_64-linux-gnu {,-m32} and committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 224625) +++ config/i386/i386.md (working copy) @@ -16746,6 +16746,22 @@ [(set_attr "type" "icmov") (set_attr "mode" "<MODE>")]) +(define_insn "*movsicc_noc_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm,0")) + (zero_extend:DI + (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))] + "TARGET_64BIT + && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %k0|%k0, %2} + cmov%O2%c1\t{%3, %k0|%k0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + ;; Don't do conditional moves with memory inputs. This splitter helps ;; register starved x86_32 by forcing inputs into registers before reload. (define_split @@ -16797,30 +16813,65 @@ ;; Don't do conditional moves with memory inputs (define_peephole2 - [(match_scratch:SWI248 2 "r") + [(match_scratch:SWI248 4 "r") (set (match_operand:SWI248 0 "register_operand") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_dup 0) - (match_operand:SWI248 3 "memory_operand")))] + (match_operand:SWI248 2 "nonimmediate_operand") + (match_operand:SWI248 3 "nonimmediate_operand")))] "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) && optimize_insn_for_speed_p ()" - [(set (match_dup 2) (match_dup 3)) + [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) - (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))]) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + { + operands[5] = operands[2]; + operands[2] = operands[4]; + } + else if (MEM_P (operands[3])) + { + operands[5] = operands[3]; + operands[3] = operands[4]; + } + else + gcc_unreachable (); +}) (define_peephole2 - [(match_scratch:SWI248 2 "r") - (set (match_operand:SWI248 0 "register_operand") - (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SWI248 3 "memory_operand") - (match_dup 0)))] - "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + [(match_scratch:SI 4 "r") + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand")) + (zero_extend:DI + (match_operand:SI 3 "nonimmediate_operand"))))] + "TARGET_64BIT + && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) && optimize_insn_for_speed_p ()" - [(set (match_dup 2) (match_dup 3)) + [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) - (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))]) + (if_then_else:DI (match_dup 1) + (zero_extend:DI (match_dup 2)) + (zero_extend:DI (match_dup 3))))] +{ + if (MEM_P (operands[2])) + { + operands[5] = operands[2]; + operands[2] = operands[4]; + } + else if (MEM_P (operands[3])) + { + operands[5] = operands[3]; + operands[3] = operands[4]; + } + else + gcc_unreachable (); +}) (define_expand "mov<mode>cc" [(set (match_operand:X87MODEF 0 "register_operand") @@ -16922,35 +16973,35 @@ ;; Don't do conditional moves with memory inputs (define_peephole2 - [(match_scratch:MODEF 2 "r") + [(match_scratch:MODEF 4 "r") (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_dup 0) - (match_operand:MODEF 3 "memory_operand")))] + (match_operand:MODEF 2 "nonimmediate_operand") + (match_operand:MODEF 3 "nonimmediate_operand")))] "(<MODE>mode != DFmode || TARGET_64BIT) && TARGET_80387 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) && optimize_insn_for_speed_p ()" - [(set (match_dup 2) (match_dup 3)) + [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) - (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))]) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + { + operands[5] = operands[2]; + operands[2] = operands[4]; + } + else if (MEM_P (operands[3])) + { + operands[5] = operands[3]; + operands[3] = operands[4]; + } + else + gcc_unreachable (); +}) -(define_peephole2 - [(match_scratch:MODEF 2 "r") - (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") - (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:MODEF 3 "memory_operand") - (match_dup 0)))] - "(<MODE>mode != DFmode || TARGET_64BIT) - && TARGET_80387 && TARGET_CMOVE - && TARGET_AVOID_MEM_OPND_FOR_CMOVE - && optimize_insn_for_speed_p ()" - [(set (match_dup 2) (match_dup 3)) - (set (match_dup 0) - (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))]) - ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict ;; the scalar versions to have only XMM registers as operands. Index: testsuite/gcc.target/i386/cmov9.c =================================================================== --- testsuite/gcc.target/i386/cmov9.c (revision 0) +++ testsuite/gcc.target/i386/cmov9.c (working copy) @@ -0,0 +1,8 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -dp" } */ +/* { dg-final { scan-assembler-not "zero_extendsidi" } } */ + +unsigned long long foo (int a, unsigned int b, unsigned int c) +{ + return a ? b : c; +}