The following testcase: unsigned sub_sat (unsigned x, unsigned y) { unsigned res; res = x - y; res &= -(x >= y); return res; }
currently compiles (-O2) to: sub_sat: movl %edi, %edx xorl %eax, %eax subl %esi, %edx cmpl %esi, %edi setnb %al negl %eax andl %edx, %eax ret We can expand through ussub{m}3 optab to use carry flag from the subtraction and generate code using SBB instruction implementing: unsigned res = x - y; res &= ~(-(x < y)); sub_sat: subl %esi, %edi sbbl %eax, %eax notl %eax andl %edi, %eax ret PR target/112600 gcc/ChangeLog: * config/i386/i386.md (ussub<mode>3): New expander. (sub<mode>_3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112600-b.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index bc2ef819df6..d69bc8d6e48 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8436,6 +8436,14 @@ (define_expand "usubv<mode>4" "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands, TARGET_APX_NDD);") +(define_expand "sub<mode>_3" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC + (match_operand:SWI 1 "nonimmediate_operand") + (match_operand:SWI 2 "<general_operand>"))) + (set (match_operand:SWI 0 "register_operand") + (minus:SWI (match_dup 1) (match_dup 2)))])]) + (define_insn "*sub<mode>_3" [(set (reg FLAGS_REG) (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") @@ -9883,7 +9891,28 @@ (define_expand "usadd<mode>3" emit_insn (gen_add<mode>3_cc_overflow_1 (res, operands[1], operands[2])); emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk)); dst = expand_simple_binop (<MODE>mode, IOR, res, msk, - operands[0], 1, OPTAB_DIRECT); + operands[0], 1, OPTAB_WIDEN); + + if (!rtx_equal_p (dst, operands[0])) + emit_move_insn (operands[0], dst); + DONE; +}) + +(define_expand "ussub<mode>3" + [(set (match_operand:SWI 0 "register_operand") + (us_minus:SWI (match_operand:SWI 1 "register_operand") + (match_operand:SWI 2 "<general_operand>")))] + "" +{ + rtx res = gen_reg_rtx (<MODE>mode); + rtx msk = gen_reg_rtx (<MODE>mode); + rtx dst; + + emit_insn (gen_sub<mode>_3 (res, operands[1], operands[2])); + emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk)); + msk = expand_simple_unop (<MODE>mode, NOT, msk, NULL, 1); + dst = expand_simple_binop (<MODE>mode, AND, res, msk, + operands[0], 1, OPTAB_WIDEN); if (!rtx_equal_p (dst, operands[0])) emit_move_insn (operands[0], dst);