After having just spent a few days looking through dumps of builtin-overflow-*.c for regressions while testing the patch for the TImode arithmetic PR, I thought I'd go ahead and post a patch to make use of the overflow bit on aarch64.
Consider this queued for stage1. r~
* config/aarch64/aarch64-modes.def (CC_V): New. * config/aarch64/aarch64.c (aarch64_zero_extend_const_eq): New. (aarch64_select_cc_mode): Test for signed overflow using CC_Vmode. (aarch64_get_condition_code_1): Handle CC_Vmode. * config/aarch64/aarch64-protos.h: Update. * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. (addti3): Create simpler code if low part is already known to be 0. (addvti4, uaddvti4): New. (*add<GPI>3_compareC_cconly_imm): New. (*add<GPI>3_compareC_cconly): New. (*add<GPI>3_compareC_imm): New. (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not handle constants within this pattern. (*add<GPI>3_compareV_cconly_imm): New. (*add<GPI>3_compareV_cconly): New. (*add<GPI>3_compareV_imm): New. (add<GPI>3_compareV): New. (add<GPI>3_carryinC, add<GPI>3_carryinV): New. (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. (subv<GPI>4, usubv<GPI>4): New. (subti): Handle op1 zero. (subvti4, usub4ti4): New. (*sub<GPI>3_compare1_imm): New. (sub<GPI>3_carryinCV): New. (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 7de0b3f..f34345a 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -26,6 +26,7 @@ CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS). */ CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, 0); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 15fc37d..32cf245 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -289,6 +289,7 @@ void aarch64_declare_function_name (FILE *, const char*, tree); bool aarch64_legitimate_pic_operand_p (rtx); bool aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2); +bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); bool aarch64_mov_operand_p (rtx, machine_mode); int aarch64_simd_attr_length_rglist (enum machine_mode); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0c18ab2..191d081 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1489,6 +1489,16 @@ aarch64_split_simd_move (rtx dst, rtx src) } } +bool +aarch64_zero_extend_const_eq (machine_mode xmode, rtx x, + machine_mode ymode, rtx y) +{ + rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode); + gcc_assert (r != NULL); + return rtx_equal_p (x, r); +} + + static rtx aarch64_force_temporary (machine_mode mode, rtx x, rtx value) { @@ -4192,6 +4202,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && GET_CODE (y) == ZERO_EXTEND) return CC_Cmode; + /* A test for signed overflow. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + && code == NE + && GET_CODE (x) == PLUS + && GET_CODE (y) == SIGN_EXTEND) + return CC_Vmode; + /* For everything else, return CCmode. */ return CCmode; } @@ -4300,6 +4317,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) } break; + case CC_Vmode: + switch (comp_code) + { + case NE: return AARCH64_VS; + case EQ: return AARCH64_VC; + default: return -1; + } + break; + default: return -1; break; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 363785e..46056f2 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1703,22 +1703,150 @@ } ) +(define_expand "addv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "uaddv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + (define_expand "addti3" [(set (match_operand:TI 0 "register_operand" "") (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "register_operand" "")))] + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + if (l2 == const0_rtx) + { + l0 = l1; + if (!aarch64_pluslong_operand (h2, DImode)) + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3 (h0, h1, h2)); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); + } - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + +(define_expand "addvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "uaddvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); DONE; }) @@ -1755,42 +1883,129 @@ [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) -(define_insn "*add<mode>3_compare1_cconly" +(define_insn "*add<mode>3_compareC_cconly_imm" [(set (reg:CC_C CC_REGNUM) (ne:CC_C (plus:<DWI> - (zero_extend:<DWI> - (match_operand:GPI 0 "aarch64_reg_or_zero" "%rZ,rZ,rZ")) - (zero_extend:<DWI> - (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))) + (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:<DWI> 2 "const_scalar_int_operand" "")) (zero_extend:<DWI> - (plus:GPI (match_dup 0) (match_dup 1)))))] - "" + (plus:GPI + (match_dup 0) + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))] + "aarch64_zero_extend_const_eq (<DWI>mode, operands[2], + <MODE>mode, operands[1])" "@ cmn\\t%<w>0, %<w>1 - cmn\\t%<w>0, %<w>1 cmp\\t%<w>0, #%n1" - [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareC_cconly" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareC_imm" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:<DWI> 3 "const_scalar_int_operand" "")) + (zero_extend:<DWI> + (plus:GPI + (match_dup 1) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "aarch64_zero_extend_const_eq (<DWI>mode, operands[3], + <MODE>mode, operands[2])" + "@ + adds\\t%<w>0, %<w>1, %<w>2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_imm")] ) -(define_insn "add<mode>3_compare1" +(define_insn "add<mode>3_compareC" [(set (reg:CC_C CC_REGNUM) (ne:CC_C (plus:<DWI> - (zero_extend:<DWI> - (match_operand:GPI 1 "aarch64_reg_or_zero" "%rZ,rZ,rZ")) - (zero_extend:<DWI> - (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))) + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) (zero_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) - (set (match_operand:GPI 0 "register_operand" "=r,r,r") + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] +) + +;; Note that since we're sign-extending, match the immediate in GPI +;; rather than in DWI. Since CONST_INT is modeless, this works fine. +(define_insn "*add<mode>3_compareV_cconly_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "@ + cmn\\t%<w>0, %<w>1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareV_cconly" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareV_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> + (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") (plus:GPI (match_dup 1) (match_dup 2)))] "" "@ - adds\\t%<w>0, %<w>1, %<w>2 adds\\t%<w>0, %<w>1, %<w>2 subs\\t%<w>0, %<w>1, #%n2" - [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] + [(set_attr "type" "alus_imm,alus_imm")] +) + +(define_insn "add<mode>3_compareV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] ) (define_insn "*adds_shift_imm_<mode>" @@ -2153,6 +2368,138 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "add<mode>3_carryinC" + [(parallel + [(set (match_dup 3) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_dup 4) + (zero_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2)))])] + "" +{ + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); +}) + +(define_insn "*add<mode>3_carryinC_zero" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinC" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + +(define_expand "add<mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_dup 3) + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))])] + "" +{ + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); +}) + +(define_insn "*add<mode>3_carryinV_zero" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*add_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -2249,22 +2596,158 @@ (set_attr "simd" "*,yes")] ) +(define_expand "subv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "usubv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + (define_expand "subti3" [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "register_operand" "") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") (match_operand:TI 2 "register_operand" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = gen_lowpart (DImode, operands[2]); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = gen_highpart (DImode, operands[2]); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryin (h0, h1, h2)); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + +(define_expand "subvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_subdi3_compare1 (h0, h1, force_reg (DImode, h2))); + } + else + { + if (CONST_INT_P (l2)) + { + l2 = force_reg (DImode, GEN_INT (-UINTVAL (l2))); + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3_compareC (l0, l1, l2)); + } + else + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryinCV (h0, force_reg (DImode, h1), h2)); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "usubvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_subdi3_compare1 (h0, h1, force_reg (DImode, h2))); + } + else + { + if (CONST_INT_P (l2)) + { + l2 = force_reg (DImode, GEN_INT (-UINTVAL (l2))); + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3_compareC (l0, l1, l2)); + } + else + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryinCV (h0, force_reg (DImode, h1), h2)); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_LTU(VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); DONE; }) @@ -2293,6 +2776,22 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*sub<mode>3_compare1_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" + "@ + subs\\t%<w>0, %<w>1, %<w>2 + adds\\t%<w>0, %<w>1, %<w>3" + [(set_attr "type" "alus_imm")] +) + (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC @@ -2520,6 +3019,85 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<mode>3_carryinCV" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinCV_z1_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z1" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (neg:GPI (match_dup 1)) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk")