* config/aarch64/aarch64-modes.def (CC_NV): New.
* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand
all of the comparisons for TImode, not just NE.
(aarch64_select_cc_mode): Recognize <su>cmp<GPI>_carryin.
(aarch64_get_condition_code_1): Handle CC_NVmode.
* config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
(ccmp_iorne<GPI>): New.
(<su_optab>cmp<GPI>_carryin): New.
(*<su_optab>cmp<GPI>_carryin): New.
(*<su_optab>cmp<GPI>_carryin_z1): New.
(*<su_optab>cmp<GPI>_carryin_z2): New.
(*cmp<GPI>_carryin_m2, *ucmp<GPI>_carryin_m2): New.
* config/aarch64/iterators.md (CC_EXTEND): New.
* config/aarch64/predicates.md (const_dword_umax): New.
---
gcc/config/aarch64/aarch64.c | 164 ++++++++++++++++++++++++---
gcc/config/aarch64/aarch64-modes.def | 1 +
gcc/config/aarch64/aarch64.md | 113 ++++++++++++++++++
gcc/config/aarch64/iterators.md | 3 +
gcc/config/aarch64/predicates.md | 9 ++
5 files changed, 277 insertions(+), 13 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 837ee6a5e37..6c825b341a0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2731,32 +2731,143 @@ rtx
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
{
machine_mode cmp_mode = GET_MODE (x);
- machine_mode cc_mode;
rtx cc_reg;
if (cmp_mode == TImode)
{
- gcc_assert (code == NE);
+ rtx x_lo, x_hi, y_lo, y_hi, tmp;
+ struct expand_operand ops[2];
- cc_mode = CCmode;
- cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ x_lo = operand_subword (x, 0, 0, TImode);
+ x_hi = operand_subword (x, 1, 0, TImode);
- rtx x_lo = operand_subword (x, 0, 0, TImode);
- rtx y_lo = operand_subword (y, 0, 0, TImode);
- emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+ if (CONST_SCALAR_INT_P (y))
+ {
+ wide_int y_wide = rtx_mode_t (y, TImode);
- rtx x_hi = operand_subword (x, 1, 0, TImode);
- rtx y_hi = operand_subword (y, 1, 0, TImode);
- emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
- gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
- GEN_INT (AARCH64_EQ)));
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ /* For equality, IOR the two halves together. If this gets
+ used for a branch, we expect this to fold to cbz/cbnz;
+ otherwise it's no larger than the cmp+ccmp below. Beware
+ of the compare-and-swap post-reload split and use ccmp. */
+ if (y_wide == 0 && can_create_pseudo_p ())
+ {
+ tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+ emit_insn (gen_cmpdi (tmp, const0_rtx));
+ cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+ goto done;
+ }
+ break;
+
+ case LE:
+ case GT:
+ /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+ keeps the constant operand. */
+ if (wi::cmps(y_wide, wi::max_value (TImode, SIGNED)) < 0)
+ {
+ y = immed_wide_int_const (wi::add (y_wide, 1), TImode);
+ code = (code == LE ? LT : GE);
+ }
+ break;
+
+ case LEU:
+ case GTU:
+ /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+ keeps the constant operand. */
+ if (wi::cmpu(y_wide, wi::max_value (TImode, UNSIGNED)) < 0)
+ {
+ y = immed_wide_int_const (wi::add (y_wide, 1), TImode);
+ code = (code == LEU ? LTU : GEU);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ y_lo = simplify_gen_subreg (DImode, y, TImode,
+ subreg_lowpart_offset (DImode, TImode));
+ y_hi = simplify_gen_subreg (DImode, y, TImode,
+ subreg_highpart_offset (DImode, TImode));
+
+ switch (code)
+ {
+ case LEU:
+ case GTU:
+ case LE:
+ case GT:
+ std::swap (x_lo, y_lo);
+ std::swap (x_hi, y_hi);
+ code = swap_condition (code);
+ break;
+
+ case LTU:
+ case GEU:
+ case LT:
+ case GE:
+ /* If the low word of y is 0, then this is simply a normal
+ compare of the upper words. */
+ if (y_lo == const0_rtx)
+ {
+ if (!aarch64_plus_operand (y_hi, DImode))
+ y_hi = force_reg (DImode, y_hi);
+ return aarch64_gen_compare_reg (code, x_hi, y_hi);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ /* Emit cmpdi, forcing operands into registers as required. */
+ create_input_operand (&ops[0], x_lo, DImode);
+ create_input_operand (&ops[1], y_lo, DImode);
+ expand_insn (CODE_FOR_cmpdi, 2, ops);
+
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ /* For NE, (x_lo != y_lo) || (x_hi != y_hi). */
+ create_input_operand (&ops[0], x_hi, DImode);
+ create_input_operand (&ops[1], y_hi, DImode);
+ expand_insn (CODE_FOR_ccmp_iornedi, 2, ops);
+ cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+ break;
+
+ case LTU:
+ case GEU:
+ create_input_operand (&ops[0], x_hi, DImode);
+ create_input_operand (&ops[1], y_hi, DImode);
+ expand_insn (CODE_FOR_ucmpdi_carryin, 2, ops);
+ cc_reg = gen_rtx_REG (CC_NOTCmode, CC_REGNUM);
+ break;
+
+ case LT:
+ case GE:
+ create_input_operand (&ops[0], x_hi, DImode);
+ create_input_operand (&ops[1], y_hi, DImode);
+ expand_insn (CODE_FOR_cmpdi_carryin, 2, ops);
+ cc_reg = gen_rtx_REG (CC_NVmode, CC_REGNUM);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
}
else
{
- cc_mode = SELECT_CC_MODE (code, x, y);
+ machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
}
+
+ done:
return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
}
@@ -9551,6 +9662,24 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
&& GET_CODE (XEXP (y, 0)) == GET_CODE (x))
return CC_Vmode;
+ /* A test for signed GE/LT comparison with borrow. */
+ if ((mode_x == DImode || mode_x == TImode)
+ && (code == GE || code == LT)
+ && (code_x == SIGN_EXTEND || x == const0_rtx)
+ && ((GET_CODE (y) == PLUS
+ && aarch64_borrow_operation (XEXP (y, 0), mode_x))
+ || aarch64_borrow_operation (y, mode_x)))
+ return CC_NVmode;
+
+ /* A test for unsigned GEU/LTU comparison with borrow. */
+ if ((mode_x == DImode || mode_x == TImode)
+ && (code == GEU || code == LTU)
+ && (code_x == ZERO_EXTEND || x == const0_rtx)
+ && ((GET_CODE (y) == PLUS
+ && aarch64_borrow_operation (XEXP (y, 0), mode_x))
+ || aarch64_borrow_operation (y, mode_x)))
+ return CC_NOTCmode;
+
/* For everything else, return CCmode. */
return CCmode;
}
@@ -9690,6 +9819,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum
rtx_code comp_code)
}
break;
+ case E_CC_NVmode:
+ switch (comp_code)
+ {
+ case GE: return AARCH64_GE;
+ case LT: return AARCH64_LT;
+ default: return -1;
+ }
+ break;
+
default:
return -1;
}
diff --git a/gcc/config/aarch64/aarch64-modes.def
b/gcc/config/aarch64/aarch64-modes.def
index 181b7b30dcd..beb5919ab01 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -41,6 +41,7 @@ CC_MODE (CC_C); /* C represents unsigned overflow of a
simple addition. */
CC_MODE (CC_NOTC); /* !C represents unsigned overflow of subtraction,
as well as our representation of add-with-carry. */
CC_MODE (CC_V); /* Only V bit of condition flags is valid. */
+CC_MODE (CC_NV); /* N and V bits set for signed GE/LT comparison. */
/* Half-precision floating point for __fp16. */
FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2b5a6eb510d..e62f79ed6f1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -471,6 +471,20 @@
operands[2] = const0_rtx;
})
+(define_expand "cbranchti4"
+ [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:TI 1 "register_operand")
+ (match_operand:TI 2 "aarch64_reg_or_imm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+{
+ operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+ operands[2]);
+ operands[1] = XEXP (operands[0], 0);
+ operands[2] = const0_rtx;
+})
+
(define_expand "cbranch<mode>4"
[(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand:GPF 1 "register_operand")
@@ -569,6 +583,25 @@
[(set_attr "type" "fccmp<s>")]
)
+;; This specialization has the advantage of being able to swap operands.
+;; Use CC_NZ because SELECT_CC_MODE uses that for comparisons against 0.
+(define_insn "ccmp_iorne<mode>"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (ior:SI
+ (ne:SI (reg:CC CC_REGNUM)
+ (const_int 0))
+ (ne:SI (match_operand:GPI 0 "register_operand" "%r,r,r")
+ (match_operand:GPI 1 "aarch64_ccmp_operand" "r,Uss,Usn")))
+ (const_int 0)))]
+ ""
+ "@
+ ccmp\\t%<w>0, %<w>1, 0, eq
+ ccmp\\t%<w>0, %1, 0, eq
+ ccmn\\t%<w>0, #%n1, 0, eq"
+ [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
;; Expansion of signed mod by a power of 2 using CSNEG.
;; For x0 % n where n is a power of 2 produce:
;; negs x1, x0
@@ -3364,6 +3397,72 @@
[(set_attr "type" "adc_reg")]
)
+(define_expand "<su_optab>cmp<mode>_carryin"
+ [(set (reg:<CC_EXTEND> CC_REGNUM)
+ (compare:<CC_EXTEND>
+ (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand"))
+ (plus:<DWI>
+ (geu:<DWI> (reg:CC_C CC_REGNUM) (const_int 0))
+ (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand")))))]
+ ""
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin"
+ [(set (reg:<CC_EXTEND> CC_REGNUM)
+ (compare:<CC_EXTEND>
+ (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+ (plus:<DWI>
+ (match_operand:<DWI> 2 "aarch64_borrow_operation" "")
+ (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand" "r")))))]
+ ""
+ "sbcs\\t<w>zr, %<w>0, %<w>1"
+ [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin_z1"
+ [(set (reg:<CC_EXTEND> CC_REGNUM)
+ (compare:<CC_EXTEND>
+ (const_int 0)
+ (plus:<DWI>
+ (match_operand:<DWI> 1 "aarch64_borrow_operation" "")
+ (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")))))]
+ ""
+ "sbcs\\t<w>zr, <w>zr, %<w>0"
+ [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin_z2"
+ [(set (reg:<CC_EXTEND> CC_REGNUM)
+ (compare:<CC_EXTEND>
+ (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+ (match_operand:<DWI> 1 "aarch64_borrow_operation" "")))]
+ ""
+ "sbcs\\t<w>zr, %<w>0, <w>zr"
+ [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*cmp<mode>_carryin_m2"
+ [(set (reg:<CC_EXTEND> CC_REGNUM)
+ (compare:<CC_EXTEND>
+ (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+ (neg:<DWI> (match_operand:<DWI> 1 "aarch64_carry_operation" ""))))]
+ ""
+ "adcs\\t<w>zr, %<w>0, <w>zr"
+ [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*ucmp<mode>_carryin_m2"
+ [(set (reg:<CC_EXTEND> CC_REGNUM)
+ (compare:<CC_EXTEND>
+ (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+ (plus:<DWI>
+ (match_operand:<DWI> 1 "aarch64_borrow_operation" "")
+ (match_operand:<DWI> 2 "const_dword_umax" ""))))]
+ ""
+ "adcs\\t<w>zr, %<w>0, <w>zr"
+ [(set_attr "type" "adc_reg")]
+)
+
(define_expand "usub<GPI:mode>3_carryinC"
[(parallel
[(set (reg:CC_NOTC CC_REGNUM)
@@ -3985,6 +4084,20 @@
operands[3] = const0_rtx;
})
+(define_expand "cstoreti4"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "aarch64_comparison_operator"
+ [(match_operand:TI 2 "register_operand")
+ (match_operand:TI 3 "aarch64_reg_or_imm")]))]
+ ""
+{
+ operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+ operands[3]);
+ PUT_MODE (operands[1], SImode);
+ operands[2] = XEXP (operands[1], 0);
+ operands[3] = const0_rtx;
+})
+
(define_expand "cstorecc4"
[(set (match_operand:SI 0 "register_operand")
(match_operator 1 "aarch64_comparison_operator_mode"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8e434389e59..f6f2e9cefd5 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1907,6 +1907,9 @@
(define_code_attr fix_trunc_optab [(fix "fix_trunc")
(unsigned_fix "fixuns_trunc")])
+;; For double-word comparisons
+(define_code_attr CC_EXTEND [(sign_extend "CC_NV") (zero_extend "CC_NOTC")])
+
;; Optab prefix for sign/zero-extending operations
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
(div "") (udiv "u")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e3572d2f60d..93d068cc69c 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -55,6 +55,15 @@
return rtx_mode_t (op, mode) == (wi::shwi (1, mode) << bits);
})
+;; True for (1 << (GET_MODE_BITSIZE (mode) / 2)) - 1
+;; I.e UINT_MAX for a given mode, in the double-word mode.
+(define_predicate "const_dword_umax"
+ (match_code "const_int,const_wide_int")
+{
+ unsigned bits = GET_MODE_BITSIZE (mode).to_constant () / 2;
+ return rtx_mode_t (op, mode) == wi::sub(wi::shwi (1, mode) << bits, 1);
+})
+
(define_predicate "subreg_lowpart_operator"
(ior (match_code "truncate")
(and (match_code "subreg")
--
2.20.1