Hi Richard,
after giving it a second thought, and seeing that most of the changes to
existing code are not strictly necessary anymore, I figured it could be
easier not changing the current control flow too much like in the
attached patch.
The changes remaining are to "outsource" the maybe_expand_insn part and
making the emit_conditional_move with full comparison and rev_comparsion
externally available.
I suppose straightening of the arguably somewhat baroque parts, we can
defer to a separate patch.
On s390 this works nicely but I haven't yet done a bootstrap on other archs.
Regards
Robin
commit eb50384ee0cdeeefa61ae89bdbb2875500b7ce60
Author: Robin Dapp <rd...@linux.ibm.com>
Date: Wed Nov 27 13:53:40 2019 +0100
ifcvt/optabs: Allow using a CC comparison for emit_conditional_move.
Currently we only ever call emit_conditional_move with the comparison
(as well as its comparands) we got from the jump. Thus, backends are
going to emit a CC comparison for every conditional move that is being
generated instead of re-using the existing CC.
This, combined with emitting temporaries for each conditional move,
causes sky-high costs for conditional moves.
This patch allows to re-use a CC so the costing situation is improved a
bit.
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 6ae883cbdd4..f7765e60548 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -772,7 +772,7 @@ static int noce_try_addcc (struct noce_if_info *);
static int noce_try_store_flag_constants (struct noce_if_info *);
static int noce_try_store_flag_mask (struct noce_if_info *);
static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
- rtx, rtx, rtx);
+ rtx, rtx, rtx, rtx = NULL, rtx = NULL);
static int noce_try_cmove (struct noce_if_info *);
static int noce_try_cmove_arith (struct noce_if_info *);
static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
@@ -1711,7 +1711,8 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
static rtx
noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
- rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue)
+ rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp,
+ rtx rev_cc_cmp)
{
rtx target ATTRIBUTE_UNUSED;
int unsignedp ATTRIBUTE_UNUSED;
@@ -1743,23 +1744,30 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
end_sequence ();
}
- /* Don't even try if the comparison operands are weird
- except that the target supports cbranchcc4. */
- if (! general_operand (cmp_a, GET_MODE (cmp_a))
- || ! general_operand (cmp_b, GET_MODE (cmp_b)))
- {
- if (!have_cbranchcc4
- || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
- || cmp_b != const0_rtx)
- return NULL_RTX;
- }
-
unsignedp = (code == LTU || code == GEU
|| code == LEU || code == GTU);
- target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
- vtrue, vfalse, GET_MODE (x),
- unsignedp);
+ if (cc_cmp != NULL_RTX && rev_cc_cmp != NULL_RTX)
+ target = emit_conditional_move (x, cc_cmp, rev_cc_cmp,
+ vtrue, vfalse, GET_MODE (x));
+ else
+ {
+ /* Don't even try if the comparison operands are weird
+ except that the target supports cbranchcc4. */
+ if (! general_operand (cmp_a, GET_MODE (cmp_a))
+ || ! general_operand (cmp_b, GET_MODE (cmp_b)))
+ {
+ if (!have_cbranchcc4
+ || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
+ || cmp_b != const0_rtx)
+ return NULL_RTX;
+ }
+
+ target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
+ vtrue, vfalse, GET_MODE (x),
+ unsignedp);
+ }
+
if (target)
return target;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 019bbb62882..25eecf29ed8 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -52,6 +52,9 @@ static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
static rtx expand_unop_direct (machine_mode, optab, rtx, rtx, int);
static void emit_libcall_block_1 (rtx_insn *, rtx, rtx, rtx, bool);
+static rtx emit_conditional_move (rtx, rtx, rtx, rtx, machine_mode);
+rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
+
/* Debug facility for use in GDB. */
void debug_optab_libfuncs (void);
@@ -4875,6 +4878,7 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
/* get_condition will prefer to generate LT and GT even if the old
comparison was against zero, so undo that canonicalization here since
comparisons against zero are cheaper. */
+
if (code == LT && op1 == const1_rtx)
code = LE, op1 = const0_rtx;
else if (code == GT && op1 == constm1_rtx)
@@ -4925,18 +4929,10 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
OPTAB_WIDEN, &comparison, &cmpmode);
if (comparison)
{
- class expand_operand ops[4];
-
- create_output_operand (&ops[0], target, mode);
- create_fixed_operand (&ops[1], comparison);
- create_input_operand (&ops[2], op2, mode);
- create_input_operand (&ops[3], op3, mode);
- if (maybe_expand_insn (icode, 4, ops))
- {
- if (ops[0].value != target)
- convert_move (target, ops[0].value, false);
- return target;
- }
+ rtx res = emit_conditional_move (target, comparison,
+ op2, op3, mode);
+ if (res != NULL_RTX)
+ return res;
}
delete_insns_since (last);
restore_pending_stack_adjust (&save);
@@ -4959,6 +4955,73 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
}
}
+/* Helper function that, in addition to COMPARISON, also tries
+ the reversed REV_COMPARISON with swapped OP2 and OP3. As opposed
+ to when we pass the specific constituents of a comparison, no
+ additional insns are emitted for it. It might still be necessary
+ to emit more than one insn for the final conditional move, though. */
+
+rtx
+emit_conditional_move (rtx target, rtx comparison, rtx rev_comparison,
+ rtx op2, rtx op3, machine_mode mode)
+{
+ rtx res = emit_conditional_move (target, comparison, op2, op3, mode);
+
+ if (res != NULL_RTX)
+ return res;
+
+ return emit_conditional_move (target, rev_comparison, op3, op2, mode);
+}
+
+/* Helper for emitting a conditional move. */
+
+static rtx
+emit_conditional_move (rtx target, rtx comparison,
+ rtx op2, rtx op3, machine_mode mode)
+{
+ enum insn_code icode;
+
+ if (comparison == NULL_RTX || !COMPARISON_P (comparison))
+ return NULL_RTX;
+
+ /* If the two source operands are identical, that's just a move. */
+ if (rtx_equal_p (op2, op3))
+ {
+ if (!target)
+ target = gen_reg_rtx (mode);
+
+ emit_move_insn (target, op3);
+ return target;
+ }
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op2);
+
+ icode = direct_optab_handler (movcc_optab, mode);
+
+ if (icode == CODE_FOR_nothing)
+ return NULL_RTX;
+
+ if (!target)
+ target = gen_reg_rtx (mode);
+
+ class expand_operand ops[4];
+
+ create_output_operand (&ops[0], target, mode);
+ create_fixed_operand (&ops[1], comparison);
+ create_input_operand (&ops[2], op2, mode);
+ create_input_operand (&ops[3], op3, mode);
+
+ if (maybe_expand_insn (icode, 4, ops))
+ {
+ if (ops[0].value != target)
+ convert_move (target, ops[0].value, false);
+ return target;
+ }
+
+ return NULL_RTX;
+}
+
/* Emit a conditional negate or bitwise complement using the
negcc or notcc optabs if available. Return NULL_RTX if such operations
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 3bbceff92d9..f853b93f37f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -281,6 +281,7 @@ extern void emit_indirect_jump (rtx);
/* Emit a conditional move operation. */
rtx emit_conditional_move (rtx, enum rtx_code, rtx, rtx, machine_mode,
rtx, rtx, machine_mode, int);
+rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
/* Emit a conditional negate or bitwise complement operation. */
rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,