Richard Henderson <richard.hender...@linaro.org> writes: > The save/restore_stack_nonlocal patterns passed a DImode rtx > to gen_tbranch_neqi3 for a QImode compare. The tbranch expander > did not do what it said on the tin, that is: emit TBNZ. > It only made it as far as AND+CMP+B.cond.
Yeah, that was done to allow ifcombine to have a go first (g:17ae956c0fa6baac3d22764019d5dd5ebf5c2b11). But the original pattern was restricted in g:8e26ac4749c5ddf827e18a846b1010b091f76fa2 to "bit 0 of a QI or HI" that we have now. If all tests pass without the expander then I suppose we at least need a new test to justify the expander's existence. It doesn't look like any other target has implemented the tbranch optab, so if we do remove the expander, I suppose we should remove the optab itself too (separately). The fix for the save/restore_stack_nonlocal mode mismatch is ok either way though. Thanks, Richard > > But since we're seeding r16 with 1, GCSEnabled will clear the > only set bit in r16, so we can use CBNZ instead of TBNZ. > > gcc: > * config/aarch64/aarch64.md (tbranch_<EQL><SHORT>3): Remove. > (save_stack_nonlocal): Use aarch64_gen_compare_zero_and_branch. > (restore_stack_nonlocal): Likewise. > --- > gcc/config/aarch64/aarch64.md | 32 ++++--------------- > .../gcc.target/aarch64/gcs-nonlocal-3.c | 2 +- > 2 files changed, 8 insertions(+), 26 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 3da5bc2122d..9aea00cf45a 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1017,24 +1017,6 @@ > ;; Test bit and branch > ;; ------------------------------------------------------------------- > > -(define_expand "tbranch_<code><mode>3" > - [(set (pc) (if_then_else (EQL > - (match_operand:SHORT 0 "register_operand") > - (match_operand 1 "const0_operand")) > - (label_ref (match_operand 2 "")) > - (pc)))] > - "" > -{ > - rtx bitvalue = gen_reg_rtx (<ZEROM>mode); > - rtx reg = gen_lowpart (<ZEROM>mode, operands[0]); > - rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]), > - <MODE>mode); > - emit_insn (gen_and<zerom>3 (bitvalue, reg, val)); > - operands[1] = const0_rtx; > - operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue, > - operands[1]); > -}) > - > (define_insn "@aarch64_tbz<optab><ALLI:mode><GPI:mode>" > [(set (pc) (if_then_else (EQL > (zero_extract:GPI > @@ -1413,16 +1395,16 @@ > /* Save GCS with code like > mov x16, 1 > chkfeat x16 > - tbnz x16, 0, .L_done > + cbnz x16, .L_done > mrs tmp, gcspr_el0 > str tmp, [%0, 8] > .L_done: */ > > - rtx done_label = gen_label_rtx (); > + auto done_label = gen_label_rtx (); > rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); > emit_move_insn (r16, const1_rtx); > emit_insn (gen_aarch64_chkfeat ()); > - emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); > + emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, > done_label)); > rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE > (Pmode)); > rtx gcs = gen_reg_rtx (Pmode); > emit_insn (gen_aarch64_load_gcspr (gcs)); > @@ -1445,7 +1427,7 @@ > /* Restore GCS with code like > mov x16, 1 > chkfeat x16 > - tbnz x16, 0, .L_done > + cbnz x16, .L_done > ldr tmp1, [%1, 8] > mrs tmp2, gcspr_el0 > subs tmp2, tmp1, tmp2 > @@ -1456,12 +1438,12 @@ > b.ne .L_loop > .L_done: */ > > - rtx loop_label = gen_label_rtx (); > - rtx done_label = gen_label_rtx (); > + auto loop_label = gen_label_rtx (); > + auto done_label = gen_label_rtx (); > rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); > emit_move_insn (r16, const1_rtx); > emit_insn (gen_aarch64_chkfeat ()); > - emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); > + emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, > done_label)); > rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE > (Pmode)); > rtx gcs_old = gen_reg_rtx (Pmode); > emit_move_insn (gcs_old, gcs_slot); > diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c > b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c > index e2391555150..7a76b14e1e7 100644 > --- a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c > +++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c > @@ -7,7 +7,7 @@ void run (void (*)()); > ** bar.0: > ** ... > ** hint 40 // chkfeat x16 > -** tbnz w16, 0, (\.L[0-9]+) > +** cbnz x16, (\.L[0-9]+) > ** ... > ** mrs (x[0-9]+), s3_3_c2_c5_1 // gcspr_el0 > ** subs x[0-9]+, x[0-9]+, \2