Richard Henderson <richard.hender...@linaro.org> writes:
> The save/restore_stack_nonlocal patterns passed a DImode rtx
> to gen_tbranch_neqi3 for a QImode compare.  The tbranch expander
> did not do what it said on the tin, that is: emit TBNZ.
> It only made it as far as AND+CMP+B.cond.

Yeah, that was done to allow ifcombine to have a go first
(g:17ae956c0fa6baac3d22764019d5dd5ebf5c2b11).  But the original
pattern was restricted in g:8e26ac4749c5ddf827e18a846b1010b091f76fa2
to "bit 0 of a QI or HI" that we have now.

If all tests pass without the expander then I suppose we at least need
a new test to justify the expander's existence.

It doesn't look like any other target has implemented the tbranch optab,
so if we do remove the expander, I suppose we should remove the optab
itself too (separately).

The fix for the save/restore_stack_nonlocal mode mismatch is ok
either way though.

Thanks,
Richard

>
> But since we're seeding r16 with 1, GCSEnabled will clear the
> only set bit in r16, so we can use CBNZ instead of TBNZ.
>
> gcc:
>       * config/aarch64/aarch64.md (tbranch_<EQL><SHORT>3): Remove.
>       (save_stack_nonlocal): Use aarch64_gen_compare_zero_and_branch.
>       (restore_stack_nonlocal): Likewise.
> ---
>  gcc/config/aarch64/aarch64.md                 | 32 ++++---------------
>  .../gcc.target/aarch64/gcs-nonlocal-3.c       |  2 +-
>  2 files changed, 8 insertions(+), 26 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 3da5bc2122d..9aea00cf45a 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1017,24 +1017,6 @@
>  ;; Test bit and branch
>  ;; -------------------------------------------------------------------
>  
> -(define_expand "tbranch_<code><mode>3"
> -  [(set (pc) (if_then_else (EQL
> -                          (match_operand:SHORT 0 "register_operand")
> -                          (match_operand 1 "const0_operand"))
> -                        (label_ref (match_operand 2 ""))
> -                        (pc)))]
> -  ""
> -{
> -  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
> -  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
> -  rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]),
> -                       <MODE>mode);
> -  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
> -  operands[1] = const0_rtx;
> -  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
> -                                      operands[1]);
> -})
> -
>  (define_insn "@aarch64_tbz<optab><ALLI:mode><GPI:mode>"
>    [(set (pc) (if_then_else (EQL
>                            (zero_extract:GPI
> @@ -1413,16 +1395,16 @@
>        /* Save GCS with code like
>               mov     x16, 1
>               chkfeat x16
> -             tbnz    x16, 0, .L_done
> +             cbnz    x16, .L_done
>               mrs     tmp, gcspr_el0
>               str     tmp, [%0, 8]
>       .L_done:  */
>  
> -      rtx done_label = gen_label_rtx ();
> +      auto done_label = gen_label_rtx ();
>        rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
>        emit_move_insn (r16, const1_rtx);
>        emit_insn (gen_aarch64_chkfeat ());
> -      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
> +      emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, 
> done_label));
>        rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE 
> (Pmode));
>        rtx gcs = gen_reg_rtx (Pmode);
>        emit_insn (gen_aarch64_load_gcspr (gcs));
> @@ -1445,7 +1427,7 @@
>        /* Restore GCS with code like
>               mov     x16, 1
>               chkfeat x16
> -             tbnz    x16, 0, .L_done
> +             cbnz    x16, .L_done
>               ldr     tmp1, [%1, 8]
>               mrs     tmp2, gcspr_el0
>               subs    tmp2, tmp1, tmp2
> @@ -1456,12 +1438,12 @@
>               b.ne    .L_loop
>       .L_done:  */
>  
> -      rtx loop_label = gen_label_rtx ();
> -      rtx done_label = gen_label_rtx ();
> +      auto loop_label = gen_label_rtx ();
> +      auto done_label = gen_label_rtx ();
>        rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
>        emit_move_insn (r16, const1_rtx);
>        emit_insn (gen_aarch64_chkfeat ());
> -      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
> +      emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, 
> done_label));
>        rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE 
> (Pmode));
>        rtx gcs_old = gen_reg_rtx (Pmode);
>        emit_move_insn (gcs_old, gcs_slot);
> diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c 
> b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> index e2391555150..7a76b14e1e7 100644
> --- a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> @@ -7,7 +7,7 @@ void run (void (*)());
>  ** bar.0:
>  **   ...
>  **   hint    40 // chkfeat x16
> -**   tbnz    w16, 0, (\.L[0-9]+)
> +**   cbnz    x16, (\.L[0-9]+)
>  **   ...
>  **   mrs     (x[0-9]+), s3_3_c2_c5_1 // gcspr_el0
>  **   subs    x[0-9]+, x[0-9]+, \2

Reply via email to