Wilco Dijkstra <wilco.dijks...@arm.com> writes:
> Hi Richard,
>
>> But what uses CMP_BRANCH after the patch?  It looked like you renamed
>> all existing uses and didn't add any new ones.
>
> My next patch will be adding uses of it now I've done some benchmarking
> to decide when to turn it on.
>
>>> +      && reg_referenced_p (SET_DEST (curr_set), PATTERN (curr)))
>
>> Looks like this should be prev_set.  But this condition will trigger
>> for any prev_insn that is only being kept around for its effect on the
>> flags, not just CMP/CMN/TST/BICS.  If it's only supposed to be those
>> four insns then I think we should test for them explicitly.
>
> Yes that should have been prev_set indeed. I've tightened up the
> condition to check for integer comparisons (which matches CMP,
> CMN, TST and BICS). Updated patch below.
>
> Cheers,
> Wilco
>
>
> [PATCH v2][AArch64] Add support for fused compare and branch
>
> Add support for fused compare with branch.  Rename the existing
> AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH
> to ALU_CBZ to make it clear what is being fused.
>
> AArch64 bootstrap OK, OK to commit?
>
> ChangeLog:
>
> 2019-12-03  Wilco Dijkstra  <wdijk...@arm.com>
>
> * config/aarch64/aarch64.c
> (thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH.
> (thunderx_tunings): Likewise.
> (tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ.
> (thunderx2t99_tunings): Likewise.
> (aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH.
> * config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.

OK, thanks.

Richard

>
> --
>
> diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def 
> b/gcc/config/aarch64/aarch64-fusion-pairs.def
> index 
> ce4bb92d5c9d1f187c026b1a714e485a2b9f1a74..051009b42b2db4e79a8b302fd3f1b65dedfdba8f
>  100644
> --- a/gcc/config/aarch64/aarch64-fusion-pairs.def
> +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
> @@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
>  AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
>  AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
>  AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
> +AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
>
>  #undef AARCH64_FUSION_PAIR
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> d0cbe13273f78ebac6f3facc983ca0dec1a43966..942413e0a377603be98a8c547db262eb75baed48
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -915,7 +915,7 @@ static const struct tune_params thunderxt88_tunings =
>    SVE_NOT_IMPLEMENTED, /* sve_width  */
>    6, /* memmov_cost  */
>    2, /* issue_rate  */
> -  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
> +  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
>    "8",/* function_align.  */
>    "8",/* jump_align.  */
>    "8",/* loop_align.  */
> @@ -941,7 +941,7 @@ static const struct tune_params thunderx_tunings =
>    SVE_NOT_IMPLEMENTED, /* sve_width  */
>    6, /* memmov_cost  */
>    2, /* issue_rate  */
> -  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
> +  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
>    "8",/* function_align.  */
>    "8",/* jump_align.  */
>    "8",/* loop_align.  */
> @@ -968,8 +968,8 @@ static const struct tune_params tsv110_tunings =
>    SVE_NOT_IMPLEMENTED, /* sve_width  */
>    4,    /* memmov_cost  */
>    4,    /* issue_rate  */
> -  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
> -   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
> +   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
>    "16", /* function_align.  */
>    "4",  /* jump_align.  */
>    "8",  /* loop_align.  */
> @@ -1103,8 +1103,8 @@ static const struct tune_params thunderx2t99_tunings =
>    SVE_NOT_IMPLEMENTED, /* sve_width  */
>    4, /* memmov_cost.  */
>    4, /* issue_rate.  */
> -  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
> -   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
> +  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
> +   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
>    "16",/* function_align.  */
>    "8",/* jump_align.  */
>    "16",/* loop_align.  */
> @@ -20372,7 +20372,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn 
> *curr)
>          }
>      }
>
> +  /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch.  */
>    if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
> +      && prev_set && curr_set && any_condjump_p (curr)
> +      && GET_CODE (SET_SRC (prev_set)) == COMPARE
> +      && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
> +      && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
> +    return true;
> +
> +  /* Fuse flag-setting ALU instructions and conditional branch.  */
> +  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
>        && any_condjump_p (curr))
>      {
>        unsigned int condreg1, condreg2;
> @@ -20396,9 +20405,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn 
> *curr)
>  }
>      }
>
> +  /* Fuse ALU instructions and CBZ/CBNZ.  */
>    if (prev_set
>        && curr_set
> -      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
> +      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
>        && any_condjump_p (curr))
>      {
>        /* We're trying to match:

Reply via email to