Wilco Dijkstra <wilco.dijks...@arm.com> writes: > Hi Richard, > >> But what uses CMP_BRANCH after the patch? It looked like you renamed >> all existing uses and didn't add any new ones. > > My next patch will be adding uses of it now I've done some benchmarking > to decide when to turn it on. > >>> + && reg_referenced_p (SET_DEST (curr_set), PATTERN (curr))) > >> Looks like this should be prev_set. But this condition will trigger >> for any prev_insn that is only being kept around for its effect on the >> flags, not just CMP/CMN/TST/BICS. If it's only supposed to be those >> four insns then I think we should test for them explicitly. > > Yes that should have been prev_set indeed. I've tightened up the > condition to check for integer comparisons (which matches CMP, > CMN, TST and BICS). Updated patch below. > > Cheers, > Wilco > > > [PATCH v2][AArch64] Add support for fused compare and branch > > Add support for fused compare with branch. Rename the existing > AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH > to ALU_CBZ to make it clear what is being fused. > > AArch64 bootstrap OK, OK to commit? > > ChangeLog: > > 2019-12-03 Wilco Dijkstra <wdijk...@arm.com> > > * config/aarch64/aarch64.c > (thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH. > (thunderx_tunings): Likewise. > (tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ. > (thunderx2t99_tunings): Likewise. > (aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH. > * config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.
OK, thanks. Richard > > -- > > diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def > b/gcc/config/aarch64/aarch64-fusion-pairs.def > index > ce4bb92d5c9d1f187c026b1a714e485a2b9f1a74..051009b42b2db4e79a8b302fd3f1b65dedfdba8f > 100644 > --- a/gcc/config/aarch64/aarch64-fusion-pairs.def > +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def > @@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR) > AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH) > AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) > AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH) > +AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ) > > #undef AARCH64_FUSION_PAIR > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > d0cbe13273f78ebac6f3facc983ca0dec1a43966..942413e0a377603be98a8c547db262eb75baed48 > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -915,7 +915,7 @@ static const struct tune_params thunderxt88_tunings = > SVE_NOT_IMPLEMENTED, /* sve_width */ > 6, /* memmov_cost */ > 2, /* issue_rate */ > - AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ > + AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */ > "8",/* function_align. */ > "8",/* jump_align. */ > "8",/* loop_align. */ > @@ -941,7 +941,7 @@ static const struct tune_params thunderx_tunings = > SVE_NOT_IMPLEMENTED, /* sve_width */ > 6, /* memmov_cost */ > 2, /* issue_rate */ > - AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ > + AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */ > "8",/* function_align. */ > "8",/* jump_align. */ > "8",/* loop_align. */ > @@ -968,8 +968,8 @@ static const struct tune_params tsv110_tunings = > SVE_NOT_IMPLEMENTED, /* sve_width */ > 4, /* memmov_cost */ > 4, /* issue_rate */ > - (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH > - | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */ > + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH > + | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ > "16", /* function_align. */ > "4", /* jump_align. */ > "8", /* loop_align. */ > @@ -1103,8 +1103,8 @@ static const struct tune_params thunderx2t99_tunings = > SVE_NOT_IMPLEMENTED, /* sve_width */ > 4, /* memmov_cost. */ > 4, /* issue_rate. */ > - (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC > - | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */ > + (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC > + | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ > "16",/* function_align. */ > "8",/* jump_align. */ > "16",/* loop_align. */ > @@ -20372,7 +20372,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn > *curr) > } > } > > + /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */ > if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH) > + && prev_set && curr_set && any_condjump_p (curr) > + && GET_CODE (SET_SRC (prev_set)) == COMPARE > + && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0))) > + && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr))) > + return true; > + > + /* Fuse flag-setting ALU instructions and conditional branch. */ > + if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) > && any_condjump_p (curr)) > { > unsigned int condreg1, condreg2; > @@ -20396,9 +20405,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn > *curr) > } > } > > + /* Fuse ALU instructions and CBZ/CBNZ. */ > if (prev_set > && curr_set > - && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) > + && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ) > && any_condjump_p (curr)) > { > /* We're trying to match: