On 27/04/17 18:38, Wilco Dijkstra wrote:
> The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true
> on AArch64 shifted instructions.  This causes the bypass to activate in
> too many cases, resulting in slower execution on Cortex-A53 like reported
> in PR79665.
> 
> This patch uses the arm_no_early_alu_shift_dep condition instead which
> improves the example in PR79665 by ~7%.  Given it is no longer used,
> remove aarch_forward_to_shift_is_not_shifted_reg.
> 
> Passes AArch64 bootstrap and regress. OK for commit?
> 
> ChangeLog:
> 2017-04-27  Wilco Dijkstra  <wdijk...@arm.com>
> 
>       PR target/79665
>       * config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
>       Remove redundant if.
>       (aarch_forward_to_shift_is_not_shifted_reg): Remove.
>       * config/arm/aarch-common-protos.h
>       (aarch_forward_to_shift_is_not_shifted_re): Remove.
>       * config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.
> 
> --
> 
> diff --git a/gcc/config/arm/aarch-common-protos.h 
> b/gcc/config/arm/aarch-common-protos.h
> index 
> 7c2bb4c2ed93728efcbd9e2811c09dddd04b37fe..4350d975abbbbd2cda55ac31e0d47971b40fcde5
>  100644
> --- a/gcc/config/arm/aarch-common-protos.h
> +++ b/gcc/config/arm/aarch-common-protos.h
> @@ -25,7 +25,6 @@
>  
>  extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
>  extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
> -extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn 
> *);
>  extern bool aarch_rev16_p (rtx);
>  extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
>  extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
> diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
> index 
> 742d2ff4c7b779ae07b92f8a800e4667e32c44fb..9da2e382b2a1ecabd56acccc57997dbf626da513
>  100644
> --- a/gcc/config/arm/aarch-common.c
> +++ b/gcc/config/arm/aarch-common.c
> @@ -254,12 +254,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
>      return 0;
>  
>    if ((early_op = arm_find_shift_sub_rtx (op)))
> -    {
> -      if (REG_P (early_op))
> -     early_op = op;
> -
> -      return !reg_overlap_mentioned_p (value, early_op);
> -    }
> +    return !reg_overlap_mentioned_p (value, early_op);
>  
>    return 0;
>  }

This function is used by several aarch32 pipeline description models.
What testing have you given it there.  Are the changes appropriate for
those cores as well?

R.

> @@ -472,38 +467,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, 
> rtx_insn *consumer)
>    return (REGNO (dest) == REGNO (accumulator));
>  }
>  
> -/* Return nonzero if the CONSUMER instruction is some sort of
> -   arithmetic or logic + shift operation, and the register we are
> -   writing in PRODUCER is not used in a register shift by register
> -   operation.  */
> -
> -int
> -aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
> -                                        rtx_insn *consumer)
> -{
> -  rtx value, op;
> -  rtx early_op;
> -
> -  if (!arm_get_set_operands (producer, consumer, &value, &op))
> -    return 0;
> -
> -  if ((early_op = arm_find_shift_sub_rtx (op)))
> -    {
> -      if (REG_P (early_op))
> -     early_op = op;
> -
> -      /* Any other canonicalisation of a shift is a shift-by-constant
> -      so we don't care.  */
> -      if (GET_CODE (early_op) == ASHIFT)
> -     return (!REG_P (XEXP (early_op, 0))
> -             || !REG_P (XEXP (early_op, 1)));
> -      else
> -     return 1;
> -    }
> -
> -  return 0;
> -}
> -
>  /* Return non-zero if the consumer (a multiply-accumulate instruction)
>     has an accumulator dependency on the result of the producer (a
>     multiplication instruction) and no other dependency on that result.  */
> diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
> index 
> 7cf5fc5a0cd1d59efd0be3310b78303018138547..5bd0e62a108241ca56b01315908426e3f095fa81
>  100644
> --- a/gcc/config/arm/cortex-a53.md
> +++ b/gcc/config/arm/cortex-a53.md
> @@ -211,7 +211,7 @@ (define_bypass 1 "cortex_a53_alu*"
>  
>  (define_bypass 1 "cortex_a53_alu*"
>                "cortex_a53_alu_shift*"
> -              "aarch_forward_to_shift_is_not_shifted_reg")
> +              "arm_no_early_alu_shift_dep")
>  
>  (define_bypass 2 "cortex_a53_alu*"
>                "cortex_a53_alu_*,cortex_a53_shift*")
> 

Reply via email to