On Wed, May 25, 2022 at 11:39 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Rigt now, mem_cost for separate mem alternative is 1 * frequency which
> is pretty small and caused the unnecessary SSE spill in the PR, I've tried
> to rework backend cost model, but RA still not happy with that(regress
> somewhere else). I think the root cause of this is cost for separate 'm'
> alternative cost is too small, especially considering that the mov cost
> of gpr are 2(default for REGISTER_MOVE_COST). So this patch increase mem_cost
> to 2*frequency, also increase 1 for reg_class cost when m alternative.
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/105513
>         * ira-costs.cc (record_reg_classes): Increase both mem_cost
>         and reg class cost by 1 for separate mem alternative when
>         REG_P (op).
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr105513-1.c: New test.
> ---
>  gcc/ira-costs.cc                           | 26 +++++++++++++---------
>  gcc/testsuite/gcc.target/i386/pr105513-1.c | 16 +++++++++++++
>  2 files changed, 31 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c
>
> diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
> index 964c94a06ef..f7b8325e195 100644
> --- a/gcc/ira-costs.cc
> +++ b/gcc/ira-costs.cc
> @@ -625,7 +625,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                           for (k = cost_classes_ptr->num - 1; k >= 0; k--)
>                             {
>                               rclass = cost_classes[k];
> -                             pp_costs[k] = mem_cost[rclass][0] * frequency;
> +                             pp_costs[k] = (mem_cost[rclass][0]
> +                                            + 1) * frequency;
>                             }
>                         }
>                       else
> @@ -648,7 +649,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                           for (k = cost_classes_ptr->num - 1; k >= 0; k--)
>                             {
>                               rclass = cost_classes[k];
> -                             pp_costs[k] = mem_cost[rclass][1] * frequency;
> +                             pp_costs[k] = (mem_cost[rclass][1]
> +                                            + 1) * frequency;
>                             }
>                         }
>                       else
> @@ -670,9 +672,9 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                           for (k = cost_classes_ptr->num - 1; k >= 0; k--)
>                             {
>                               rclass = cost_classes[k];
> -                             pp_costs[k] = ((mem_cost[rclass][0]
> -                                             + mem_cost[rclass][1])
> -                                            * frequency);
> +                             pp_costs[k] = (mem_cost[rclass][0]
> +                                            + mem_cost[rclass][1]
> +                                            + 2) * frequency;
>                             }
>                         }
>                       else
> @@ -861,7 +863,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                           for (k = cost_classes_ptr->num - 1; k >= 0; k--)
>                             {
>                               rclass = cost_classes[k];
> -                             pp_costs[k] = mem_cost[rclass][0] * frequency;
> +                             pp_costs[k] = (mem_cost[rclass][0]
> +                                            + 1) * frequency;
>                             }
>                         }
>                       else
> @@ -884,7 +887,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                           for (k = cost_classes_ptr->num - 1; k >= 0; k--)
>                             {
>                               rclass = cost_classes[k];
> -                             pp_costs[k] = mem_cost[rclass][1] * frequency;
> +                             pp_costs[k] = (mem_cost[rclass][1]
> +                                            + 1) * frequency;
>                             }
>                         }
>                       else
> @@ -906,9 +910,9 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                           for (k = cost_classes_ptr->num - 1; k >= 0; k--)
>                             {
>                               rclass = cost_classes[k];
> -                             pp_costs[k] = ((mem_cost[rclass][0]
> -                                             + mem_cost[rclass][1])
> -                                            * frequency);
> +                             pp_costs[k] = (mem_cost[rclass][0]
> +                                            + mem_cost[rclass][1]
> +                                            + 2) * frequency;
>                             }
>                         }
>                       else
> @@ -929,7 +933,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
>                     /* Although we don't need insn to reload from
>                        memory, still accessing memory is usually more
>                        expensive than a register.  */
> -                   pp->mem_cost = frequency;
> +                   pp->mem_cost = 2 * frequency;
>                   else
>                     /* If the alternative actually allows memory, make
>                        things a bit cheaper since we won't need an
> diff --git a/gcc/testsuite/gcc.target/i386/pr105513-1.c 
> b/gcc/testsuite/gcc.target/i386/pr105513-1.c
> new file mode 100644
> index 00000000000..530f5292252
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105513-1.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mtune=skylake -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-not "\\(%rsp\\)" } } */
> +
> +static int as_int(float x)
> +{
> +    return (union{float x; int i;}){x}.i;
> +}
> +
> +float f(double y, float x)
> +{
> +    int i = as_int(x);
> +    if (__builtin_expect(i > 99, 0)) return 0;
> +    if (i*2u < 77) if (i==2) return 0;
> +    return y*x;
> +}
> --
> 2.18.1
>


-- 
BR,
Hongtao

Reply via email to