On Thu, Mar 14, 2019 at 6:19 PM <apin...@marvell.com> wrote:
>
> From: Andrew Pinski <apin...@marvell.com>
>
> Hi,
>   On OcteonTX2, ld1r and ld1 (with a single lane) are split
> into two different micro-ops unlike most other targets.
> This adds three extra costs to the cost table:
> ld1_dup: used for "ld1r {v0.4s}, [x0]"
> merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
> ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Ping?  It has been 1.5 months now.

>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/arm/aarch-common-protos.h (vector_cost_table):
> Add merge_dup, ld1_merge, and ld1_dup.
> * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
> Update for the new fields.
> (thunderx_extra_costs): Likewise.
> (thunderx2t99_extra_costs): Likewise.
> (tsv110_extra_costs): Likewise.
> * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
> (cortexa53_extra_costs): Likewise.
> (cortexa57_extra_costs): Likewise.
> (exynosm1_extra_costs): Likewise.
> (xgene1_extra_costs): Likewise.
> * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
> Hanlde vec_merge of a memory.
>
> Signed-off-by: Andrew Pinski <apin...@marvell.com>
> ---
>  gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++----
>  gcc/config/aarch64/aarch64.c             | 22 +++++++++++++++++++++
>  gcc/config/arm/aarch-common-protos.h     |  3 +++
>  gcc/config/arm/aarch-cost-tables.h       | 25 +++++++++++++++++++-----
>  4 files changed, 61 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
> b/gcc/config/aarch64/aarch64-cost-tables.h
> index 5c9442e1b89..9a7c70ba595 100644
> --- a/gcc/config/aarch64/aarch64-cost-tables.h
> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* alu.  */
> +    COSTS_N_INSNS (1),  /* Alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* Alu.  */
> +    COSTS_N_INSNS (1), /* Alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* Alu.  */
> +    COSTS_N_INSNS (1), /* Alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* alu.  */
> +    COSTS_N_INSNS (1), /* Alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b38505b0872..dc4d3d39af8 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -10568,6 +10568,28 @@ cost_plus:
>          }
>        break;
>
> +    case VEC_DUPLICATE:
> +      if (!speed)
> +       return false;
> +
> +      if (GET_CODE (XEXP (x, 0)) == MEM)
> +       *cost += extra_cost->vect.ld1_dup;
> +      else
> +       *cost += extra_cost->vect.merge_dup;
> +      return true;
> +
> +    case VEC_MERGE:
> +      if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
> +       {
> +         if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
> +           *cost += extra_cost->vect.ld1_merge;
> +         else
> +           *cost += extra_cost->vect.merge_dup;
> +         return true;
> +       }
> +      break;
> +
> +
>      case TRUNCATE:
>
>        /* Decompose <su>muldi3_highpart.  */
> diff --git a/gcc/config/arm/aarch-common-protos.h 
> b/gcc/config/arm/aarch-common-protos.h
> index 11cd5145bbc..dbc1282402a 100644
> --- a/gcc/config/arm/aarch-common-protos.h
> +++ b/gcc/config/arm/aarch-common-protos.h
> @@ -131,6 +131,9 @@ struct fp_cost_table
>  struct vector_cost_table
>  {
>    const int alu;
> +  const int merge_dup;
> +  const int ld1_merge;
> +  const int ld1_dup;
>  };
>
>  struct cpu_cost_table
> diff --git a/gcc/config/arm/aarch-cost-tables.h 
> b/gcc/config/arm/aarch-cost-tables.h
> index bc33efadc6c..a51bc668f56 100644
> --- a/gcc/config/arm/aarch-cost-tables.h
> +++ b/gcc/config/arm/aarch-cost-tables.h
> @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* alu.  */
> +    COSTS_N_INSNS (1),  /* alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* alu.  */
> +    COSTS_N_INSNS (1),  /* alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (1)  /* alu.  */
> +    COSTS_N_INSNS (1),  /* alu.  */
> +    COSTS_N_INSNS (1), /* dup_merge.  */
> +    COSTS_N_INSNS (1), /* ld1_merge.  */
> +    COSTS_N_INSNS (1)  /* ld1_dup.  */
>    }
>  };
>
> @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (0)  /* alu.  */
> +    COSTS_N_INSNS (0),  /* alu.  */
> +    COSTS_N_INSNS (0), /* dup_merge.  */
> +    COSTS_N_INSNS (0), /* ld1_merge.  */
> +    COSTS_N_INSNS (0)  /* ld1_dup.  */
>    }
>  };
>
> @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs =
>    },
>    /* Vector */
>    {
> -    COSTS_N_INSNS (2)  /* alu.  */
> +    COSTS_N_INSNS (2),  /* alu.  */
> +    COSTS_N_INSNS (2), /* dup_merge.  */
> +    COSTS_N_INSNS (2), /* ld1_merge.  */
> +    COSTS_N_INSNS (2)  /* ld1_dup.  */
>    }
>  };
>
> --
> 2.17.1
>

Reply via email to