On Thu, 14 Dec 2023, Juzhe-Zhong wrote:

> Follow Richard's suggestions, we should not model address cost in the loop
> vectorizer for select_vl or decrement IV since other style vectorization 
> doesn't
> do that.
> 
> To make cost model comparison apple to apple.
> This patch set COST from 2 to 1 which turns out have better codegen
> in various codegen for RVV.
> 
> Ok for trunk ?

OK with me.

Richard.

>       PR target/111153
> 
> gcc/ChangeLog:
> 
>       * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove 
> address cost for select_vl/decrement IV.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to...
>       * gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here.
>       * gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test.
> 
> ---
>  .../vect/costmodel/riscv/rvv/pr111153-1.c      | 18 ++++++++++++++++++
>  .../riscv/rvv/{pr111153.c => pr11153-2.c}      |  4 ++--
>  gcc/tree-vect-loop.cc                          | 10 ++++------
>  3 files changed, 24 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
>  rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c => 
> pr11153-2.c} (93%)
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> new file mode 100644
> index 00000000000..51c91f7410c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
> -mtune=generic-ooo -ffast-math" } */
> +
> +#define DEF_REDUC_PLUS(TYPE)                                                 
>   \
> +  TYPE __attribute__ ((noinline, noclone))                                   
>   \
> +  reduc_plus_##TYPE (TYPE *__restrict a, int n)                              
>   \
> +  {                                                                          
>   \
> +    TYPE r = 0;                                                              
>   \
> +    for (int i = 0; i < n; ++i)                                              
>   \
> +      r += a[i];                                                             
>   \
> +    return r;                                                                
>   \
> +  }
> +
> +#define TEST_PLUS(T) T (int) T (float)
> +
> +TEST_PLUS (DEF_REDUC_PLUS)
> +
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> similarity index 93%
> rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> index 06e08ec5f2e..d361f1fc7fa 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
> -mtune=generic-ooo" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
> -ffast-math" } */
>  
>  #define DEF_REDUC_PLUS(TYPE)                                                 
>   \
>    TYPE __attribute__ ((noinline, noclone))                                   
>   \
> @@ -11,7 +11,7 @@
>      return r;                                                                
>   \
>    }
>  
> -#define TEST_PLUS(T) T (int)
> +#define TEST_PLUS(T) T (int) T (float)
>  
>  TEST_PLUS (DEF_REDUC_PLUS)
>  
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 19e38b8637b..7a3db5f098b 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info 
> loop_vinfo,
>  
>           unsigned int length_update_cost = 0;
>           if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
> -           /* For decrement IV style, we use a single SELECT_VL since
> -              beginning to calculate the number of elements need to be
> -              processed in current iteration, and a SHIFT operation to
> -              compute the next memory address instead of adding vectorization
> -              factor.  */
> -           length_update_cost = 2;
> +           /* For decrement IV style, Each only need a single SELECT_VL
> +              or MIN since beginning to calculate the number of elements
> +              need to be processed in current iteration.  */
> +           length_update_cost = 1;
>           else
>             /* For increment IV stype, Each may need two MINs and one MINUS to
>                update lengths in body for next iteration.  */
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to