While working on cost model, I notice one case that dynamic lmul cost doesn't 
work well.

Before this patch:

foo:
        lui     a4,%hi(.LANCHOR0)
        li      a0,1953
        li      a1,63
        addi    a4,a4,%lo(.LANCHOR0)
        li      a3,64
        vsetvli a2,zero,e32,mf2,ta,ma
        vmv.v.x v5,a0
        vmv.v.x v4,a1
        vid.v   v3
.L2:
        vsetvli a5,a3,e32,mf2,ta,ma
        vadd.vi v2,v3,1
        vadd.vv v1,v3,v5
        mv      a2,a5
        vmacc.vv        v1,v2,v4
        slli    a1,a5,2
        vse32.v v1,0(a4)
        sub     a3,a3,a5
        add     a4,a4,a1
        vsetvli a5,zero,e32,mf2,ta,ma
        vmv.v.x v1,a2
        vadd.vv v3,v3,v1
        bne     a3,zero,.L2
        li      a0,0
        ret

Unexpected: Use scalable vector and LMUL = MF2 which is wasting computation 
resources.

Ideally, we should use LMUL = M8 VLS modes.

The root cause is the dynamic LMUL heuristic dominates the VLS heuristic.
Adapt the cost model heuristic.

After this patch:

foo:
        lui     a4,%hi(.LANCHOR0)
        addi    a4,a4,%lo(.LANCHOR0)
        li      a3,4096
        li      a5,32
        li      a1,2016
        addi    a2,a4,128
        addiw   a3,a3,-32
        vsetvli zero,a5,e32,m8,ta,ma
        li      a0,0
        vid.v   v8
        vsll.vi v8,v8,6
        vadd.vx v16,v8,a1
        vadd.vx v8,v8,a3
        vse32.v v16,0(a4)
        vse32.v v8,0(a2)
        ret

Tested on both RV32/RV64 no regression.

Ok for trunk ?

gcc/ChangeLog:

        * config/riscv/riscv-vector-costs.cc (costs::better_main_loop_than_p): 
Minior tweak.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Fix test.
        * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.

---
 gcc/config/riscv/riscv-vector-costs.cc                       | 3 ++-
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c          | 5 ++---
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c          | 5 ++---
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c          | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index f4a1a789f23..e53f4a186f3 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -994,7 +994,8 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
                     vect_vf_for_cost (other_loop_vinfo));
 
   /* Apply the unrolling heuristic described above m_unrolled_vls_niters.  */
-  if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts))
+  if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts)
+      && m_cost_type != other->m_cost_type)
     {
       bool this_prefer_unrolled = this->prefer_unrolled_loop ();
       bool other_prefer_unrolled = other->prefer_unrolled_loop ();
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
index 3ddffa37fe4..89a6c678960 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
@@ -3,7 +3,7 @@
 
 #include <stdint-gcc.h>
 
-#define N 40
+#define N 48
 
 int a[N];
 
@@ -22,7 +22,6 @@ foo (){
   return 0;
 }
 
-/* { dg-final { scan-assembler-times 
{vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
 /* { dg-final { scan-assembler-times 
{vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
 /* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
index 7625ec5c4b1..86732ef2ce5 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
@@ -3,7 +3,7 @@
 
 #include <stdint-gcc.h>
 
-#define N 40
+#define N 64
 
 int a[N];
 
@@ -22,7 +22,6 @@ foo (){
   return 0;
 }
 
-/* { dg-final { scan-assembler-times 
{vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
 /* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
 /* { dg-final { scan-assembler-times {vsetvli} 1 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
index 7625ec5c4b1..505c4cd2c40 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 
--param=riscv-autovec-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } 
*/
 
 #include <stdint-gcc.h>
 
-- 
2.36.3

Reply via email to