[PATCH v2] RISC-V: Add per-type reduction costs to the vector cost model

Wang Yaduo Fri, 15 May 2026 01:50:33 -0700

Add per-type reduction costs (i8/i16/i32/i64/f16/f32/f64) to the RISC-V
vector cost model, distinguishing between ordered (fold-left) and
unordered (tree) floating-point reductions.  When a reduction is
detected, the per-type cost replaces the default vec_to_scalar_cost,
similar to AArch64.


Ordered reduction costs scale with element count (f16=20, f32=10, f64=5),
reflecting that ordered reductions process elements sequentially and
wider elements mean fewer elements per vector register.

Existing VLS ordered reduction tests are updated to use
-mrvv-vector-bits=zvl -fno-vect-cost-model to preserve original
vectorization behavior independent of cost model changes.

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (common_vector_cost): Add per-type
        reduction cost fields: reduc_i8_cost, reduc_i16_cost,
        reduc_i32_cost, reduc_i64_cost, reduc_f16_cost, reduc_f32_cost,
        reduc_f64_cost for unordered reductions, and reduc_f16_ordered_cost,
        reduc_f32_ordered_cost, reduc_f64_ordered_cost for ordered
        (fold-left) reductions.
        * config/riscv/riscv.cc (rvv_vla_vector_cost): Initialize reduction
        cost fields.
        (rvv_vls_vector_cost): Likewise.
        * config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost): Add
        reduction detection in the vec_to_scalar case using both
        vect_is_reduction(stmt_info) and vect_is_reduction(node).  Apply
        per-type reduction cost based on element mode and reduction kind
        (ordered vs unordered).

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_unordered.c: New test.
        * gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_ordered.c: New test.
        * gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_unordered.c: New test.
        * gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_ordered.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/reduc-19.c: Add
        -mrvv-vector-bits=zvl -fno-vect-cost-model, update expected count.
        * gcc.target/riscv/rvv/autovec/vls/reduc-20.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vls/reduc-21.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vls/wred-3.c: Likewise.

Signed-off-by: Wang Yaduo <[email protected]>
---
Changes since v1:
- Removed TODO comment from scalable_vector_cost (Robin #1)
- Adjusted ordered reduction costs to 20/10/5 (Robin #2)
- Moved costmodel tests to gcc.dg/vect/costmodel/riscv/rvv/ (Robin #3)
- Split costmodel tests into 4 files: reduc_vla_unordered, reduc_vla_ordered,
  reduc_vls_unordered, reduc_vls_ordered
- Added vect_is_reduction(node) check to cover SLP path
- Updated existing VLS tests (reduc-19/20/21, wred-3) with
  -mrvv-vector-bits=zvl -fno-vect-cost-model to preserve original
  vectorization behavior

Note on VLS test changes: When using only -fno-vect-cost-model (without
-mrvv-vector-bits=zvl), the vectorizer prefers VLA vectorization even for
these known-trip-count loops under 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/.
This seems to conflict with the VLS test intent.  I added -mrvv-vector-bits=zvl
to force VLS mode, but I'm not sure if this is the preferred approach.
Should we instead allow VLA selection here, or is -mrvv-vector-bits=zvl
the right fix?

 gcc/config/riscv/riscv-protos.h               | 20 +++++-
 gcc/config/riscv/riscv-vector-costs.cc        | 70 ++++++++++++++++++-
 gcc/config/riscv/riscv.cc                     | 20 ++++++
 .../costmodel/riscv/rvv/reduc_vla_ordered.c   | 35 ++++++++++
 .../costmodel/riscv/rvv/reduc_vla_unordered.c | 34 +++++++++
 .../costmodel/riscv/rvv/reduc_vls_ordered.c   | 38 ++++++++++
 .../costmodel/riscv/rvv/reduc_vls_unordered.c | 33 +++++++++
 .../riscv/rvv/autovec/vls/reduc-19.c          |  4 +-
 .../riscv/rvv/autovec/vls/reduc-20.c          |  4 +-
 .../riscv/rvv/autovec/vls/reduc-21.c          |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/wred-3.c |  2 +-
 11 files changed, 252 insertions(+), 12 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_ordered.c
 create mode 100644 
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_unordered.c
 create mode 100644 
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_ordered.c
 create mode 100644 
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_unordered.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 8b362e323..631755f02 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -279,6 +279,24 @@ struct common_vector_cost
 
   /* Cost of an unaligned vector store.  */
   const int unalign_store_cost;
+
+  /* Cost of vector reduction operations (unordered / tree reduction).
+     Indexed by element type.  */
+  const int reduc_i8_cost;
+  const int reduc_i16_cost;
+  const int reduc_i32_cost;
+  const int reduc_i64_cost;
+  const int reduc_f16_cost;
+  const int reduc_f32_cost;
+  const int reduc_f64_cost;
+
+  /* Cost of ordered (fold-left / strict) floating-point reductions.
+     These are significantly more expensive than unordered (tree) reductions
+     because RVV ordered reduction instructions (e.g. vfredosum) process
+     elements sequentially.  */
+  const int reduc_f16_ordered_cost;
+  const int reduc_f32_ordered_cost;
+  const int reduc_f64_ordered_cost;
 };
 
 /* scalable vectorization (VLA) specific cost.  */
@@ -288,8 +306,6 @@ struct scalable_vector_cost : common_vector_cost
     : common_vector_cost (base)
   {}
 
-  /* TODO: We will need more other kinds of vector cost for VLA.
-     E.g. fold_left reduction cost, lanes load/store cost, ..., etc.  */
 };
 
 /* Additional costs for register copies.  Cost is for one register.  */
diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index e678e0de7..98d3f1d47 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1292,9 +1292,73 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, 
loop_vec_info loop,
        += (FLOAT_TYPE_P (vectype) ? get_fr2vr_cost () : get_gr2vr_cost ());
       break;
     case vec_to_scalar:
-      stmt_cost
-       += (FLOAT_TYPE_P (vectype) ? get_vr2fr_cost () : get_vr2gr_cost ());
-      break;
+      {
+       /* Detect reduction operations and apply type-specific reduction
+          costs.  The vec_to_scalar cost kind represents the reduction
+          operation itself (e.g. vredsum.vs, vfredosum.vs), so we replace
+          the default vec_to_scalar_cost with a more precise per-type cost.
+          For floating-point reductions, distinguish between ordered
+          (fold-left, e.g. vfredosum) and unordered (tree, e.g. vfredusum)
+          reductions since ordered reductions are significantly more
+          expensive due to sequential processing.  */
+       if (vectype
+           && ((stmt_info && vect_is_reduction (stmt_info))
+               || (node && vect_is_reduction (node))))
+         {
+           const common_vector_cost *common_costs
+             = loop && riscv_vla_mode_p (loop->vector_mode)
+               ? costs->vla : costs->vls;
+
+           bool is_ordered = false;
+           if (FLOAT_TYPE_P (vectype) && loop && node)
+             {
+               int reduc_type = vect_reduc_type (m_vinfo, node);
+               is_ordered = (reduc_type == FOLD_LEFT_REDUCTION);
+             }
+
+           int reduc_cost = 0;
+           switch (GET_MODE_INNER (TYPE_MODE (vectype)))
+             {
+             case E_QImode:
+               reduc_cost = common_costs->reduc_i8_cost;
+               break;
+             case E_HImode:
+               reduc_cost = common_costs->reduc_i16_cost;
+               break;
+             case E_SImode:
+               reduc_cost = common_costs->reduc_i32_cost;
+               break;
+             case E_DImode:
+               reduc_cost = common_costs->reduc_i64_cost;
+               break;
+             case E_HFmode:
+             case E_BFmode:
+               reduc_cost = is_ordered
+                            ? common_costs->reduc_f16_ordered_cost
+                            : common_costs->reduc_f16_cost;
+               break;
+             case E_SFmode:
+               reduc_cost = is_ordered
+                            ? common_costs->reduc_f32_ordered_cost
+                            : common_costs->reduc_f32_cost;
+               break;
+             case E_DFmode:
+               reduc_cost = is_ordered
+                            ? common_costs->reduc_f64_ordered_cost
+                            : common_costs->reduc_f64_cost;
+               break;
+             default:
+               break;
+             }
+
+           if (reduc_cost)
+             stmt_cost = reduc_cost;
+         }
+
+       stmt_cost
+         += (FLOAT_TYPE_P (vectype) ? get_vr2fr_cost () : get_vr2gr_cost ());
+       break;
+      }
     case vector_load:
     case vector_store:
        {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 681b816d2..cb0c7ef3a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -396,6 +396,16 @@ static const common_vector_cost rvv_vls_vector_cost = {
   1, /* align_store_cost  */
   2, /* unalign_load_cost  */
   2, /* unalign_store_cost  */
+  2, /* reduc_i8_cost  */
+  2, /* reduc_i16_cost  */
+  2, /* reduc_i32_cost  */
+  2, /* reduc_i64_cost  */
+  2, /* reduc_f16_cost  */
+  2, /* reduc_f32_cost  */
+  2, /* reduc_f64_cost  */
+  20, /* reduc_f16_ordered_cost  */
+  10, /* reduc_f32_ordered_cost  */
+  5, /* reduc_f64_ordered_cost  */
 };
 
 /* RVV costs for VLA vector operations.  */
@@ -419,6 +429,16 @@ static const scalable_vector_cost rvv_vla_vector_cost = {
     1, /* align_store_cost  */
     2, /* unalign_load_cost  */
     2, /* unalign_store_cost  */
+    2, /* reduc_i8_cost  */
+    2, /* reduc_i16_cost  */
+    2, /* reduc_i32_cost  */
+    2, /* reduc_i64_cost  */
+    2, /* reduc_f16_cost  */
+    2, /* reduc_f32_cost  */
+    2, /* reduc_f64_cost  */
+    20, /* reduc_f16_ordered_cost  */
+    10, /* reduc_f32_ordered_cost  */
+    5, /* reduc_f64_ordered_cost  */
   },
 };
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_ordered.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_ordered.c
new file mode 100644
index 000000000..183ea9465
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_ordered.c
@@ -0,0 +1,35 @@
+/* Verify that the vector cost model handles ordered (fold-left / strict)
+   floating-point reductions for all FP element types (VLA).  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 
-mrvv-vector-bits=scalable -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_REDUC_PLUS(TYPE)                           \
+  TYPE __attribute__ ((noinline, noclone))             \
+  reduc_plus_##TYPE (TYPE *restrict a, int n)          \
+  {                                                    \
+    TYPE r = 0;                                                \
+    for (int i = 0; i < n; ++i)                                \
+      r += a[i];                                       \
+    return r;                                          \
+  }
+
+DEF_REDUC_PLUS (_Float16)
+DEF_REDUC_PLUS (float)
+DEF_REDUC_PLUS (double)
+
+/* Without -ffast-math, FP reductions use ordered (fold-left) mode.
+   With high ordered reduction costs, vectorization may be rejected as
+   unprofitable, but the cost model should still compute and report
+   the correct per-type ordered reduction costs in the vect dump.
+
+   Verify the ordered reduction cost is reflected in the cost model dump.
+   For ordered reductions: reduc_f*_ordered_cost + vr2fr (2),
+   where reduc_f*_ordered_cost replaces the default vec_to_scalar_cost.
+   f16: reduc_f16_ordered_cost (20) + vr2fr (2) = 22
+   f32: reduc_f32_ordered_cost (10) + vr2fr (2) = 12
+   f64: reduc_f64_ordered_cost (5)  + vr2fr (2) = 7  */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 22" "vect" } } */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 12" "vect" } } */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 7" "vect" } } */
diff --git 
a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_unordered.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_unordered.c
new file mode 100644
index 000000000..07df3adfc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vla_unordered.c
@@ -0,0 +1,34 @@
+/* Verify that the vector cost model handles unordered (tree) reductions
+   for all integer and floating-point element types (VLA).  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 
-mrvv-vector-bits=scalable -ffast-math -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_REDUC_PLUS(TYPE)                   \
+TYPE __attribute__ ((noinline, noclone))       \
+reduc_plus_##TYPE (TYPE *restrict a, int n)    \
+{                                              \
+  TYPE r = 0;                                  \
+  for (int i = 0; i < n; ++i)                  \
+    r += a[i];                                 \
+  return r;                                    \
+}
+
+DEF_REDUC_PLUS (int8_t)
+DEF_REDUC_PLUS (int16_t)
+DEF_REDUC_PLUS (int32_t)
+DEF_REDUC_PLUS (int64_t)
+DEF_REDUC_PLUS (_Float16)
+DEF_REDUC_PLUS (float)
+DEF_REDUC_PLUS (double)
+
+/* All loops should be vectorized with the cost model enabled.  */
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 7 "vect" } } 
*/
+/* { dg-final { scan-assembler-times 
{vredsum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times 
{vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 3 } } */
+
+/* Verify the reduction cost is reflected in the cost model dump.
+   For unordered reductions: reduc_*_cost (2) + vr2gr/vr2fr (2) = 4,
+   where reduc_*_cost replaces the default vec_to_scalar_cost.  */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 4" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_ordered.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_ordered.c
new file mode 100644
index 000000000..b47e5359a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_ordered.c
@@ -0,0 +1,38 @@
+/* Verify that the vector cost model handles ordered (fold-left / strict)
+   floating-point reductions in VLS mode.  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_REDUC_PLUS(TYPE, NUM)                      \
+  TYPE __attribute__ ((noinline, noclone))             \
+  reduc_plus_##TYPE##_##NUM (TYPE *restrict a)         \
+  {                                                    \
+    TYPE r = 0;                                                \
+    for (int i = 0; i < NUM; ++i)                      \
+      r += a[i];                                       \
+    return r;                                          \
+  }
+
+/* Without -ffast-math, FP reductions default to ordered.  */
+DEF_REDUC_PLUS (_Float16, 4)
+DEF_REDUC_PLUS (_Float16, 8)
+DEF_REDUC_PLUS (float, 4)
+DEF_REDUC_PLUS (float, 8)
+DEF_REDUC_PLUS (double, 4)
+DEF_REDUC_PLUS (double, 8)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+
+/* Verify the ordered reduction cost is reflected in the cost model dump.
+   For ordered reductions: reduc_f*_ordered_cost + vr2fr (2),
+   where reduc_f*_ordered_cost replaces the default vec_to_scalar_cost.
+   f16: reduc_f16_ordered_cost (20) + vr2fr (2) = 22
+   f32: reduc_f32_ordered_cost (10) + vr2fr (2) = 12
+   f64: reduc_f64_ordered_cost (5)  + vr2fr (2) = 7
+   Note: ordered FP reductions may not be vectorized with the cost model
+   enabled (cost too high), but the costs are still reported in the dump.  */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 22" "vect" } } */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 12" "vect" } } */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 7" "vect" } } */
diff --git 
a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_unordered.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_unordered.c
new file mode 100644
index 000000000..0bfe3cf42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/reduc_vls_unordered.c
@@ -0,0 +1,33 @@
+/* Verify that the vector cost model handles unordered (tree) reductions
+   for integer types in VLS mode.  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_REDUC_PLUS(TYPE, NUM)                      \
+  TYPE __attribute__ ((noinline, noclone))             \
+  reduc_plus_##TYPE##_##NUM (TYPE *restrict a)         \
+  {                                                    \
+    TYPE r = 0;                                                \
+    for (int i = 0; i < NUM; ++i)                      \
+      r += a[i];                                       \
+    return r;                                          \
+  }
+
+DEF_REDUC_PLUS (int8_t, 4)
+DEF_REDUC_PLUS (int8_t, 8)
+DEF_REDUC_PLUS (int16_t, 4)
+DEF_REDUC_PLUS (int16_t, 8)
+DEF_REDUC_PLUS (int32_t, 4)
+DEF_REDUC_PLUS (int32_t, 8)
+DEF_REDUC_PLUS (int64_t, 4)
+DEF_REDUC_PLUS (int64_t, 8)
+
+/* { dg-final { scan-assembler-times {vredsum\.vs} 8 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+
+/* Verify the reduction cost is reflected in the cost model dump.
+   For integer unordered reductions: reduc_i*_cost (2) + vr2gr (2) = 4,
+   where reduc_i*_cost replaces the default vec_to_scalar_cost.  */
+/* { dg-final { scan-tree-dump "vec_to_scalar costs 4" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c
index 5a4df4824..e8c2b28e0 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -fdump-tree-optimized-details" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -mrvv-vector-bits=zvl -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
 
 #include "def.h"
 
@@ -14,7 +14,7 @@ DEF_REDUC_PLUS (_Float16, 512)
 DEF_REDUC_PLUS (_Float16, 1024)
 DEF_REDUC_PLUS (_Float16, 2048)
 
-/* { dg-final { scan-assembler-times {vfredosum\.vs} 9 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs} 10 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
 /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-20.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-20.c
index daf9c8a32..59f0a3c58 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-20.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-20.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -fdump-tree-optimized-details" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -mrvv-vector-bits=zvl -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
 
 #include "def.h"
 
@@ -13,7 +13,7 @@ DEF_REDUC_PLUS (float, 256)
 DEF_REDUC_PLUS (float, 512)
 DEF_REDUC_PLUS (float, 1024)
 
-/* { dg-final { scan-assembler-times {vfredosum\.vs} 8 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs} 9 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
 /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-21.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-21.c
index d1b8c2535..f12a95b95 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-21.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-21.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -fdump-tree-optimized-details" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -mrvv-vector-bits=zvl -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
 
 #include "def.h"
 
@@ -12,7 +12,7 @@ DEF_REDUC_PLUS (float, 128)
 DEF_REDUC_PLUS (float, 256)
 DEF_REDUC_PLUS (float, 512)
 
-/* { dg-final { scan-assembler-times {vfredosum\.vs} 7 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs} 8 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
 /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c
index 6e9456b23..af990d208 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -fdump-tree-optimized" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 
-mrvv-max-lmul=m8 -mrvv-vector-bits=zvl -fno-vect-cost-model 
-fdump-tree-optimized" } */
 
 #include "wred-2.c"
 
-- 
2.47.1

[PATCH v2] RISC-V: Add per-type reduction costs to the vector cost model

Reply via email to