1. This patch set scalar_to_vec cost as 2 instead 1 since scalar move instruction is slightly more costly than normal rvv instructions (e.g. vadd.vv).
2. Adjust scalar_to_vec cost accurately according to the splat value, for example, a value like 32872, needs 2 more scalar instructions: so the cost = 2 (scalar instructions) + 2 (scalar move). We adjust the cost like this since it doesn need such many instructions in vectorized codes, wheras they are not needed in scalar codes. After this patch, no matter -march=rv64gcv_zvl256b or -march=rv64gcv_zvl4096b. We have optimal codgen: lui a5,%hi(a) li a4,19 sb a4,%lo(a)(a5) li a0,0 ret PR target/113281 gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Adjust scalar_to_vec cost accurately. (costs::add_stmt_cost): Ditto. * config/riscv/riscv.cc: Ditto. * config/riscv/t-riscv: Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr113209.c: Adapt test. * gcc.target/riscv/rvv/autovec/zve32f-1.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c: New test. --- gcc/config/riscv/riscv-vector-costs.cc | 50 ++++++++++++++++++- gcc/config/riscv/riscv.cc | 4 +- gcc/config/riscv/t-riscv | 2 +- .../vect/costmodel/riscv/rvv/pr113281-1.c | 18 +++++++ .../vect/costmodel/riscv/rvv/pr113281-2.c | 18 +++++++ .../gcc.target/riscv/rvv/autovec/pr113209.c | 2 +- .../gcc.target/riscv/rvv/autovec/zve32f-1.c | 2 +- 7 files changed, 90 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 58ec0b9b503..fc377435e53 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "backend.h" #include "tree-data-ref.h" #include "tree-ssa-loop-niter.h" +#include "emit-rtl.h" /* This file should be included last. */ #include "riscv-vector-costs.h" @@ -1055,6 +1056,50 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const return vector_costs::better_main_loop_than_p (other); } +/* Adjust vectorization cost after calling + targetm.vectorize.builtin_vectorization_cost. For some statement, we would + like to further fine-grain tweak the cost on top of + targetm.vectorize.builtin_vectorization_cost handling which doesn't have any + information on statement operation codes etc. */ + +static unsigned +adjust_stmt_cost (enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int count, int stmt_cost) +{ + gimple *stmt = stmt_info->stmt; + switch (kind) + { + case scalar_to_vec: { + stmt_cost *= count; + gcall *call = dyn_cast<gcall *> (stmt); + /* Adjust cost by counting the scalar value initialization. */ + unsigned int num + = call ? gimple_call_num_args (call) : gimple_num_ops (stmt); + unsigned int start = call ? 0 : 1; + + for (unsigned int i = start; i < num; i++) + { + tree op = call ? gimple_call_arg (call, i) : gimple_op (stmt, i); + if (TREE_CODE (op) == INTEGER_CST) + { + HOST_WIDE_INT value = tree_fits_shwi_p (op) ? tree_to_shwi (op) + : tree_to_uhwi (op); + /* We don't need to count scalar costs if it + is in range of [-16, 15] since we can use + vmv.v.i. */ + if (!IN_RANGE (value, -16, 15)) + stmt_cost += riscv_const_insns (gen_int_mode (value, Pmode)); + } + /* TODO: We don't count CONST_POLY_INT value for now. */ + } + return stmt_cost; + } + default: + break; + } + return count * stmt_cost; +} + unsigned costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree, tree vectype, @@ -1082,9 +1127,12 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind, as one iteration of the VLA loop. */ if (where == vect_body && m_unrolled_vls_niters) m_unrolled_vls_stmts += count * m_unrolled_vls_niters; + + if (vectype) + stmt_cost = adjust_stmt_cost (kind, stmt_info, count, stmt_cost); } - return record_stmt_cost (stmt_info, where, count * stmt_cost); + return record_stmt_cost (stmt_info, where, stmt_cost); } void diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index df9799d9c5e..a14fb36817a 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -366,7 +366,7 @@ static const common_vector_cost rvv_vls_vector_cost = { 1, /* gather_load_cost */ 1, /* scatter_store_cost */ 1, /* vec_to_scalar_cost */ - 1, /* scalar_to_vec_cost */ + 2, /* scalar_to_vec_cost */ 1, /* permute_cost */ 1, /* align_load_cost */ 1, /* align_store_cost */ @@ -382,7 +382,7 @@ static const scalable_vector_cost rvv_vla_vector_cost = { 1, /* gather_load_cost */ 1, /* scatter_store_cost */ 1, /* vec_to_scalar_cost */ - 1, /* scalar_to_vec_cost */ + 2, /* scalar_to_vec_cost */ 1, /* permute_cost */ 1, /* align_load_cost */ 1, /* align_store_cost */ diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 32de6b851c1..fb2bf1c155f 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -73,7 +73,7 @@ riscv-vector-costs.o: $(srcdir)/config/riscv/riscv-vector-costs.cc \ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H) $(FUNCTION_H) \ $(TREE_H) basic-block.h $(RTL_H) gimple.h targhooks.h cfgloop.h \ fold-const.h $(TM_P_H) tree-vectorizer.h gimple-iterator.h bitmap.h \ - ssa.h backend.h tree-data-ref.h tree-ssa-loop-niter.h \ + ssa.h backend.h tree-data-ref.h tree-ssa-loop-niter.h emit-rtl.h \ $(srcdir)/config/riscv/riscv-vector-costs.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vector-costs.cc diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c new file mode 100644 index 00000000000..fdf6ed0334b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -ftree-vectorize" } */ + +unsigned char a; + +int main() { + short b = a = 0; + for (; a != 19; a++) + if (a) + b = 32872 >> a; + + if (b == 0) + return 0; + else + return 1; +} + +/* { dg-final { scan-assembler-not {vset} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c new file mode 100644 index 00000000000..706e19116c9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8" } */ + +unsigned char a; + +int main() { + short b = a = 0; + for (; a != 19; a++) + if (a) + b = 32872 >> a; + + if (b == 0) + return 0; + else + return 1; +} + +/* { dg-final { scan-assembler-not {vset} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c index 081ee369394..70aae151000 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */ +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -fno-vect-cost-model" } */ int b, c, d, f, i, a; int e[1] = {0}; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c index ab57e89b1cd..3a00327dfed 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */ -- 2.36.3