Hi, This has been sitting on my local tree - I've been wanting to post it for a while but somehow forgot.
This patch makes segment loads and stores more expensive. It adds segment_load and segment_store cost fields to the common vector costs and adds handling to adjust_stmt_cost. In the future we could handle this in a more fine-grained manner but let's start somehow. Regtested on rv64. Regards Robin gcc/ChangeLog: * config/riscv/riscv-protos.h (struct common_vector_cost): Add segment_[load/store]_cost. * config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost): Handle segment loads/stores. * config/riscv/riscv.cc: Initialize segment_[load/store]_cost to 1. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: Expect m4 instead of m2. --- gcc/config/riscv/riscv-protos.h | 4 + gcc/config/riscv/riscv-vector-costs.cc | 127 ++++++++++++------ gcc/config/riscv/riscv.cc | 4 + .../vect/costmodel/riscv/rvv/pr113112-4.c | 4 +- 4 files changed, 95 insertions(+), 44 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 80efdf2b7e5..2e8ab9990a8 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -218,6 +218,10 @@ struct common_vector_cost const int gather_load_cost; const int scatter_store_cost; + /* Segment load/store cost. */ + const int segment_load_cost; + const int segment_store_cost; + /* Cost of a vector-to-scalar operation. */ const int vec_to_scalar_cost; diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index adf9c197df5..d3c12444773 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -1043,6 +1043,24 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const return vector_costs::better_main_loop_than_p (other); } +/* Returns the group size i.e. the number of vectors to be loaded by a + segmented load/store instruction. Return 0 if it is no segmented + load/store. */ +static int +segment_loadstore_group_size (enum vect_cost_for_stmt kind, + stmt_vec_info stmt_info) +{ + if ((kind == vector_load || kind == vector_store) + && STMT_VINFO_DATA_REF (stmt_info)) + { + stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); + if (stmt_info + && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES) + return DR_GROUP_SIZE (stmt_info); + } + return 0; +} + /* Adjust vectorization cost after calling riscv_builtin_vectorization_cost. For some statement, we would like to further fine-grain tweak the cost on top of riscv_builtin_vectorization_cost handling which doesn't have any @@ -1067,55 +1085,80 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, case vector_load: case vector_store: { - /* Unit-stride vector loads and stores do not have offset addressing - as opposed to scalar loads and stores. - If the address depends on a variable we need an additional - add/sub for each load/store in the worst case. */ - if (stmt_info && stmt_info->stmt) + if (stmt_info && stmt_info->stmt && STMT_VINFO_DATA_REF (stmt_info)) { - data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - class loop *father = stmt_info->stmt->bb->loop_father; - if (!loop && father && !father->inner && father->superloops) + int group_size; + if ((group_size + = segment_loadstore_group_size (kind, stmt_info)) > 1) { - tree ref; - if (TREE_CODE (dr->ref) != MEM_REF - || !(ref = TREE_OPERAND (dr->ref, 0)) - || TREE_CODE (ref) != SSA_NAME) - break; + /* Segment loads and stores. When the group size is > 1 + the vectorizer will add a vector load/store statement for + each vector in the group. Note that STMT_COST is + overwritten here rather than adjusted. */ + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost + = (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)) + ? costs->vla->segment_load_cost + : costs->vla->segment_store_cost); + else + stmt_cost + = (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)) + ? costs->vls->segment_load_cost + : costs->vls->segment_store_cost); + break; + /* TODO: Indexed and ordered/unordered cost. */ + } + else + { + /* Unit-stride vector loads and stores do not have offset + addressing as opposed to scalar loads and stores. + If the address depends on a variable we need an additional + add/sub for each load/store in the worst case. */ + data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + class loop *father = stmt_info->stmt->bb->loop_father; + if (!loop && father && !father->inner && father->superloops) + { + tree ref; + if (TREE_CODE (dr->ref) != MEM_REF + || !(ref = TREE_OPERAND (dr->ref, 0)) + || TREE_CODE (ref) != SSA_NAME) + break; - if (SSA_NAME_IS_DEFAULT_DEF (ref)) - break; + if (SSA_NAME_IS_DEFAULT_DEF (ref)) + break; - if (memrefs.contains ({ref, cst0})) - break; + if (memrefs.contains ({ref, cst0})) + break; - memrefs.add ({ref, cst0}); + memrefs.add ({ref, cst0}); - /* In case we have not seen REF before and the base address - is a pointer operation try a bit harder. */ - tree base = DR_BASE_ADDRESS (dr); - if (TREE_CODE (base) == POINTER_PLUS_EXPR - || TREE_CODE (base) == POINTER_DIFF_EXPR) - { - /* Deconstruct BASE's first operand. If it is a binary - operation, i.e. a base and an "offset" store this - pair. Only increase the stmt_cost if we haven't seen - it before. */ - tree argp = TREE_OPERAND (base, 1); - typedef std::pair<tree, tree> addr_pair; - addr_pair pair; - if (TREE_CODE_CLASS (TREE_CODE (argp)) == tcc_binary) + /* In case we have not seen REF before and the base + address is a pointer operation try a bit harder. */ + tree base = DR_BASE_ADDRESS (dr); + if (TREE_CODE (base) == POINTER_PLUS_EXPR + || TREE_CODE (base) == POINTER_DIFF_EXPR) { - tree argp0 = tree_strip_nop_conversions - (TREE_OPERAND (argp, 0)); - tree argp1 = TREE_OPERAND (argp, 1); - pair = addr_pair (argp0, argp1); - if (memrefs.contains (pair)) - break; - - memrefs.add (pair); - stmt_cost += builtin_vectorization_cost (scalar_stmt, - NULL_TREE, 0); + /* Deconstruct BASE's first operand. If it is a + binary operation, i.e. a base and an "offset" + store this pair. Only increase the stmt_cost if + we haven't seen it before. */ + tree argp = TREE_OPERAND (base, 1); + typedef std::pair<tree, tree> addr_pair; + addr_pair pair; + if (TREE_CODE_CLASS (TREE_CODE (argp)) == tcc_binary) + { + tree argp0 = tree_strip_nop_conversions + (TREE_OPERAND (argp, 0)); + tree argp1 = TREE_OPERAND (argp, 1); + pair = addr_pair (argp0, argp1); + if (memrefs.contains (pair)) + break; + + memrefs.add (pair); + stmt_cost + += builtin_vectorization_cost (scalar_stmt, + NULL_TREE, 0); + } } } } diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 5e984ee2a55..6c2f0ec34f4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -365,6 +365,8 @@ static const common_vector_cost rvv_vls_vector_cost = { 1, /* fp_stmt_cost */ 1, /* gather_load_cost */ 1, /* scatter_store_cost */ + 2, /* segment_load_cost */ + 2, /* segment_store_cost */ 1, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* permute_cost */ @@ -381,6 +383,8 @@ static const scalable_vector_cost rvv_vla_vector_cost = { 1, /* fp_stmt_cost */ 1, /* gather_load_cost */ 1, /* scatter_store_cost */ + 2, /* segment_load_cost */ + 2, /* segment_store_cost */ 1, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* permute_cost */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c index 5c55a66ed77..bdeedaf5224 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c @@ -21,8 +21,8 @@ void move_replacements (rtx *x, rtx *y, int n_replacements) } } -/* { dg-final { scan-assembler {e64,m2} } } */ -/* { dg-final { scan-assembler-not {e64,m4} } } */ +/* { dg-final { scan-assembler-not {e64,m2} } } */ +/* { dg-final { scan-assembler {e64,m4} } } */ /* { dg-final { scan-assembler-not {jr} } } */ /* { dg-final { scan-assembler {ret} } } */ /* { dg-final { scan-assembler-not {sp} } } */ -- 2.43.2