https://gcc.gnu.org/g:3c73ba85910beb7224a198fcc2da989785863255
commit 3c73ba85910beb7224a198fcc2da989785863255 Author: Robin Dapp <[email protected]> Date: Wed Dec 10 19:02:11 2025 +0100 RISC-V: -mrvv-max-lmul=conv-dynamic [PR122846]. As discussed in the patchwork sync this patch adds a dynamic LMUL mode that sets the LMUL to the ratio of largest/smallest type size in a loop, with the maximum being LMUL8. This is supposed to imitate what other architectures implicitly do by vec_unpack_hi/lo. I have done cursory testing and obviously more coverage would be preferred. PR target/122846 gcc/ChangeLog: * config/riscv/riscv-opts.h (enum rvv_max_lmul_enum): Add RVV_CONV_DYNAMIC. (TARGET_MAX_LMUL): Ditto. * config/riscv/riscv-string.cc (use_vector_stringop_p): Use LMUL1 for RVV_CONV_DYNAMIC. (expand_rawmemchr): Ditto. (expand_strcmp): Ditto. (check_vectorise_memory_operation): Ditto. * config/riscv/riscv-vector-costs.cc (get_smallest_mode): New function. (compute_lmul_from_conversion_ratio): Calculate LMUL from largest/smallest type. (costs::has_unexpected_spills_p): Split. (costs::compute_live_ranges_and_lmul): Compute smallest type and call new function. (costs::cleanup_live_range_data): New function. (costs::compute_conversion_dynamic_lmul): New function. (costs::record_potential_unexpected_spills): Use new function. (costs::better_main_loop_than_p): Allow appropriate LMUL. * config/riscv/riscv-vector-costs.h: Declare. * config/riscv/riscv.opt: New option -mrvv-max-lmul=conv-dynamic. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c: New test. * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c: New test. * gcc.target/riscv/rvv/autovec/pr122846.c: New test. (cherry picked from commit 45bd656eb5ce5850a6f7de34850383a326895bb8) Diff: --- gcc/config/riscv/riscv-opts.h | 8 +- gcc/config/riscv/riscv-string.cc | 26 ++- gcc/config/riscv/riscv-vector-costs.cc | 232 ++++++++++++++++----- gcc/config/riscv/riscv-vector-costs.h | 17 +- gcc/config/riscv/riscv.opt | 3 + .../gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c | 42 ++++ .../gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c | 43 ++++ .../gcc.target/riscv/rvv/autovec/pr122846.c | 14 ++ 8 files changed, 325 insertions(+), 60 deletions(-) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 9b92a965e27f..e806e66e11ec 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -86,7 +86,10 @@ enum rvv_max_lmul_enum { RVV_M4 = 4, RVV_M8 = 8, /* For dynamic LMUL, we compare COST start with LMUL8. */ - RVV_DYNAMIC = 9 + RVV_DYNAMIC = 9, + /* For dynamic LMUL based on conversions, set LMUL based on + type size ratio. */ + RVV_CONV_DYNAMIC = 10 }; enum riscv_multilib_select_kind { @@ -155,7 +158,8 @@ enum rvv_vector_bits_enum { /* The maximum LMUL according to user configuration. */ #define TARGET_MAX_LMUL \ - (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul) + (int) ((rvv_max_lmul == RVV_DYNAMIC || rvv_max_lmul == RVV_CONV_DYNAMIC) \ + ? RVV_M8 : rvv_max_lmul) /* TLS types. */ enum riscv_tls_type { diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index d888dac8e5f7..adb3f5429736 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -1089,13 +1089,17 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew, if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR)) return false; + int max_lmul = TARGET_MAX_LMUL; + if (rvv_max_lmul == RVV_CONV_DYNAMIC) + max_lmul = RVV_M1; + if (CONST_INT_P (length_in)) { HOST_WIDE_INT length = INTVAL (length_in); /* If the VLEN and preferred LMUL allow the entire block to be copied in one go then no loop is needed. */ - if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL)) + if (known_le (length, BYTES_PER_RISCV_VECTOR * max_lmul)) { need_loop = false; @@ -1130,10 +1134,10 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew, poly_int64 nunits; if (need_loop) - per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL; + per_iter = BYTES_PER_RISCV_VECTOR * max_lmul; else per_iter = length; - /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by + /* BYTES_PER_RISCV_VECTOR * MAX_LMUL may not be divisible by this potential_ew. */ if (!multiple_p (per_iter, potential_ew, &nunits)) continue; @@ -1164,7 +1168,7 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew, pointless. Still, by choosing a lower LMUL factor that still allows an entire transfer, we can reduce register pressure. */ - for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1) + for (int lmul = 1; lmul < max_lmul; lmul <<= 1) if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul) && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew, &mode_units) @@ -1177,9 +1181,9 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew, if (vmode != VOIDmode) break; - /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible + /* BYTES_PER_RISCV_VECTOR * MAX_LMUL will at least be divisible by potential_ew 1, so this should succeed eventually. */ - if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL, + if (multiple_p (BYTES_PER_RISCV_VECTOR * max_lmul, potential_ew, &mode_units) && riscv_vector::get_vector_mode (elem_mode, mode_units).exists (&vmode)) @@ -1195,7 +1199,7 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew, } else { - gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode)); + gcc_assert (get_lmul_mode (QImode, max_lmul).exists (&vmode)); } /* A memcpy libcall in the worst case takes 3 instructions to prepare the @@ -1356,6 +1360,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle, unsigned int isize = GET_MODE_SIZE (mode).to_constant (); int lmul = TARGET_MAX_LMUL; + if (rvv_max_lmul == RVV_CONV_DYNAMIC) + lmul = RVV_M1; poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); machine_mode vmode; @@ -1455,6 +1461,8 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes, machine_mode mode = E_QImode; unsigned int isize = GET_MODE_SIZE (mode).to_constant (); int lmul = TARGET_MAX_LMUL; + if (rvv_max_lmul == RVV_CONV_DYNAMIC) + lmul = RVV_M1; poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); machine_mode vmode; @@ -1606,7 +1614,9 @@ check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out) if (rvv_max_lmul != RVV_DYNAMIC) { lmul_out = TARGET_MAX_LMUL; - return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8)); + if (rvv_max_lmul == RVV_CONV_DYNAMIC) + lmul_out = RVV_M1; + return (length <= ((lmul_out * TARGET_MIN_VLEN) / 8)); } /* Find smallest lmul large enough for entire op. */ diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index d4edba57e842..2bded3aff79f 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -258,6 +258,14 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2) return mode1_size >= mode2_size ? mode1 : mode2; } +static machine_mode +get_smallest_mode (machine_mode mode1, machine_mode mode2) +{ + unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant (); + unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant (); + return mode1_size <= mode2_size ? mode1 : mode2; +} + /* Return true if OP is invariant. */ static bool @@ -361,9 +369,11 @@ machine_mode costs::compute_local_live_ranges ( loop_vec_info loop_vinfo, const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb, - hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb) + hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb, + machine_mode *smallest_mode_out) { machine_mode biggest_mode = QImode; + machine_mode smallest_mode = TImode; class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); if (!program_points_per_bb.is_empty ()) { @@ -394,8 +404,12 @@ costs::compute_local_live_ranges ( if (variable_vectorized_p (loop, program_point.stmt_info, *node, lhs, true)) { - biggest_mode = get_biggest_mode (biggest_mode, - TYPE_MODE (TREE_TYPE (lhs))); + biggest_mode + = get_biggest_mode (biggest_mode, + TYPE_MODE (TREE_TYPE (lhs))); + smallest_mode + = get_smallest_mode (smallest_mode, + TYPE_MODE (TREE_TYPE (lhs))); bool existed_p = false; pair &live_range = live_ranges->get_or_insert (lhs, &existed_p); @@ -415,6 +429,9 @@ costs::compute_local_live_ranges ( biggest_mode = get_biggest_mode (biggest_mode, TYPE_MODE (TREE_TYPE (var))); + smallest_mode + = get_smallest_mode (smallest_mode, + TYPE_MODE (TREE_TYPE (var))); bool existed_p = false; pair &live_range = live_ranges->get_or_insert (var, &existed_p); @@ -446,6 +463,8 @@ costs::compute_local_live_ranges ( (*r).second = MAX (point, (*r).second); biggest_mode = get_biggest_mode ( biggest_mode, TYPE_MODE (TREE_TYPE (arg))); + smallest_mode = get_smallest_mode ( + smallest_mode, TYPE_MODE (TREE_TYPE (arg))); } } else @@ -465,8 +484,14 @@ costs::compute_local_live_ranges ( } } if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n", - GET_MODE_NAME (biggest_mode)); + { + dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n", + GET_MODE_NAME (biggest_mode)); + dump_printf_loc (MSG_NOTE, vect_location, "Smallest mode = %s\n", + GET_MODE_NAME (smallest_mode)); + } + if (smallest_mode_out) + *smallest_mode_out = smallest_mode; return biggest_mode; } @@ -640,6 +665,25 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode) return 0; } +/* Compute LMUL based on the ratio of biggest to smallest type size. + This is used for RVV_CONV_DYNAMIC. */ +static int +compute_lmul_from_conversion_ratio (machine_mode biggest_mode, + machine_mode smallest_mode) +{ + gcc_assert (GET_MODE_BITSIZE (biggest_mode).is_constant ()); + gcc_assert (GET_MODE_BITSIZE (smallest_mode).is_constant ()); + + unsigned int biggest_size = GET_MODE_BITSIZE (biggest_mode).to_constant (); + unsigned int smallest_size = GET_MODE_BITSIZE (smallest_mode).to_constant (); + + int lmul = biggest_size / smallest_size; + lmul = std::min (lmul, (int) RVV_M8); + lmul = std::max (lmul, (int) RVV_M1); + + return lmul; +} + /* Update the live ranges according PHI. Loop: @@ -826,56 +870,37 @@ costs::update_local_live_ranges ( } } -/* Compute the maximum live V_REGS. */ -bool -costs::has_unexpected_spills_p (loop_vec_info loop_vinfo) +/* Helper to compute live ranges, modes, and LMUL. */ +void +costs::compute_live_ranges_and_lmul (loop_vec_info loop_vinfo, + hash_map<basic_block, vec<stmt_point>> &program_points_per_bb, + hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb, + machine_mode &biggest_mode, machine_mode &smallest_mode, int &lmul) { - /* Compute local program points. - It's a fast and effective computation. */ - hash_map<basic_block, vec<stmt_point>> program_points_per_bb; compute_local_program_points (loop_vinfo, program_points_per_bb); - /* Compute local live ranges. */ - hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb; - machine_mode biggest_mode - = compute_local_live_ranges (loop_vinfo, program_points_per_bb, - live_ranges_per_bb); + smallest_mode = TImode; + biggest_mode = compute_local_live_ranges (loop_vinfo, program_points_per_bb, + live_ranges_per_bb, &smallest_mode); - /* Update live ranges according to PHI. */ update_local_live_ranges (loop_vinfo, program_points_per_bb, live_ranges_per_bb, &biggest_mode); - int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode); + if (rvv_max_lmul == RVV_CONV_DYNAMIC) + lmul = compute_lmul_from_conversion_ratio (biggest_mode, smallest_mode); + else + lmul = compute_estimated_lmul (loop_vinfo, biggest_mode); + gcc_assert (lmul <= RVV_M8); - /* TODO: We calculate the maximum live vars base on current STMTS - sequence. We can support live range shrink if it can give us - big improvement in the future. */ - if (lmul > RVV_M1) - { - if (!live_ranges_per_bb.is_empty ()) - { - unsigned int max_nregs = 0; - for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter - = live_ranges_per_bb.begin (); - iter != live_ranges_per_bb.end (); ++iter) - { - basic_block bb = (*iter).first; - unsigned int max_point - = (*program_points_per_bb.get (bb)).length () + 1; - if ((*iter).second.is_empty ()) - continue; - /* We prefer larger LMUL unless it causes register spillings. */ - unsigned int nregs - = max_number_of_live_regs (loop_vinfo, bb, (*iter).second, - max_point, biggest_mode, lmul); - if (nregs > max_nregs) - max_nregs = nregs; - } - live_ranges_per_bb.empty (); - if (max_nregs > V_REG_NUM) - return true; - } - } +} + +/* Helper to clean up live range data structures. */ +void +costs::cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> + &program_points_per_bb, + hash_map<basic_block, hash_map<tree, pair>> + &live_ranges_per_bb) +{ if (!program_points_per_bb.is_empty ()) { for (hash_map<basic_block, vec<stmt_point>>::iterator iter @@ -888,7 +913,72 @@ costs::has_unexpected_spills_p (loop_vec_info loop_vinfo) } program_points_per_bb.empty (); } - return false; + live_ranges_per_bb.empty (); +} + +/* Compute LMUL for RVV_CONV_DYNAMIC mode based on conversion ratio. */ +void +costs::compute_conversion_dynamic_lmul (loop_vec_info loop_vinfo) +{ + hash_map<basic_block, vec<stmt_point>> program_points_per_bb; + hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb; + machine_mode biggest_mode, smallest_mode; + int lmul; + + compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb, + live_ranges_per_bb, biggest_mode, + smallest_mode, lmul); + + /* Store the computed LMUL and biggest mode for later comparison + in cost model. */ + m_computed_lmul_from_conv = lmul; + m_biggest_mode_for_conv = biggest_mode; + + cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb); +} + +/* Compute the maximum live V_REGS and check for unexpected spills. */ +bool +costs::has_unexpected_spills_p (loop_vec_info loop_vinfo) +{ + hash_map<basic_block, vec<stmt_point>> program_points_per_bb; + hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb; + machine_mode biggest_mode, smallest_mode; + int lmul; + + compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb, + live_ranges_per_bb, biggest_mode, + smallest_mode, lmul); + + /* TODO: We calculate the maximum live vars base on current STMTS + sequence. We can support live range shrink if it can give us + big improvement in the future. */ + bool has_spills = false; + if (lmul > RVV_M1 && !live_ranges_per_bb.is_empty ()) + { + unsigned int max_nregs = 0; + for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter + = live_ranges_per_bb.begin (); + iter != live_ranges_per_bb.end (); ++iter) + { + basic_block bb = (*iter).first; + unsigned int max_point + = (*program_points_per_bb.get (bb)).length () + 1; + if ((*iter).second.is_empty ()) + continue; + /* We prefer larger LMUL unless it causes register spillings. */ + unsigned int nregs + = max_number_of_live_regs (loop_vinfo, bb, (*iter).second, + max_point, biggest_mode, lmul); + if (nregs > max_nregs) + max_nregs = nregs; + } + if (max_nregs > V_REG_NUM) + has_spills = true; + } + + cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb); + return has_spills; } costs::costs (vec_info *vinfo, bool costing_for_scalar) @@ -938,6 +1028,8 @@ costs::record_potential_unexpected_spills (loop_vec_info loop_vinfo) if (!post_dom_available_p) free_dominance_info (CDI_POST_DOMINATORS); } + else if (rvv_max_lmul == RVV_CONV_DYNAMIC) + compute_conversion_dynamic_lmul (loop_vinfo); } /* Decide whether to use the unrolling heuristic described above @@ -1034,6 +1126,50 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const return other_prefer_unrolled; } } + else if (rvv_max_lmul == RVV_CONV_DYNAMIC) + { + if (this->m_computed_lmul_from_conv > 0 + && other->m_computed_lmul_from_conv > 0 + && this->m_biggest_mode_for_conv != VOIDmode) + { + int this_vf = vect_vf_for_cost (this_loop_vinfo); + int other_vf = vect_vf_for_cost (other_loop_vinfo); + + /* Get element size from the biggest mode. */ + unsigned int element_bits + = GET_MODE_BITSIZE (this->m_biggest_mode_for_conv).to_constant (); + + /* Estimate LMUL from VF * element_size / MIN_VLEN. */ + int this_lmul = (this_vf * element_bits) / TARGET_MIN_VLEN; + int other_lmul = (other_vf * element_bits) / TARGET_MIN_VLEN; + + /* Clamp to valid LMUL range. */ + this_lmul = MAX (1, MIN (this_lmul, 8)); + other_lmul = MAX (1, MIN (other_lmul, 8)); + + int target_lmul = this->m_computed_lmul_from_conv; + + /* Prefer the LMUL that exactly matches our computed ratio. */ + if (this_lmul == target_lmul && other_lmul != target_lmul) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Preferring LMUL=%d loop because it matches" + " conversion ratio (other LMUL=%d)\n", + this_lmul, other_lmul); + return true; + } + else if (this_lmul != target_lmul && other_lmul == target_lmul) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Preferring other LMUL=%d loop because it" + " matches conversion ratio" + " (this LMUL=%d)\n", other_lmul, this_lmul); + return false; + } + } + } else if (rvv_max_lmul == RVV_DYNAMIC) { if (other->m_has_unexpected_spills_p) diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h index b84ceb1d3cf0..89f813c3d98f 100644 --- a/gcc/config/riscv/riscv-vector-costs.h +++ b/gcc/config/riscv/riscv-vector-costs.h @@ -106,6 +106,11 @@ private: bool m_has_unexpected_spills_p = false; void record_potential_unexpected_spills (loop_vec_info); + /* For RVV_DYNAMIC_CONV mode, store the LMUL computed from conversion ratio + and the biggest mode used in the computation. */ + int m_computed_lmul_from_conv = 0; + machine_mode m_biggest_mode_for_conv = VOIDmode; + void compute_local_program_points (vec_info *, hash_map<basic_block, vec<stmt_point>> &); void update_local_live_ranges (vec_info *, @@ -114,9 +119,17 @@ private: machine_mode *); machine_mode compute_local_live_ranges (loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &, - hash_map<basic_block, hash_map<tree, pair>> &); - + hash_map<basic_block, hash_map<tree, pair>> &, + machine_mode * = nullptr); + + void compute_live_ranges_and_lmul (loop_vec_info, + hash_map<basic_block, vec<stmt_point>> &, + hash_map<basic_block, hash_map<tree, pair>> &, + machine_mode &, machine_mode &, int &); + void cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> &, + hash_map<basic_block, hash_map<tree, pair>> &); bool has_unexpected_spills_p (loop_vec_info); + void compute_conversion_dynamic_lmul (loop_vec_info); bool need_additional_vector_vars_p (stmt_vec_info, slp_tree); void adjust_vect_cost_per_loop (loop_vec_info); diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 452062c65008..de7730a89611 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -313,6 +313,9 @@ Enum(rvv_max_lmul) String(m8) Value(RVV_M8) EnumValue Enum(rvv_max_lmul) String(dynamic) Value(RVV_DYNAMIC) +EnumValue +Enum(rvv_max_lmul) String(conv-dynamic) Value(RVV_CONV_DYNAMIC) + mrvv-max-lmul= Target RejectNegative Joined Enum(rvv_max_lmul) Var(rvv_max_lmul) Init(RVV_M1) -mrvv-max-lmul=<string> Set the RVV LMUL of auto-vectorization. diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c new file mode 100644 index 000000000000..b07bd86f76e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } */ + +void foo2x1 (short *restrict a, char *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo2x2 (int *restrict a, short *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo2x3 (long *restrict a, int *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo4x1 (int *restrict a, char *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo4x2 (long *restrict a, short *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo8x (long *restrict a, char *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +/* { dg-final { scan-assembler-times ",m2," 3 } } */ +/* { dg-final { scan-assembler-times ",m4," 2 } } */ +/* { dg-final { scan-assembler-times ",m8," 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c new file mode 100644 index 000000000000..c37e4dd63f20 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } */ + +void foo2x1 (unsigned char *restrict a, unsigned short *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo2x2 (unsigned short *restrict a, unsigned int *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo2x3 (unsigned int *restrict a, unsigned long *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo4x1 (unsigned char *restrict a, unsigned int *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo4x2 (unsigned short *restrict a, unsigned long *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +void foo8x (unsigned char *restrict a, unsigned long *restrict b, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i]; +} + +/* { dg-final { scan-assembler-times ",m1," 6 } } */ +/* { dg-final { scan-assembler-times ",m2," 3 } } */ +/* { dg-final { scan-assembler-times ",m4," 1 } } */ +/* { dg-final { scan-assembler-not ",mf2," } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c new file mode 100644 index 000000000000..7753a66cd96f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } */ + +int +foo (const char *x, const char *y) +{ + int sum = 0; + for (int i = 0; i < 1024; i++) + sum += x[i] * y[i]; + return sum; +} + +/* One for the initial value, one for the reduction. */ +/* { dg-final { scan-assembler-times ",m4," 2 } } */
