ack, I am interested in this patch and will review this in the next few days.
On Fri, Dec 12, 2025 at 10:22 PM Robin Dapp <[email protected]> wrote: > > Hi, > > As discussed in the patchwork sync this patch adds a dynamic LMUL mode > that sets the LMUL to the ratio of largest/smallest type size in a loop, > with the maximum being LMUL8. > > This is supposed to imitate what other architectures implicitly do by > vec_unpack_hi/lo. I have done cursory testing and obviously more > coverage would be preferred. > > Regtested on rv64gcv_zvl512b. > > Regards > Robin > > PR target/122846 > > gcc/ChangeLog: > > * config/riscv/riscv-opts.h (enum rvv_max_lmul_enum): Add > RVV_CONV_DYNAMIC. > (TARGET_MAX_LMUL): Ditto. > * config/riscv/riscv-string.cc (use_vector_stringop_p): Use > LMUL1 for RVV_CONV_DYNAMIC. > (expand_rawmemchr): Ditto. > (expand_strcmp): Ditto. > (check_vectorise_memory_operation): Ditto. > * config/riscv/riscv-vector-costs.cc (get_smallest_mode): > New function. > (compute_lmul_from_conversion_ratio): Calculate LMUL from > largest/smallest type. > (costs::has_unexpected_spills_p): Split. > (costs::compute_live_ranges_and_lmul): Compute smallest type and > call new function. > (costs::cleanup_live_range_data): New function. > (costs::compute_conversion_dynamic_lmul): New function. > (costs::record_potential_unexpected_spills): Use new function. > (costs::better_main_loop_than_p): Allow appropriate LMUL. > * config/riscv/riscv-vector-costs.h: Declare. > * config/riscv/riscv.opt: New option > -mrvv-max-lmul=conv-dynamic. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c: New test. > * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c: New test. > * gcc.target/riscv/rvv/autovec/pr122846.c: New test. > --- > gcc/config/riscv/riscv-opts.h | 7 +- > gcc/config/riscv/riscv-string.cc | 26 +- > gcc/config/riscv/riscv-vector-costs.cc | 226 ++++++++++++++---- > gcc/config/riscv/riscv-vector-costs.h | 17 +- > gcc/config/riscv/riscv.opt | 3 + > .../riscv/rvv/autovec/dyn-lmul-conv-1.c | 42 ++++ > .../riscv/rvv/autovec/dyn-lmul-conv-2.c | 43 ++++ > .../gcc.target/riscv/rvv/autovec/pr122846.c | 14 ++ > 8 files changed, 320 insertions(+), 58 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c > > diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h > index 9b92a965e27..c6a09d59620 100644 > --- a/gcc/config/riscv/riscv-opts.h > +++ b/gcc/config/riscv/riscv-opts.h > @@ -86,7 +86,9 @@ enum rvv_max_lmul_enum { > RVV_M4 = 4, > RVV_M8 = 8, > /* For dynamic LMUL, we compare COST start with LMUL8. */ > - RVV_DYNAMIC = 9 > + RVV_DYNAMIC = 9, > + /* For dynamic LMUL based on conversions, set LMUL based on type size > ratio. */ > + RVV_CONV_DYNAMIC = 10 > }; > > enum riscv_multilib_select_kind { > @@ -155,7 +157,8 @@ enum rvv_vector_bits_enum { > > /* The maximum LMUL according to user configuration. */ > #define TARGET_MAX_LMUL > \ > - (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul) > + (int) ((rvv_max_lmul == RVV_DYNAMIC || rvv_max_lmul == RVV_CONV_DYNAMIC) \ > + ? RVV_M8 : rvv_max_lmul) > > /* TLS types. */ > enum riscv_tls_type { > diff --git a/gcc/config/riscv/riscv-string.cc > b/gcc/config/riscv/riscv-string.cc > index c5710e4c896..ac9b19213a0 100644 > --- a/gcc/config/riscv/riscv-string.cc > +++ b/gcc/config/riscv/riscv-string.cc > @@ -1089,13 +1089,17 @@ use_vector_stringop_p (struct stringop_info &info, > HOST_WIDE_INT max_ew, > if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR)) > return false; > > + int max_lmul = TARGET_MAX_LMUL; > + if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + max_lmul = RVV_M1; > + > if (CONST_INT_P (length_in)) > { > HOST_WIDE_INT length = INTVAL (length_in); > > /* If the VLEN and preferred LMUL allow the entire block to be copied > in > one go then no loop is needed. */ > - if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL)) > + if (known_le (length, BYTES_PER_RISCV_VECTOR * max_lmul)) > { > need_loop = false; > > @@ -1130,10 +1134,10 @@ use_vector_stringop_p (struct stringop_info &info, > HOST_WIDE_INT max_ew, > poly_int64 nunits; > > if (need_loop) > - per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL; > + per_iter = BYTES_PER_RISCV_VECTOR * max_lmul; > else > per_iter = length; > - /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by > + /* BYTES_PER_RISCV_VECTOR * MAX_LMUL may not be divisible by > this potential_ew. */ > if (!multiple_p (per_iter, potential_ew, &nunits)) > continue; > @@ -1164,7 +1168,7 @@ use_vector_stringop_p (struct stringop_info &info, > HOST_WIDE_INT max_ew, > pointless. > Still, by choosing a lower LMUL factor that still allows > an entire transfer, we can reduce register pressure. */ > - for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1) > + for (int lmul = 1; lmul < max_lmul; lmul <<= 1) > if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul) > && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, > potential_ew, > &mode_units) > @@ -1177,9 +1181,9 @@ use_vector_stringop_p (struct stringop_info &info, > HOST_WIDE_INT max_ew, > if (vmode != VOIDmode) > break; > > - /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be > divisible > + /* BYTES_PER_RISCV_VECTOR * MAX_LMUL will at least be divisible > by potential_ew 1, so this should succeed eventually. */ > - if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL, > + if (multiple_p (BYTES_PER_RISCV_VECTOR * max_lmul, > potential_ew, &mode_units) > && riscv_vector::get_vector_mode (elem_mode, > mode_units).exists (&vmode)) > @@ -1195,7 +1199,7 @@ use_vector_stringop_p (struct stringop_info &info, > HOST_WIDE_INT max_ew, > } > else > { > - gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode)); > + gcc_assert (get_lmul_mode (QImode, max_lmul).exists (&vmode)); > } > > /* A memcpy libcall in the worst case takes 3 instructions to prepare the > @@ -1356,6 +1360,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx > haystack, rtx needle, > > unsigned int isize = GET_MODE_SIZE (mode).to_constant (); > int lmul = TARGET_MAX_LMUL; > + if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + lmul = RVV_M1; > poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); > > machine_mode vmode; > @@ -1455,6 +1461,8 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx > nbytes, > machine_mode mode = E_QImode; > unsigned int isize = GET_MODE_SIZE (mode).to_constant (); > int lmul = TARGET_MAX_LMUL; > + if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + lmul = RVV_M1; > poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); > > machine_mode vmode; > @@ -1606,7 +1614,9 @@ check_vectorise_memory_operation (rtx length_in, > HOST_WIDE_INT &lmul_out) > if (rvv_max_lmul != RVV_DYNAMIC) > { > lmul_out = TARGET_MAX_LMUL; > - return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8)); > + if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + lmul_out = RVV_M1; > + return (length <= ((lmul_out * TARGET_MIN_VLEN) / 8)); > } > > /* Find smallest lmul large enough for entire op. */ > diff --git a/gcc/config/riscv/riscv-vector-costs.cc > b/gcc/config/riscv/riscv-vector-costs.cc > index 27ced61e815..41b4e4860b0 100644 > --- a/gcc/config/riscv/riscv-vector-costs.cc > +++ b/gcc/config/riscv/riscv-vector-costs.cc > @@ -258,6 +258,14 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2) > return mode1_size >= mode2_size ? mode1 : mode2; > } > > +static machine_mode > +get_smallest_mode (machine_mode mode1, machine_mode mode2) > +{ > + unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant (); > + unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant (); > + return mode1_size <= mode2_size ? mode1 : mode2; > +} > + > /* Return true if OP is invariant. */ > > static bool > @@ -361,9 +369,11 @@ machine_mode > costs::compute_local_live_ranges ( > loop_vec_info loop_vinfo, > const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb, > - hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb) > + hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb, > + machine_mode *smallest_mode_out) > { > machine_mode biggest_mode = QImode; > + machine_mode smallest_mode = TImode; > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > if (!program_points_per_bb.is_empty ()) > { > @@ -396,6 +406,8 @@ costs::compute_local_live_ranges ( > { > biggest_mode = get_biggest_mode (biggest_mode, > TYPE_MODE (TREE_TYPE > (lhs))); > + smallest_mode = get_smallest_mode (smallest_mode, > + TYPE_MODE (TREE_TYPE > (lhs))); > bool existed_p = false; > pair &live_range > = live_ranges->get_or_insert (lhs, &existed_p); > @@ -415,6 +427,9 @@ costs::compute_local_live_ranges ( > biggest_mode > = get_biggest_mode (biggest_mode, > TYPE_MODE (TREE_TYPE (var))); > + smallest_mode > + = get_smallest_mode (smallest_mode, > + TYPE_MODE (TREE_TYPE (var))); > bool existed_p = false; > pair &live_range > = live_ranges->get_or_insert (var, &existed_p); > @@ -445,6 +460,8 @@ costs::compute_local_live_ranges ( > (*r).second = MAX (point, (*r).second); > biggest_mode = get_biggest_mode ( > biggest_mode, TYPE_MODE (TREE_TYPE > (arg))); > + smallest_mode = get_smallest_mode ( > + smallest_mode, TYPE_MODE (TREE_TYPE > (arg))); > } > } > else > @@ -464,8 +481,14 @@ costs::compute_local_live_ranges ( > } > } > if (dump_enabled_p ()) > - dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n", > - GET_MODE_NAME (biggest_mode)); > + { > + dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n", > + GET_MODE_NAME (biggest_mode)); > + dump_printf_loc (MSG_NOTE, vect_location, "Smallest mode = %s\n", > + GET_MODE_NAME (smallest_mode)); > + } > + if (smallest_mode_out) > + *smallest_mode_out = smallest_mode; > return biggest_mode; > } > > @@ -639,6 +662,25 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, > machine_mode mode) > return 0; > } > > +/* Compute LMUL based on the ratio of biggest to smallest type size. > + This is used for RVV_CONV_DYNAMIC. */ > +static int > +compute_lmul_from_conversion_ratio (machine_mode biggest_mode, > + machine_mode smallest_mode) > +{ > + gcc_assert (GET_MODE_BITSIZE (biggest_mode).is_constant ()); > + gcc_assert (GET_MODE_BITSIZE (smallest_mode).is_constant ()); > + > + unsigned int biggest_size = GET_MODE_BITSIZE (biggest_mode).to_constant (); > + unsigned int smallest_size = GET_MODE_BITSIZE (smallest_mode).to_constant > (); > + > + int lmul = biggest_size / smallest_size; > + lmul = std::min (lmul, (int) RVV_M8); > + lmul = std::max (lmul, (int) RVV_M1); > + > + return lmul; > +} > + > /* Update the live ranges according PHI. > > Loop: > @@ -825,56 +867,37 @@ costs::update_local_live_ranges ( > } > } > > -/* Compute the maximum live V_REGS. */ > -bool > -costs::has_unexpected_spills_p (loop_vec_info loop_vinfo) > +/* Helper to compute live ranges, modes, and LMUL. */ > +void > +costs::compute_live_ranges_and_lmul (loop_vec_info loop_vinfo, > + hash_map<basic_block, vec<stmt_point>> &program_points_per_bb, > + hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb, > + machine_mode &biggest_mode, machine_mode &smallest_mode, int &lmul) > { > - /* Compute local program points. > - It's a fast and effective computation. */ > - hash_map<basic_block, vec<stmt_point>> program_points_per_bb; > compute_local_program_points (loop_vinfo, program_points_per_bb); > > - /* Compute local live ranges. */ > - hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb; > - machine_mode biggest_mode > - = compute_local_live_ranges (loop_vinfo, program_points_per_bb, > - live_ranges_per_bb); > + smallest_mode = TImode; > + biggest_mode = compute_local_live_ranges (loop_vinfo, > program_points_per_bb, > + live_ranges_per_bb, > &smallest_mode); > > - /* Update live ranges according to PHI. */ > update_local_live_ranges (loop_vinfo, program_points_per_bb, > live_ranges_per_bb, &biggest_mode); > > - int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode); > + if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + lmul = compute_lmul_from_conversion_ratio (biggest_mode, smallest_mode); > + else > + lmul = compute_estimated_lmul (loop_vinfo, biggest_mode); > + > gcc_assert (lmul <= RVV_M8); > - /* TODO: We calculate the maximum live vars base on current STMTS > - sequence. We can support live range shrink if it can give us > - big improvement in the future. */ > - if (lmul > RVV_M1) > - { > - if (!live_ranges_per_bb.is_empty ()) > - { > - unsigned int max_nregs = 0; > - for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter > - = live_ranges_per_bb.begin (); > - iter != live_ranges_per_bb.end (); ++iter) > - { > - basic_block bb = (*iter).first; > - unsigned int max_point > - = (*program_points_per_bb.get (bb)).length () + 1; > - if ((*iter).second.is_empty ()) > - continue; > - /* We prefer larger LMUL unless it causes register spillings. */ > - unsigned int nregs > - = max_number_of_live_regs (loop_vinfo, bb, (*iter).second, > - max_point, biggest_mode, lmul); > - if (nregs > max_nregs) > - max_nregs = nregs; > - } > - live_ranges_per_bb.empty (); > - if (max_nregs > V_REG_NUM) > - return true; > - } > - } > +} > + > +/* Helper to clean up live range data structures. */ > +void > +costs::cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> > + &program_points_per_bb, > + hash_map<basic_block, hash_map<tree, pair>> > + &live_ranges_per_bb) > +{ > if (!program_points_per_bb.is_empty ()) > { > for (hash_map<basic_block, vec<stmt_point>>::iterator iter > @@ -887,7 +910,72 @@ costs::has_unexpected_spills_p (loop_vec_info loop_vinfo) > } > program_points_per_bb.empty (); > } > - return false; > + live_ranges_per_bb.empty (); > +} > + > +/* Compute LMUL for RVV_CONV_DYNAMIC mode based on conversion ratio. */ > +void > +costs::compute_conversion_dynamic_lmul (loop_vec_info loop_vinfo) > +{ > + hash_map<basic_block, vec<stmt_point>> program_points_per_bb; > + hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb; > + machine_mode biggest_mode, smallest_mode; > + int lmul; > + > + compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb, > + live_ranges_per_bb, biggest_mode, > + smallest_mode, lmul); > + > + /* Store the computed LMUL and biggest mode for later comparison > + in cost model. */ > + m_computed_lmul_from_conv = lmul; > + m_biggest_mode_for_conv = biggest_mode; > + > + cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb); > +} > + > +/* Compute the maximum live V_REGS and check for unexpected spills. */ > +bool > +costs::has_unexpected_spills_p (loop_vec_info loop_vinfo) > +{ > + hash_map<basic_block, vec<stmt_point>> program_points_per_bb; > + hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb; > + machine_mode biggest_mode, smallest_mode; > + int lmul; > + > + compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb, > + live_ranges_per_bb, biggest_mode, > + smallest_mode, lmul); > + > + /* TODO: We calculate the maximum live vars base on current STMTS > + sequence. We can support live range shrink if it can give us > + big improvement in the future. */ > + bool has_spills = false; > + if (lmul > RVV_M1 && !live_ranges_per_bb.is_empty ()) > + { > + unsigned int max_nregs = 0; > + for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter > + = live_ranges_per_bb.begin (); > + iter != live_ranges_per_bb.end (); ++iter) > + { > + basic_block bb = (*iter).first; > + unsigned int max_point > + = (*program_points_per_bb.get (bb)).length () + 1; > + if ((*iter).second.is_empty ()) > + continue; > + /* We prefer larger LMUL unless it causes register spillings. */ > + unsigned int nregs > + = max_number_of_live_regs (loop_vinfo, bb, (*iter).second, > + max_point, biggest_mode, lmul); > + if (nregs > max_nregs) > + max_nregs = nregs; > + } > + if (max_nregs > V_REG_NUM) > + has_spills = true; > + } > + > + cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb); > + return has_spills; > } > > costs::costs (vec_info *vinfo, bool costing_for_scalar) > @@ -937,6 +1025,8 @@ costs::record_potential_unexpected_spills (loop_vec_info > loop_vinfo) > if (!post_dom_available_p) > free_dominance_info (CDI_POST_DOMINATORS); > } > + else if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + compute_conversion_dynamic_lmul (loop_vinfo); > } > > /* Decide whether to use the unrolling heuristic described above > @@ -1033,6 +1123,50 @@ costs::better_main_loop_than_p (const vector_costs > *uncast_other) const > return other_prefer_unrolled; > } > } > + else if (rvv_max_lmul == RVV_CONV_DYNAMIC) > + { > + if (this->m_computed_lmul_from_conv > 0 > + && other->m_computed_lmul_from_conv > 0 > + && this->m_biggest_mode_for_conv != VOIDmode) > + { > + int this_vf = vect_vf_for_cost (this_loop_vinfo); > + int other_vf = vect_vf_for_cost (other_loop_vinfo); > + > + /* Get element size from the biggest mode. */ > + unsigned int element_bits > + = GET_MODE_BITSIZE (this->m_biggest_mode_for_conv).to_constant (); > + > + /* Estimate LMUL from VF * element_size / MIN_VLEN. */ > + int this_lmul = (this_vf * element_bits) / TARGET_MIN_VLEN; > + int other_lmul = (other_vf * element_bits) / TARGET_MIN_VLEN; > + > + /* Clamp to valid LMUL range. */ > + this_lmul = MAX (1, MIN (this_lmul, 8)); > + other_lmul = MAX (1, MIN (other_lmul, 8)); > + > + int target_lmul = this->m_computed_lmul_from_conv; > + > + /* Prefer the LMUL that exactly matches our computed ratio. */ > + if (this_lmul == target_lmul && other_lmul != target_lmul) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "Preferring LMUL=%d loop because it matches" > + " conversion ratio (other LMUL=%d)\n", > + this_lmul, other_lmul); > + return true; > + } > + else if (this_lmul != target_lmul && other_lmul == target_lmul) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "Preferring other LMUL=%d loop because it > matches" > + " conversion ratio (this LMUL=%d)\n", > + other_lmul, this_lmul); > + return false; > + } > + } > + } > else if (rvv_max_lmul == RVV_DYNAMIC) > { > if (other->m_has_unexpected_spills_p) > diff --git a/gcc/config/riscv/riscv-vector-costs.h > b/gcc/config/riscv/riscv-vector-costs.h > index b84ceb1d3cf..89f813c3d98 100644 > --- a/gcc/config/riscv/riscv-vector-costs.h > +++ b/gcc/config/riscv/riscv-vector-costs.h > @@ -106,6 +106,11 @@ private: > bool m_has_unexpected_spills_p = false; > void record_potential_unexpected_spills (loop_vec_info); > > + /* For RVV_DYNAMIC_CONV mode, store the LMUL computed from conversion ratio > + and the biggest mode used in the computation. */ > + int m_computed_lmul_from_conv = 0; > + machine_mode m_biggest_mode_for_conv = VOIDmode; > + > void compute_local_program_points (vec_info *, > hash_map<basic_block, vec<stmt_point>> > &); > void update_local_live_ranges (vec_info *, > @@ -114,9 +119,17 @@ private: > machine_mode *); > machine_mode compute_local_live_ranges > (loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &, > - hash_map<basic_block, hash_map<tree, pair>> &); > - > + hash_map<basic_block, hash_map<tree, pair>> &, > + machine_mode * = nullptr); > + > + void compute_live_ranges_and_lmul (loop_vec_info, > + hash_map<basic_block, vec<stmt_point>> &, > + hash_map<basic_block, hash_map<tree, > pair>> &, > + machine_mode &, machine_mode &, int &); > + void cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> &, > + hash_map<basic_block, hash_map<tree, pair>> > &); > bool has_unexpected_spills_p (loop_vec_info); > + void compute_conversion_dynamic_lmul (loop_vec_info); > bool need_additional_vector_vars_p (stmt_vec_info, slp_tree); > > void adjust_vect_cost_per_loop (loop_vec_info); > diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt > index 452062c6500..de7730a8961 100644 > --- a/gcc/config/riscv/riscv.opt > +++ b/gcc/config/riscv/riscv.opt > @@ -313,6 +313,9 @@ Enum(rvv_max_lmul) String(m8) Value(RVV_M8) > EnumValue > Enum(rvv_max_lmul) String(dynamic) Value(RVV_DYNAMIC) > > +EnumValue > +Enum(rvv_max_lmul) String(conv-dynamic) Value(RVV_CONV_DYNAMIC) > + > mrvv-max-lmul= > Target RejectNegative Joined Enum(rvv_max_lmul) Var(rvv_max_lmul) > Init(RVV_M1) > -mrvv-max-lmul=<string> Set the RVV LMUL of auto-vectorization. > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c > new file mode 100644 > index 00000000000..b07bd86f76e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" > } */ > + > +void foo2x1 (short *restrict a, char *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo2x2 (int *restrict a, short *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo2x3 (long *restrict a, int *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo4x1 (int *restrict a, char *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo4x2 (long *restrict a, short *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo8x (long *restrict a, char *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +/* { dg-final { scan-assembler-times ",m2," 3 } } */ > +/* { dg-final { scan-assembler-times ",m4," 2 } } */ > +/* { dg-final { scan-assembler-times ",m8," 1 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c > new file mode 100644 > index 00000000000..c37e4dd63f2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c > @@ -0,0 +1,43 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" > } */ > + > +void foo2x1 (unsigned char *restrict a, unsigned short *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo2x2 (unsigned short *restrict a, unsigned int *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo2x3 (unsigned int *restrict a, unsigned long *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo4x1 (unsigned char *restrict a, unsigned int *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo4x2 (unsigned short *restrict a, unsigned long *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +void foo8x (unsigned char *restrict a, unsigned long *restrict b, int n) > +{ > + for (int i = 0; i < n; i++) > + a[i] = b[i]; > +} > + > +/* { dg-final { scan-assembler-times ",m1," 6 } } */ > +/* { dg-final { scan-assembler-times ",m2," 3 } } */ > +/* { dg-final { scan-assembler-times ",m4," 1 } } */ > +/* { dg-final { scan-assembler-not ",mf2," } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c > new file mode 100644 > index 00000000000..7753a66cd96 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" > } */ > + > +int > +foo (const char *x, const char *y) > +{ > + int sum = 0; > + for (int i = 0; i < 1024; i++) > + sum += x[i] * y[i]; > + return sum; > +} > + > +/* One for the initial value, one for the reduction. */ > +/* { dg-final { scan-assembler-times ",m4," 2 } } */ > -- > 2.51.1 >
