https://gcc.gnu.org/g:25a11c51d508370e439d1a9b5643c8aa39afb4b5
commit r16-5564-g25a11c51d508370e439d1a9b5643c8aa39afb4b5 Author: Robin Dapp <[email protected]> Date: Fri Nov 14 15:01:29 2025 +0100 vect: Make SELECT_VL a convert optab. Currently select_vl is a direct optab with its mode always Xmode/Pmode. This does not give us sufficient freedom to enable/disable vsetvl (=SELECT_VL) depending on the vector mode. This patch makes select_vl a convert optab and adjusts the associated IFN functions as well as the query/emit code in the vectorizer. With this patch nothing new is actually exercised yet. This is going to happen in a separate riscv patch that enables "VLS" select_vl. gcc/ChangeLog: * config/riscv/autovec.md (select_vl<mode>): Rename to... (select_vl<V:mode><P:mode>): ...this. * doc/md.texi: Document new behavior. * internal-fn.cc (select_vl_direct): Make (expand_select_vl_optab_fn): Adjust for convert optab. (direct_select_vl_optab_supported_p): Ditto. * internal-fn.def (SELECT_VL): Ditto. * optabs.def (OPTAB_CD): Add select_vl. (OPTAB_D): Remove select_vl. * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Adjust for convert select_vl optab. * tree-vect-loop.cc: Ditto. Diff: --- gcc/config/riscv/autovec.md | 5 +++-- gcc/doc/md.texi | 17 +++++++++-------- gcc/internal-fn.cc | 5 +++++ gcc/internal-fn.def | 2 +- gcc/optabs.def | 2 +- gcc/tree-vect-loop-manip.cc | 4 +++- gcc/tree-vect-loop.cc | 15 ++++++++++++--- 7 files changed, 34 insertions(+), 16 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 2777c16126e1..cec0113fca32 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1335,10 +1335,11 @@ ;; == SELECT_VL ;; ========================================================================= -(define_expand "select_vl<mode>" +(define_expand "select_vl<V:mode><P:mode>" [(match_operand:P 0 "register_operand") (match_operand:P 1 "vector_length_operand") - (match_operand:P 2 "immediate_operand")] + (match_operand:P 2 "immediate_operand") + (match_operand:V 3)] "TARGET_VECTOR" { riscv_vector::expand_select_vl (operands); diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index ae5d709bd479..6dedca225ae1 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5273,13 +5273,14 @@ for (i = 1; i < operand3; i++) operand0[i] = operand0[i - 1] && (operand1 + i < operand2); @end smallexample -@cindex @code{select_vl@var{m}} instruction pattern -@item @code{select_vl@var{m}} -Set operand 0 to the number of scalar iterations that should be handled -by one iteration of a vector loop. Operand 1 is the total number of -scalar iterations that the loop needs to process and operand 2 is a -maximum bound on the result (also known as the maximum ``vectorization -factor''). +@cindex @code{select_vl@var{m}@var{n}} instruction pattern +@item @code{select_vl@var{m}@var{n}} +Set operand 0 (of mode @var{n}) to the number of scalar iterations that +should be handled by one iteration of a vector loop. Operand 1 is the +total number of scalar iterations that the loop needs to process and +operand 2 is a maximum bound on the result (also known as the +maximum ``vectorization factor''). Operand 3 (of mode @var{m}) is +a dummy parameter to pass the vector mode to be used. The maximum value of operand 0 is given by: @smallexample @@ -5293,7 +5294,7 @@ this, it is generally not useful to define this instruction if it will always calculate the maximum value. This optab is only useful on targets that implement @samp{len_load_@var{m}} -and/or @samp{len_store_@var{m}}. +and/or @samp{len_store_@var{m}} or the associated @samp{_len} variants. @cindex @code{check_raw_ptrs@var{m}} instruction pattern @item @samp{check_raw_ptrs@var{m}} diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 514fe98f40d8..13fbd2ce7884 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -195,6 +195,7 @@ init_internal_fns () #define check_ptrs_direct { 0, 0, false } #define crc_direct { 1, -1, true } #define reduc_sbool_direct { 0, 0, true } +#define select_vl_direct { 2, 0, false } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, @@ -4183,6 +4184,9 @@ expand_reduc_sbool_optab_fn (internal_fn fn, gcall *stmt, direct_optab optab) #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 4) +#define expand_select_vl_optab_fn(FN, STMT, OPTAB) \ + expand_convert_optab_fn (FN, STMT, OPTAB, 3) + /* Expanders for optabs that can use expand_convert_optab_fn. */ #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \ @@ -4299,6 +4303,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_vec_set_optab_supported_p direct_optab_supported_p #define direct_vec_extract_optab_supported_p convert_optab_supported_p #define direct_reduc_sbool_optab_supported_p direct_optab_supported_p +#define direct_select_vl_optab_supported_p convert_optab_supported_p /* Return the optab used by internal function FN. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 7874fcfb3dfd..4e83a59880eb 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -250,7 +250,7 @@ DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store) DEF_INTERNAL_OPTAB_FN (MASK_LEN_STORE, 0, mask_len_store, mask_len_store) DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) -DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, binary) +DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, select_vl) DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW, check_raw_ptrs, check_ptrs) DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW, diff --git a/gcc/optabs.def b/gcc/optabs.def index b6f290a95130..5218b6d6ec0a 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -116,6 +116,7 @@ OPTAB_CD (udot_prod_optab, "udot_prod$I$a$b") OPTAB_CD (usdot_prod_optab, "usdot_prod$I$a$b") OPTAB_CD (while_ult_optab, "while_ult$a$b") +OPTAB_CD (select_vl_optab, "select_vl$a$b") OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(add_optab, "add$F$a3") @@ -553,6 +554,5 @@ OPTAB_D (len_load_optab, "len_load_$a") OPTAB_D (len_store_optab, "len_store_$a") OPTAB_D (mask_len_strided_load_optab, "mask_len_strided_load_$a") OPTAB_D (mask_len_strided_store_optab, "mask_len_strided_store_$a") -OPTAB_D (select_vl_optab, "select_vl$a") OPTAB_D (andn_optab, "andn$a3") OPTAB_D (iorn_optab, "iorn$a3") diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index af3f3aff2cc0..a9fe14b31855 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -574,8 +574,10 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, { create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi, insert_after, &index_before_incr, &index_after_incr); + tree vectype = build_zero_cst (rgc->type); tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type, - index_before_incr, nitems_step); + index_before_incr, nitems_step, + vectype); gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len)); } else diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index e013d4f98096..6b6dc206c59e 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2420,14 +2420,23 @@ start_over: if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)) { tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); - if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type, - OPTIMIZE_FOR_SPEED) - && LOOP_VINFO_LENS (loop_vinfo).length () == 1 + if (LOOP_VINFO_LENS (loop_vinfo).length () == 1 && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1 && (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())) LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true; + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) + for (auto rgc : LOOP_VINFO_LENS (loop_vinfo)) + if (rgc.type + && !direct_internal_fn_supported_p (IFN_SELECT_VL, + rgc.type, iv_type, + OPTIMIZE_FOR_SPEED)) + { + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false; + break; + } + /* If any of the SLP instances cover more than a single lane we cannot use .SELECT_VL at the moment, even if the number of lanes is uniform throughout the SLP graph. */
