On Fri, Nov 28, 2025 at 10:10 AM Robin Dapp <[email protected]> wrote:
>
> > Ah, I see. How the code is factored makes this a bit iffy IMO.
> > Can't we refactor things somehow to avoid this "special value"?
>
> Time for a store else value? :-x
>
> Attached is a v2, there is a bit of code duplication now but I think not too
> bad. Sniff regtest on riscv64 and x86 was successful, rest of the targets are
> running.
I think this is better, but I'd make this partial_store_elidable_p only and for
loads replace with 'else'? Alternatively rename it to
partial_load_store_all_lanes_masked_p
or so? Btw, I see we're oddly rejecting any mask != -1 even when len
== 0? Likewise
we don't seem to treat mask == 0 the same as len == 0?
Richard.
> Regards
> Robin
>
>
> [PATCH v2] fold: Elide MASK_LEN_LOAD/STORE with zero length [PR122635].
>
> This patch adds zero-length handling to gimple_fold_partial_store and
> gimple_fold_partial_load. If length + bias is zero, we replace the
> load with its else value and remove a store altogether.
>
> At the same time the patch removes the mask_p argument of
> gimple_fold_partial_load and _store. We can easily get the mask index
> from the IFN.
>
> PR tree-optimization/122635
>
> gcc/ChangeLog:
>
> * gimple-fold.cc (gimple_fold_partial_load_store_mem_ref):
> Replace zero-length load with zero constant.
> (partial_load_store_elidible_p): New function.
> (gimple_fold_partial_load): Remove mask_p argument.
> (gimple_fold_partial_store): Remove store if elidible.
> (gimple_fold_call): Remove mask_p argument.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr122635-1.c: New test.
> * gcc.target/riscv/rvv/autovec/pr122635-2.c: New test.
> * gcc.target/powerpc/p9-vec-length-epil-8.c: Expect two lxvl
> less.
> ---
> gcc/gimple-fold.cc | 116 ++++++++++++------
> .../gcc.target/powerpc/p9-vec-length-epil-8.c | 2 +-
> .../gcc.target/riscv/rvv/autovec/pr122635-1.c | 20 +++
> .../gcc.target/riscv/rvv/autovec/pr122635-2.c | 18 +++
> 4 files changed, 116 insertions(+), 40 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index 3fc76313622..60169f92655 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -5757,48 +5757,80 @@ arith_overflowed_p (enum tree_code code, const_tree
> type,
> return wi::min_precision (wres, sign) > TYPE_PRECISION (type);
> }
>
> +/* Check if IFN_{MASK,LEN,MASK_LEN}_LOAD/STORE call CALL is unconditional and
> + has a zero length. If so, it can be safely elided. */
> +
> +static bool
> +partial_load_store_elidible_p (gcall *call)
> +{
> + internal_fn ifn = gimple_call_internal_fn (call);
> +
> + int mask_index = internal_fn_mask_index (ifn);
> + if (mask_index != -1)
> + {
> + tree mask = gimple_call_arg (call, mask_index);
> + if (!integer_all_onesp (mask))
> + return false;
> + }
> +
> + int len_index = internal_fn_len_index (ifn);
> + tree len = gimple_call_arg (call, len_index);
> + tree bias = gimple_call_arg (call, len_index + 1);
> + gcc_assert (TREE_CODE (bias) == INTEGER_CST);
> +
> + if (poly_int_tree_p (len))
> + {
> + poly_widest_int wlen = wi::to_poly_widest (len)
> + + wi::to_widest (bias);
> + return known_eq (wlen, 0);
> + }
> +
> + return false;
> +}
> +
> +
> /* If IFN_{MASK,LEN,MASK_LEN}_LOAD/STORE call CALL is unconditional,
> return a MEM_REF for the memory it references, otherwise return null.
> - VECTYPE is the type of the memory vector. MASK_P indicates it's for
> - MASK if true, otherwise it's for LEN. */
> + VECTYPE is the type of the memory vector. */
>
> static tree
> -gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool
> mask_p)
> +gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype)
> {
> tree ptr = gimple_call_arg (call, 0);
> tree alias_align = gimple_call_arg (call, 1);
> if (!tree_fits_uhwi_p (alias_align))
> return NULL_TREE;
>
> - if (mask_p)
> + internal_fn ifn = gimple_call_internal_fn (call);
> + int mask_index = internal_fn_mask_index (ifn);
> + int len_index = internal_fn_len_index (ifn);
> +
> + if (mask_index != -1)
> {
> - tree mask = gimple_call_arg (call, 2);
> + tree mask = gimple_call_arg (call, mask_index);
> if (!integer_all_onesp (mask))
> return NULL_TREE;
> }
> - else
> +
> + if (len_index != -1)
> {
> - internal_fn ifn = gimple_call_internal_fn (call);
> - int len_index = internal_fn_len_index (ifn);
> - tree basic_len = gimple_call_arg (call, len_index);
> - if (!poly_int_tree_p (basic_len))
> + tree len = gimple_call_arg (call, len_index);
> + if (!poly_int_tree_p (len))
> return NULL_TREE;
> tree bias = gimple_call_arg (call, len_index + 1);
> gcc_assert (TREE_CODE (bias) == INTEGER_CST);
> - /* For LEN_LOAD/LEN_STORE/MASK_LEN_LOAD/MASK_LEN_STORE,
> - we don't fold when (bias + len) != VF. */
> - if (maybe_ne (wi::to_poly_widest (basic_len) + wi::to_widest (bias),
> - GET_MODE_NUNITS (TYPE_MODE (vectype))))
> - return NULL_TREE;
> -
> - /* For MASK_LEN_{LOAD,STORE}, we should also check whether
> - the mask is all ones mask. */
> - if (ifn == IFN_MASK_LEN_LOAD || ifn == IFN_MASK_LEN_STORE)
> + poly_widest_int wlen = wi::to_poly_widest (len)
> + + wi::to_widest (bias);
> + if (known_eq (wlen, 0) && internal_fn_else_index (ifn))
> {
> - tree mask = gimple_call_arg (call, internal_fn_mask_index (ifn));
> - if (!integer_all_onesp (mask))
> - return NULL_TREE;
> + /* Length is 0. Replace with the else operand. */
> + int else_index = internal_fn_else_index (ifn);
> + return gimple_call_arg (call, else_index);
> }
> + /* For LEN_LOAD/LEN_STORE/MASK_LEN_LOAD/MASK_LEN_STORE,
> + we don't fold when len + bias != VF. */
> + else if (maybe_ne (wlen, GET_MODE_NUNITS (TYPE_MODE (vectype))))
> + return NULL_TREE;
> }
>
> unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align);
> @@ -5808,18 +5840,17 @@ gimple_fold_partial_load_store_mem_ref (gcall *call,
> tree vectype, bool mask_p)
> return fold_build2 (MEM_REF, vectype, ptr, offset);
> }
>
> -/* Try to fold IFN_{MASK,LEN}_LOAD call CALL. Return true on success.
> - MASK_P indicates it's for MASK if true, otherwise it's for LEN. */
> +/* Try to fold IFN_{MASK,LEN}_LOAD call CALL. Return true on success. */
>
> static bool
> -gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call, bool
> mask_p)
> +gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call)
> {
> tree lhs = gimple_call_lhs (call);
> if (!lhs)
> return false;
>
> if (tree rhs
> - = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (lhs),
> mask_p))
> + = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (lhs)))
> {
> gassign *new_stmt = gimple_build_assign (lhs, rhs);
> gimple_set_location (new_stmt, gimple_location (call));
> @@ -5830,17 +5861,16 @@ gimple_fold_partial_load (gimple_stmt_iterator *gsi,
> gcall *call, bool mask_p)
> return false;
> }
>
> -/* Try to fold IFN_{MASK,LEN}_STORE call CALL. Return true on success.
> - MASK_P indicates it's for MASK if true, otherwise it's for LEN. */
> +/* Try to fold IFN_{MASK,LEN}_STORE call CALL. Return true on success. */
>
> static bool
> -gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call,
> - bool mask_p)
> +gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call)
> {
> internal_fn ifn = gimple_call_internal_fn (call);
> +
> tree rhs = gimple_call_arg (call, internal_fn_stored_value_index (ifn));
> if (tree lhs
> - = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs),
> mask_p))
> + = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs)))
> {
> gassign *new_stmt = gimple_build_assign (lhs, rhs);
> gimple_set_location (new_stmt, gimple_location (call));
> @@ -5848,6 +5878,18 @@ gimple_fold_partial_store (gimple_stmt_iterator *gsi,
> gcall *call,
> gsi_replace (gsi, new_stmt, false);
> return true;
> }
> +
> + /* gimple_fold_partial_load_store_mem_ref can replace a zero-length load
> + with its else value but not a store as those don't have an else value.
> + Therefore remove those here. */
> + if (partial_load_store_elidible_p (call))
> + {
> + unlink_stmt_vdef (call);
> + release_defs (call);
> + gsi_replace (gsi, gimple_build_nop (), true);
> + return true;
> + }
> +
> return false;
> }
>
> @@ -6075,19 +6117,15 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool
> inplace)
> cplx_result = true;
> uaddc_usubc = true;
> break;
> - case IFN_MASK_LOAD:
> - changed |= gimple_fold_partial_load (gsi, stmt, true);
> - break;
> - case IFN_MASK_STORE:
> - changed |= gimple_fold_partial_store (gsi, stmt, true);
> - break;
> case IFN_LEN_LOAD:
> + case IFN_MASK_LOAD:
> case IFN_MASK_LEN_LOAD:
> - changed |= gimple_fold_partial_load (gsi, stmt, false);
> + changed |= gimple_fold_partial_load (gsi, stmt);
> break;
> case IFN_LEN_STORE:
> + case IFN_MASK_STORE:
> case IFN_MASK_LEN_STORE:
> - changed |= gimple_fold_partial_store (gsi, stmt, false);
> + changed |= gimple_fold_partial_store (gsi, stmt);
> break;
> default:
> break;
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> index 34a2c8eb11b..5dff0d0ceb9 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> @@ -13,5 +13,5 @@
>
> #include "p9-vec-length-8.h"
>
> -/* { dg-final { scan-assembler-times {\mlxvl\M} 16 } } */
> +/* { dg-final { scan-assembler-times {\mlxvl\M} 14 } } */
> /* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
> new file mode 100644
> index 00000000000..0beb3d70866
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d
> -mrvv-vector-bits=zvl -mno-autovec-segment" } */
> +
> +typedef struct {
> + int a[6];
> + float b[3];
> +} c;
> +
> +int d(c *e) {
> + int f =0;
> + for (; f < 3; f++) {
> + e->a[2 * f] = e->b[f];
> + e->a[2 * f + 1] = -e->a[2 * f];
> + e->a[2 * f] = f + 3 * e->a[2 * f];
> + e->a[2 * f + 1] = f + 3 * e->a[2 * f + 1];
> + }
> + return 0;
> +}
> +
> +/* { dg-final { scan-assembler-not "vsetivli.*zero,0" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
> new file mode 100644
> index 00000000000..0de69b52cb0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d
> -mrvv-vector-bits=zvl -mno-autovec-segment" } */
> +
> +typedef struct {
> + int A[6];
> + float b[];
> +} a;
> +
> +int b(a *a) {
> + int b = 0;
> + for (; b < 3; b++) {
> + a->A[2 * b] = a->b[b] - b + a->A[2 * b];
> + a->A[2 * b + 1] = b * a->A[2 * b + 1];
> + }
> + return 0;
> +}
> +
> +/* { dg-final { scan-assembler-not "vsetivli.*zero,0" } } */
> --
> 2.51.1
>