modified_between_p and modified_in_p conservatively treat any write to
a register as conflicting with all its subregs, even disjoint ones.
This blocks combine optimizations and leaves redundant register moves.
The issue is visible on RISC-V with -mrvv-vector-bits=zvl and may
affect other architectures.
Use chunk-based analysis to recognize disjoint subreg writes as
non-conflicting. Extract implicit_reg_set_p from reg_set_p to
separate implicit clobber checks from explicit subreg write analysis.
For example, with RISC-V VLS:
Before:
vfadd.vf v1,v15,fa4
vfadd.vf v8,v8,fa5
vmv1r.v v15,v1 # Redundant move
After:
vfadd.vf v8,v8,fa4
vfadd.vf v15,v15,fa5 # Direct operation, no move needed
Changes from v1 (following Richard's comments):
- Switch to `rtx_properties` (with a new `ignore_srcs` flag) to avoid
re-implementing instruction traversal logic.
- Drop the strict mode size check; the chunk-based analysis is already
robust enough for general cases.
- Use `read_modify_subreg_p` to filter out full-register writes early
and remove redundant precondition checks.
Thanks for Richard's helpful and detailed guidance.
gcc/
* rtlanal.h (class rtx_properties): Add ignore_srcs flag.
(rtx_properties::try_to_add_src_1): New prototype.
(rtx_properties::try_to_add_src): Inline wrapper around
try_to_add_src_1.
* rtlanal.cc (rtx_properties::try_to_add_src_1): Rename from
try_to_add_src.
(implicit_reg_set_p): New function.
(reg_set_p): Refactored to call implicit_reg_set_p.
(subreg_write_clobbers_ref_p): New function.
(subreg_modified_by_dest_p): New function.
(subreg_modified_in_p): New function.
(subreg_modified_between_p): New function.
(modified_between_p): Use read_modify_subreg_p up-front and
subreg_modified_between_p.
(modified_in_p): Ditto.
gcc/testsuite/
* gcc.target/riscv/rvv/base/tuple-zvl-subreg.c: New test.
Signed-off-by: Zhongyao Chen <[email protected]>
---
gcc/rtlanal.cc | 188 +++++++++++++++++-
gcc/rtlanal.h | 17 +-
.../riscv/rvv/base/tuple-zvl-subreg.c | 33 +++
3 files changed, 232 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index 88561a54e5a..f79d62371d5 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -1230,17 +1230,17 @@ reg_set_between_p (const_rtx reg, const rtx_insn
*from_insn,
return false;
}
-/* Return true if REG is set or clobbered inside INSN. */
+/* Return true if REG is modified by an implicit side effect of INSN. */
-bool
-reg_set_p (const_rtx reg, const_rtx insn)
+static bool
+implicit_reg_set_p (const_rtx reg, const_rtx insn)
{
/* After delay slot handling, call and branch insns might be in a
sequence. Check all the elements there. */
if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
{
for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
- if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+ if (implicit_reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
return true;
return false;
@@ -1277,9 +1277,177 @@ reg_set_p (const_rtx reg, const_rtx insn)
}
}
+ return false;
+}
+
+/* Return true if REG is set or clobbered inside INSN. */
+
+bool
+reg_set_p (const_rtx reg, const_rtx insn)
+{
+ /* After delay slot handling, call and branch insns might be in a
+ sequence. Check all the elements there. */
+ if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+ {
+ for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
+ if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+ return true;
+
+ return false;
+ }
+
+ if (implicit_reg_set_p (reg, insn))
+ return true;
+
return set_of (reg, insn) != NULL_RTX;
}
+/* Return true if storing to DEST changes the value of SUBREG_REF, where both
+ are simple subregs of the same register with a known common containing size.
+
+ A subreg store can clobber the whole REGMODE_NATURAL_SIZE chunk that
+ contains it, even when the stored mode itself is smaller. */
+
+static bool
+subreg_write_clobbers_ref_p (const_rtx subreg_ref, const_rtx dest)
+{
+ poly_uint64 chunk_size = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG (dest)));
+ poly_uint64 ref_chunk_size
+ = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG (subreg_ref)));
+ if (maybe_gt (ref_chunk_size, chunk_size))
+ chunk_size = ref_chunk_size;
+ poly_uint64 container_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)));
+ HOST_WIDE_INT num_chunks;
+ if (!constant_multiple_p (container_size, chunk_size, &num_chunks))
+ return true;
+
+ poly_uint64 ref_start = SUBREG_BYTE (subreg_ref);
+ poly_uint64 ref_size = GET_MODE_SIZE (GET_MODE (subreg_ref));
+ poly_uint64 dest_start = SUBREG_BYTE (dest);
+ poly_uint64 dest_size = GET_MODE_SIZE (GET_MODE (dest));
+
+ for (HOST_WIDE_INT i = 0; i < num_chunks; ++i)
+ {
+ poly_uint64 chunk_start = i * chunk_size;
+ if (ranges_maybe_overlap_p (chunk_start, chunk_size,
+ dest_start, dest_size)
+ && ranges_maybe_overlap_p (chunk_start, chunk_size,
+ ref_start, ref_size))
+ return true;
+ }
+ return false;
+}
+
+/* Return true if DEST modifies SUBREG_REF. */
+
+static bool
+subreg_modified_by_dest_p (const_rtx subreg_ref, const_rtx dest)
+{
+ if (!dest)
+ return false;
+
+ switch (GET_CODE (dest))
+ {
+ case SUBREG:
+ if (REG_P (SUBREG_REG (dest))
+ && REGNO (SUBREG_REG (subreg_ref)) == REGNO (SUBREG_REG (dest)))
+ return subreg_write_clobbers_ref_p (subreg_ref, dest);
+ return reg_overlap_mentioned_p (subreg_ref, dest);
+
+ case STRICT_LOW_PART:
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ return subreg_modified_by_dest_p (subreg_ref, XEXP (dest, 0));
+
+ case REG:
+ case SCRATCH:
+ case PC:
+ return reg_overlap_mentioned_p (subreg_ref, dest);
+
+ case PARALLEL:
+ for (int i = XVECLEN (dest, 0) - 1; i >= 0; --i)
+ if (XEXP (XVECEXP (dest, 0, i), 0)
+ && subreg_modified_by_dest_p (subreg_ref,
+ XEXP (XVECEXP (dest, 0, i), 0)))
+ return true;
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true if X, a simple partial SUBREG of a REG, is modified in INSN. */
+
+static bool
+subreg_modified_in_p (const_rtx x, const_rtx insn)
+{
+ gcc_checking_assert (SUBREG_P (x) && REG_P (SUBREG_REG (x)));
+
+ if (implicit_reg_set_p (SUBREG_REG (x), insn))
+ return true;
+
+ vec_rtx_properties properties;
+ properties.ignore_srcs = true;
+ if (INSN_P (insn))
+ properties.try_to_add_insn (as_a <const rtx_insn *> (insn), false);
+ else
+ properties.try_to_add_pattern (insn);
+ bool possible_conflict = false;
+ for (auto ref : properties.refs ())
+ if (ref.is_reg () && ref.is_write ()
+ && ref.regno == REGNO (SUBREG_REG (x)))
+ {
+ if (!ref.in_subreg ())
+ return true;
+ possible_conflict = true;
+ }
+
+ if (!possible_conflict)
+ return false;
+
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn,
NONCONST)
+ {
+ const_rtx sub = *iter;
+ if (GET_CODE (sub) == SET || GET_CODE (sub) == CLOBBER)
+ {
+ if (subreg_modified_by_dest_p (x, XEXP (sub, 0)))
+ return true;
+ }
+ }
+
+ if (CALL_P (insn))
+ {
+ for (rtx link = CALL_INSN_FUNCTION_USAGE (insn); link;
+ link = XEXP (link, 1))
+ {
+ rtx sub = XEXP (link, 0);
+ if (GET_CODE (sub) == CLOBBER)
+ if (subreg_modified_by_dest_p (x, XEXP (sub, 0)))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Return true if X, a simple partial SUBREG of a REG, is modified
+ between START and END. */
+
+static bool
+subreg_modified_between_p (const_rtx x, const rtx_insn *start,
+ const rtx_insn *end)
+{
+ for (const rtx_insn *insn = NEXT_INSN (start);
+ insn != end;
+ insn = NEXT_INSN (insn))
+ if (subreg_modified_in_p (x, insn))
+ return true;
+
+ return false;
+}
+
/* Similar to reg_set_between_p, but check all registers in X. Return false
only if none of them are modified between START and END. Return true if
X contains a MEM; this routine does use memory aliasing. */
@@ -1319,6 +1487,11 @@ modified_between_p (const_rtx x, const rtx_insn *start,
const rtx_insn *end)
case REG:
return reg_set_between_p (x, start, end);
+ case SUBREG:
+ if (REG_P (SUBREG_REG (x)) && read_modify_subreg_p (x))
+ return subreg_modified_between_p (x, start, end);
+ break;
+
default:
break;
}
@@ -1372,6 +1545,11 @@ modified_in_p (const_rtx x, const_rtx insn)
case REG:
return reg_set_p (x, insn);
+ case SUBREG:
+ if (REG_P (SUBREG_REG (x)) && read_modify_subreg_p (x))
+ return subreg_modified_in_p (x, insn);
+ break;
+
default:
break;
}
@@ -2180,7 +2358,7 @@ rtx_properties::try_to_add_dest (const_rtx x, unsigned
int flags)
This routine accepts all rtxes that can legitimately appear in a SET_SRC.
*/
void
-rtx_properties::try_to_add_src (const_rtx x, unsigned int flags)
+rtx_properties::try_to_add_src_1 (const_rtx x, unsigned int flags)
{
unsigned int base_flags = flags & rtx_obj_flags::STICKY_FLAGS;
subrtx_iterator::array_type array;
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 04c3b1b9a59..e62c3bf4402 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -129,6 +129,7 @@ public:
void try_to_add_reg (const_rtx x, unsigned int flags = 0);
void try_to_add_dest (const_rtx x, unsigned int flags = 0);
+ void try_to_add_src_1 (const_rtx x, unsigned int flags = 0);
void try_to_add_src (const_rtx x, unsigned int flags = 0);
void try_to_add_pattern (const_rtx pat);
void try_to_add_note (const_rtx x);
@@ -160,8 +161,11 @@ public:
volatile_refs_p. */
unsigned int has_volatile_refs : 1;
+ /* True if we should ignore sources and only record destinations. */
+ unsigned int ignore_srcs : 1;
+
/* For future expansion. */
- unsigned int spare : 28;
+ unsigned int spare : 27;
};
inline rtx_properties::rtx_properties ()
@@ -172,6 +176,7 @@ inline rtx_properties::rtx_properties ()
has_call (false),
has_pre_post_modify (false),
has_volatile_refs (false),
+ ignore_srcs (false),
spare (0)
{
}
@@ -185,6 +190,16 @@ rtx_properties::try_to_add_note (const_rtx x)
try_to_add_src (x, rtx_obj_flags::IN_NOTE);
}
+/* Wrapper around try_to_add_src_1 that avoids the walk if IGNORE_SRCS
+ is true. */
+
+inline void
+rtx_properties::try_to_add_src (const_rtx x, unsigned int flags)
+{
+ if (!ignore_srcs)
+ try_to_add_src_1 (x, flags);
+}
+
/* Return true if the rtx has side effects, in the sense of
side_effects_p (except for side_effects_p's special handling
of combine.cc clobbers). */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
new file mode 100644
index 00000000000..6433b77e6f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-additional-options "-mrvv-vector-bits=zvl" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stddef.h>
+#include "riscv_vector.h"
+
+/* Two disjoint tuple field updates should not require whole-register
+ copies when -mrvv-vector-bits=zvl represents the fields as VLS
+ subregs. */
+
+__attribute__ ((noipa))
+void
+foo (float *dst)
+{
+ const size_t vl = 8;
+ const ptrdiff_t stride = (ptrdiff_t) sizeof (*dst);
+ vfloat32m1x8_t tuple = __riscv_vlsseg8e32_v_f32m1x8 (dst, stride, vl);
+
+ tuple = __riscv_vset_v_f32m1_f32m1x8 (
+ tuple, 0,
+ __riscv_vfadd_vf_f32m1 (
+ __riscv_vget_v_f32m1x8_f32m1 (tuple, 0), 1.0f, vl));
+ tuple = __riscv_vset_v_f32m1_f32m1x8 (
+ tuple, 7,
+ __riscv_vfadd_vf_f32m1 (
+ __riscv_vget_v_f32m1x8_f32m1 (tuple, 7), 2.0f, vl));
+
+ __riscv_vssseg8e32_v_f32m1x8 (dst, stride, tuple, vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv[1248]r\.v\s+v[0-9]+,\s*v[0-9]+} } } */
--
2.43.0