modified_between_p and modified_in_p conservatively treat any write to
a register as conflicting with all its subregs, even disjoint ones.
This blocks combine optimizations and leaves redundant register moves.
The issue is visible on RISC-V with -mrvv-vector-bits=zvl and may
affect other architectures, but I haven't tried to find a testcase for
other architectures.
Use chunk-based analysis to recognize disjoint subreg writes as
non-conflicting. Extract implicit_reg_set_p from reg_set_p to
separate implicit clobber checks from explicit subreg write analysis.
For example, with RISC-V VLS:
Before:
vfadd.vf v1,v15,fa4
vfadd.vf v8,v8,fa5
vmv1r.v v15,v1 # Redundant move
After:
vfadd.vf v8,v8,fa4
vfadd.vf v15,v15,fa5 # Direct operation, no move needed
gcc/
* rtlanal.cc (implicit_reg_set_p): New function.
(reg_set_p): Refactored to call implicit_reg_set_p.
(simple_reg_subreg_p): New function.
(subreg_write_clobbers_ref_p): New function.
(subreg_modified_by_dest_p): New function.
(subreg_modified_by_pattern_p): New function.
(simple_reg_subreg_modified_in_p): New function.
(simple_reg_subreg_modified_between_p): New function.
(modified_between_p): Use chunk-based analysis for subreg writes.
(modified_in_p): Ditto.
gcc/testsuite/
* gcc.target/riscv/rvv/base/tuple-zvl-subreg.c: New test.
Signed-off-by: Zhongyao Chen <[email protected]>
---
gcc/rtlanal.cc | 190 +++++++++++++++++-
.../riscv/rvv/base/tuple-zvl-subreg.c | 33 +++
2 files changed, 219 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index 88561a54e5a..09d23c6e51b 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -1230,17 +1230,17 @@ reg_set_between_p (const_rtx reg, const rtx_insn
*from_insn,
return false;
}
-/* Return true if REG is set or clobbered inside INSN. */
+/* Return true if REG is modified by an implicit side effect of INSN. */
-bool
-reg_set_p (const_rtx reg, const_rtx insn)
+static bool
+implicit_reg_set_p (const_rtx reg, const_rtx insn)
{
/* After delay slot handling, call and branch insns might be in a
sequence. Check all the elements there. */
if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
{
for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
- if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+ if (implicit_reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
return true;
return false;
@@ -1277,9 +1277,181 @@ reg_set_p (const_rtx reg, const_rtx insn)
}
}
+ return false;
+}
+
+/* Return true if REG is set or clobbered inside INSN. */
+
+bool
+reg_set_p (const_rtx reg, const_rtx insn)
+{
+ /* After delay slot handling, call and branch insns might be in a
+ sequence. Check all the elements there. */
+ if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+ {
+ for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
+ if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+ return true;
+
+ return false;
+ }
+
+ if (implicit_reg_set_p (reg, insn))
+ return true;
+
return set_of (reg, insn) != NULL_RTX;
}
+/* Return true if X is a simple non-paradoxical SUBREG of a REG. */
+
+static bool
+simple_reg_subreg_p (const_rtx x)
+{
+ return (SUBREG_P (x)
+ && REG_P (SUBREG_REG (x))
+ && !paradoxical_subreg_p (x));
+}
+
+/* Return true if storing to DEST changes the value of SUBREG_REF, where both
+ are simple subregs of the same register with a known common containing size.
+
+ A subreg store can clobber the whole REGMODE_NATURAL_SIZE chunk that
+ contains it, even when the stored mode itself is smaller. */
+
+static bool
+subreg_write_clobbers_ref_p (const_rtx subreg_ref, const_rtx dest)
+{
+ poly_uint64 chunk_size = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG (dest)));
+ poly_uint64 ref_chunk_size = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG
(subreg_ref)));
+ if (maybe_gt (ref_chunk_size, chunk_size))
+ chunk_size = ref_chunk_size;
+ poly_uint64 container_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)));
+ HOST_WIDE_INT num_chunks;
+ if (!constant_multiple_p (container_size, chunk_size, &num_chunks))
+ return true;
+
+ poly_uint64 ref_start = SUBREG_BYTE (subreg_ref);
+ poly_uint64 ref_size = GET_MODE_SIZE (GET_MODE (subreg_ref));
+ poly_uint64 dest_start = SUBREG_BYTE (dest);
+ poly_uint64 dest_size = GET_MODE_SIZE (GET_MODE (dest));
+
+ for (HOST_WIDE_INT i = 0; i < num_chunks; ++i)
+ {
+ poly_uint64 chunk_start = i * chunk_size;
+ if (ranges_maybe_overlap_p (chunk_start, chunk_size,
+ dest_start, dest_size)
+ && ranges_maybe_overlap_p (chunk_start, chunk_size,
+ ref_start, ref_size))
+ return true;
+ }
+ return false;
+}
+
+/* Return true if DEST modifies SUBREG_REF. */
+
+static bool
+subreg_modified_by_dest_p (const_rtx subreg_ref, const_rtx dest)
+{
+ if (!dest)
+ return false;
+
+ switch (GET_CODE (dest))
+ {
+ case SUBREG:
+ if (simple_reg_subreg_p (subreg_ref)
+ && simple_reg_subreg_p (dest)
+ && REGNO (SUBREG_REG (subreg_ref)) == REGNO (SUBREG_REG (dest))
+ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg_ref))),
+ GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
+ return subreg_write_clobbers_ref_p (subreg_ref, dest);
+ return reg_overlap_mentioned_p (subreg_ref, dest);
+
+ case STRICT_LOW_PART:
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ return subreg_modified_by_dest_p (subreg_ref, XEXP (dest, 0));
+
+ case REG:
+ case SCRATCH:
+ case PC:
+ return reg_overlap_mentioned_p (subreg_ref, dest);
+
+ case PARALLEL:
+ for (int i = XVECLEN (dest, 0) - 1; i >= 0; --i)
+ if (XEXP (XVECEXP (dest, 0, i), 0)
+ && subreg_modified_by_dest_p (subreg_ref,
+ XEXP (XVECEXP (dest, 0, i), 0)))
+ return true;
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true if PAT modifies SUBREG_REF. */
+
+static bool
+subreg_modified_by_pattern_p (const_rtx subreg_ref, const_rtx pat)
+{
+ if (GET_CODE (pat) == COND_EXEC)
+ pat = COND_EXEC_CODE (pat);
+
+ switch (GET_CODE (pat))
+ {
+ case SET:
+ case CLOBBER:
+ return subreg_modified_by_dest_p (subreg_ref, SET_DEST (pat));
+
+ case PARALLEL:
+ for (int i = XVECLEN (pat, 0) - 1; i >= 0; --i)
+ if (subreg_modified_by_pattern_p (subreg_ref, XVECEXP (pat, 0, i)))
+ return true;
+ return false;
+
+ case SEQUENCE:
+ for (int i = XVECLEN (pat, 0) - 1; i >= 0; --i)
+ if (subreg_modified_by_pattern_p (subreg_ref,
+ PATTERN (XVECEXP (pat, 0, i))))
+ return true;
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true if X, a simple non-paradoxical SUBREG of a REG, is modified in
+ INSN. */
+
+static bool
+simple_reg_subreg_modified_in_p (const_rtx x, const_rtx insn)
+{
+ gcc_checking_assert (simple_reg_subreg_p (x));
+
+ if (implicit_reg_set_p (SUBREG_REG (x), insn))
+ return true;
+
+ return subreg_modified_by_pattern_p (x,
+ INSN_P (insn) ? PATTERN (insn) : insn);
+}
+
+/* Return true if X, a simple non-paradoxical SUBREG of a REG, is modified
+ between START and END. */
+
+static bool
+simple_reg_subreg_modified_between_p (const_rtx x, const rtx_insn *start,
+ const rtx_insn *end)
+{
+ for (const rtx_insn *insn = NEXT_INSN (start);
+ insn != end;
+ insn = NEXT_INSN (insn))
+ if (simple_reg_subreg_modified_in_p (x, insn))
+ return true;
+
+ return false;
+}
+
/* Similar to reg_set_between_p, but check all registers in X. Return false
only if none of them are modified between START and END. Return true if
X contains a MEM; this routine does use memory aliasing. */
@@ -1319,6 +1491,11 @@ modified_between_p (const_rtx x, const rtx_insn *start,
const rtx_insn *end)
case REG:
return reg_set_between_p (x, start, end);
+ case SUBREG:
+ if (simple_reg_subreg_p (x))
+ return simple_reg_subreg_modified_between_p (x, start, end);
+ break;
+
default:
break;
}
@@ -1372,6 +1549,11 @@ modified_in_p (const_rtx x, const_rtx insn)
case REG:
return reg_set_p (x, insn);
+ case SUBREG:
+ if (simple_reg_subreg_p (x))
+ return simple_reg_subreg_modified_in_p (x, insn);
+ break;
+
default:
break;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
new file mode 100644
index 00000000000..6433b77e6f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-additional-options "-mrvv-vector-bits=zvl" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stddef.h>
+#include "riscv_vector.h"
+
+/* Two disjoint tuple field updates should not require whole-register
+ copies when -mrvv-vector-bits=zvl represents the fields as VLS
+ subregs. */
+
+__attribute__ ((noipa))
+void
+foo (float *dst)
+{
+ const size_t vl = 8;
+ const ptrdiff_t stride = (ptrdiff_t) sizeof (*dst);
+ vfloat32m1x8_t tuple = __riscv_vlsseg8e32_v_f32m1x8 (dst, stride, vl);
+
+ tuple = __riscv_vset_v_f32m1_f32m1x8 (
+ tuple, 0,
+ __riscv_vfadd_vf_f32m1 (
+ __riscv_vget_v_f32m1x8_f32m1 (tuple, 0), 1.0f, vl));
+ tuple = __riscv_vset_v_f32m1_f32m1x8 (
+ tuple, 7,
+ __riscv_vfadd_vf_f32m1 (
+ __riscv_vget_v_f32m1x8_f32m1 (tuple, 7), 2.0f, vl));
+
+ __riscv_vssseg8e32_v_f32m1x8 (dst, stride, tuple, vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv[1248]r\.v\s+v[0-9]+,\s*v[0-9]+} } } */
--
2.43.0