modified_between_p and modified_in_p conservatively treat any write to
a register as conflicting with all its subregs, even disjoint ones.

This blocks combine optimizations and leaves redundant register moves.
The issue is visible on RISC-V with -mrvv-vector-bits=zvl and may
affect other architectures.

Use chunk-based analysis to recognize disjoint subreg writes as
non-conflicting. Extract implicit_reg_set_p from reg_set_p to
separate implicit clobber checks from explicit subreg write analysis.

For example, with RISC-V VLS:

  Before:
    vfadd.vf  v1,v15,fa4
    vfadd.vf  v8,v8,fa5
    vmv1r.v   v15,v1        # Redundant move

  After:
    vfadd.vf  v8,v8,fa4
    vfadd.vf  v15,v15,fa5   # Direct operation, no move needed

Changes from v1 (following Richard's comments):
- Switch to `rtx_properties` (with a new `ignore_srcs` flag) to avoid
  re-implementing instruction traversal logic.
- Drop the strict mode size check; the chunk-based analysis is already
  robust enough for general cases.
- Use `read_modify_subreg_p` to filter out full-register writes early
  and remove redundant precondition checks.

Thanks for Richard's helpful and detailed guidance.

gcc/
        * rtlanal.h (class rtx_properties): Add ignore_srcs flag.
        (rtx_properties::try_to_add_src_1): New prototype.
        (rtx_properties::try_to_add_src): Inline wrapper around
        try_to_add_src_1.
        * rtlanal.cc (rtx_properties::try_to_add_src_1): Rename from
        try_to_add_src.
        (implicit_reg_set_p): New function.
        (reg_set_p): Refactored to call implicit_reg_set_p.
        (subreg_write_clobbers_ref_p): New function.
        (subreg_modified_by_dest_p): New function.
        (subreg_modified_in_p): New function.
        (subreg_modified_between_p): New function.
        (modified_between_p): Use read_modify_subreg_p up-front and
        subreg_modified_between_p.
        (modified_in_p): Ditto.

gcc/testsuite/
        * gcc.target/riscv/rvv/base/tuple-zvl-subreg.c: New test.

Signed-off-by: Zhongyao Chen <[email protected]>
---
 gcc/rtlanal.cc                                | 188 +++++++++++++++++-
 gcc/rtlanal.h                                 |  17 +-
 .../riscv/rvv/base/tuple-zvl-subreg.c         |  33 +++
 3 files changed, 232 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c

diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index 88561a54e5a..f79d62371d5 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -1230,17 +1230,17 @@ reg_set_between_p (const_rtx reg, const rtx_insn 
*from_insn,
   return false;
 }
 
-/* Return true if REG is set or clobbered inside INSN.  */
+/* Return true if REG is modified by an implicit side effect of INSN.  */
 
-bool
-reg_set_p (const_rtx reg, const_rtx insn)
+static bool
+implicit_reg_set_p (const_rtx reg, const_rtx insn)
 {
   /* After delay slot handling, call and branch insns might be in a
      sequence.  Check all the elements there.  */
   if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
     {
       for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
-       if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+       if (implicit_reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
          return true;
 
       return false;
@@ -1277,9 +1277,177 @@ reg_set_p (const_rtx reg, const_rtx insn)
        }
     }
 
+  return false;
+}
+
+/* Return true if REG is set or clobbered inside INSN.  */
+
+bool
+reg_set_p (const_rtx reg, const_rtx insn)
+{
+  /* After delay slot handling, call and branch insns might be in a
+     sequence.  Check all the elements there.  */
+  if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+    {
+      for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
+       if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+         return true;
+
+      return false;
+    }
+
+  if (implicit_reg_set_p (reg, insn))
+    return true;
+
   return set_of (reg, insn) != NULL_RTX;
 }
 
+/* Return true if storing to DEST changes the value of SUBREG_REF, where both
+   are simple subregs of the same register with a known common containing size.
+
+   A subreg store can clobber the whole REGMODE_NATURAL_SIZE chunk that
+   contains it, even when the stored mode itself is smaller.  */
+
+static bool
+subreg_write_clobbers_ref_p (const_rtx subreg_ref, const_rtx dest)
+{
+  poly_uint64 chunk_size = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG (dest)));
+  poly_uint64 ref_chunk_size
+    = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG (subreg_ref)));
+  if (maybe_gt (ref_chunk_size, chunk_size))
+    chunk_size = ref_chunk_size;
+  poly_uint64 container_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)));
+  HOST_WIDE_INT num_chunks;
+  if (!constant_multiple_p (container_size, chunk_size, &num_chunks))
+    return true;
+
+  poly_uint64 ref_start = SUBREG_BYTE (subreg_ref);
+  poly_uint64 ref_size = GET_MODE_SIZE (GET_MODE (subreg_ref));
+  poly_uint64 dest_start = SUBREG_BYTE (dest);
+  poly_uint64 dest_size = GET_MODE_SIZE (GET_MODE (dest));
+
+  for (HOST_WIDE_INT i = 0; i < num_chunks; ++i)
+    {
+      poly_uint64 chunk_start = i * chunk_size;
+      if (ranges_maybe_overlap_p (chunk_start, chunk_size,
+                                 dest_start, dest_size)
+         && ranges_maybe_overlap_p (chunk_start, chunk_size,
+                                    ref_start, ref_size))
+       return true;
+    }
+  return false;
+}
+
+/* Return true if DEST modifies SUBREG_REF.  */
+
+static bool
+subreg_modified_by_dest_p (const_rtx subreg_ref, const_rtx dest)
+{
+  if (!dest)
+    return false;
+
+  switch (GET_CODE (dest))
+    {
+    case SUBREG:
+      if (REG_P (SUBREG_REG (dest))
+         && REGNO (SUBREG_REG (subreg_ref)) == REGNO (SUBREG_REG (dest)))
+       return subreg_write_clobbers_ref_p (subreg_ref, dest);
+      return reg_overlap_mentioned_p (subreg_ref, dest);
+
+    case STRICT_LOW_PART:
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      return subreg_modified_by_dest_p (subreg_ref, XEXP (dest, 0));
+
+    case REG:
+    case SCRATCH:
+    case PC:
+      return reg_overlap_mentioned_p (subreg_ref, dest);
+
+    case PARALLEL:
+      for (int i = XVECLEN (dest, 0) - 1; i >= 0; --i)
+       if (XEXP (XVECEXP (dest, 0, i), 0)
+           && subreg_modified_by_dest_p (subreg_ref,
+                                         XEXP (XVECEXP (dest, 0, i), 0)))
+         return true;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if X, a simple partial SUBREG of a REG, is modified in INSN.  */
+
+static bool
+subreg_modified_in_p (const_rtx x, const_rtx insn)
+{
+  gcc_checking_assert (SUBREG_P (x) && REG_P (SUBREG_REG (x)));
+
+  if (implicit_reg_set_p (SUBREG_REG (x), insn))
+    return true;
+
+  vec_rtx_properties properties;
+  properties.ignore_srcs = true;
+  if (INSN_P (insn))
+    properties.try_to_add_insn (as_a <const rtx_insn *> (insn), false);
+  else
+    properties.try_to_add_pattern (insn);
+  bool possible_conflict = false;
+  for (auto ref : properties.refs ())
+    if (ref.is_reg () && ref.is_write ()
+       && ref.regno == REGNO (SUBREG_REG (x)))
+      {
+       if (!ref.in_subreg ())
+         return true;
+       possible_conflict = true;
+      }
+
+  if (!possible_conflict)
+    return false;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, 
NONCONST)
+    {
+      const_rtx sub = *iter;
+      if (GET_CODE (sub) == SET || GET_CODE (sub) == CLOBBER)
+       {
+         if (subreg_modified_by_dest_p (x, XEXP (sub, 0)))
+           return true;
+       }
+    }
+
+  if (CALL_P (insn))
+    {
+      for (rtx link = CALL_INSN_FUNCTION_USAGE (insn); link;
+          link = XEXP (link, 1))
+       {
+         rtx sub = XEXP (link, 0);
+         if (GET_CODE (sub) == CLOBBER)
+           if (subreg_modified_by_dest_p (x, XEXP (sub, 0)))
+             return true;
+       }
+    }
+
+  return false;
+}
+
+/* Return true if X, a simple partial SUBREG of a REG, is modified
+   between START and END.  */
+
+static bool
+subreg_modified_between_p (const_rtx x, const rtx_insn *start,
+                          const rtx_insn *end)
+{
+  for (const rtx_insn *insn = NEXT_INSN (start);
+       insn != end;
+       insn = NEXT_INSN (insn))
+    if (subreg_modified_in_p (x, insn))
+      return true;
+
+  return false;
+}
+
 /* Similar to reg_set_between_p, but check all registers in X.  Return false
    only if none of them are modified between START and END.  Return true if
    X contains a MEM; this routine does use memory aliasing.  */
@@ -1319,6 +1487,11 @@ modified_between_p (const_rtx x, const rtx_insn *start, 
const rtx_insn *end)
     case REG:
       return reg_set_between_p (x, start, end);
 
+    case SUBREG:
+      if (REG_P (SUBREG_REG (x)) && read_modify_subreg_p (x))
+       return subreg_modified_between_p (x, start, end);
+      break;
+
     default:
       break;
     }
@@ -1372,6 +1545,11 @@ modified_in_p (const_rtx x, const_rtx insn)
     case REG:
       return reg_set_p (x, insn);
 
+    case SUBREG:
+      if (REG_P (SUBREG_REG (x)) && read_modify_subreg_p (x))
+       return subreg_modified_in_p (x, insn);
+      break;
+
     default:
       break;
     }
@@ -2180,7 +2358,7 @@ rtx_properties::try_to_add_dest (const_rtx x, unsigned 
int flags)
    This routine accepts all rtxes that can legitimately appear in a SET_SRC.  
*/
 
 void
-rtx_properties::try_to_add_src (const_rtx x, unsigned int flags)
+rtx_properties::try_to_add_src_1 (const_rtx x, unsigned int flags)
 {
   unsigned int base_flags = flags & rtx_obj_flags::STICKY_FLAGS;
   subrtx_iterator::array_type array;
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 04c3b1b9a59..e62c3bf4402 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -129,6 +129,7 @@ public:
 
   void try_to_add_reg (const_rtx x, unsigned int flags = 0);
   void try_to_add_dest (const_rtx x, unsigned int flags = 0);
+  void try_to_add_src_1 (const_rtx x, unsigned int flags = 0);
   void try_to_add_src (const_rtx x, unsigned int flags = 0);
   void try_to_add_pattern (const_rtx pat);
   void try_to_add_note (const_rtx x);
@@ -160,8 +161,11 @@ public:
      volatile_refs_p.  */
   unsigned int has_volatile_refs : 1;
 
+  /* True if we should ignore sources and only record destinations.  */
+  unsigned int ignore_srcs : 1;
+
   /* For future expansion.  */
-  unsigned int spare : 28;
+  unsigned int spare : 27;
 };
 
 inline rtx_properties::rtx_properties ()
@@ -172,6 +176,7 @@ inline rtx_properties::rtx_properties ()
     has_call (false),
     has_pre_post_modify (false),
     has_volatile_refs (false),
+    ignore_srcs (false),
     spare (0)
 {
 }
@@ -185,6 +190,16 @@ rtx_properties::try_to_add_note (const_rtx x)
   try_to_add_src (x, rtx_obj_flags::IN_NOTE);
 }
 
+/* Wrapper around try_to_add_src_1 that avoids the walk if IGNORE_SRCS
+   is true.  */
+
+inline void
+rtx_properties::try_to_add_src (const_rtx x, unsigned int flags)
+{
+  if (!ignore_srcs)
+    try_to_add_src_1 (x, flags);
+}
+
 /* Return true if the rtx has side effects, in the sense of
    side_effects_p (except for side_effects_p's special handling
    of combine.cc clobbers).  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
new file mode 100644
index 00000000000..6433b77e6f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-additional-options "-mrvv-vector-bits=zvl" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stddef.h>
+#include "riscv_vector.h"
+
+/* Two disjoint tuple field updates should not require whole-register
+   copies when -mrvv-vector-bits=zvl represents the fields as VLS
+   subregs.  */
+
+__attribute__ ((noipa))
+void
+foo (float *dst)
+{
+  const size_t vl = 8;
+  const ptrdiff_t stride = (ptrdiff_t) sizeof (*dst);
+  vfloat32m1x8_t tuple = __riscv_vlsseg8e32_v_f32m1x8 (dst, stride, vl);
+
+  tuple = __riscv_vset_v_f32m1_f32m1x8 (
+    tuple, 0,
+    __riscv_vfadd_vf_f32m1 (
+      __riscv_vget_v_f32m1x8_f32m1 (tuple, 0), 1.0f, vl));
+  tuple = __riscv_vset_v_f32m1_f32m1x8 (
+    tuple, 7,
+    __riscv_vfadd_vf_f32m1 (
+      __riscv_vget_v_f32m1x8_f32m1 (tuple, 7), 2.0f, vl));
+
+  __riscv_vssseg8e32_v_f32m1x8 (dst, stride, tuple, vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv[1248]r\.v\s+v[0-9]+,\s*v[0-9]+} } } */
-- 
2.43.0

Reply via email to