modified_between_p and modified_in_p conservatively treat any write to
a register as conflicting with all its subregs, even disjoint ones.

This blocks combine optimizations and leaves redundant register moves.
The issue is visible on RISC-V with -mrvv-vector-bits=zvl and may
affect other architectures, but I haven't tried to find a testcase for
other architectures.

Use chunk-based analysis to recognize disjoint subreg writes as
non-conflicting.  Extract implicit_reg_set_p from reg_set_p to
separate implicit clobber checks from explicit subreg write analysis.

For example, with RISC-V VLS:

  Before:
    vfadd.vf  v1,v15,fa4
    vfadd.vf  v8,v8,fa5
    vmv1r.v   v15,v1        # Redundant move

  After:
    vfadd.vf  v8,v8,fa4
    vfadd.vf  v15,v15,fa5   # Direct operation, no move needed

gcc/
        * rtlanal.cc (implicit_reg_set_p): New function.
        (reg_set_p): Refactored to call implicit_reg_set_p.
        (simple_reg_subreg_p): New function.
        (subreg_write_clobbers_ref_p): New function.
        (subreg_modified_by_dest_p): New function.
        (subreg_modified_by_pattern_p): New function.
        (simple_reg_subreg_modified_in_p): New function.
        (simple_reg_subreg_modified_between_p): New function.
        (modified_between_p): Use chunk-based analysis for subreg writes.
        (modified_in_p): Ditto.

gcc/testsuite/
        * gcc.target/riscv/rvv/base/tuple-zvl-subreg.c: New test.

Signed-off-by: Zhongyao Chen <[email protected]>
---
 gcc/rtlanal.cc                                | 190 +++++++++++++++++-
 .../riscv/rvv/base/tuple-zvl-subreg.c         |  33 +++
 2 files changed, 219 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c

diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index 88561a54e5a..09d23c6e51b 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -1230,17 +1230,17 @@ reg_set_between_p (const_rtx reg, const rtx_insn 
*from_insn,
   return false;
 }
 
-/* Return true if REG is set or clobbered inside INSN.  */
+/* Return true if REG is modified by an implicit side effect of INSN.  */
 
-bool
-reg_set_p (const_rtx reg, const_rtx insn)
+static bool
+implicit_reg_set_p (const_rtx reg, const_rtx insn)
 {
   /* After delay slot handling, call and branch insns might be in a
      sequence.  Check all the elements there.  */
   if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
     {
       for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
-       if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+       if (implicit_reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
          return true;
 
       return false;
@@ -1277,9 +1277,181 @@ reg_set_p (const_rtx reg, const_rtx insn)
        }
     }
 
+  return false;
+}
+
+/* Return true if REG is set or clobbered inside INSN.  */
+
+bool
+reg_set_p (const_rtx reg, const_rtx insn)
+{
+  /* After delay slot handling, call and branch insns might be in a
+     sequence.  Check all the elements there.  */
+  if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+    {
+      for (int i = 0; i < XVECLEN (PATTERN (insn), 0); ++i)
+       if (reg_set_p (reg, XVECEXP (PATTERN (insn), 0, i)))
+         return true;
+
+      return false;
+    }
+
+  if (implicit_reg_set_p (reg, insn))
+    return true;
+
   return set_of (reg, insn) != NULL_RTX;
 }
 
+/* Return true if X is a simple non-paradoxical SUBREG of a REG.  */
+
+static bool
+simple_reg_subreg_p (const_rtx x)
+{
+  return (SUBREG_P (x)
+         && REG_P (SUBREG_REG (x))
+         && !paradoxical_subreg_p (x));
+}
+
+/* Return true if storing to DEST changes the value of SUBREG_REF, where both
+   are simple subregs of the same register with a known common containing size.
+
+   A subreg store can clobber the whole REGMODE_NATURAL_SIZE chunk that
+   contains it, even when the stored mode itself is smaller.  */
+
+static bool
+subreg_write_clobbers_ref_p (const_rtx subreg_ref, const_rtx dest)
+{
+  poly_uint64 chunk_size = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG (dest)));
+  poly_uint64 ref_chunk_size = REGMODE_NATURAL_SIZE (GET_MODE (SUBREG_REG 
(subreg_ref)));
+  if (maybe_gt (ref_chunk_size, chunk_size))
+    chunk_size = ref_chunk_size;
+  poly_uint64 container_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)));
+  HOST_WIDE_INT num_chunks;
+  if (!constant_multiple_p (container_size, chunk_size, &num_chunks))
+    return true;
+
+  poly_uint64 ref_start = SUBREG_BYTE (subreg_ref);
+  poly_uint64 ref_size = GET_MODE_SIZE (GET_MODE (subreg_ref));
+  poly_uint64 dest_start = SUBREG_BYTE (dest);
+  poly_uint64 dest_size = GET_MODE_SIZE (GET_MODE (dest));
+
+  for (HOST_WIDE_INT i = 0; i < num_chunks; ++i)
+    {
+      poly_uint64 chunk_start = i * chunk_size;
+      if (ranges_maybe_overlap_p (chunk_start, chunk_size,
+                                 dest_start, dest_size)
+         && ranges_maybe_overlap_p (chunk_start, chunk_size,
+                                    ref_start, ref_size))
+       return true;
+    }
+  return false;
+}
+
+/* Return true if DEST modifies SUBREG_REF.  */
+
+static bool
+subreg_modified_by_dest_p (const_rtx subreg_ref, const_rtx dest)
+{
+  if (!dest)
+    return false;
+
+  switch (GET_CODE (dest))
+    {
+    case SUBREG:
+      if (simple_reg_subreg_p (subreg_ref)
+         && simple_reg_subreg_p (dest)
+         && REGNO (SUBREG_REG (subreg_ref)) == REGNO (SUBREG_REG (dest))
+         && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg_ref))),
+                      GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
+       return subreg_write_clobbers_ref_p (subreg_ref, dest);
+      return reg_overlap_mentioned_p (subreg_ref, dest);
+
+    case STRICT_LOW_PART:
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      return subreg_modified_by_dest_p (subreg_ref, XEXP (dest, 0));
+
+    case REG:
+    case SCRATCH:
+    case PC:
+      return reg_overlap_mentioned_p (subreg_ref, dest);
+
+    case PARALLEL:
+      for (int i = XVECLEN (dest, 0) - 1; i >= 0; --i)
+       if (XEXP (XVECEXP (dest, 0, i), 0)
+           && subreg_modified_by_dest_p (subreg_ref,
+                                         XEXP (XVECEXP (dest, 0, i), 0)))
+         return true;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if PAT modifies SUBREG_REF.  */
+
+static bool
+subreg_modified_by_pattern_p (const_rtx subreg_ref, const_rtx pat)
+{
+  if (GET_CODE (pat) == COND_EXEC)
+    pat = COND_EXEC_CODE (pat);
+
+  switch (GET_CODE (pat))
+    {
+    case SET:
+    case CLOBBER:
+      return subreg_modified_by_dest_p (subreg_ref, SET_DEST (pat));
+
+    case PARALLEL:
+      for (int i = XVECLEN (pat, 0) - 1; i >= 0; --i)
+       if (subreg_modified_by_pattern_p (subreg_ref, XVECEXP (pat, 0, i)))
+         return true;
+      return false;
+
+    case SEQUENCE:
+      for (int i = XVECLEN (pat, 0) - 1; i >= 0; --i)
+       if (subreg_modified_by_pattern_p (subreg_ref,
+                                         PATTERN (XVECEXP (pat, 0, i))))
+         return true;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if X, a simple non-paradoxical SUBREG of a REG, is modified in
+   INSN.  */
+
+static bool
+simple_reg_subreg_modified_in_p (const_rtx x, const_rtx insn)
+{
+  gcc_checking_assert (simple_reg_subreg_p (x));
+
+  if (implicit_reg_set_p (SUBREG_REG (x), insn))
+    return true;
+
+  return subreg_modified_by_pattern_p (x,
+                                      INSN_P (insn) ? PATTERN (insn) : insn);
+}
+
+/* Return true if X, a simple non-paradoxical SUBREG of a REG, is modified
+   between START and END.  */
+
+static bool
+simple_reg_subreg_modified_between_p (const_rtx x, const rtx_insn *start,
+                                     const rtx_insn *end)
+{
+  for (const rtx_insn *insn = NEXT_INSN (start);
+       insn != end;
+       insn = NEXT_INSN (insn))
+    if (simple_reg_subreg_modified_in_p (x, insn))
+      return true;
+
+  return false;
+}
+
 /* Similar to reg_set_between_p, but check all registers in X.  Return false
    only if none of them are modified between START and END.  Return true if
    X contains a MEM; this routine does use memory aliasing.  */
@@ -1319,6 +1491,11 @@ modified_between_p (const_rtx x, const rtx_insn *start, 
const rtx_insn *end)
     case REG:
       return reg_set_between_p (x, start, end);
 
+    case SUBREG:
+      if (simple_reg_subreg_p (x))
+       return simple_reg_subreg_modified_between_p (x, start, end);
+      break;
+
     default:
       break;
     }
@@ -1372,6 +1549,11 @@ modified_in_p (const_rtx x, const_rtx insn)
     case REG:
       return reg_set_p (x, insn);
 
+    case SUBREG:
+      if (simple_reg_subreg_p (x))
+       return simple_reg_subreg_modified_in_p (x, insn);
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
new file mode 100644
index 00000000000..6433b77e6f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/tuple-zvl-subreg.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-additional-options "-mrvv-vector-bits=zvl" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stddef.h>
+#include "riscv_vector.h"
+
+/* Two disjoint tuple field updates should not require whole-register
+   copies when -mrvv-vector-bits=zvl represents the fields as VLS
+   subregs.  */
+
+__attribute__ ((noipa))
+void
+foo (float *dst)
+{
+  const size_t vl = 8;
+  const ptrdiff_t stride = (ptrdiff_t) sizeof (*dst);
+  vfloat32m1x8_t tuple = __riscv_vlsseg8e32_v_f32m1x8 (dst, stride, vl);
+
+  tuple = __riscv_vset_v_f32m1_f32m1x8 (
+    tuple, 0,
+    __riscv_vfadd_vf_f32m1 (
+      __riscv_vget_v_f32m1x8_f32m1 (tuple, 0), 1.0f, vl));
+  tuple = __riscv_vset_v_f32m1_f32m1x8 (
+    tuple, 7,
+    __riscv_vfadd_vf_f32m1 (
+      __riscv_vget_v_f32m1x8_f32m1 (tuple, 7), 2.0f, vl));
+
+  __riscv_vssseg8e32_v_f32m1x8 (dst, stride, tuple, vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv[1248]r\.v\s+v[0-9]+,\s*v[0-9]+} } } */
-- 
2.43.0

Reply via email to