On 2025-09-20 02:33, Andrew Pinski wrote:
On Fri, Sep 19, 2025, 6:22 PM Peter Damianov <peter0...@disroot.org>
wrote:

This patch implements folding of aggregate assignments (*dest =
*src)
by converting them to scalar MEM_REF operations when the size
permits. This enables vectorization opportunities.

I am not sure this will work with my recent changes to forwprop.  Plus
it might make sra not handle them later on.

Plus INDIRECT_REF will never shows up in gimple.

Iirc it is ldist that should be handling aggregate assignments to get
it converted into memcpys.
It is not the vectorizer really.

Wouldn't converting it into memcpy then lose the stride info of the copying loop?
Does that matter?


Thanks,
Andrew

gcc/ChangeLog:

PR tree-optimization/99504
* tree-ssa-forwprop.cc (fold_aggregate_assignment): New
function.
Folds aggregate assignments to scalar MEM_REF operations/
(pass_forwprop::execute): Call fold_aggregate_assignment for
applicable assignment statements.

gcc/testsuite/ChangeLog:

PR tree-optimization/99504
* gcc.dg/tree-ssa/forwprop-42.c: New test. Verify that
aggregate
assignments of various sizes get folded to scalar MEM_REF
operations.

Signed-off-by: Peter Damianov <peter0...@disroot.org>
---
gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c |  66 +++++++++
gcc/tree-ssa-forwprop.cc                    | 140
++++++++++++++++++++
2 files changed, 206 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
new file mode 100644
index 00000000000..7fef9821e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
@@ -0,0 +1,66 @@
+/* PR tree-optimization/99504 */
+/* Test that aggregate assignments get folded to scalar MEM_REF
operations */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+
+#include <stdint.h>
+
+struct pixel_4 {
+  uint8_t r, g, b, a;
+};
+
+struct pixel_8 {
+  uint16_t r, g, b, a;
+};
+
+struct pixel_16 {
+  uint32_t r, g, b, a;
+};
+
+struct pixel_32 {
+  uint64_t r, g, b, a;
+};
+
+#ifdef __SIZEOF_INT128__
+struct pixel_64 {
+  __int128 r, g, b, a;
+};
+#endif
+
+void test_4_bytes(struct pixel_4 *dest, struct pixel_4 *src)
+{
+  *dest = *src;
+}
+
+void test_8_bytes(struct pixel_8 *dest, struct pixel_8 *src)
+{
+  *dest = *src;
+}
+
+void test_16_bytes(struct pixel_16 *dest, struct pixel_16 *src)
+{
+  *dest = *src;
+}
+
+void test_32_bytes(struct pixel_32 *dest, struct pixel_32 *src)
+{
+  *dest = *src;
+}
+
+#ifdef __SIZEOF_INT128__
+void test_64_bytes(struct pixel_64 *dest, struct pixel_64 *src)
+{
+  *dest = *src;
+}
+#endif
+
+void copy_pixels(struct pixel_4 *dest, struct pixel_4 *src, int n)
+{
+  for (int i = 0; i < n; i++)
+    dest[i] = src[i];
+}
+
+/* { dg-final { scan-tree-dump-times "MEM\\\[" 10 "forwprop1" } }
*/
+/* Check that we generate scalar temporaries for the folded
assignments */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = MEM\\\[" "forwprop1" }
} */
+/* { dg-final { scan-tree-dump "MEM\\\[.*\] = _\[0-9\]+"
"forwprop1" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 43b1c9d696f..3ce94a737c6 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -205,6 +205,7 @@ struct _vec_perm_simplify_seq
typedef struct _vec_perm_simplify_seq *vec_perm_simplify_seq;

static bool forward_propagate_addr_expr (tree, tree, bool);
+static bool fold_aggregate_assignment (gimple_stmt_iterator *);

/* Set to true if we delete dead edges during the optimization.  */
static bool cfg_changed;
@@ -981,6 +982,141 @@ forward_propagate_addr_expr (tree name, tree
rhs, bool parent_single_use_p)
}

+/* Try to optimize aggregate assignments by converting them to
scalar
+   MEM_REF operations when profitable for vectorization.
+   This applies the same folding as memcpy to aggregate
assignments.  */
+
+static bool
+fold_aggregate_assignment (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+
+  if (!is_gimple_assign (stmt) || !gimple_assign_single_p (stmt))
+    return false;
+
+  tree lhs = gimple_assign_lhs (stmt);
+  tree rhs = gimple_assign_rhs1 (stmt);
+
+  /* Check if this is an aggregate assignment: *dest = *src
+     where both sides are aggregate types (can be MEM_REF or
indirection).  */
+  bool lhs_is_indirect = (TREE_CODE (lhs) == INDIRECT_REF);
+  bool rhs_is_indirect = (TREE_CODE (rhs) == INDIRECT_REF);
+
+  if ((TREE_CODE (lhs) != MEM_REF && !lhs_is_indirect)
+      || (TREE_CODE (rhs) != MEM_REF && !rhs_is_indirect))
+    return false;
+
+  tree lhs_type = TREE_TYPE (lhs);
+  tree rhs_type = TREE_TYPE (rhs);
+
+  if (!AGGREGATE_TYPE_P (lhs_type) || !AGGREGATE_TYPE_P (rhs_type))
+    return false;
+
+  if (!types_compatible_p (lhs_type, rhs_type))
+    return false;
+
+  if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (lhs_type)))
+    return false;
+
+  unsigned HOST_WIDE_INT ilen = tree_to_uhwi (TYPE_SIZE_UNIT
(lhs_type));
+  if (!pow2p_hwi (ilen) || ilen > MOVE_MAX)
+    return false;
+
+  tree lhs_base = TREE_OPERAND (lhs, 0);
+  tree rhs_base = TREE_OPERAND (rhs, 0);
+
+  unsigned int lhs_align = get_pointer_alignment (lhs_base);
+  unsigned int rhs_align = get_pointer_alignment (rhs_base);
+
+  scalar_int_mode imode;
+  machine_mode mode;
+  if (!int_mode_for_size (ilen * BITS_PER_UNIT, 0).exists (&imode)
+      || !bitwise_mode_for_size (ilen * BITS_PER_UNIT).exists
(&mode)
+      || !known_eq (GET_MODE_BITSIZE (mode), ilen * BITS_PER_UNIT))
+    return false;
+
+  if ((lhs_align < GET_MODE_ALIGNMENT (mode)
+       && targetm.slow_unaligned_access (mode, lhs_align)
+       && optab_handler (movmisalign_optab, mode) ==
CODE_FOR_nothing)
+      || (rhs_align < GET_MODE_ALIGNMENT (mode)
+         && targetm.slow_unaligned_access (mode, rhs_align)
+         && optab_handler (movmisalign_optab, mode) ==
CODE_FOR_nothing))
+    return false;
+
+  tree type = bitwise_type_for_mode (mode);
+  tree srctype = type;
+  tree desttype = type;
+
+  if (rhs_align < GET_MODE_ALIGNMENT (mode))
+    srctype = build_aligned_type (type, rhs_align);
+  if (lhs_align < GET_MODE_ALIGNMENT (mode))
+    desttype = build_aligned_type (type, lhs_align);
+
+  tree off0 = build_int_cst (build_pointer_type_for_mode
(char_type_node,
+                                                         ptr_mode,
true), 0);
+
+  tree srcmem, destmem;
+
+  if (rhs_is_indirect)
+    {
+      srcmem = fold_build2 (MEM_REF, srctype, rhs_base, off0);
+    }
+  else
+    {
+      tree rhs_offset = TREE_OPERAND (rhs, 1);
+      srcmem = fold_build2 (MEM_REF, srctype, rhs_base,
rhs_offset);
+    }
+
+  if (lhs_is_indirect)
+    {
+      destmem = fold_build2 (MEM_REF, desttype, lhs_base, off0);
+    }
+  else
+    {
+      tree lhs_offset = TREE_OPERAND (lhs, 1);
+      destmem = fold_build2 (MEM_REF, desttype, lhs_base,
lhs_offset);
+    }
+  gimple *new_stmt;
+  if (is_gimple_reg_type (srctype))
+    {
+      new_stmt = gimple_build_assign (NULL_TREE, srcmem);
+      tree tmp_var = make_ssa_name (srctype, new_stmt);
+      gimple_assign_set_lhs (new_stmt, tmp_var);
+      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+      gimple_set_location (new_stmt, gimple_location (stmt));
+      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+
+      new_stmt = gimple_build_assign (destmem, tmp_var);
+      gimple_move_vops (new_stmt, stmt);
+      gimple_set_location (new_stmt, gimple_location (stmt));
+      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+      gsi_remove (gsi, true);
+    }
+  else
+    {
+      new_stmt = gimple_build_assign (destmem, srcmem);
+      gimple_move_vops (new_stmt, stmt);
+      gimple_set_location (new_stmt, gimple_location (stmt));
+      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+      gsi_remove (gsi, true);
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file,
+              "Converted aggregate assignment to scalar
MEM_REF:\n");
+      fprintf (dump_file, "  Original: ");
+      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+      fprintf (dump_file, "  Size: %u bytes, Mode: %s\n",
+              (unsigned)ilen, GET_MODE_NAME (mode));
+    }
+
+  statistics_counter_event (cfun, "aggregate assignment to scalar
MEM_REF", 1);
+
+  return true;
+}
+
+
/* Helper function for simplify_gimple_switch.  Remove case labels
that
have values outside the range of the new type.  */

@@ -4477,6 +4613,10 @@ pass_forwprop::execute (function *fun)
if (TREE_CODE (lhs) != SSA_NAME
|| has_zero_uses (lhs))
{
+             if (TREE_CODE (lhs) != SSA_NAME
+                 && fold_aggregate_assignment (&gsi))
+               continue;
+
process_vec_perm_simplify_seq_list
(&vec_perm_simplify_seq_list);
gsi_next (&gsi);
continue;
--
2.39.5

Reply via email to