gcc/ChangeLog:
PR tree-optimization/99504
* tree-ssa-forwprop.cc (fold_aggregate_assignment): New
function.
Folds aggregate assignments to scalar MEM_REF operations/
(pass_forwprop::execute): Call fold_aggregate_assignment for
applicable assignment statements.
gcc/testsuite/ChangeLog:
PR tree-optimization/99504
* gcc.dg/tree-ssa/forwprop-42.c: New test. Verify that
aggregate
assignments of various sizes get folded to scalar MEM_REF
operations.
Signed-off-by: Peter Damianov <peter0...@disroot.org>
---
gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c | 66 +++++++++
gcc/tree-ssa-forwprop.cc | 140
++++++++++++++++++++
2 files changed, 206 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
new file mode 100644
index 00000000000..7fef9821e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
@@ -0,0 +1,66 @@
+/* PR tree-optimization/99504 */
+/* Test that aggregate assignments get folded to scalar MEM_REF
operations */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+
+#include <stdint.h>
+
+struct pixel_4 {
+ uint8_t r, g, b, a;
+};
+
+struct pixel_8 {
+ uint16_t r, g, b, a;
+};
+
+struct pixel_16 {
+ uint32_t r, g, b, a;
+};
+
+struct pixel_32 {
+ uint64_t r, g, b, a;
+};
+
+#ifdef __SIZEOF_INT128__
+struct pixel_64 {
+ __int128 r, g, b, a;
+};
+#endif
+
+void test_4_bytes(struct pixel_4 *dest, struct pixel_4 *src)
+{
+ *dest = *src;
+}
+
+void test_8_bytes(struct pixel_8 *dest, struct pixel_8 *src)
+{
+ *dest = *src;
+}
+
+void test_16_bytes(struct pixel_16 *dest, struct pixel_16 *src)
+{
+ *dest = *src;
+}
+
+void test_32_bytes(struct pixel_32 *dest, struct pixel_32 *src)
+{
+ *dest = *src;
+}
+
+#ifdef __SIZEOF_INT128__
+void test_64_bytes(struct pixel_64 *dest, struct pixel_64 *src)
+{
+ *dest = *src;
+}
+#endif
+
+void copy_pixels(struct pixel_4 *dest, struct pixel_4 *src, int n)
+{
+ for (int i = 0; i < n; i++)
+ dest[i] = src[i];
+}
+
+/* { dg-final { scan-tree-dump-times "MEM\\\[" 10 "forwprop1" } }
*/
+/* Check that we generate scalar temporaries for the folded
assignments */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = MEM\\\[" "forwprop1" }
} */
+/* { dg-final { scan-tree-dump "MEM\\\[.*\] = _\[0-9\]+"
"forwprop1" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 43b1c9d696f..3ce94a737c6 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -205,6 +205,7 @@ struct _vec_perm_simplify_seq
typedef struct _vec_perm_simplify_seq *vec_perm_simplify_seq;
static bool forward_propagate_addr_expr (tree, tree, bool);
+static bool fold_aggregate_assignment (gimple_stmt_iterator *);
/* Set to true if we delete dead edges during the optimization. */
static bool cfg_changed;
@@ -981,6 +982,141 @@ forward_propagate_addr_expr (tree name, tree
rhs, bool parent_single_use_p)
}
+/* Try to optimize aggregate assignments by converting them to
scalar
+ MEM_REF operations when profitable for vectorization.
+ This applies the same folding as memcpy to aggregate
assignments. */
+
+static bool
+fold_aggregate_assignment (gimple_stmt_iterator *gsi)
+{
+ gimple *stmt = gsi_stmt (*gsi);
+
+ if (!is_gimple_assign (stmt) || !gimple_assign_single_p (stmt))
+ return false;
+
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+
+ /* Check if this is an aggregate assignment: *dest = *src
+ where both sides are aggregate types (can be MEM_REF or
indirection). */
+ bool lhs_is_indirect = (TREE_CODE (lhs) == INDIRECT_REF);
+ bool rhs_is_indirect = (TREE_CODE (rhs) == INDIRECT_REF);
+
+ if ((TREE_CODE (lhs) != MEM_REF && !lhs_is_indirect)
+ || (TREE_CODE (rhs) != MEM_REF && !rhs_is_indirect))
+ return false;
+
+ tree lhs_type = TREE_TYPE (lhs);
+ tree rhs_type = TREE_TYPE (rhs);
+
+ if (!AGGREGATE_TYPE_P (lhs_type) || !AGGREGATE_TYPE_P (rhs_type))
+ return false;
+
+ if (!types_compatible_p (lhs_type, rhs_type))
+ return false;
+
+ if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (lhs_type)))
+ return false;
+
+ unsigned HOST_WIDE_INT ilen = tree_to_uhwi (TYPE_SIZE_UNIT
(lhs_type));
+ if (!pow2p_hwi (ilen) || ilen > MOVE_MAX)
+ return false;
+
+ tree lhs_base = TREE_OPERAND (lhs, 0);
+ tree rhs_base = TREE_OPERAND (rhs, 0);
+
+ unsigned int lhs_align = get_pointer_alignment (lhs_base);
+ unsigned int rhs_align = get_pointer_alignment (rhs_base);
+
+ scalar_int_mode imode;
+ machine_mode mode;
+ if (!int_mode_for_size (ilen * BITS_PER_UNIT, 0).exists (&imode)
+ || !bitwise_mode_for_size (ilen * BITS_PER_UNIT).exists
(&mode)
+ || !known_eq (GET_MODE_BITSIZE (mode), ilen * BITS_PER_UNIT))
+ return false;
+
+ if ((lhs_align < GET_MODE_ALIGNMENT (mode)
+ && targetm.slow_unaligned_access (mode, lhs_align)
+ && optab_handler (movmisalign_optab, mode) ==
CODE_FOR_nothing)
+ || (rhs_align < GET_MODE_ALIGNMENT (mode)
+ && targetm.slow_unaligned_access (mode, rhs_align)
+ && optab_handler (movmisalign_optab, mode) ==
CODE_FOR_nothing))
+ return false;
+
+ tree type = bitwise_type_for_mode (mode);
+ tree srctype = type;
+ tree desttype = type;
+
+ if (rhs_align < GET_MODE_ALIGNMENT (mode))
+ srctype = build_aligned_type (type, rhs_align);
+ if (lhs_align < GET_MODE_ALIGNMENT (mode))
+ desttype = build_aligned_type (type, lhs_align);
+
+ tree off0 = build_int_cst (build_pointer_type_for_mode
(char_type_node,
+ ptr_mode,
true), 0);
+
+ tree srcmem, destmem;
+
+ if (rhs_is_indirect)
+ {
+ srcmem = fold_build2 (MEM_REF, srctype, rhs_base, off0);
+ }
+ else
+ {
+ tree rhs_offset = TREE_OPERAND (rhs, 1);
+ srcmem = fold_build2 (MEM_REF, srctype, rhs_base,
rhs_offset);
+ }
+
+ if (lhs_is_indirect)
+ {
+ destmem = fold_build2 (MEM_REF, desttype, lhs_base, off0);
+ }
+ else
+ {
+ tree lhs_offset = TREE_OPERAND (lhs, 1);
+ destmem = fold_build2 (MEM_REF, desttype, lhs_base,
lhs_offset);
+ }
+ gimple *new_stmt;
+ if (is_gimple_reg_type (srctype))
+ {
+ new_stmt = gimple_build_assign (NULL_TREE, srcmem);
+ tree tmp_var = make_ssa_name (srctype, new_stmt);
+ gimple_assign_set_lhs (new_stmt, tmp_var);
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ gimple_set_location (new_stmt, gimple_location (stmt));
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+
+ new_stmt = gimple_build_assign (destmem, tmp_var);
+ gimple_move_vops (new_stmt, stmt);
+ gimple_set_location (new_stmt, gimple_location (stmt));
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ gsi_remove (gsi, true);
+ }
+ else
+ {
+ new_stmt = gimple_build_assign (destmem, srcmem);
+ gimple_move_vops (new_stmt, stmt);
+ gimple_set_location (new_stmt, gimple_location (stmt));
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ gsi_remove (gsi, true);
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Converted aggregate assignment to scalar
MEM_REF:\n");
+ fprintf (dump_file, " Original: ");
+ print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+ fprintf (dump_file, " Size: %u bytes, Mode: %s\n",
+ (unsigned)ilen, GET_MODE_NAME (mode));
+ }
+
+ statistics_counter_event (cfun, "aggregate assignment to scalar
MEM_REF", 1);
+
+ return true;
+}
+
+
/* Helper function for simplify_gimple_switch. Remove case labels
that
have values outside the range of the new type. */
@@ -4477,6 +4613,10 @@ pass_forwprop::execute (function *fun)
if (TREE_CODE (lhs) != SSA_NAME
|| has_zero_uses (lhs))
{
+ if (TREE_CODE (lhs) != SSA_NAME
+ && fold_aggregate_assignment (&gsi))
+ continue;
+
process_vec_perm_simplify_seq_list
(&vec_perm_simplify_seq_list);
gsi_next (&gsi);
continue;
--
2.39.5