This fixes the specific case of complex arithmetic vectorization
in the PR which is caused by loads/stores of complex types which
the vectorizer does not like.
The patch implements two things, first a "late" variant of
gimplify_modify_expr_complex_part in update-address-taken
when we can write the variable into SSA form (which ends up
removing a temporary for the testcase). Second, splitting
up loads and stores of complex type if the loads all feed
{REAL,IMAG}PART_EXPRs or the store is fed by a single-use
COMPLEX_EXPR.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
Richard.
2015-01-09 Richard Biener <[email protected]>
PR tree-optimization/64410
* tree-ssa.c (non_rewritable_lvalue_p): Allow REALPART/IMAGPART_EXPR
on the LHS.
(execute_update_addresses_taken): Deal with that.
* tree-ssa-forwprop.c (pass_forwprop::execute): Use component-wise
loads/stores for complex variables.
* g++.dg/vect/pr64410.cc: New testcase.
Index: gcc/tree-ssa.c
===================================================================
*** gcc/tree-ssa.c.orig 2015-01-08 14:58:47.058032954 +0100
--- gcc/tree-ssa.c 2015-01-08 15:00:46.253028827 +0100
*************** non_rewritable_lvalue_p (tree lhs)
*** 1340,1345 ****
--- 1340,1352 ----
if (DECL_P (lhs))
return false;
+ /* We can re-write REALPART_EXPR and IMAGPART_EXPR sets in
+ a reasonably efficient manner... */
+ if ((TREE_CODE (lhs) == REALPART_EXPR
+ || TREE_CODE (lhs) == IMAGPART_EXPR)
+ && DECL_P (TREE_OPERAND (lhs, 0)))
+ return false;
+
/* A decl that is wrapped inside a MEM-REF that covers
it full is also rewritable.
??? The following could be relaxed allowing component
*************** execute_update_addresses_taken (void)
*** 1544,1549 ****
--- 1551,1585 ----
tree rhs, *rhsp = gimple_assign_rhs1_ptr (stmt);
tree sym;
+ /* Rewrite LHS IMAG/REALPART_EXPR similar to
+ gimplify_modify_expr_complex_part. */
+ if ((TREE_CODE (lhs) == IMAGPART_EXPR
+ || TREE_CODE (lhs) == REALPART_EXPR)
+ && DECL_P (TREE_OPERAND (lhs, 0))
+ && bitmap_bit_p (suitable_for_renaming,
+ DECL_UID (TREE_OPERAND (lhs, 0))))
+ {
+ tree other = make_ssa_name (TREE_TYPE (lhs));
+ tree lrhs = build1 (TREE_CODE (lhs) == IMAGPART_EXPR
+ ? REALPART_EXPR : IMAGPART_EXPR,
+ TREE_TYPE (other),
+ TREE_OPERAND (lhs, 0));
+ gimple load = gimple_build_assign (other, lrhs);
+ gimple_set_vuse (load, gimple_vuse (stmt));
+ gsi_insert_before (&gsi, load, GSI_SAME_STMT);
+ gimple_assign_set_lhs (stmt, TREE_OPERAND (lhs, 0));
+ gimple_assign_set_rhs_with_ops
+ (&gsi, COMPLEX_EXPR,
+ TREE_CODE (lhs) == IMAGPART_EXPR
+ ? other : gimple_assign_rhs1 (stmt),
+ TREE_CODE (lhs) == IMAGPART_EXPR
+ ? gimple_assign_rhs1 (stmt) : other, NULL_TREE);
+ stmt = gsi_stmt (gsi);
+ unlink_stmt_vdef (stmt);
+ update_stmt (stmt);
+ continue;
+ }
+
/* We shouldn't have any fancy wrapping of
component-refs on the LHS, but look through
VIEW_CONVERT_EXPRs as that is easy. */
Index: gcc/tree-ssa-forwprop.c
===================================================================
*** gcc/tree-ssa-forwprop.c.orig 2015-01-08 13:25:14.892227266 +0100
--- gcc/tree-ssa-forwprop.c 2015-01-09 11:09:50.785517099 +0100
*************** pass_forwprop::execute (function *fun)
*** 2210,2215 ****
--- 2210,2306 ----
else
gsi_next (&gsi);
}
+ else if (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE
+ && gimple_assign_load_p (stmt)
+ && !gimple_has_volatile_ops (stmt)
+ && !stmt_can_throw_internal (stmt))
+ {
+ /* Rewrite loads used only in real/imagpart extractions to
+ component-wise loads. */
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ bool rewrite = true;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
+ {
+ gimple use_stmt = USE_STMT (use_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+ if (!is_gimple_assign (use_stmt)
+ || (gimple_assign_rhs_code (use_stmt) != REALPART_EXPR
+ && gimple_assign_rhs_code (use_stmt) !=
IMAGPART_EXPR))
+ {
+ rewrite = false;
+ break;
+ }
+ }
+ if (rewrite)
+ {
+ gimple use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+ {
+ if (is_gimple_debug (use_stmt))
+ {
+ if (gimple_debug_bind_p (use_stmt))
+ {
+ gimple_debug_bind_reset_value (use_stmt);
+ update_stmt (use_stmt);
+ }
+ continue;
+ }
+
+ tree new_rhs = build1 (gimple_assign_rhs_code (use_stmt),
+ TREE_TYPE (TREE_TYPE (rhs)),
+ unshare_expr (rhs));
+ gimple new_stmt
+ = gimple_build_assign (gimple_assign_lhs (use_stmt),
+ new_rhs);
+
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
+ unlink_stmt_vdef (use_stmt);
+ gsi_remove (&gsi2, true);
+
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ }
+ gsi_remove (&gsi, true);
+ }
+ else
+ gsi_next (&gsi);
+ }
+ else if (code == COMPLEX_EXPR)
+ {
+ /* Rewrite stores of a single-use complex build expression
+ to component-wise stores. */
+ use_operand_p use_p;
+ gimple use_stmt;
+ if (single_imm_use (lhs, &use_p, &use_stmt)
+ && gimple_store_p (use_stmt)
+ && !gimple_has_volatile_ops (use_stmt)
+ && is_gimple_assign (use_stmt))
+ {
+ tree use_lhs = gimple_assign_lhs (use_stmt);
+ tree new_lhs = build1 (REALPART_EXPR,
+ TREE_TYPE (TREE_TYPE (use_lhs)),
+ unshare_expr (use_lhs));
+ gimple new_stmt = gimple_build_assign (new_lhs, rhs);
+ gimple_set_vuse (new_stmt, gimple_vuse (use_stmt));
+ gimple_set_vdef (new_stmt, make_ssa_name (gimple_vop (cfun)));
+ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+ gimple_set_vuse (use_stmt, gimple_vdef (new_stmt));
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
+ gsi_insert_before (&gsi2, new_stmt, GSI_SAME_STMT);
+
+ new_lhs = build1 (IMAGPART_EXPR,
+ TREE_TYPE (TREE_TYPE (use_lhs)),
+ unshare_expr (use_lhs));
+ gimple_assign_set_lhs (use_stmt, new_lhs);
+ gimple_assign_set_rhs1 (use_stmt, gimple_assign_rhs2 (stmt));
+ update_stmt (use_stmt);
+
+ gsi_remove (&gsi, true);
+ }
+ else
+ gsi_next (&gsi);
+ }
else
gsi_next (&gsi);
}
Index: gcc/testsuite/g++.dg/vect/pr64410.cc
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/g++.dg/vect/pr64410.cc 2015-01-08 16:10:35.204883792
+0100
***************
*** 0 ****
--- 1,52 ----
+ // { dg-do compile }
+
+ #include <vector>
+ #include <complex>
+ #include <iostream>
+ #include <cstdlib>
+
+ using namespace std;
+
+ int
+ main(int argc, char** argv)
+ {
+ if (argc < 3)
+ {
+ cout << "usage: size N" << endl;
+ return -1;
+ }
+
+ const unsigned int size = atoi(argv[1]);
+ const unsigned int N = atoi(argv[2]);
+
+ cout << "size = " << size << endl;
+ cout << "N = " << N << endl;
+
+ typedef complex<double> cx_double;
+
+ vector< cx_double > A(size);
+ vector< cx_double > B(size);
+ vector< cx_double > C(size);
+
+ cx_double* A_ptr = &A[0];
+ cx_double* B_ptr = &B[0];
+ cx_double* C_ptr = &C[0];
+
+ for (unsigned int i=0; i<size; ++i)
+ {
+ A_ptr[i] = cx_double( (double(rand())/RAND_MAX),
(double(rand())/RAND_MAX) );
+ B_ptr[i] = cx_double( (double(rand())/RAND_MAX),
(double(rand())/RAND_MAX) );
+ C_ptr[i] = cx_double( double(0), double(0) );
+ }
+
+ for (unsigned int j=0; j<N; ++j)
+ for (unsigned int i=0; i<size; ++i)
+ C_ptr[i] = A_ptr[i] + B_ptr[i];
+
+ cout << C_ptr[0] << endl;
+
+ return 0;
+ }
+
+ // { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } }
+ // { dg-final { cleanup-tree-dump "vect" } }