On Dienstag, 8. Mai 2018 12:42:33 CEST Richard Biener wrote: > On Tue, May 8, 2018 at 12:37 PM, Allan Sandfeld Jensen > > <li...@carewolf.com> wrote: > > I have tried to fix PR85692 that I opened. > > Please add a testcase as well. It also helps if you shortly tell what > the patch does > in your mail. > Okay. I have updated the patch with a test-case based on my motivating examples. The patch just extends patching a vector construction to not just a single source permute instruction, but also a two source permute instruction.
commit 15c0f6a933d60b085416a59221851b604b955958 Author: Allan Sandfeld Jensen <allan.jen...@qt.io> Date: Tue May 8 13:16:18 2018 +0200
Try two source permute for vector construction simplify_vector_constructor() was detecting when vector construction could be implemented as a single source permute, but was not detecting when it could be implemented as a double source permute. This patch adds the second case. 2018-05-08 Allan Sandfeld Jensen <allan.jen...@qt.io> gcc/ PR tree-optimization/85692 * tree-ssa-forwprop.c (simplify_vector_constructor): Try two source permute as well. gcc/testsuite * gcc.target/i386/pr85692.c: Test two simply constructions are detected as permute instructions. diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c b/gcc/testsuite/gcc.target/i386/pr85692.c new file mode 100644 index 00000000000..322c1050161 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr85692.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse4.1" } */ +/* { dg-final { scan-assembler "unpcklps" } } */ +/* { dg-final { scan-assembler "blendps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ + +typedef float v4sf __attribute__ ((vector_size (16))); + +v4sf unpcklps(v4sf a, v4sf b) +{ + return v4sf{a[0],b[0],a[1],b[1]}; +} + +v4sf blendps(v4sf a, v4sf b) +{ + return v4sf{a[0],b[1],a[2],b[3]}; +} diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 58ec6b47a5b..fbee8064160 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) { gimple *stmt = gsi_stmt (*gsi); gimple *def_stmt; - tree op, op2, orig, type, elem_type; + tree op, op2, orig1, orig2, type, elem_type; unsigned elem_size, i; unsigned HOST_WIDE_INT nelts; enum tree_code code, conv_code; @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elem_type = TREE_TYPE (type); elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - vec_perm_builder sel (nelts, nelts, 1); - orig = NULL; + vec_perm_builder sel (nelts, 2, nelts); + orig1 = NULL; + orig2 = NULL; conv_code = ERROR_MARK; maybe_ident = true; FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; op1 = gimple_assign_rhs1 (def_stmt); ref = TREE_OPERAND (op1, 0); - if (orig) + if (orig1) { - if (ref != orig) - return false; + if (ref == orig1 || orig2) + { + if (ref != orig1 && ref != orig2) + return false; + } + else + { + if (TREE_CODE (ref) != SSA_NAME) + return false; + if (! VECTOR_TYPE_P (TREE_TYPE (ref)) + || ! useless_type_conversion_p (TREE_TYPE (op1), + TREE_TYPE (TREE_TYPE (ref)))) + return false; + if (TREE_TYPE (orig1) != TREE_TYPE (ref)) + return false; + orig2 = ref; + maybe_ident = false; + } } else { @@ -2076,12 +2093,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) || ! useless_type_conversion_p (TREE_TYPE (op1), TREE_TYPE (TREE_TYPE (ref)))) return false; - orig = ref; + orig1 = ref; } unsigned int elt; if (maybe_ne (bit_field_size (op1), elem_size) || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) return false; + if (orig2 && ref == orig2) + elt += nelts; if (elt != i) maybe_ident = false; sel.quick_push (elt); @@ -2089,14 +2108,17 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (i < nelts) return false; - if (! VECTOR_TYPE_P (TREE_TYPE (orig)) + if (! VECTOR_TYPE_P (TREE_TYPE (orig1)) || maybe_ne (TYPE_VECTOR_SUBPARTS (type), - TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig)))) + TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig1)))) return false; + if (!orig2) + orig2 = orig1; + tree tem; if (conv_code != ERROR_MARK - && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig), + && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig1), &tem, &conv_code) || conv_code == CALL_EXPR)) return false; @@ -2104,16 +2126,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (maybe_ident) { if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_from_tree (gsi, orig); + gimple_assign_set_rhs_from_tree (gsi, orig1); else - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, + gimple_assign_set_rhs_with_ops (gsi, conv_code, orig1, NULL_TREE, NULL_TREE); } else { tree mask_type; - vec_perm_indices indices (sel, 1, nelts); + vec_perm_indices indices (sel, 2, nelts); if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) return false; mask_type @@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; op2 = vec_perm_indices_to_tree (mask_type, indices); if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2); + gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2); else { gimple *perm - = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)), - VEC_PERM_EXPR, orig, orig, op2); - orig = gimple_assign_lhs (perm); + = gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)), + VEC_PERM_EXPR, orig1, orig2, op2); gsi_insert_before (gsi, perm, GSI_SAME_STMT); - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, + gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs (perm), NULL_TREE, NULL_TREE); } }