Hi!

As I found during investigation of PR51387, e.g. on the attached testcase
we generate pretty bad code (for f1 even with bigger N like 256 for avx2),
because after vectorization cunroll unrolls the loops completely and we
end up with lots of VEC_PACK_TRUNC_EXPR etc. expressions with VECTOR_CST
arguments.  We don't fold them, thus we read lots of constants from memory
and reshuffle them in lots of code.  This patch adds folding for these
expressions, we end up on this testcase with the same amount of loaded
constants from memory, but no need to reshuffle it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-12-02  Jakub Jelinek  <ja...@redhat.com>

        * fold-const.c (fold_unary_loc): Fold VEC_UNPACK_LO_EXPR,
        VEC_UNPACK_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR and
        VEC_UNPACK_FLOAT_HI_EXPR with VECTOR_CST argument.
        (fold_binary_loc): Fold VEC_PACK_TRUNC_EXPR,
        VEC_PACK_FIX_TRUNC_EXPR, VEC_WIDEN_MULT_LO_EXPR
        and VEC_WIDEN_MULT_HI_EXPR with VECTOR_CST arguments.

        * gcc.dg/vect/vect-122.c: New test.

--- gcc/fold-const.c.jj 2011-12-02 01:52:26.000000000 +0100
+++ gcc/fold-const.c    2011-12-02 17:43:09.246557524 +0100
@@ -7651,6 +7651,8 @@ build_fold_addr_expr_loc (location_t loc
   return build_fold_addr_expr_with_type_loc (loc, t, ptrtype);
 }
 
+static bool vec_cst_ctor_to_array (tree, tree *);
+
 /* Fold a unary expression of code CODE and type TYPE with operand
    OP0.  Return the folded expression if folding is successful.
    Otherwise, return NULL_TREE.  */
@@ -8294,6 +8296,44 @@ fold_unary_loc (location_t loc, enum tre
        }
       return NULL_TREE;
 
+    case VEC_UNPACK_LO_EXPR:
+    case VEC_UNPACK_HI_EXPR:
+    case VEC_UNPACK_FLOAT_LO_EXPR:
+    case VEC_UNPACK_FLOAT_HI_EXPR:
+      {
+       unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+       tree *elts, vals = NULL_TREE;
+       enum tree_code subcode;
+
+       gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts * 2);
+       if (TREE_CODE (arg0) != VECTOR_CST)
+         return NULL_TREE;
+
+       elts = XALLOCAVEC (tree, nelts * 2);
+       if (!vec_cst_ctor_to_array (arg0, elts))
+         return NULL_TREE;
+
+       if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_UNPACK_LO_EXPR
+                                  || code == VEC_UNPACK_FLOAT_LO_EXPR))
+         elts += nelts;
+
+       if (code == VEC_UNPACK_LO_EXPR || code == VEC_UNPACK_HI_EXPR)
+         subcode = NOP_EXPR;
+       else
+         subcode = FLOAT_EXPR;
+
+       for (i = 0; i < nelts; i++)
+         {
+           elts[i] = fold_convert_const (subcode, TREE_TYPE (type), elts[i]);
+           if (elts[i] == NULL_TREE || !CONSTANT_CLASS_P (elts[i]))
+             return NULL_TREE;
+         }
+
+       for (i = 0; i < nelts; i++)
+         vals = tree_cons (NULL_TREE, elts[nelts - i - 1], vals);
+       return build_vector (type, vals);
+      }
+
     default:
       return NULL_TREE;
     } /* switch (code) */
@@ -13498,6 +13538,73 @@ fold_binary_loc (location_t loc,
        }
       return NULL_TREE;
 
+    case VEC_PACK_TRUNC_EXPR:
+    case VEC_PACK_FIX_TRUNC_EXPR:
+      {
+       unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+       tree *elts, vals = NULL_TREE;
+
+       gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts / 2
+                   && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts / 2);
+       if (TREE_CODE (arg0) != VECTOR_CST || TREE_CODE (arg1) != VECTOR_CST)
+         return NULL_TREE;
+
+       elts = XALLOCAVEC (tree, nelts);
+       if (!vec_cst_ctor_to_array (arg0, elts)
+           || !vec_cst_ctor_to_array (arg1, elts + nelts / 2))
+         return NULL_TREE;
+
+       for (i = 0; i < nelts; i++)
+         {
+           elts[i] = fold_convert_const (code == VEC_PACK_TRUNC_EXPR
+                                         ? NOP_EXPR : FIX_TRUNC_EXPR,
+                                         TREE_TYPE (type), elts[i]);
+           if (elts[i] == NULL_TREE || !CONSTANT_CLASS_P (elts[i]))
+             return NULL_TREE;
+         }
+
+       for (i = 0; i < nelts; i++)
+         vals = tree_cons (NULL_TREE, elts[nelts - i - 1], vals);
+       return build_vector (type, vals);
+      }
+
+    case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_MULT_HI_EXPR:
+      {
+       unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+       tree *elts, vals = NULL_TREE;
+
+       gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts * 2
+                   && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts * 2);
+       if (TREE_CODE (arg0) != VECTOR_CST || TREE_CODE (arg1) != VECTOR_CST)
+         return NULL_TREE;
+
+       elts = XALLOCAVEC (tree, nelts * 4);
+       if (!vec_cst_ctor_to_array (arg0, elts)
+           || !vec_cst_ctor_to_array (arg1, elts + nelts * 2))
+         return NULL_TREE;
+
+       if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_WIDEN_MULT_LO_EXPR))
+         elts += nelts;
+
+       for (i = 0; i < nelts; i++)
+         {
+           elts[i] = fold_convert_const (NOP_EXPR, TREE_TYPE (type), elts[i]);
+           elts[i + nelts * 2]
+             = fold_convert_const (NOP_EXPR, TREE_TYPE (type),
+                                   elts[i + nelts * 2]);
+           if (elts[i] == NULL_TREE || elts[i + nelts * 2] == NULL_TREE)
+             return NULL_TREE;
+           elts[i] = const_binop (MULT_EXPR, elts[i], elts[i + nelts * 2]);
+           if (elts[i] == NULL_TREE || !CONSTANT_CLASS_P (elts[i]))
+             return NULL_TREE;
+         }
+
+       for (i = 0; i < nelts; i++)
+         vals = tree_cons (NULL_TREE, elts[nelts - i - 1], vals);
+       return build_vector (type, vals);
+      }
+
     default:
       return NULL_TREE;
     } /* switch (code) */
--- gcc/testsuite/gcc.dg/vect/vect-122.c.jj     2011-12-02 17:48:27.182059637 
+0100
+++ gcc/testsuite/gcc.dg/vect/vect-122.c        2011-12-02 17:49:05.160880424 
+0100
@@ -0,0 +1,59 @@
+#include "tree-vect.h"
+
+#ifndef N
+#define N 64
+#endif
+
+char a[N];
+float b[N];
+long long l[N], m[N];
+
+__attribute__((noinline, noclone)) int
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    a[i] = i;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    b[i] = (double) i;
+}
+
+__attribute__((noinline, noclone)) int
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    l[i] = (long long) i * (i + 7);
+}
+
+__attribute__((noinline, noclone)) int
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    m[i] = (long long) i * 7;
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+  f1 ();
+  f2 ();
+  f3 ();
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (a[i] != i || b[i] != i || l[i] != i * (i + 7LL) || m[i] != i * 7LL)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */


        Jakub

Reply via email to