On Thu, Apr 04, 2013 at 08:37:47PM +0200, Richard Biener wrote:
> Can you factor out a function that returns
> A proper qimode value if possible or null and
> Use it in both places?

Like this?

2013-04-04  Jakub Jelinek  <ja...@redhat.com>

        * tree-loop-distribution.c (const_with_all_bytes_same): New function.
        (generate_memset_builtin): Only handle integer_all_onesp as -1 val if
        TYPE_PRECISION is equal to mode bitsize.  Use const_with_all_bytes_same
        if possible to compute val.
        (classify_partition): Verify CONSTRUCTOR doesn't have any elts.
        For QImode integers don't require anything about precision.  Use
        const_with_all_bytes_same to find out if the constant doesn't have
        repeated bytes in it.

        * gcc.dg/pr56837.c: New test.

--- gcc/tree-loop-distribution.c.jj     2013-04-04 15:03:28.000000000 +0200
+++ gcc/tree-loop-distribution.c        2013-04-04 20:49:14.295546543 +0200
@@ -297,6 +297,27 @@ build_addr_arg_loc (location_t loc, data
   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
 }
 
+/* If VAL memory representation contains the same value in all bytes,
+   return that value, otherwise return -1.
+   E.g. for 0x24242424 return 0x24, for IEEE double
+   747708026454360457216.0 return 0x44, etc.  */
+
+static int
+const_with_all_bytes_same (tree val)
+{
+  unsigned char buf[64];
+  int i, len;
+  if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+    return -1;
+  len = native_encode_expr (val, buf, sizeof (buf));
+  if (len == 0)
+    return -1;
+  for (i = 1; i < len; i++)
+    if (buf[i] != buf[0])
+      return -1;
+  return buf[0];
+}
+
 /* Generate a call to memset for PARTITION in LOOP.  */
 
 static void
@@ -331,11 +352,18 @@ generate_memset_builtin (struct loop *lo
       || real_zerop (val)
       || TREE_CODE (val) == CONSTRUCTOR)
     val = integer_zero_node;
-  else if (integer_all_onesp (val))
+  else if (integer_all_onesp (val)
+          && TYPE_PRECISION (TREE_TYPE (val))
+             == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (val))))
     val = build_int_cst (integer_type_node, -1);
   else
     {
-      if (TREE_CODE (val) == INTEGER_CST)
+      /* Handle constants like 0x15151515 and similarly
+        floating point constants etc. where all bytes are the same.  */
+      int bytev = const_with_all_bytes_same (val);
+      if (bytev != -1)
+       val = build_int_cst (integer_type_node, bytev);
+      else if (TREE_CODE (val) == INTEGER_CST)
        val = fold_convert (integer_type_node, val);
       else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
        {
@@ -944,15 +972,15 @@ classify_partition (loop_p loop, struct
       if (!(integer_zerop (rhs)
            || real_zerop (rhs)
            || (TREE_CODE (rhs) == CONSTRUCTOR
-               && !TREE_CLOBBER_P (rhs))
-           || ((integer_all_onesp (rhs)
-                || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
-                    && (TYPE_MODE (TREE_TYPE (rhs))
-                        == TYPE_MODE (unsigned_char_type_node))))
-               /* For stores of a non-zero value require that the precision
-                  of the value matches its actual size.  */
+               && !TREE_CLOBBER_P (rhs)
+               && CONSTRUCTOR_NELTS (rhs) == 0)
+           || (integer_all_onesp (rhs)
                && (TYPE_PRECISION (TREE_TYPE (rhs))
-                   == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs)))))))
+                   == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs)))))
+           || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
+               && (TYPE_MODE (TREE_TYPE (rhs))
+                   == TYPE_MODE (unsigned_char_type_node)))
+           || const_with_all_bytes_same (rhs) != -1))
        return;
       if (TREE_CODE (rhs) == SSA_NAME
          && !SSA_NAME_IS_DEFAULT_DEF (rhs)
--- gcc/testsuite/gcc.dg/pr56837.c.jj   2013-04-04 17:37:58.458675152 +0200
+++ gcc/testsuite/gcc.dg/pr56837.c      2013-04-04 17:36:40.000000000 +0200
@@ -0,0 +1,67 @@
+/* Limit this test to selected targets with IEEE double, 8-byte long long,
+   supported 4x int vectors, 4-byte int.  */
+/* { dg-do compile { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse2" { target ia32 } } */
+/* { dg-additional-options "-mvsx -maltivec" { target powerpc*-*-* } } */
+
+typedef int V __attribute__((__vector_size__ (16)));
+#define N 1024
+double d[N];
+long long int l[N];
+_Bool b[N];
+_Complex double c[N];
+V v[N];
+
+void
+fd (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = 747708026454360457216.0;
+}
+
+void
+fl (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    l[i] = 0x7c7c7c7c7c7c7c7cULL;
+}
+
+void
+fb (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    b[i] = 1;
+}
+
+void
+fc (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = 747708026454360457216.0 + 747708026454360457216.0i;
+}
+
+void
+fv (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    v[i] = (V) { 0x12121212, 0x12121212, 0x12121212, 0x12121212 };
+}
+
+/* Look for
+  __builtin_memset (&d, 68, 8192);
+  __builtin_memset (&l, 124, 8192);
+  __builtin_memset (&b, 1, 1024);
+  __builtin_memset (&c, 68, 16384);
+  __builtin_memset (&v, 18, 16384); */
+/* { dg-final { scan-tree-dump-times "memset ..d, 68, 8192.;" 1 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "memset ..l, 124, 8192.;" 1 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "memset ..b, 1, 1024.;" 1 "optimized" } } 
*/
+/* { dg-final { scan-tree-dump-times "memset ..c, 68, 16384.;" 1 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "memset ..v, 18, 16384.;" 1 "optimized" } 
} */
+/* { dg-final { cleanup-tree-dump "optimized" } } */


        Jakub

Reply via email to