This is a respin of patches
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg03266.html and
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg03267.html, which were
"too quickly" approved before concerns with efficiency were pointed out.

I tried to change the hashing just in tree-ssa-dom.c using C++ subclassing, but
couldn't cleanly separate this out from tree-ssa-scopedtables and
tree-ssa-threadedge.c due to use of avail_exprs_stack. So I figured it was
probably appropriate to use the equivalences in jump threading too. Also,
using get_ref_base_and_extent unifies handling of MEM_REFs and ARRAY_REFs
(hence only one patch rather than two).

I've added a couple of testcases that show the improvement in DOM, but in all
cases I had to disable FRE, even PRE, to get any improvement, apart from on
ssa-dom-cse-2.c itself (where on the affected platforms FRE still does not do
the optimization). This makes me wonder if this is the right approach or whether
changing the references output by SRA (as per
https://gcc.gnu.org/ml/gcc-patches/2015-08/msg01490.html , judged as a hack to
SRA to work around limitations in DOM - or is it?) would be better.

Also, this causes gcc to miss vectorization of pr65947-2.c, because the
additional equivalents spotted in dom1 lead to an extra jump-thread (partially
peeling the loop's first iter, as this has become entirely predictable);
loop-header-copying (ch_vect) then kicks in, restoring the loop structure but
leading to:

      i_49 = 1;
      ivtmp_46 = 253;
      if (ivtmp_46 != 0) goto <bb 3>; else goto <bb 8>;

      <bb 3>: <--- OUTSIDE LOOP

      <bb 4>: <--- LOOP HEADER
      # i_53 = PHI <i_42(7), i_49(3)>

and scalar evolution is unable to analyze i_49 (defined outside the loop) even
though it's equal to a constant (analyze_scalar_evolution_1, test of
flow_bb_inside_loop_p).

This is fixed in the next patch...

gcc/ChangeLog:

        * tree-ssa-scopedtables.c (avail_expr_hash): Hash MEM_REF and ARRAY_REF
        using get_ref_base_and_extent.
        (equal_mem_array_ref_p): New.
        (hashable_expr_equal_p): Add call to previous.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/ssa-dom-cse-5.c: New.
        * gcc.dg/tree-ssa/ssa-dom-cse-6.c: New.
        * gcc.dg/tree-ssa/ssa-dom-cse-7.c: New.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-5.c | 18 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-6.c | 16 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-7.c | 19 ++++++++
 gcc/tree-ssa-scopedtables.c                   | 67 +++++++++++++++++++++++++--
 4 files changed, 117 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-7.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-5.c
new file mode 100644
index 0000000..cd38d3e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-5.c
@@ -0,0 +1,18 @@
+/* Test normalization of ARRAY_REF expressions to MEM_REFs in dom.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-fre -fdump-tree-dom2" } */
+
+#define N 8
+
+int
+main (int argc, char **argv)
+{
+  int a[N];
+  for (int i = 0; i < N; i++)
+    a[i] = 2*i + 1;
+  int *p = &a[0];
+  __builtin_printf ("%d\n", a[argc]);
+  return *(++p);
+}
+
+/* { dg-final { scan-tree-dump-times "return 3;" 1 "dom2"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-6.c
new file mode 100644
index 0000000..b0c5138
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-6.c
@@ -0,0 +1,16 @@
+/* Test normalization of ARRAY_REF expressions to MEM_REFs in dom.  */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-fre -fdump-tree-dom1" } */
+
+int
+main (int argc, char **argv)
+{
+  union {
+    int a[8];
+    int b[2];
+  } u = { .a = { 1, 42, 3, 4, 5, 6, 7, 8 } };
+  __builtin_printf ("%d\n", u.a[argc]);
+  return u.b[1];
+}
+
+/* { dg-final { scan-assembler-times "return 42;" 1 "dom1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-7.c
new file mode 100644
index 0000000..1fc4c5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-7.c
@@ -0,0 +1,19 @@
+/* Test normalization of MEM_REF expressions in dom.  */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized" } */
+
+typedef struct {
+  int a[8];
+} foo;
+
+foo f;
+
+int
+test ()
+{
+  foo g = { { 1, 2, 3, 4, 5, 6, 7, 8 } };
+  f=g;
+  return f.a[2];
+}
+
+/* { dg-final { scan-tree-dump-times "return 3;" 1 "optimized" } } */
diff --git a/gcc/tree-ssa-scopedtables.c b/gcc/tree-ssa-scopedtables.c
index 6034f79..8df7125 100644
--- a/gcc/tree-ssa-scopedtables.c
+++ b/gcc/tree-ssa-scopedtables.c
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "fold-const.h"
 #include "tree-eh.h"
 #include "internal-fn.h"
+#include "tree-dfa.h"
 
 static bool hashable_expr_equal_p (const struct hashable_expr *,
                                   const struct hashable_expr *);
@@ -209,11 +210,70 @@ avail_expr_hash (class expr_hash_elt *p)
   const struct hashable_expr *expr = p->expr ();
   inchash::hash hstate;
 
+  if (expr->kind == EXPR_SINGLE)
+    {
+      /* T could potentially be a switch index or a goto dest.  */
+      tree t = expr->ops.single.rhs;
+      if (TREE_CODE (t) == MEM_REF || TREE_CODE (t) == ARRAY_REF)
+       {
+         /* Make equivalent statements of both these kinds hash together.
+            Dealing with both MEM_REF and ARRAY_REF allows us not to care
+            about equivalence with other statements not considered here.  */
+         bool reverse;
+         HOST_WIDE_INT offset, size, max_size;
+         tree base = get_ref_base_and_extent (t, &offset, &size, &max_size,
+                                              &reverse);
+         /* Strictly, we could try to normalize variable-sized accesses too,
+           but here we just deal with the common case.  */
+         if (size == max_size)
+           {
+             enum tree_code code = MEM_REF;
+             hstate.add_object (code);
+             inchash::add_expr (base, hstate);
+             hstate.add_object (offset);
+             hstate.add_object (size);
+             return hstate.end ();
+           }
+       }
+    }
+
   inchash::add_hashable_expr (expr, hstate);
 
   return hstate.end ();
 }
 
+/* Compares trees T0 and T1 to see if they are MEM_REF or ARRAY_REFs equivalent
+   to each other.  (That is, they return the value of the same bit of memory.)
+
+   Return TRUE if the two are so equivalent; FALSE if not (which could still
+   mean the two are equivalent by other means).  */
+
+static bool
+equal_mem_array_ref_p (tree t0, tree t1)
+{
+  if (TREE_CODE (t0) != MEM_REF && TREE_CODE (t0) != ARRAY_REF)
+    return false;
+  if (TREE_CODE (t1) != MEM_REF && TREE_CODE (t1) != ARRAY_REF)
+    return false;
+
+  if (!types_compatible_p (TREE_TYPE (t0), TREE_TYPE (t1)))
+    return false;
+  bool rev0;
+  HOST_WIDE_INT off0, sz0, max0;
+  tree base0 = get_ref_base_and_extent (t0, &off0, &sz0, &max0, &rev0);
+
+  bool rev1;
+  HOST_WIDE_INT off1, sz1, max1;
+  tree base1 = get_ref_base_and_extent (t1, &off1, &sz1, &max1, &rev1);
+
+  /* Types were compatible, so these are sanity checks.  */
+  gcc_assert (sz0 == sz1);
+  gcc_assert (max0 == max1);
+  gcc_assert (rev0 == rev1);
+
+  return (off0 == off1) && operand_equal_p (base0, base1, 0);
+}
+
 /* Compare two hashable_expr structures for equivalence.  They are
    considered equivalent when the expressions they denote must
    necessarily be equal.  The logic is intended to follow that of
@@ -246,9 +306,10 @@ hashable_expr_equal_p (const struct hashable_expr *expr0,
   switch (expr0->kind)
     {
     case EXPR_SINGLE:
-      return operand_equal_p (expr0->ops.single.rhs,
-                              expr1->ops.single.rhs, 0);
-
+      return equal_mem_array_ref_p (expr0->ops.single.rhs,
+                                   expr1->ops.single.rhs)
+            || operand_equal_p (expr0->ops.single.rhs,
+                                expr1->ops.single.rhs, 0);
     case EXPR_UNARY:
       if (expr0->ops.unary.op != expr1->ops.unary.op)
         return false;
-- 
1.9.1

Reply via email to