Hi,
GCC vectorizer generates many unnecessary runtime alias checks known at 
compilation time.  For some data-reference pairs, alias relation can be 
resolved at compilation time, in this case, we can simply skip the alias check. 
 For some other data-reference pairs, alias relation can be realized at 
compilation time, in this case, we should return false and simply skip 
vectorizing the loop.  For the second case, the corresponding loop is 
vectorized for nothing because the guard alias condition is bound to false 
anyway.  Vectorizing it not only wastes compilation time, but also slows down 
generated code because GCC fails to resolve these "false" alias check after 
vectorization.  Even in cases it can be resolved (by VRP), GCC fails to cleanup 
all the mess generated in loop versioning.
This looks like a common issue in spec2k6.  For example, in 434.zeusmp/ggen.f, 
there are three loops vectorized but never executed; in 464.h264ref, there are 
loops in which all runtime alias checks are resolved at compilation time thus 
loop versioning is proven unnecessary.  Statistic data also shows that about 
>100 loops are falsely vectorized currently in my build of spec2k6.

This patch is based on 
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00399.html, bootstrap and test on 
x86_64 and AArch64 (ongoing), is it OK?

Thanks,
bin

2016-06-07  Bin Cheng  <bin.ch...@arm.com>

        * tree-vect-data-refs.c (vect_no_alias_p): New function.
        (vect_prune_runtime_alias_test_list): Call vect_no_alias_p to
        resolve alias checks which are known at compilation time.
        Truncate vector LOOP_VINFO_MAY_ALIAS_DDRS(loop_vinfo) if all
        alias checks are resolved at compilation time.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-35-big-array.c 
b/gcc/testsuite/gcc.dg/vect/vect-35-big-array.c
index 1caca74..ca57a10 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-35-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-35-big-array.c
@@ -21,7 +21,9 @@ int main1 ()
     }
 
   /* Dependence analysis fails cause s.a and s.b may overlap.
-     Use runtime aliasing test with versioning.  */
+     Try to use runtime aliasing test with versioning, and
+     later versioning/vectorization are skipped because the
+     overlap is proven at compilation time.  */
   for (i = 0; i < N; i++)
     {
       s.a[i] = s.b[i] + 1;
@@ -45,5 +47,5 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { xfail { 
ia64-*-* sparc*-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 
"vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { 
ia64-*-* sparc*-*-* } } } } */
+/* { dg-final { scan-tree-dump "can't determine dependence between" "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.dg/vect/vect-35.c 
b/gcc/testsuite/gcc.dg/vect/vect-35.c
index edbeb1f..76fe32d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-35.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-35.c
@@ -21,7 +21,9 @@ int main1 ()
     }
 
   /* Dependence analysis fails cause s.a and s.b may overlap.
-     Use runtime aliasing test with versioning.  */
+     Try to use runtime aliasing test with versioning, and
+     later versioning/vectorization are skipped because the
+     overlap is proven at compilation time.  */
   for (i = 0; i < N; i++)
     {
       s.a[i] = s.b[i] + 1;
@@ -45,5 +47,5 @@ int main (void)
 } 
 
 
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { xfail { 
ia64-*-* sparc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { 
ia64-*-* sparc*-*-* } } } } */
 /* { dg-final { scan-tree-dump "can't determine dependence between" "vect" } } 
*/
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index ba4d637..c70f658 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2927,6 +2927,54 @@ vect_vfa_segment_size (struct data_reference *dr, tree 
length_factor)
   return segment_length;
 }
 
+/* Function vect_no_alias_p.
+
+   Given data references A and B whose alias relation is known at
+   compilation time, return TRUE if they do not alias to each other;
+   return FALSE if they do.  SEGMENT_LENGTH_A and SEGMENT_LENGTH_B
+   are the memory lengths accessed by A and B respectively.  */
+
+static bool
+vect_no_alias_p (struct data_reference *a, struct data_reference *b,
+                 tree segment_length_a, tree segment_length_b)
+{
+  gcc_assert (TREE_CODE (DR_INIT (a)) == INTEGER_CST
+             && TREE_CODE (DR_INIT (b)) == INTEGER_CST);
+  if (wi::to_widest (DR_INIT (a)) == wi::to_widest (DR_INIT (b)))
+    return false;
+
+  tree seg_a_min = DR_INIT (a);
+  tree seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_min),
+                               seg_a_min, segment_length_a);
+  /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
+     bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
+     [a, a+12) */
+  if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
+    {
+      tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
+      seg_a_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_max),
+                              seg_a_max, unit_size);
+      seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (a)),
+                              DR_INIT (a), unit_size);
+    }
+  tree seg_b_min = DR_INIT (b);
+  tree seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_min),
+                               seg_b_min, segment_length_b);
+  if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
+    {
+      tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
+      seg_b_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_max),
+                              seg_b_max, unit_size);
+      seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (b)),
+                              DR_INIT (b), unit_size);
+    }
+  if (wi::to_widest (seg_a_max) <= wi::to_widest (seg_b_min)
+      || wi::to_widest (seg_b_max) <= wi::to_widest (seg_a_min))
+    return true;
+
+  return false;
+}
+
 /* Function vect_prune_runtime_alias_test_list.
 
    Prune a list of ddrs to be tested at run-time by versioning for alias.
@@ -3019,15 +3067,32 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
loop_vinfo)
       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
 
+      comp_res = compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b));
+      if (comp_res == 0)
+       comp_res = compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
+
+      /* Alias is known at compilation time.  */
+      if (comp_res == 0
+         && TREE_CODE (length_factor) == INTEGER_CST
+         && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
+         && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST)
+       {
+         if (vect_no_alias_p (dr_a, dr_b, segment_length_a, segment_length_b))
+           continue;
+
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "not vectorized: compilation time alias.\n");
+
+         return false;
+       }
+
       dr_with_seg_len_pair_t dr_with_seg_len_pair
          (dr_with_seg_len (dr_a, segment_length_a),
           dr_with_seg_len (dr_b, segment_length_b));
 
       /* Canonicalize pairs by sorting the two DR members.  */
-      comp_res = compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b));
-      if (comp_res > 0
-          || (comp_res == 0
-              && compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b)) > 0))
+      if (comp_res > 0)
        std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
 
       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
@@ -3176,7 +3241,19 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
loop_vinfo)
                   may_alias_ddrs.length (), comp_alias_ddrs.length ());
   if ((int) comp_alias_ddrs.length () >
       PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
-    return false;
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "number of versioning for alias "
+                        "run-time tests exceeds %d "
+                        "(--param vect-max-version-for-alias-checks)\n",
+                        PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
+      return false;
+    }
+
+  /* All alias checks have been resolved at compilation time.  */
+  if (!comp_alias_ddrs.length ())
+    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
 
   return true;
 }
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index bc1257c..8759b4c 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1969,15 +1969,7 @@ start_over:
      since we use grouping information gathered by interleaving analysis.  */
   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
   if (!ok)
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "number of versioning for alias "
-                        "run-time tests exceeds %d "
-                        "(--param vect-max-version-for-alias-checks)\n",
-                        PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
-      return false;
-    }
+    return false;
 
   /* This pass will decide on using loop versioning and/or loop peeling in
      order to enhance the alignment of data references in the loop.  */

Reply via email to