Re: [google] Refine static branch prediction (iv-compare heuristic)

Dehao Chen Wed, 28 Mar 2012 19:56:12 -0700

Thanks, attached is the updated patch.

Dehao


Index: gcc/testsuite/gcc.dg/predict-3.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-3.c    (revision 185903)
+++ gcc/testsuite/gcc.dg/predict-3.c    (working copy)
@@ -10,10 +10,16 @@
   int i, ret = 0;
   for (i = 0; i <= bound; i++)
     {
+      if (i < bound - 2)
+       global += bar (i);
+      if (i <= bound)
+       global += bar (i);
+      if (i + 1 < bound)
+       global += bar (i);
       if (i != bound)
        global += bar (i);
     }
 }

-/* { dg-final { scan-tree-dump "loop iv compare heuristics"
"profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
100.0%" 4 "profile_estimate"} } */
 /* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-4.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-4.c    (revision 185903)
+++ gcc/testsuite/gcc.dg/predict-4.c    (working copy)
@@ -15,5 +15,5 @@
     }
 }

-/* { dg-final { scan-tree-dump "loop iv compare heuristics"
"profile_estimate"} } */
+/* { dg-final { scan-tree-dump "loop iv compare heuristics: 50.0%"
"profile_estimate"} } */
 /* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-1.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-1.c    (revision 185903)
+++ gcc/testsuite/gcc.dg/predict-1.c    (working copy)
@@ -10,10 +10,18 @@
   int i, ret = 0;
   for (i = 0; i < bound; i++)
     {
+      if (i > bound)
+       global += bar (i);
+      if (i >= bound + 2)
+       global += bar (i);
       if (i > bound - 2)
        global += bar (i);
+      if (i + 2 > bound)
+       global += bar (i);
+      if (i == 10)
+       global += bar (i);
     }
 }

-/* { dg-final { scan-tree-dump "loop iv compare heuristics"
"profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
0.0%" 5 "profile_estimate"} } */
 /* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-5.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-5.c    (revision 0)
+++ gcc/testsuite/gcc.dg/predict-5.c    (revision 0)
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar (int);
+
+void foo (int base, int bound)
+{
+  int i, ret = 0;
+  for (i = base; i <= bound; i++)
+    {
+      if (i > base)
+       global += bar (i);
+      if (i > base + 1)
+       global += bar (i);
+      if (i >= base + 3)
+       global += bar (i);
+      if (i - 2 >= base)
+       global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
100.0%" 4 "profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-2.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-2.c    (revision 185903)
+++ gcc/testsuite/gcc.dg/predict-2.c    (working copy)
@@ -5,12 +5,20 @@

 int bar(int);

-void foo (int bound)
+void foo (int base, int bound)
 {
   int i, ret = 0;
-  for (i = 0; i < bound; i++)
+  for (i = base; i < bound; i++)
     {
-      if (i > bound * bound )
+      if (i > bound * bound)
+       global += bar (i);
+      if (i > bound + 10)
+       global += bar (i);
+      if (i <= bound + 10)
+       global += bar (i);
+      if (i > base + 10)
+       global += bar (i);
+      if (i < base - 10)
        global += bar (i);
     }
 }
Index: gcc/testsuite/gcc.dg/predict-6.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-6.c    (revision 0)
+++ gcc/testsuite/gcc.dg/predict-6.c    (revision 0)
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar (int);
+
+void foo (int base, int bound)
+{
+  int i, ret = 0;
+  for (i = base; i <= bound; i++)
+    {
+      if (i < base)
+       global += bar (i);
+      if (i < base + 1)
+       global += bar (i);
+      if (i <= base + 3)
+       global += bar (i);
+      if (i - 1 < base)
+       global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
0.0%" 4 "profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/predict.c
===================================================================
--- gcc/predict.c       (revision 185903)
+++ gcc/predict.c       (working copy)
@@ -1070,6 +1070,8 @@
     bound = get_base_value (bound);
   if (!bound)
     return false;
+  if (TREE_CODE (base) != INTEGER_CST)
+    base = get_base_value (base);

   *loop_invariant = bound;
   *compare_code = code;
@@ -1185,8 +1187,7 @@
       return;
     }

-  if (!expr_coherent_p (loop_bound_var, compare_var)
-      || loop_iv_base_var != compare_base)
+  if (!expr_coherent_p(loop_iv_base_var, compare_base))
     return;

   /* If loop bound, base and compare bound are all constents, we can
@@ -1230,34 +1231,52 @@
       return;
     }

-  if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
-      && (compare_code == LT_EXPR || compare_code == LE_EXPR))
-    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
-  else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
-          && (compare_code == GT_EXPR || compare_code == GE_EXPR))
-    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
-  else if (loop_bound_code == NE_EXPR)
-    {
-      /* If the loop backedge condition is "(i != bound)", we do
-        the comparison based on the step of IV:
-          * step < 0 : backedge condition is like (i > bound)
-          * step > 0 : backedge condition is like (i < bound)  */
-      gcc_assert (loop_bound_step != 0);
+  if (expr_coherent_p (loop_bound_var, compare_var))
+    {
+      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
+         && (compare_code == LT_EXPR || compare_code == LE_EXPR))
+       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
+      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
+              && (compare_code == GT_EXPR || compare_code == GE_EXPR))
+       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
+      else if (loop_bound_code == NE_EXPR)
+       {
+         /* If the loop backedge condition is "(i != bound)", we do
+            the comparison based on the step of IV:
+            * step < 0 : backedge condition is like (i > bound)
+            * step > 0 : backedge condition is like (i < bound)  */
+         gcc_assert (loop_bound_step != 0);
+         if (loop_bound_step > 0
+             && (compare_code == LT_EXPR
+                 || compare_code == LE_EXPR))
+           predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
+         else if (loop_bound_step < 0
+                  && (compare_code == GT_EXPR
+                      || compare_code == GE_EXPR))
+           predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
+         else
+           predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
+       }
+      else
+       /* The branch is predicted not-taken if loop_bound_code is
+          opposite with compare_code.  */
+       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
+    }
+  else if (expr_coherent_p (loop_iv_base_var, compare_var))
+    {
+      /* For cases like:
+          for (i = s; i < h; i++)
+            if (i > s + 2) ....
+        The branch should be predicted taken.  */
       if (loop_bound_step > 0
-         && (compare_code == LT_EXPR
-             || compare_code == LE_EXPR))
+         && (compare_code == GT_EXPR || compare_code == GE_EXPR))
        predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
       else if (loop_bound_step < 0
-              && (compare_code == GT_EXPR
-                  || compare_code == GE_EXPR))
+              && (compare_code == LT_EXPR || compare_code == LE_EXPR))
        predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
       else
        predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
     }
-  else
-    /* The branch is predicted not-taken if loop_bound_code is
-       opposite with compare_code.  */
-    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
 }

 /* Predict edge probabilities by exploiting loop structure.  */

On Thu, Mar 29, 2012 at 8:06 AM, Xinliang David Li <[email protected]> wrote:
> Can the test case be improved so that expected prediction results is
> checked (with the help of more dumping such as blah blah is predicted
> to be taken/not taken with probability xyz) ?
>
> Also the more test cases need to be added to cover more cases >base, >
> base +1, >=base +2, < base+1, <=base+1 etc -- even though expression
> reassociation will normalize them ...
>
> Thanks,
>
> David
>
> On Wed, Mar 28, 2012 at 4:54 PM, Dehao Chen <[email protected]> wrote:
>> Hi,
>>
>> This patch handles the comparison of iv against the loop iv initial
>> value. Previously, we only handle the comparison of iv against its
>> bound.
>>
>> Bootstrapped and passed all regression tests.
>>
>> Ok for google branches?
>>
>> Thanks,
>> Dehao
>>
>> 2012-03-29  Dehao Chen  <[email protected]>
>>
>>        * predict.c (predict_iv_comparison): Add the comparison of induction
>>        variable against its initial value.
>>
>> 2012-03-29  Dehao Chen  <[email protected]>
>>
>>        * gcc.dg/predict-5.c: Check if LOOP_IV_COMPARE static predict
>>        heuristic is working properly.
>>
>> Index: gcc/testsuite/gcc.dg/predict-5.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/predict-5.c    (revision 0)
>> +++ gcc/testsuite/gcc.dg/predict-5.c    (revision 0)
>> @@ -0,0 +1,21 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
>> +
>> +extern int global;
>> +
>> +int bar(int);
>> +
>> +void foo (int base, int bound)
>> +{
>> +  int i, ret = 0;
>> +  for (i = base; i <= bound; i++)
>> +    {
>> +      if (i > base + 2)
>> +       global += bar (i);
>> +      else
>> +        global += bar (i + 1);
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump "loop iv compare heuristics"
>> "profile_estimate"} } */
>> +/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
>> Index: gcc/predict.c
>> ===================================================================
>> --- gcc/predict.c       (revision 185903)
>> +++ gcc/predict.c       (working copy)
>> @@ -1185,8 +1185,7 @@
>>       return;
>>     }
>>
>> -  if (!expr_coherent_p (loop_bound_var, compare_var)
>> -      || loop_iv_base_var != compare_base)
>> +  if (loop_iv_base_var != compare_base)
>>     return;
>>
>>   /* If loop bound, base and compare bound are all constents, we can
>> @@ -1230,34 +1229,52 @@
>>       return;
>>     }
>>
>> -  if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
>> -      && (compare_code == LT_EXPR || compare_code == LE_EXPR))
>> -    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>> -  else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
>> -          && (compare_code == GT_EXPR || compare_code == GE_EXPR))
>> -    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>> -  else if (loop_bound_code == NE_EXPR)
>> -    {
>> -      /* If the loop backedge condition is "(i != bound)", we do
>> -        the comparison based on the step of IV:
>> -          * step < 0 : backedge condition is like (i > bound)
>> -          * step > 0 : backedge condition is like (i < bound)  */
>> -      gcc_assert (loop_bound_step != 0);
>> +  if (expr_coherent_p (loop_bound_var, compare_var))
>> +    {
>> +      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
>> +         && (compare_code == LT_EXPR || compare_code == LE_EXPR))
>> +       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>> +      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
>> +              && (compare_code == GT_EXPR || compare_code == GE_EXPR))
>> +       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>> +      else if (loop_bound_code == NE_EXPR)
>> +       {
>> +         /* If the loop backedge condition is "(i != bound)", we do
>> +            the comparison based on the step of IV:
>> +            * step < 0 : backedge condition is like (i > bound)
>> +            * step > 0 : backedge condition is like (i < bound)  */
>> +         gcc_assert (loop_bound_step != 0);
>> +         if (loop_bound_step > 0
>> +             && (compare_code == LT_EXPR
>> +                 || compare_code == LE_EXPR))
>> +           predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>> +         else if (loop_bound_step < 0
>> +                  && (compare_code == GT_EXPR
>> +                      || compare_code == GE_EXPR))
>> +           predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>> +         else
>> +           predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
>> +       }
>> +      else
>> +       /* The branch is predicted not-taken if loop_bound_code is
>> +          opposite with compare_code.  */
>> +       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
>> +    }
>> +  else if (expr_coherent_p (loop_iv_base_var, compare_var))
>> +    {
>> +      /* For cases like:
>> +          for (i = s; i < h; i++)
>> +            if (i > s + 2) ....
>> +        The branch should be predicted taken.  */
>>       if (loop_bound_step > 0
>> -         && (compare_code == LT_EXPR
>> -             || compare_code == LE_EXPR))
>> +         && (compare_code == GT_EXPR || compare_code == GE_EXPR))
>>        predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>>       else if (loop_bound_step < 0
>> -              && (compare_code == GT_EXPR
>> -                  || compare_code == GE_EXPR))
>> +              && (compare_code == LT_EXPR || compare_code == LE_EXPR))
>>        predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, TAKEN);
>>       else
>>        predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
>>     }
>> -  else
>> -    /* The branch is predicted not-taken if loop_bound_code is
>> -       opposite with compare_code.  */
>> -    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE, NOT_TAKEN);
>>  }
>>
>>  /* Predict edge probabilities by exploiting loop structure.  */

Re: [google] Refine static branch prediction (iv-compare heuristic)

Reply via email to