On 2021/10/21 18:55, Richard Biener wrote:
> On Thu, 21 Oct 2021, Xionghu Luo wrote:
> 
>>
>>
>> On 2021/10/15 13:51, Xionghu Luo via Gcc-patches wrote:
>>>
>>>
>>> On 2021/9/23 20:17, Richard Biener wrote:
>>>> On Wed, 22 Sep 2021, Xionghu Luo wrote:
>>>>
>>>>>
>>>>>
>>>>> On 2021/8/11 17:16, Richard Biener wrote:
>>>>>> On Wed, 11 Aug 2021, Xionghu Luo wrote:
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 2021/8/10 22:47, Richard Biener wrote:
>>>>>>>> On Mon, 9 Aug 2021, Xionghu Luo wrote:
>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>> On 2021/8/6 19:46, Richard Biener wrote:
>>>>>>>>>> On Tue, 3 Aug 2021, Xionghu Luo wrote:
>>>>>>>>>>
>>>>>>>>>>> loop split condition is moved between loop1 and loop2, the split 
>>>>>>>>>>> bb's
>>>>>>>>>>> count and probability should also be duplicated instead of (100% vs
>>>>>>>>>>> INV),
>>>>>>>>>>> secondly, the original loop1 and loop2 count need be propotional 
>>>>>>>>>>> from
>>>>>>>>>>> the
>>>>>>>>>>> original loop.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> diff base/loop-cond-split-1.c.151t.lsplit
>>>>>>>>>>> patched/loop-cond-split-1.c.151t.lsplit:
>>>>>>>>>>> ...
>>>>>>>>>>>       int prephitmp_16;
>>>>>>>>>>>       int prephitmp_25;
>>>>>>>>>>>
>>>>>>>>>>>       <bb 2> [local count: 118111600]:
>>>>>>>>>>>       if (n_7(D) > 0)
>>>>>>>>>>>         goto <bb 4>; [89.00%]
>>>>>>>>>>>       else
>>>>>>>>>>>         goto <bb 3>; [11.00%]
>>>>>>>>>>>
>>>>>>>>>>>       <bb 3> [local count: 118111600]:
>>>>>>>>>>>       return;
>>>>>>>>>>>
>>>>>>>>>>>       <bb 4> [local count: 105119324]:
>>>>>>>>>>>       pretmp_3 = ga;
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 5> [local count: 955630225]:
>>>>>>>>>>> +  <bb 5> [local count: 315357973]:
>>>>>>>>>>>       # i_13 = PHI <i_10(20), 0(4)>
>>>>>>>>>>>       # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
>>>>>>>>>>>       if (prephitmp_12 != 0)
>>>>>>>>>>>         goto <bb 6>; [33.00%]
>>>>>>>>>>>       else
>>>>>>>>>>>         goto <bb 7>; [67.00%]
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 6> [local count: 315357972]:
>>>>>>>>>>> +  <bb 6> [local count: 104068130]:
>>>>>>>>>>>       _2 = do_something ();
>>>>>>>>>>>       ga = _2;
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 7> [local count: 955630225]:
>>>>>>>>>>> +  <bb 7> [local count: 315357973]:
>>>>>>>>>>>       # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
>>>>>>>>>>>       i_10 = inc (i_13);
>>>>>>>>>>>       if (n_7(D) > i_10)
>>>>>>>>>>>         goto <bb 21>; [89.00%]
>>>>>>>>>>>       else
>>>>>>>>>>>         goto <bb 11>; [11.00%]
>>>>>>>>>>>
>>>>>>>>>>>       <bb 11> [local count: 105119324]:
>>>>>>>>>>>       goto <bb 3>; [100.00%]
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 21> [local count: 850510901]:
>>>>>>>>>>> +  <bb 21> [local count: 280668596]:
>>>>>>>>>>>       if (prephitmp_12 != 0)
>>>>>>>>>>> -    goto <bb 20>; [100.00%]
>>>>>>>>>>> +    goto <bb 20>; [33.00%]
>>>>>>>>>>>       else
>>>>>>>>>>> -    goto <bb 19>; [INV]
>>>>>>>>>>> +    goto <bb 19>; [67.00%]
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 20> [local count: 850510901]:
>>>>>>>>>>> +  <bb 20> [local count: 280668596]:
>>>>>>>>>>>       goto <bb 5>; [100.00%]
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 19> [count: 0]:
>>>>>>>>>>> +  <bb 19> [local count: 70429947]:
>>>>>>>>>>>       # i_23 = PHI <i_10(21)>
>>>>>>>>>>>       # prephitmp_25 = PHI <prephitmp_5(21)>
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 12> [local count: 955630225]:
>>>>>>>>>>> +  <bb 12> [local count: 640272252]:
>>>>>>>>>>>       # i_15 = PHI <i_23(19), i_22(16)>
>>>>>>>>>>>       # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
>>>>>>>>>>>       i_22 = inc (i_15);
>>>>>>>>>>>       if (n_7(D) > i_22)
>>>>>>>>>>>         goto <bb 16>; [89.00%]
>>>>>>>>>>>       else
>>>>>>>>>>>         goto <bb 11>; [11.00%]
>>>>>>>>>>>
>>>>>>>>>>> -  <bb 16> [local count: 850510901]:
>>>>>>>>>>> +  <bb 16> [local count: 569842305]:
>>>>>>>>>>>       goto <bb 12>; [100.00%]
>>>>>>>>>>>
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>> gcc/ChangeLog:
>>>>>>>>>>>
>>>>>>>>>>>  * tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
>>>>>>>>>>>  (do_split_loop_on_cond): Likewise.
>>>>>>>>>>> ---
>>>>>>>>>>>     gcc/tree-ssa-loop-split.c | 16 ++++++++--------
>>>>>>>>>>>     1 file changed, 8 insertions(+), 8 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>>>>>>>>>>> index 3a09bbc39e5..8e5a7ded0f7 100644
>>>>>>>>>>> --- a/gcc/tree-ssa-loop-split.c
>>>>>>>>>>> +++ b/gcc/tree-ssa-loop-split.c
>>>>>>>>>>> @@ -583,10 +583,10 @@ split_loop (class loop *loop1)
>>>>>>>>>>>      basic_block cond_bb;
>>>>>>>>>
>>>>>>>>>       if (!initial_true)
>>>>>>>>> -       cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
>>>>>>>>> +       cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
>>>>>>>>> +
>>>>>>>>> +     edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
>>>>>>>>> +                        ? EDGE_SUCC (bbs[i], 0)
>>>>>>>>> +                        : EDGE_SUCC (bbs[i], 1);
>>>>>>>>>
>>>>>>>>>>>     
>>>>>>>>>>>             class loop *loop2 = loop_version (loop1, cond, &cond_bb,
>>>>>>>>>>> -                                      profile_probability::always
>>>>>>>>>>> (),
>>>>>>>>>>> -                                      profile_probability::always
>>>>>>>>>>> (),
>>>>>>>>>>> -                                      profile_probability::always
>>>>>>>>>>> (),
>>>>>>>>>>> -                                      profile_probability::always
>>>>>>>>>>> (),
>>>>>>>>>>> +                                      true_edge->probability,
>>>>>>>>>>> +
>>>>>>>>>>> true_edge->probability.invert (),
>>>>>>>>>>> +                                      true_edge->probability,
>>>>>>>>>>> +
>>>>>>>>>>> true_edge->probability.invert (),
>>>>>>>>>>>             true);
>>>>>>>>>>
>>>>>>>>>> there is no 'true_edge' variable at this point.
>>>>>>>>>
>>>>>>>>> Sorry, missed the above hunk when split the patch.
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>>      gcc_assert (loop2);
>>>>>>>>>>>     @@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop 
>>>>>>>>>>> *loop1,
>>>>>>>>>>> edge invar_branch)
>>>>>>>>>>>       initialize_original_copy_tables ();
>>>>>>>>>>>     
>>>>>>>>>>>       struct loop *loop2 = loop_version (loop1, boolean_true_node,
>>>>>>>>>>> NULL,
>>>>>>>>>>> -                                profile_probability::always (),
>>>>>>>>>>> -                                profile_probability::never (),
>>>>>>>>>>> -                                profile_probability::always (),
>>>>>>>>>>> -                                profile_probability::always (),
>>>>>>>>>>> +                                invar_branch->probability.invert
>>>>>>>>>>> (),
>>>>>>>>>>> +                                invar_branch->probability,
>>>>>>>>>>> +                                invar_branch->probability.invert
>>>>>>>>>>> (),
>>>>>>>>>>> +                                invar_branch->probability,
>>>>>>>>>>>                                          true);
>>>>>>>>>>>       if (!loop2)
>>>>>>>>>>>         {
>>>>>>>>>>
>>>>>>>>>> The patch introduction seems to talk about do_split_loop_on_cond 
>>>>>>>>>> only.
>>>>>>>>>
>>>>>>>>> split_loop faces similar issue though it sets the two branches to 
>>>>>>>>> 100% vs
>>>>>>>>> 100%
>>>>>>>>> and no scaling which seems also incorrect.
>>>>>>>>>
>>>>>>>>>> Since loop versioning inserts a condition with the passed 
>>>>>>>>>> probabilities
>>>>>>>>>> but in this case a 'boolean_true_node' condition the then and else
>>>>>>>>>> probabilities passed look correct.  It's just the scaling arguments
>>>>>>>>>> that look wrong?  This loop_version call should get a comment as to
>>>>>>>>>> why we are passing probabilities the way we do.
>>>>>>>>>
>>>>>>>>> This optimization is transforming from:
>>>>>>>>>
>>>>>>>>> for (i = 0; i < n; i = inc (i))
>>>>>>>>>       {
>>>>>>>>>         if (ga)
>>>>>>>>>           ga = do_something ();
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> to:
>>>>>>>>>
>>>>>>>>>     for (i = 0; i < x; i = inc (i))
>>>>>>>>> {
>>>>>>>>>       if (true)
>>>>>>>>>           ga = do_something ();
>>>>>>>>>           if (!ga)
>>>>>>>>>             break;
>>>>>>>>> }
>>>>>>>>>     for (; i < n; i = inc (i))
>>>>>>>>> {
>>>>>>>>>       if (false)
>>>>>>>>>            ga = do_something ();
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> `boolean_true_node` is passed in to make the first loop's condition
>>>>>>>>> statement to be 'true', after returning from loop_version, there is a
>>>>>>>>> piece of code forcing the condition in second loop to be false,
>>>>>>>>> and the original condition is moved from *in loop* to *exit edge*
>>>>>>>>> between loop1 and loop2.
>>>>>>>>
>>>>>>>> Yes, one complication is that we use loop_version but do not retain
>>>>>>>> the CFG it creates.  Something like the vectorizers
>>>>>>>> slpeel_tree_duplicate_loop_to_edge_cfg would be a better "fit"
>>>>>>>> but then that code doesn't do any scaling yet.  But then
>>>>>>>> loop_version uses cfg_hook_duplicate_loop_to_header_edge and I suppose
>>>>>>>> we could write a variant that simply doesn't mangle the CFG
>>>>>>>> with a new condition switching between both loops but keeps them
>>>>>>>> executed after each other ...
>>>>>>>>
>>>>>>>> As said, this adds to the confusion and some awkwardness.
>>>>>>>
>>>>>>> Then loop_version in loop split requires two types of variant, one
>>>>>>> is to insert condition to loop preheader for 'split_loops' usage,
>>>>>>> another is to insert condition to loop exit for 'do_split_loop_on_cond'
>>>>>>> usage, it needs one extra function to encapsulate these cfg alterations
>>>>>>> from outside to inside.
>>>>>>>
>>>>>>> unswitching only execute one loop as it only moves the invariant 
>>>>>>> condition
>>>>>>> to first loop's pre-header.  While 'split_loops' and
>>>>>>> 'do_split_loop_on_cond'
>>>>>>> may execute both loops no matter the condition is moved to the first 
>>>>>>> loop's
>>>>>>> preheader or exit.
>>>>>>>
>>>>>>> The condition stmt in loop unswitching is invariant, but it is variant
>>>>>>> in loop splitting, that's why loop unswitching execute only one loop
>>>>>>> but loop splitting executes both loops.
>>>>>>>
>>>>>>> Seems we need two condition arguments for loop_version, one for 
>>>>>>> connecting
>>>>>>> loop1 preheader to loop2 preheader, another one for connecting loop1's 
>>>>>>> exit
>>>>>>> to loop2's header?  Then it will be more generic for both unswitching 
>>>>>>> pass
>>>>>>> and splitting pass.  Looks a bit complicated and conflicted with
>>>>>>> loop_version's
>>>>>>> comments:
>>>>>>>
>>>>>>> /* Main entry point for Loop Versioning transformation.
>>>>>>>
>>>>>>>     This transformation given a condition and a loop, creates
>>>>>>>     -if (condition) { loop_copy1 } else { loop_copy2 }, ... */
>>>>>>>
>>>>>>>
>>>>>>> And this only works for loop split usage, those many other places
>>>>>>> doesn't use loop_version like this?
>>>>>>
>>>>>> Yes, as said if you don't want the above CFG then you probably
>>>>>> shouldn't use loop_version but instead use its building blocks
>>>>>> (and some refactoring of loop_version can make that easier).
>>>>>>
>>>>>> I think splitting wants
>>>>>>
>>>>>>    loop_copy1
>>>>>>    if (condition)
>>>>>>      loop_copy2
>>>>>>
>>>>>> IMHO it would be good to split 'loopify' into the actual loopification
>>>>>> and the scaling.  Basically make the part of loop_version that
>>>>>> copies the loop on the header edge and creates a loop structure for
>>>>>> it separate.
>>>>>>
>>>>>> loop distribution uses slpeel_tree_duplicate_loop_to_edge_cfg
>>>>>> (copy_loop_before).
>>>>>>
>>>>>
>>>>> Unfortunately slpeel_tree_duplicate_loop_to_edge_cfg only supports
>>>>> copying loops with single exit, it would cause many ICEs in it even
>>>>> building GCC stage1 (line 1065, line 1184 due to exit or new_exit
>>>>> is NULL returning from single_exit (loop).).  Seems loop_version is
>>>>> more flexible for loop split.
>>>>
>>>> Hmm, sure - loop_version does not need to do anything special with
>>>> exits since control flow either enters the original or the loop copy.
>>>>
>>>> But slpeel_tree_duplicate_loop_to_edge_cfg intends to create
>>>> control flow that enters _both_ loops, so it needs to have
>>>> an edge from the first loop exit to the second loop entry.
>>>>
>>>> One could extend this to a region copy, copying eventual CFG merges
>>>> of exits and specifying which exit of a SEME region is supposed
>>>> to be connected to the original region entry.
>>>>
>>>> After all that's what loop splitting needs in the end - though not
>>>> sure what exactly it does with more than one exit.
>>>
>>> In tree-ssa-loop-split.c,  split_loop only accepts single exit loop,
>>> the recently added split_loop_on_cond could process multiple exits loop.
>>>
>>> For example, do some changes to the loop-cond-split-1.c,
>>>
>>> int ga;
>>> extern int a;
>>> extern int b;
>>> extern int c;
>>>
>>> void test1 (int n) {
>>>   int i;
>>>   for (i = 0; i < n; i = inc (i)). {
>>>       if (a+3 > 0)
>>>        break;
>>>
>>>       if (ga)
>>>         ga = do_something ();
>>>
>>>       for (int j = 0; j < n; j++)
>>>         {
>>>            b+=5;
>>>            if (b > c) break;
>>>         }
>>>     }
>>> }
>>>
>>> the "if (ga)" will be a new exit edge from loop_copy1 to loop_copy2. 
>>> I am not sure whether it is valuable to do semi-invariant loop split to such
>>> cases with multiple exits, but obviously the split_loop_on_cond is a special
>>> case from split_loop both duplicate loop to 
>>>    if (condition1) {loop_copy1} if (condition2) {loop_copy2}
>>> The only difference is condition1 is true for split_loop_on_cond.
>>>
>>>>
>>>> So there's another set of "loop" copying, gimple_duplicate_sese_region,
>>>> which doesn't actually require a single exit but a single "important"
>>>> exit.  That might match how you treat multiple exits.
>>>
>>> gimple_duplicate_sese_region doesn't handle subloops and latches.  Finally,
>>> I found that loop_version is still much better
>>> than slpeel_tree_duplicate_loop_to_edge_cfg and gimple_duplicate_sese_region
>>> since it could handle all cases like multiple exits/subloops, etc.  I did 
>>> some
>>> refactor to the code to introduce loop_version2 to create duplicated loops
>>> with two input conditions as attached patch, is this reasonable enough?
>>>
>>> I also tried to copy the code in loop_version out of it to don't call 
>>> loop_version
>>> in loop_version2, but it seems useless with many duplicate code and NOT get 
>>> rid
>>> of creating "if (condition1) {loop_copy1}" at first?
>>>
>>>
>>
>> The previous attached patch 
>> 0001-Add-loop_version2-to-support-loop-transformation-wit.patch
>> had issues when connecting two loops, reason is split_loop connect_loops at 
>> *exit* edge,
>> do_split_loop_on_cond connect_loops at latch_edge.  So they have different 
>> CFGs even
>> both with two conditions :(.  It seems not so that useful to also define 
>> another new function
>> 'connect_loops_at_latch' given the limited usage in loop split only?  And 
>> the loop_version2
>> also looks not so general again ...
> 
> All I wanted to say is that none of the current high-level APIs we have
> are a 1:1 match for loop splitting and that they in fact might do
> stuff that's unnecessary or counter-productive.  If inventing a new API
> doesn't sound appealing then maybe refactoring an existing
> (maybe loop_version) to expose re-usable pieces would make more sense.
> 
> For example loop_version currently does lv_adjust_loop_entry_edge
> before it loopifys the copy inserted on the header.  I wonder if
> the condition generation can be done later and thus we can
> have three pieces - 1) duplicating the loop on the entry edge,
> 2) adjusting the CFG to insert a condition branching to either loop,
> 3) from loopify extract the scale_loop_frequencies bits (in fact
> loopify is only used from within cfgloopmanip.c)
> 
> That said, it shouldn't be a requirement to do all this to fix the
> profile for loop splitting it's just that the current situation
> does not help understanding of how the adjustment works.

The attached patch 0002-Refactor-loop_version.patch is to refactor loop_version
according to your above comments.

loopify is moved before condition generation, scale_loop_frequencies is
extracted out of loopify. 

0001-Fix-loop-split-incorrect-count-and-probability.patch is still the initial
version that fixes the probability and frequency issues in loop split, just a
repeat, both passed regression test on P8LE, OK for master?


> 
> Richard.
> 

-- 
Thanks,
Xionghu
From 2b4a48f05fb6eac83ec6153b8f89496ee8807b88 Mon Sep 17 00:00:00 2001
From: Xionghu Luo <luo...@linux.ibm.com>
Date: Tue, 3 Aug 2021 03:44:14 -0500
Subject: [PATCH 1/2] Fix loop split incorrect count and probability

loop split condition is moved between loop1 and loop2, the split bb's
count and probability should also be duplicated instead of (100% vs INV),
secondly, the original loop1 and loop2 count need be propotional from the
original loop.

Regression tested pass, OK for master?

diff base/loop-cond-split-1.c.151t.lsplit  
patched/loop-cond-split-1.c.151t.lsplit:
...
   int prephitmp_16;
   int prephitmp_25;

   <bb 2> [local count: 118111600]:
   if (n_7(D) > 0)
     goto <bb 4>; [89.00%]
   else
     goto <bb 3>; [11.00%]

   <bb 3> [local count: 118111600]:
   return;

   <bb 4> [local count: 105119324]:
   pretmp_3 = ga;

-  <bb 5> [local count: 955630225]:
+  <bb 5> [local count: 315357973]:
   # i_13 = PHI <i_10(20), 0(4)>
   # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
   if (prephitmp_12 != 0)
     goto <bb 6>; [33.00%]
   else
     goto <bb 7>; [67.00%]

-  <bb 6> [local count: 315357972]:
+  <bb 6> [local count: 104068130]:
   _2 = do_something ();
   ga = _2;

-  <bb 7> [local count: 955630225]:
+  <bb 7> [local count: 315357973]:
   # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
   i_10 = inc (i_13);
   if (n_7(D) > i_10)
     goto <bb 21>; [89.00%]
   else
     goto <bb 11>; [11.00%]

   <bb 11> [local count: 105119324]:
   goto <bb 3>; [100.00%]

-  <bb 21> [local count: 850510901]:
+  <bb 21> [local count: 280668596]:
   if (prephitmp_12 != 0)
-    goto <bb 20>; [100.00%]
+    goto <bb 20>; [33.00%]
   else
-    goto <bb 19>; [INV]
+    goto <bb 19>; [67.00%]

-  <bb 20> [local count: 850510901]:
+  <bb 20> [local count: 280668596]:
   goto <bb 5>; [100.00%]

-  <bb 19> [count: 0]:
+  <bb 19> [local count: 70429947]:
   # i_23 = PHI <i_10(21)>
   # prephitmp_25 = PHI <prephitmp_5(21)>

-  <bb 12> [local count: 955630225]:
+  <bb 12> [local count: 640272252]:
   # i_15 = PHI <i_23(19), i_22(16)>
   # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
   i_22 = inc (i_15);
   if (n_7(D) > i_22)
     goto <bb 16>; [89.00%]
   else
     goto <bb 11>; [11.00%]

-  <bb 16> [local count: 850510901]:
+  <bb 16> [local count: 569842305]:
   goto <bb 12>; [100.00%]

 }

gcc/ChangeLog:

        * tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
        (do_split_loop_on_cond): Likewise.
---
 gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3f6ad046623..d30782888f3 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -575,7 +575,11 @@ split_loop (class loop *loop1)
                                            stmts2);
        tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
        if (!initial_true)
-         cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
+         cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+
+       edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
+                          ? EDGE_SUCC (bbs[i], 0)
+                          : EDGE_SUCC (bbs[i], 1);
 
        /* Now version the loop, placing loop2 after loop1 connecting
           them, and fix up SSA form for that.  */
@@ -583,10 +587,10 @@ split_loop (class loop *loop1)
        basic_block cond_bb;
 
        class loop *loop2 = loop_version (loop1, cond, &cond_bb,
-                                          profile_probability::always (),
-                                          profile_probability::always (),
-                                          profile_probability::always (),
-                                          profile_probability::always (),
+                                          true_edge->probability,
+                                          true_edge->probability.invert (),
+                                          true_edge->probability,
+                                          true_edge->probability.invert (),
                                           true);
        gcc_assert (loop2);
 
@@ -1486,10 +1490,10 @@ do_split_loop_on_cond (struct loop *loop1, edge 
invar_branch)
   initialize_original_copy_tables ();
 
   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
-                                    profile_probability::always (),
-                                    profile_probability::never (),
-                                    profile_probability::always (),
-                                    profile_probability::always (),
+                                    invar_branch->probability.invert (),
+                                    invar_branch->probability,
+                                    invar_branch->probability.invert (),
+                                    invar_branch->probability,
                                     true);
   if (!loop2)
     {
@@ -1530,6 +1534,9 @@ do_split_loop_on_cond (struct loop *loop1, edge 
invar_branch)
   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
 
+  to_loop1->probability = invar_branch->probability.invert ();
+  to_loop2->probability = invar_branch->probability;
+
   /* Due to introduction of a control flow edge from loop1 latch to loop2
      pre-header, we should update PHIs in loop2 to reflect this connection
      between loop1 and loop2.  */
-- 
2.27.0.90.geebb51ba8c

From e55a70b8d6fe1b0d2bf0905613acaff087bd40e9 Mon Sep 17 00:00:00 2001
From: Xiong Hu Luo <luo...@linux.ibm.com>
Date: Mon, 25 Oct 2021 03:38:55 -0500
Subject: [PATCH 2/2] Refactor loop_version

loop_version currently does lv_adjust_loop_entry_edge
before it loopifys the copy inserted on the header.  This patch moves
the condition generation later and thus we have four pieces to help
understanding of how the adjustment works:
 1) duplicating the loop on the entry edge.
 2) loopify the duplicated new loop.
 3) adjusting the CFG to insert a condition branching to either loop
 with lv_adjust_loop_entry_edge.
 4) From loopify extract the scale_loop_frequencies bits.

I also removed some piece of code seems obviously useless which is not
completely sure:
 - redirect_all_edges since it is false and loopify only called once.
 - extract_cond_bb_edges and lv_flush_pending_stmts (false_edge) as the
 edge is not redirected actually.
 - The unused function loopify is still not removed yet.

gcc/ChangeLog:

        * cfgloopmanip.c (loop_version): Refactor loopify to
        loop_version.  Move condition generation after loopify.
---
 gcc/cfgloopmanip.c | 48 ++++++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 82c242dd720..5d1491b8c1c 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -1681,7 +1681,7 @@ loop_version (class loop *loop,
              bool place_after)
 {
   basic_block first_head, second_head;
-  edge entry, latch_edge, true_edge, false_edge;
+  edge entry, latch_edge;
   int irred_flag;
   class loop *nloop;
   basic_block cond_bb;
@@ -1694,7 +1694,7 @@ loop_version (class loop *loop,
   /* Note down head of loop as first_head.  */
   first_head = entry->dest;
 
-  /* Duplicate loop.  */
+  /* 1) Duplicate loop on the entry edge.  */
   if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
                                               NULL, NULL, NULL, 0))
     {
@@ -1702,11 +1702,28 @@ loop_version (class loop *loop,
       return NULL;
     }
 
+  /* 2) loopify the duplicated new loop. */
+  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
+  nloop = alloc_loop ();
+  class loop *outer = loop_outer (latch_edge->dest->loop_father);
+  edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
+  nloop->header = new_header_edge->dest;
+  nloop->latch = latch_edge->src;
+  loop_redirect_edge (latch_edge, nloop->header);
+
+  /* Compute new loop.  */
+  add_loop (nloop, outer);
+  copy_loop_info (loop, nloop);
+  set_loop_copy (loop, nloop);
+
+  /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
+  lv_flush_pending_stmts (latch_edge);
+
   /* After duplication entry edge now points to new loop head block.
      Note down new head as second_head.  */
   second_head = entry->dest;
 
-  /* Split loop entry edge and insert new block with cond expr.  */
+  /* 3) Split loop entry edge and insert new block with cond expr.  */
   cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
                                        entry, cond_expr, then_prob, else_prob);
   if (condition_bb)
@@ -1718,24 +1735,17 @@ loop_version (class loop *loop,
       return NULL;
     }
 
-  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
-
-  extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
-  nloop = loopify (latch_edge,
-                  single_pred_edge (get_bb_copy (loop->header)),
-                  cond_bb, true_edge, false_edge,
-                  false /* Do not redirect all edges.  */,
-                  then_scale, else_scale);
-
-  copy_loop_info (loop, nloop);
-  set_loop_copy (loop, nloop);
+  /* Add cond_bb to appropriate loop.  */
+  if (cond_bb->loop_father)
+    remove_bb_from_loops (cond_bb);
+  add_bb_to_loop (cond_bb, outer);
 
-  /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
-  lv_flush_pending_stmts (latch_edge);
+  /* 4) Scale the original loop and new loop frequency.  */
+  scale_loop_frequencies (loop, then_scale);
+  scale_loop_frequencies (nloop, else_scale);
+  update_dominators_in_loop (loop);
+  update_dominators_in_loop (nloop);
 
-  /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS.  */
-  extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
-  lv_flush_pending_stmts (false_edge);
   /* Adjust irreducible flag.  */
   if (irred_flag)
     {
-- 
2.27.0.90.geebb51ba8c

Reply via email to