Hi, this patch adds missing profile update to maybe_optimize_range_tests. Jakub, I hope I got the code right: I think it basically analyzes the chain of conditionals, finds some basic blocks involved in the range testing and then puts all the test into first BB.
The patch fixes gcc.dg/tree-ssa/update-threading.c profile misupdate on power-pc. Curiously enough the code is produced differently for x86_64. I tried to find testcase for x86_64 and found that testsuite/gcc.dg/tree-ssa/reassoc-33.c testsuite/gcc.dg/tree-ssa/reassoc-37.c testsuite/gcc.dg/tree-ssa/reassoc-43.c are testing this function. However sadly neighter of these testcases seems to work as expected. For example in testsuite/gcc.dg/tree-ssa/reassoc-33.c we turn ;; basic block 3, loop depth 0, count 708669600 (estimated locally, freq 0.6600), maybe hot ;; prev block 2, next block 4, flags: (NEW, VISITED) ;; pred: 2 [66.0% (guessed)] count:708669600 (estimated locally, freq 0.6600) (FALSE_VALUE,EXECUTABLE) _4 = a_14(D) == 44; _5 = a_14(D) == 78; _30 = 0; _6 = _4 | _5; if (_30 != 0) goto <bb 7>; [34.00%] else goto <bb 4>; [66.00%] ;; succ: 7 [34.0% (guessed)] count:240947667 (estimated locally, freq 0.2244) (TRUE_VALUE,EXECUTABLE) ;; 4 [66.0% (guessed)] count:467721933 (estimated locally, freq 0.4356) (FALSE_VALUE,EXECUTABLE) to ;; basic block 2, loop depth 0, count 1073741824 (estimated locally, freq 1.0000), maybe hot ;; prev block 0, next block 3, flags: (NEW, VISITED) ;; pred: ENTRY [always] count:1073741824 (estimated locally, freq 1.0000) (FALLTHRU,EXECUTABLE) _18 = (unsigned int) a_14(D); _19 = _18 + 4294967253; _24 = (unsigned int) a_14(D); _25 = _24 + 4294967253; _26 = _25 & 4294967260; _27 = _26 == 0; _20 = _19 <= 3; _1 = a_14(D) == 43; _21 = (unsigned int) a_14(D); _22 = _21 + 4294967221; _23 = _22 <= 3; _2 = a_14(D) == 75; _31 = _27; _3 = _1 | _2; if (_31 != 0) goto <bb 7>; [34.00%] else goto <bb 3>; [66.00%] which replaces later tests ;; basic block 4, loop depth 0, count 467721934 (estimated locally, freq 0.4356), maybe hot ;; prev block 3, next block 5, flags: (NEW, VISITED) ;; pred: 3 [66.0% (guessed)] count:467721933 (estimated locally, freq 0.4356) (FALSE_VALUE,EXECUTABLE) _7 = a_14(D) == 77; _8 = a_14(D) == 46; _29 = 0; _9 = _7 | _8; if (_29 != 0) goto <bb 7>; [34.00%] else goto <bb 5>; [66.00%] ;; basic block 5, loop depth 0, count 308696475 (estimated locally, freq 0.2875), maybe hot ;; prev block 4, next block 6, flags: (NEW, VISITED) ;; pred: 4 [66.0% (guessed)] count:308696475 (estimated locally, freq 0.2875) (FALSE_VALUE,EXECUTABLE) _10 = a_14(D) == 76; _11 = a_14(D) == 45; _28 = 0; _12 = _10 | _11; if (_28 != 0) goto <bb 7>; [50.00%] else goto <bb 6>; [50.00%] ;; succ: 7 [50.0% (guessed)] count:154348238 (estimated locally, freq 0.1437) (TRUE_VALUE,EXECUTABLE) ;; 6 [50.0% (guessed)] count:154348238 (estimated locally, freq 0.1437) (FALSE_VALUE,EXECUTABLE) However BB4 and BB5 is not updated to be unconditional by tree-ssa-reassoc pass and we thus miss the profile update. This happens later in forwprop but at that time it is too late to update the probabilities. So we get: ;; basic block 2, loop depth 0, count 1073741824 (estimated locally, freq 1.0000), maybe hot ;; prev block 0, next block 3, flags: (NEW, VISITED) ;; pred: ENTRY [always] count:1073741824 (estimated locally, freq 1.0000) (FALLTHRU,EXECUTABLE) _24 = (unsigned int) a_14(D); _25 = _24 + 4294967253; _26 = _25 & 4294967260; _27 = _26 == 0; if (_26 == 0) goto <bb 4>; [34.00%] else goto <bb 3>; [66.00%] ;; succ: 4 [34.0% (guessed)] count:365072224 (estimated locally, freq 0.3400) (TRUE_VALUE,EXECUTABLE) ;; 3 [66.0% (guessed)] count:708669600 (estimated locally, freq 0.6600) (FALSE_VALUE,EXECUTABLE) ;; basic block 3, loop depth 0, count 154348237 (estimated locally, freq 0.1437), maybe hot ;; Invalid sum of incoming counts 708669600 (estimated locally, freq 0.6600), should be 154348237 (estimated locally, freq 0.1437) ;; prev block 2, next block 4, flags: (NEW, VISITED) ;; pred: 2 [66.0% (guessed)] count:708669600 (estimated locally, freq 0.6600) (FALSE_VALUE,EXECUTABLE) ;; succ: 4 [always] count:154348237 (estimated locally, freq 0.1437) (FALLTHRU,EXECUTABLE) c.c:12:12 ;; basic block 4, loop depth 0, count 1073741824 (estimated locally, freq 1.0000), maybe hot ;; Invalid sum of incoming counts 519420461 (estimated locally, freq 0.4837), should be 1073741824 (estimated locally, freq 1.0000) ;; prev block 3, next block 1, flags: (NEW, VISITED) ;; pred: 2 [34.0% (guessed)] count:365072224 (estimated locally, freq 0.3400) (TRUE_VALUE,EXECUTABLE) ;; 3 [always] count:154348237 (estimated locally, freq 0.1437) (FALLTHRU,EXECUTABLE) c.c:12:12 # _13 = PHI <b_16(D)(2), c_15(D)(3)> return _13; Jakub, it seems that the code is originally yours. Any idea why those are not turned to constant true or false conditionals? Bootstrapped/regtested x86_64-linux, does it seem to make sense? gcc/ChangeLog: PR tree-optimization/110628 * tree-ssa-reassoc.cc (maybe_optimize_range_tests): Add profile update. diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index eda03bf98a6..a718a0f41f1 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -5091,6 +5091,8 @@ maybe_optimize_range_tests (gimple *stmt) if (bb == first_bb) break; } + profile_probability extra_other_prob = profile_probability::never (); + auto_vec<basic_block, 8> bbs; for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) { if (bbinfo[idx].first_idx < bbinfo[idx].last_idx @@ -5098,6 +5100,8 @@ maybe_optimize_range_tests (gimple *stmt) && ops[bbinfo[idx].first_idx]->op != NULL_TREE) { gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb)); + edge true_e, false_e; + extract_true_false_edges_from_block (bb, &true_e, &false_e); if (idx > max_idx) max_idx = idx; @@ -5108,25 +5112,50 @@ maybe_optimize_range_tests (gimple *stmt) { gimple_cond_make_false (cond_stmt); cfg_cleanup_needed = true; + if (true_e->dest == other_bb) + extra_other_prob += true_e->probability; + false_e->probability = profile_probability::always (); + true_e->probability = profile_probability::never (); } else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) { gimple_cond_make_true (cond_stmt); cfg_cleanup_needed = true; + if (false_e->dest == other_bb) + extra_other_prob += false_e->probability; + true_e->probability = profile_probability::always (); + false_e->probability = profile_probability::never (); } else { + gcc_assert (bb = first_bb); gimple_cond_set_code (cond_stmt, NE_EXPR); gimple_cond_set_lhs (cond_stmt, ops[bbinfo[idx].first_idx]->op); gimple_cond_set_rhs (cond_stmt, boolean_false_node); + if (bb->count.initialized_p () && bb->count.nonzero_p ()) + { + edge e_to_other = true_e->dest == other_bb + ? true_e : false_e; + edge e_to_tests = true_e->dest == other_bb + ? false_e : true_e; + e_to_other->probability += extra_other_prob; + e_to_tests->probability + = e_to_other->probability.invert (); + } } update_stmt (cond_stmt); } if (bb == first_bb) break; + bbs.safe_push (bb); + extra_other_prob *= single_pred_edge (bb)->probability; } + /* Finaly update counts of basic blocks in the chain. */ + for (basic_block bb : bbs) + bb->count = single_pred_edge (bb)->count (); + /* The above changes could result in basic blocks after the first modified one, up to and including last_bb, to be executed even if they would not be in the original program. If the value ranges of