The following patch fixes a few things I noticed when looking at PR42108 again. First of all the current setup of
/* Calculate the loop count. to-from can overflow, so we cast to unsigned. */ doesn't work as advertised (well, it does, for the result and -fwrapv) - it does not consider that signed overflow invokes undefined behavior. Second, originally for the innermost loop in PR42108 we create D.1910 = i; D.1911 = *nnd; D.1912 = *n; k = D.1910; if (D.1912 > 0) { if (D.1911 < D.1910) goto L.6; } else { if (D.1911 > D.1910) goto L.6; } countm1.6 = (unsigned int) ((D.1911 - D.1910) * (D.1912 < 0 ? -1 : 1)) / (unsigned int) ((D.1912 < 0 ? -1 : 1) * D.1912); which ends up emitting a D.1912 < 0 conditional block three times which persists until the first VRP / DOM pass. There isn't actually much value in computing countm1 in a single way - apart from folding for the case of a constant (or known sign) step. We can do even better than that by moving the countm1 computation inside the first step sign test: D.1910 = i; D.1911 = *nnd; D.1912 = *n; k = D.1910; if (D.1912 < 0) { if (D.1911 > D.1910) { goto L.6; } else { countm1.6 = ((unsigned int) D.1910 - (unsigned int) D.1911) / -(unsigned int) D.1912; } } else { if (D.1911 < D.1910) { goto L.6; } else { countm1.6 = ((unsigned int) D.1911 - (unsigned int) D.1910) / (unsigned int) D.1912; } } which avoids all the initial redundant control flow instructions and avoids multiplications / negations. I believe this is what we produced in 4.4 ... (apart from the signedness issue). Note that due to the way niter analysis works neither form helps us very much in that regard when the step sign is not known. This is a regression likely caused by 2009-12-01 Janne Blomqvist <j...@gcc.gnu.org> PR fortran/42131 * trans-stmt.c (gfc_trans_do): Sign test using ternary operator. 2009-11-30 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/42131 * trans-stmt.c (gfc_trans_do): Calculate loop count without if statements. which I see I triggered somehow. Only that the end-result isn't much better than the original. I believe we can't really make niter analysis work well with unknown step (or even unknown step sign). With the patch below it still arrives at Analyzing # of iterations of loop 3 exit condition [countm1.6_40, + , 4294967295] != 0 bounds on difference of bases: -4294967295 ... 0 result: # of iterations countm1.6_40, bounded by 4294967295 which is correct and as good as it can get. So in the end this patch fixes initial / unoptimized code generation quality regression and possible issues with undefined signed overflow. Bootstrapped on x86_64-unknown-linux-gnu, regtests running. Ok for trunk? Thanks, Richard. 2013-01-16 Richard Biener <rguent...@suse.de> fortran/ * trans-stmt.c (gfc_trans_do): Conditionally compute countm1 dependent on sign of step, avoids repeated evaluation of step sign test. Avoid undefined overflow issues by using unsigned arithmetic. Index: gcc/fortran/trans-stmt.c =================================================================== *** gcc/fortran/trans-stmt.c (revision 195233) --- gcc/fortran/trans-stmt.c (working copy) *************** gfc_trans_do (gfc_code * code, tree exit *** 1542,1548 **** tree cycle_label; tree exit_label; tree tmp; - tree pos_step; stmtblock_t block; stmtblock_t body; location_t loc; --- 1542,1547 ---- *************** gfc_trans_do (gfc_code * code, tree exit *** 1587,1594 **** || tree_int_cst_equal (step, integer_minus_one_node))) return gfc_trans_simple_do (code, &block, dovar, from, to, step, exit_cond); - pos_step = fold_build2_loc (loc, GT_EXPR, boolean_type_node, step, - build_zero_cst (type)); if (TREE_CODE (type) == INTEGER_TYPE) utype = unsigned_type_for (type); --- 1586,1591 ---- *************** gfc_trans_do (gfc_code * code, tree exit *** 1617,1681 **** /* Initialize loop count and jump to exit label if the loop is empty. This code is executed before we enter the loop body. We generate: - step_sign = sign(1,step); if (step > 0) { if (to < from) goto exit_label; } else { if (to > from) goto exit_label; } ! countm1 = (to*step_sign - from*step_sign) / (step*step_sign); ! ! */ if (TREE_CODE (type) == INTEGER_TYPE) { ! tree pos, neg, step_sign, to2, from2, step2; ! /* Calculate SIGN (1,step), as (step < 0 ? -1 : 1) */ ! ! tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, step, ! build_int_cst (TREE_TYPE (step), 0)); ! step_sign = fold_build3_loc (loc, COND_EXPR, type, tmp, ! build_int_cst (type, -1), ! build_int_cst (type, 1)); tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, to, from); pos = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp, fold_build1_loc (loc, GOTO_EXPR, void_type_node, exit_label), ! build_empty_stmt (loc)); ! tmp = fold_build2_loc (loc, GT_EXPR, boolean_type_node, to, ! from); neg = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp, fold_build1_loc (loc, GOTO_EXPR, void_type_node, exit_label), ! build_empty_stmt (loc)); ! tmp = fold_build3_loc (loc, COND_EXPR, void_type_node, ! pos_step, pos, neg); ! gfc_add_expr_to_block (&block, tmp); ! ! /* Calculate the loop count. to-from can overflow, so ! we cast to unsigned. */ - to2 = fold_build2_loc (loc, MULT_EXPR, type, step_sign, to); - from2 = fold_build2_loc (loc, MULT_EXPR, type, step_sign, from); - step2 = fold_build2_loc (loc, MULT_EXPR, type, step_sign, step); - step2 = fold_convert (utype, step2); - tmp = fold_build2_loc (loc, MINUS_EXPR, type, to2, from2); - tmp = fold_convert (utype, tmp); - tmp = fold_build2_loc (loc, TRUNC_DIV_EXPR, utype, tmp, step2); - tmp = fold_build2_loc (loc, MODIFY_EXPR, void_type_node, countm1, tmp); gfc_add_expr_to_block (&block, tmp); } else { /* TODO: We could use the same width as the real type. This would probably cause more problems that it solves when we implement "long double" types. */ --- 1614,1680 ---- /* Initialize loop count and jump to exit label if the loop is empty. This code is executed before we enter the loop body. We generate: if (step > 0) { if (to < from) goto exit_label; + countm1 = (to - from) / step; } else { if (to > from) goto exit_label; + countm1 = (from - to) / -step; } ! */ if (TREE_CODE (type) == INTEGER_TYPE) { ! tree pos, neg, tou, fromu, stepu, tmp2; ! /* The distance from FROM to TO cannot always be represented in a signed ! type, thus use unsigned arithmetic, also to avoid any undefined ! overflow issues. */ ! tou = fold_convert (utype, to); ! fromu = fold_convert (utype, from); ! stepu = fold_convert (utype, step); + /* For a positive step, when to < from, exit, otherwise compute + countm1 = ((unsigned)to - (unsigned)from) / (unsigned)step */ tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, to, from); + tmp2 = fold_build2_loc (loc, TRUNC_DIV_EXPR, utype, + fold_build2_loc (loc, MINUS_EXPR, utype, + tou, fromu), + stepu); pos = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp, fold_build1_loc (loc, GOTO_EXPR, void_type_node, exit_label), ! fold_build2 (MODIFY_EXPR, void_type_node, ! countm1, tmp2)); ! /* For a negative step, when to > from, exit, otherwise compute ! countm1 = ((unsigned)from - (unsigned)to) / -(unsigned)step */ ! tmp = fold_build2_loc (loc, GT_EXPR, boolean_type_node, to, from); ! tmp2 = fold_build2_loc (loc, TRUNC_DIV_EXPR, utype, ! fold_build2_loc (loc, MINUS_EXPR, utype, ! fromu, tou), ! fold_build1_loc (loc, NEGATE_EXPR, utype, stepu)); neg = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp, fold_build1_loc (loc, GOTO_EXPR, void_type_node, exit_label), ! fold_build2 (MODIFY_EXPR, void_type_node, ! countm1, tmp2)); ! tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, step, ! build_int_cst (TREE_TYPE (step), 0)); ! tmp = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp, neg, pos); gfc_add_expr_to_block (&block, tmp); } else { + tree pos_step; + /* TODO: We could use the same width as the real type. This would probably cause more problems that it solves when we implement "long double" types. */ *************** gfc_trans_do (gfc_code * code, tree exit *** 1687,1692 **** --- 1686,1693 ---- /* We need a special check for empty loops: empty = (step > 0 ? to < from : to > from); */ + pos_step = fold_build2_loc (loc, GT_EXPR, boolean_type_node, step, + build_zero_cst (type)); tmp = fold_build3_loc (loc, COND_EXPR, boolean_type_node, pos_step, fold_build2_loc (loc, LT_EXPR, boolean_type_node, to, from),