Re: [PATCH] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) ^ y [PR96928]
On Tue, 5 Jan 2021, Jakub Jelinek wrote: > Hi! > > As requested in the PR, the one's complement abs can be done more > efficiently without cmov or branching. > > Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize > it in ifcvt, on x86_64 with -m32 we generate in the end the exact same > code, but with -m64: > movl%edi, %eax > - notl%eax > - cmpl%edi, %eax > - cmovl %edi, %eax > + sarl$31, %eax > + xorl%edi, %eax > ret > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Thanks, Richard. > 2021-01-05 Jakub Jelinek > > PR tree-optimization/96928 > * tree-ssa-phiopt.c (xor_replacement): New function. > (tree_ssa_phiopt_worker): Call it. > > * gcc.dg/tree-ssa/pr96928.c: New test. > * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1, > instead of scanning rtl dump for ifcvt message check assembly > for xor instruction. > > --- gcc/tree-ssa-phiopt.c.jj 2021-01-04 10:25:38.638236032 +0100 > +++ gcc/tree-ssa-phiopt.c 2021-01-04 15:29:30.050005505 +0100 > @@ -62,6 +62,8 @@ static bool minmax_replacement (basic_bl > edge, edge, gimple *, tree, tree); > static bool abs_replacement (basic_block, basic_block, >edge, edge, gimple *, tree, tree); > +static bool xor_replacement (basic_block, basic_block, > + edge, edge, gimple *, tree, tree); > static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, > basic_block, > edge, edge, gimple *, > tree, tree); > @@ -346,6 +348,9 @@ tree_ssa_phiopt_worker (bool do_store_el > else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) > cfgchanged = true; > else if (!early_p > +&& xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) > + cfgchanged = true; > + else if (!early_p > && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, > e2, phi, arg0, > arg1)) > @@ -2097,6 +2102,109 @@ abs_replacement (basic_block cond_bb, ba >/* Note that we optimized this PHI. */ >return true; > } > + > +/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ > + > +static bool > +xor_replacement (basic_block cond_bb, basic_block middle_bb, > + edge e0 ATTRIBUTE_UNUSED, edge e1, > + gimple *phi, tree arg0, tree arg1) > +{ > + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) > +return false; > + > + /* OTHER_BLOCK must have only one executable statement which must have the > + form arg0 = ~arg1 or arg1 = ~arg0. */ > + > + gimple *assign = last_and_only_stmt (middle_bb); > + /* If we did not find the proper one's complement assignment, then we > cannot > + optimize. */ > + if (assign == NULL) > +return false; > + > + /* If we got here, then we have found the only executable statement > + in OTHER_BLOCK. If it is anything other than arg = ~arg1 or > + arg1 = ~arg0, then we cannot optimize. */ > + if (!is_gimple_assign (assign)) > +return false; > + > + if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) > +return false; > + > + tree lhs = gimple_assign_lhs (assign); > + tree rhs = gimple_assign_rhs1 (assign); > + > + /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ > + if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) > +return false; > + > + gimple *cond = last_stmt (cond_bb); > + tree result = PHI_RESULT (phi); > + > + /* Only relationals comparing arg[01] against zero are interesting. */ > + enum tree_code cond_code = gimple_cond_code (cond); > + if (cond_code != LT_EXPR && cond_code != GE_EXPR) > +return false; > + > + /* Make sure the conditional is x OP 0. */ > + tree clhs = gimple_cond_lhs (cond); > + if (TREE_CODE (clhs) != SSA_NAME > + || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) > + || TYPE_UNSIGNED (TREE_TYPE (clhs)) > + || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE > (arg1)) > + || !integer_zerop (gimple_cond_rhs (cond))) > +return false; > + > + /* We need to know which is the true edge and which is the false > + edge so that we know if have xor or inverted xor. */ > + edge true_edge, false_edge; > + extract_true_false_edges_from_block (cond_bb, _edge, _edge); > + > + /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we > + will need to invert the result. Similarly for LT_EXPR if > + the false edge goes to OTHER_BLOCK. */ > + edge e; > + if (cond_code == GE_EXPR) > +e = true_edge; > + else > +e = false_edge; > + > + bool invert = e->dest == middle_bb; > + > + result = duplicate_ssa_name (result, NULL); > + > +
[PATCH] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) ^ y [PR96928]
Hi! As requested in the PR, the one's complement abs can be done more efficiently without cmov or branching. Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize it in ifcvt, on x86_64 with -m32 we generate in the end the exact same code, but with -m64: movl%edi, %eax - notl%eax - cmpl%edi, %eax - cmovl %edi, %eax + sarl$31, %eax + xorl%edi, %eax ret Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2021-01-05 Jakub Jelinek PR tree-optimization/96928 * tree-ssa-phiopt.c (xor_replacement): New function. (tree_ssa_phiopt_worker): Call it. * gcc.dg/tree-ssa/pr96928.c: New test. * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1, instead of scanning rtl dump for ifcvt message check assembly for xor instruction. --- gcc/tree-ssa-phiopt.c.jj2021-01-04 10:25:38.638236032 +0100 +++ gcc/tree-ssa-phiopt.c 2021-01-04 15:29:30.050005505 +0100 @@ -62,6 +62,8 @@ static bool minmax_replacement (basic_bl edge, edge, gimple *, tree, tree); static bool abs_replacement (basic_block, basic_block, edge, edge, gimple *, tree, tree); +static bool xor_replacement (basic_block, basic_block, +edge, edge, gimple *, tree, tree); static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, edge, edge, gimple *, tree, tree); @@ -346,6 +348,9 @@ tree_ssa_phiopt_worker (bool do_store_el else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; else if (!early_p + && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, e2, phi, arg0, arg1)) @@ -2097,6 +2102,109 @@ abs_replacement (basic_block cond_bb, ba /* Note that we optimized this PHI. */ return true; } + +/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ + +static bool +xor_replacement (basic_block cond_bb, basic_block middle_bb, +edge e0 ATTRIBUTE_UNUSED, edge e1, +gimple *phi, tree arg0, tree arg1) +{ + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) +return false; + + /* OTHER_BLOCK must have only one executable statement which must have the + form arg0 = ~arg1 or arg1 = ~arg0. */ + + gimple *assign = last_and_only_stmt (middle_bb); + /* If we did not find the proper one's complement assignment, then we cannot + optimize. */ + if (assign == NULL) +return false; + + /* If we got here, then we have found the only executable statement + in OTHER_BLOCK. If it is anything other than arg = ~arg1 or + arg1 = ~arg0, then we cannot optimize. */ + if (!is_gimple_assign (assign)) +return false; + + if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) +return false; + + tree lhs = gimple_assign_lhs (assign); + tree rhs = gimple_assign_rhs1 (assign); + + /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ + if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) +return false; + + gimple *cond = last_stmt (cond_bb); + tree result = PHI_RESULT (phi); + + /* Only relationals comparing arg[01] against zero are interesting. */ + enum tree_code cond_code = gimple_cond_code (cond); + if (cond_code != LT_EXPR && cond_code != GE_EXPR) +return false; + + /* Make sure the conditional is x OP 0. */ + tree clhs = gimple_cond_lhs (cond); + if (TREE_CODE (clhs) != SSA_NAME + || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) + || TYPE_UNSIGNED (TREE_TYPE (clhs)) + || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) + || !integer_zerop (gimple_cond_rhs (cond))) +return false; + + /* We need to know which is the true edge and which is the false + edge so that we know if have xor or inverted xor. */ + edge true_edge, false_edge; + extract_true_false_edges_from_block (cond_bb, _edge, _edge); + + /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we + will need to invert the result. Similarly for LT_EXPR if + the false edge goes to OTHER_BLOCK. */ + edge e; + if (cond_code == GE_EXPR) +e = true_edge; + else +e = false_edge; + + bool invert = e->dest == middle_bb; + + result = duplicate_ssa_name (result, NULL); + + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); + + int prec = TYPE_PRECISION (TREE_TYPE (clhs)); + gimple *new_stmt += gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, + build_int_cst (integer_type_node, prec -