Re: [PATCH] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) ^ y [PR96928]

2021-01-05 Thread Richard Biener
On Tue, 5 Jan 2021, Jakub Jelinek wrote:

> Hi!
> 
> As requested in the PR, the one's complement abs can be done more
> efficiently without cmov or branching.
> 
> Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
> it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
> code, but with -m64:
>   movl%edi, %eax
> - notl%eax
> - cmpl%edi, %eax
> - cmovl   %edi, %eax
> + sarl$31, %eax
> + xorl%edi, %eax
>   ret
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2021-01-05  Jakub Jelinek  
> 
>   PR tree-optimization/96928
>   * tree-ssa-phiopt.c (xor_replacement): New function.
>   (tree_ssa_phiopt_worker): Call it.
> 
>   * gcc.dg/tree-ssa/pr96928.c: New test.
>   * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
>   instead of scanning rtl dump for ifcvt message check assembly
>   for xor instruction.
> 
> --- gcc/tree-ssa-phiopt.c.jj  2021-01-04 10:25:38.638236032 +0100
> +++ gcc/tree-ssa-phiopt.c 2021-01-04 15:29:30.050005505 +0100
> @@ -62,6 +62,8 @@ static bool minmax_replacement (basic_bl
>   edge, edge, gimple *, tree, tree);
>  static bool abs_replacement (basic_block, basic_block,
>edge, edge, gimple *, tree, tree);
> +static bool xor_replacement (basic_block, basic_block,
> +  edge, edge, gimple *, tree, tree);
>  static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, 
> basic_block,
> edge, edge, gimple *,
> tree, tree);
> @@ -346,6 +348,9 @@ tree_ssa_phiopt_worker (bool do_store_el
> else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>   cfgchanged = true;
> else if (!early_p
> +&& xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
> + cfgchanged = true;
> +   else if (!early_p
>  && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
>   e2, phi, arg0,
>   arg1))
> @@ -2097,6 +2102,109 @@ abs_replacement (basic_block cond_bb, ba
>/* Note that we optimized this PHI.  */
>return true;
>  }
> +
> +/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y.  */
> +
> +static bool
> +xor_replacement (basic_block cond_bb, basic_block middle_bb,
> +  edge e0 ATTRIBUTE_UNUSED, edge e1,
> +  gimple *phi, tree arg0, tree arg1)
> +{
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
> +return false;
> +
> +  /* OTHER_BLOCK must have only one executable statement which must have the
> + form arg0 = ~arg1 or arg1 = ~arg0.  */
> +
> +  gimple *assign = last_and_only_stmt (middle_bb);
> +  /* If we did not find the proper one's complement assignment, then we 
> cannot
> + optimize.  */
> +  if (assign == NULL)
> +return false;
> +
> +  /* If we got here, then we have found the only executable statement
> + in OTHER_BLOCK.  If it is anything other than arg = ~arg1 or
> + arg1 = ~arg0, then we cannot optimize.  */
> +  if (!is_gimple_assign (assign))
> +return false;
> +
> +  if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
> +return false;
> +
> +  tree lhs = gimple_assign_lhs (assign);
> +  tree rhs = gimple_assign_rhs1 (assign);
> +
> +  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
> +  if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
> +return false;
> +
> +  gimple *cond = last_stmt (cond_bb);
> +  tree result = PHI_RESULT (phi);
> +
> +  /* Only relationals comparing arg[01] against zero are interesting.  */
> +  enum tree_code cond_code = gimple_cond_code (cond);
> +  if (cond_code != LT_EXPR && cond_code != GE_EXPR)
> +return false;
> +
> +  /* Make sure the conditional is x OP 0.  */
> +  tree clhs = gimple_cond_lhs (cond);
> +  if (TREE_CODE (clhs) != SSA_NAME
> +  || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
> +  || TYPE_UNSIGNED (TREE_TYPE (clhs))
> +  || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE 
> (arg1))
> +  || !integer_zerop (gimple_cond_rhs (cond)))
> +return false;
> +
> +  /* We need to know which is the true edge and which is the false
> + edge so that we know if have xor or inverted xor.  */
> +  edge true_edge, false_edge;
> +  extract_true_false_edges_from_block (cond_bb, _edge, _edge);
> +
> +  /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
> + will need to invert the result.  Similarly for LT_EXPR if
> + the false edge goes to OTHER_BLOCK.  */
> +  edge e;
> +  if (cond_code == GE_EXPR)
> +e = true_edge;
> +  else
> +e = false_edge;
> +
> +  bool invert = e->dest == middle_bb;
> +
> +  result = duplicate_ssa_name (result, NULL);
> +
> +  

[PATCH] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) ^ y [PR96928]

2021-01-05 Thread Jakub Jelinek via Gcc-patches
Hi!

As requested in the PR, the one's complement abs can be done more
efficiently without cmov or branching.

Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
code, but with -m64:
movl%edi, %eax
-   notl%eax
-   cmpl%edi, %eax
-   cmovl   %edi, %eax
+   sarl$31, %eax
+   xorl%edi, %eax
ret

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-01-05  Jakub Jelinek  

PR tree-optimization/96928
* tree-ssa-phiopt.c (xor_replacement): New function.
(tree_ssa_phiopt_worker): Call it.

* gcc.dg/tree-ssa/pr96928.c: New test.
* gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
instead of scanning rtl dump for ifcvt message check assembly
for xor instruction.

--- gcc/tree-ssa-phiopt.c.jj2021-01-04 10:25:38.638236032 +0100
+++ gcc/tree-ssa-phiopt.c   2021-01-04 15:29:30.050005505 +0100
@@ -62,6 +62,8 @@ static bool minmax_replacement (basic_bl
edge, edge, gimple *, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 edge, edge, gimple *, tree, tree);
+static bool xor_replacement (basic_block, basic_block,
+edge, edge, gimple *, tree, tree);
 static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
  edge, edge, gimple *,
  tree, tree);
@@ -346,6 +348,9 @@ tree_ssa_phiopt_worker (bool do_store_el
  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
  else if (!early_p
+  && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+   cfgchanged = true;
+ else if (!early_p
   && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
arg1))
@@ -2097,6 +2102,109 @@ abs_replacement (basic_block cond_bb, ba
   /* Note that we optimized this PHI.  */
   return true;
 }
+
+/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y.  */
+
+static bool
+xor_replacement (basic_block cond_bb, basic_block middle_bb,
+edge e0 ATTRIBUTE_UNUSED, edge e1,
+gimple *phi, tree arg0, tree arg1)
+{
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
+return false;
+
+  /* OTHER_BLOCK must have only one executable statement which must have the
+ form arg0 = ~arg1 or arg1 = ~arg0.  */
+
+  gimple *assign = last_and_only_stmt (middle_bb);
+  /* If we did not find the proper one's complement assignment, then we cannot
+ optimize.  */
+  if (assign == NULL)
+return false;
+
+  /* If we got here, then we have found the only executable statement
+ in OTHER_BLOCK.  If it is anything other than arg = ~arg1 or
+ arg1 = ~arg0, then we cannot optimize.  */
+  if (!is_gimple_assign (assign))
+return false;
+
+  if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
+return false;
+
+  tree lhs = gimple_assign_lhs (assign);
+  tree rhs = gimple_assign_rhs1 (assign);
+
+  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
+  if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
+return false;
+
+  gimple *cond = last_stmt (cond_bb);
+  tree result = PHI_RESULT (phi);
+
+  /* Only relationals comparing arg[01] against zero are interesting.  */
+  enum tree_code cond_code = gimple_cond_code (cond);
+  if (cond_code != LT_EXPR && cond_code != GE_EXPR)
+return false;
+
+  /* Make sure the conditional is x OP 0.  */
+  tree clhs = gimple_cond_lhs (cond);
+  if (TREE_CODE (clhs) != SSA_NAME
+  || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
+  || TYPE_UNSIGNED (TREE_TYPE (clhs))
+  || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
+  || !integer_zerop (gimple_cond_rhs (cond)))
+return false;
+
+  /* We need to know which is the true edge and which is the false
+ edge so that we know if have xor or inverted xor.  */
+  edge true_edge, false_edge;
+  extract_true_false_edges_from_block (cond_bb, _edge, _edge);
+
+  /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
+ will need to invert the result.  Similarly for LT_EXPR if
+ the false edge goes to OTHER_BLOCK.  */
+  edge e;
+  if (cond_code == GE_EXPR)
+e = true_edge;
+  else
+e = false_edge;
+
+  bool invert = e->dest == middle_bb;
+
+  result = duplicate_ssa_name (result, NULL);
+
+  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+
+  int prec = TYPE_PRECISION (TREE_TYPE (clhs));
+  gimple *new_stmt
+= gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
+  build_int_cst (integer_type_node, prec -