Hi,
Following the previous discussion, I do some implemention in expr.cc,
This location allows access to tree-level type information while still
enabling queries to target-specific costs. However, I have some concerns
regarding the cost comparison logic.I am currently comparing the cost
of the multiplication directly against the sum of the decomposed
logical operations.
Does this cost heuristic seem reasonable to you?

Thanks and regards,
Dongyan

This patch implements an optimization to transform (a * b) == 0 to
(a == 0) || (b == 0) and (a * b != 0) to (a != 0) && (b != 0)
for signed and unsigned integer.

        PR target/122935

gcc/ChangeLog:

        * expr.cc (strip_widening_cast): New function to look 
        through casting operations.
        (split_mult_eq_zero): New function to check 
        safety and profitability of the transformation using target costs.
        (expand_expr_real_2): Add optimization for (a * b) ==/!= 0.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/pr122935-1.c: New test.
        * gcc.target/riscv/pr122935-2.c: New test.

---
 gcc/expr.cc                                 | 129 ++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr122935-1.c |  17 +++
 gcc/testsuite/gcc.target/riscv/pr122935-2.c |  11 ++
 3 files changed, 157 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr122935-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr122935-2.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 7d84ad9e6fc..26262aef6da 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9825,6 +9825,119 @@ expr_has_boolean_range (tree exp, gimple *stmt)
   return false;
 }

+/* Helper to strip integer promotions (casts).  */
+static tree
+strip_widening_cast (tree op)
+{
+  if (TREE_CODE (op) == SSA_NAME)
+    {
+      gimple *def = SSA_NAME_DEF_STMT (op);
+      if (def && is_gimple_assign (def))
+        {
+          enum tree_code code = gimple_assign_rhs_code (def);
+          if (CONVERT_EXPR_CODE_P (code))
+            return gimple_assign_rhs1 (def);
+        }
+    }
+  return op;
+}
+
+/* Return true if we should split (a * b) == 0 / !=0. */
+static bool
+split_mult_eq_zero (enum tree_code comp_code, tree op0, tree *res_op0, tree 
*res_op1)
+{
+  tree work_op = op0;
+  while (true)
+    {
+      if (TREE_CODE (work_op) == MULT_EXPR)
+        {
+          *res_op0 = TREE_OPERAND (work_op, 0);
+          *res_op1 = TREE_OPERAND (work_op, 1);
+          break;
+        }
+      else if (TREE_CODE (work_op) == SSA_NAME)
+        {
+          gimple *def = SSA_NAME_DEF_STMT (work_op);
+          if (!def || !is_gimple_assign (def))
+            return false;
+
+          enum tree_code rhs_code = gimple_assign_rhs_code (def);
+          if (rhs_code == MULT_EXPR)
+            {
+              *res_op0 = gimple_assign_rhs1 (def);
+              *res_op1 = gimple_assign_rhs2 (def);
+              break;
+            }
+          else if (CONVERT_EXPR_CODE_P (rhs_code))
+            {
+              work_op = gimple_assign_rhs1 (def);
+              continue;
+            }
+          else
+            return false;
+        }
+      else
+        return false;
+    }
+
+  /* Safety Check */
+  tree type = TREE_TYPE (op0); 
+  bool safe = false;
+
+  /* Check for signed integer overflow undefined behavior. */
+  if (TYPE_OVERFLOW_UNDEFINED (type))
+    safe = true;
+  /* Check for widening multiplication */
+  else if (INTEGRAL_TYPE_P (type))
+    {
+      tree inner_op0 = strip_widening_cast (*res_op0);
+      tree inner_op1 = strip_widening_cast (*res_op1);
+
+      /* If result precision >= sum of operand precisions, no overflow. */
+      if (INTEGRAL_TYPE_P (TREE_TYPE (inner_op0))
+          && INTEGRAL_TYPE_P (TREE_TYPE (inner_op1))
+          && (TYPE_PRECISION (type) 
+              >= (TYPE_PRECISION (TREE_TYPE (inner_op0)) 
+                  + TYPE_PRECISION (TREE_TYPE (inner_op1)))))
+        safe = true;
+    }
+
+  bool speed_p = optimize_insn_for_speed_p ();
+  if (!safe || optimize_size || !speed_p)
+    return false;
+
+  machine_mode mode = TYPE_MODE (type);
+  rtx reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+  rtx mult_rtx = gen_rtx_MULT (mode, reg, reg);
+  int mult_cost = set_src_cost (mult_rtx, mode, speed_p);
+
+  int logic_cost = 0;
+  int cmp_cost = 0;
+  int logic_op_cost = 0;
+
+  if (comp_code == EQ_EXPR)
+    {
+      rtx eq_rtx = gen_rtx_EQ (mode, reg, const0_rtx);
+      cmp_cost = set_src_cost (eq_rtx, mode, speed_p);
+      rtx ior_rtx = gen_rtx_IOR (mode, reg, reg);
+      logic_op_cost = set_src_cost (ior_rtx, mode, speed_p);
+      logic_cost = 2 * cmp_cost + logic_op_cost;
+    }
+  else /* NE_EXPR */
+    {
+      rtx ne_rtx = gen_rtx_NE (mode, reg, const0_rtx);
+      cmp_cost = set_src_cost (ne_rtx, mode, speed_p);
+      rtx and_rtx = gen_rtx_AND (mode, reg, reg);
+      logic_op_cost = set_src_cost (and_rtx, mode, speed_p);
+      logic_cost = 2 * cmp_cost + logic_op_cost;
+    }
+
+  if (mult_cost > logic_cost)
+    return true;
+
+  return false;  
+}
+
 rtx
 expand_expr_real_2 (const_sepops ops, rtx target, machine_mode tmode,
                    enum expand_modifier modifier)
@@ -10920,6 +11033,22 @@ expand_expr_real_2 (const_sepops ops, rtx target, 
machine_mode tmode,
     case UNEQ_EXPR:
     case LTGT_EXPR:
       {
+        if (ops->op1 && integer_zerop (ops->op1))
+          {
+            tree mult_op0, mult_op1;
+            if (split_mult_eq_zero (code, ops->op0, &mult_op0, &mult_op1))
+              {
+                tree cmp0 = build2 (code, boolean_type_node, 
+                                    mult_op0, build_zero_cst (TREE_TYPE 
(mult_op0)));
+                tree cmp1 = build2 (code, boolean_type_node, 
+                                    mult_op1, build_zero_cst (TREE_TYPE 
(mult_op1)));
+                enum tree_code logic_code = (code == EQ_EXPR) ? BIT_IOR_EXPR : 
BIT_AND_EXPR;
+                
+                tree new_exp = build2 (logic_code, boolean_type_node, cmp0, 
cmp1);
+
+                return expand_expr (new_exp, target, tmode, modifier);
+              }
+          }
        temp = do_store_flag (ops,
                              modifier != EXPAND_STACK_PARM ? target : NULL_RTX,
                              tmode != VOIDmode ? tmode : mode);
diff --git a/gcc/testsuite/gcc.target/riscv/pr122935-1.c 
b/gcc/testsuite/gcc.target/riscv/pr122935-1.c
new file mode 100644
index 00000000000..1cbdfc4daec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr122935-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gc -mabi=lp64d" { target { rv64 } } } */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+bool foo1(int32_t a, int32_t b) { return ((int32_t)a * b) == 0; }
+
+bool foo2(uint8_t a, uint8_t b) { return ((uint32_t)a * b) == 0; }
+
+bool foo3(int32_t a, int32_t b) { return ((int32_t)a * b) != 0; }
+
+bool foo4(uint8_t a, uint8_t b) { return ((uint32_t)a * b) != 0; }
+
+/* { dg-final { scan-assembler-times {seqz\t} } } */
+/* { dg-final { scan-assembler-times {snez\t} } } */
+/* { dg-final { scan-assembler-not {mulw} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr122935-2.c 
b/gcc/testsuite/gcc.target/riscv/pr122935-2.c
new file mode 100644
index 00000000000..54b02e518ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr122935-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gc -mabi=lp64d" { target { rv64 } } } */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+bool foo1(uint32_t a, uint32_t b) { return ((uint32_t)a * b) == 0; }
+
+bool foo2(uint32_t a, uint32_t b) { return ((uint32_t)a * b) != 0; }
+
+/* { dg-final { scan-assembler-times {mulw\t} } } */
--
2.43.0

Reply via email to