On Thu, Dec 4, 2025 at 9:56 AM Dongyan Chen
<[email protected]> wrote:
>
> Hi,
> Following the previous discussion, I do some implemention in expr.cc,
> This location allows access to tree-level type information while still
> enabling queries to target-specific costs. However, I have some concerns
> regarding the cost comparison logic.I am currently comparing the cost
> of the multiplication directly against the sum of the decomposed
> logical operations.
> Does this cost heuristic seem reasonable to you?
>
> Thanks and regards,
> Dongyan
>
> This patch implements an optimization to transform (a * b) == 0 to
> (a == 0) || (b == 0) and (a * b != 0) to (a != 0) && (b != 0)
> for signed and unsigned integer.
>
> PR target/122935
>
> gcc/ChangeLog:
>
> * expr.cc (strip_widening_cast): New function to look
> through casting operations.
> (split_mult_eq_zero): New function to check
> safety and profitability of the transformation using target costs.
> (expand_expr_real_2): Add optimization for (a * b) ==/!= 0.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/pr122935-1.c: New test.
> * gcc.target/riscv/pr122935-2.c: New test.
>
> ---
> gcc/expr.cc | 129 ++++++++++++++++++++
> gcc/testsuite/gcc.target/riscv/pr122935-1.c | 17 +++
> gcc/testsuite/gcc.target/riscv/pr122935-2.c | 11 ++
> 3 files changed, 157 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr122935-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr122935-2.c
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 7d84ad9e6fc..26262aef6da 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -9825,6 +9825,119 @@ expr_has_boolean_range (tree exp, gimple *stmt)
> return false;
> }
>
> +/* Helper to strip integer promotions (casts). */
> +static tree
> +strip_widening_cast (tree op)
> +{
> + if (TREE_CODE (op) == SSA_NAME)
> + {
> + gimple *def = SSA_NAME_DEF_STMT (op);
> + if (def && is_gimple_assign (def))
During RTL expansion you are not allowed to simply look up SSA_NAME
definitions since we coalesce variables. Instead you need to use
get_gimple_for_ssa_name and expect a NULL return.
> + {
> + enum tree_code code = gimple_assign_rhs_code (def);
> + if (CONVERT_EXPR_CODE_P (code))
> + return gimple_assign_rhs1 (def);
> + }
> + }
> + return op;
> +}
> +
> +/* Return true if we should split (a * b) == 0 / !=0. */
> +static bool
> +split_mult_eq_zero (enum tree_code comp_code, tree op0, tree *res_op0, tree
> *res_op1)
> +{
> + tree work_op = op0;
> + while (true)
> + {
> + if (TREE_CODE (work_op) == MULT_EXPR)
> + {
> + *res_op0 = TREE_OPERAND (work_op, 0);
> + *res_op1 = TREE_OPERAND (work_op, 1);
> + break;
> + }
> + else if (TREE_CODE (work_op) == SSA_NAME)
> + {
> + gimple *def = SSA_NAME_DEF_STMT (work_op);
> + if (!def || !is_gimple_assign (def))
> + return false;
> +
> + enum tree_code rhs_code = gimple_assign_rhs_code (def);
> + if (rhs_code == MULT_EXPR)
> + {
> + *res_op0 = gimple_assign_rhs1 (def);
> + *res_op1 = gimple_assign_rhs2 (def);
> + break;
> + }
> + else if (CONVERT_EXPR_CODE_P (rhs_code))
> + {
> + work_op = gimple_assign_rhs1 (def);
> + continue;
> + }
> + else
> + return false;
> + }
> + else
> + return false;
> + }
> +
> + /* Safety Check */
> + tree type = TREE_TYPE (op0);
> + bool safe = false;
> +
> + /* Check for signed integer overflow undefined behavior. */
> + if (TYPE_OVERFLOW_UNDEFINED (type))
> + safe = true;
> + /* Check for widening multiplication */
> + else if (INTEGRAL_TYPE_P (type))
> + {
> + tree inner_op0 = strip_widening_cast (*res_op0);
> + tree inner_op1 = strip_widening_cast (*res_op1);
> +
> + /* If result precision >= sum of operand precisions, no overflow. */
> + if (INTEGRAL_TYPE_P (TREE_TYPE (inner_op0))
> + && INTEGRAL_TYPE_P (TREE_TYPE (inner_op1))
> + && (TYPE_PRECISION (type)
> + >= (TYPE_PRECISION (TREE_TYPE (inner_op0))
> + + TYPE_PRECISION (TREE_TYPE (inner_op1)))))
> + safe = true;
> + }
Instead of open-coding I'd suggest to add a match pattern to match.pd that
you can invoke here. You'd have to provide a custom valueization function
using get_gimple_for_ssa_name to reject invalid matching.
> + bool speed_p = optimize_insn_for_speed_p ();
> + if (!safe || optimize_size || !speed_p)
The explicit optimize_size check should be redundant.
> + return false;
> +
> + machine_mode mode = TYPE_MODE (type);
> + rtx reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
> + rtx mult_rtx = gen_rtx_MULT (mode, reg, reg);
> + int mult_cost = set_src_cost (mult_rtx, mode, speed_p);
> +
> + int logic_cost = 0;
> + int cmp_cost = 0;
> + int logic_op_cost = 0;
> +
> + if (comp_code == EQ_EXPR)
> + {
> + rtx eq_rtx = gen_rtx_EQ (mode, reg, const0_rtx);
> + cmp_cost = set_src_cost (eq_rtx, mode, speed_p);
> + rtx ior_rtx = gen_rtx_IOR (mode, reg, reg);
> + logic_op_cost = set_src_cost (ior_rtx, mode, speed_p);
> + logic_cost = 2 * cmp_cost + logic_op_cost;
> + }
> + else /* NE_EXPR */
> + {
> + rtx ne_rtx = gen_rtx_NE (mode, reg, const0_rtx);
> + cmp_cost = set_src_cost (ne_rtx, mode, speed_p);
> + rtx and_rtx = gen_rtx_AND (mode, reg, reg);
> + logic_op_cost = set_src_cost (and_rtx, mode, speed_p);
> + logic_cost = 2 * cmp_cost + logic_op_cost;
> + }
Can you check what AVR does for the above? Esp. when
mode is bigger than word_mode.
> +
> + if (mult_cost > logic_cost)
> + return true;
> +
> + return false;
> +}
> +
> rtx
> expand_expr_real_2 (const_sepops ops, rtx target, machine_mode tmode,
> enum expand_modifier modifier)
> @@ -10920,6 +11033,22 @@ expand_expr_real_2 (const_sepops ops, rtx target,
> machine_mode tmode,
> case UNEQ_EXPR:
> case LTGT_EXPR:
> {
> + if (ops->op1 && integer_zerop (ops->op1))
> + {
> + tree mult_op0, mult_op1;
> + if (split_mult_eq_zero (code, ops->op0, &mult_op0, &mult_op1))
> + {
> + tree cmp0 = build2 (code, boolean_type_node,
> + mult_op0, build_zero_cst (TREE_TYPE
> (mult_op0)));
> + tree cmp1 = build2 (code, boolean_type_node,
> + mult_op1, build_zero_cst (TREE_TYPE
> (mult_op1)));
> + enum tree_code logic_code = (code == EQ_EXPR) ? BIT_IOR_EXPR
> : BIT_AND_EXPR;
> +
> + tree new_exp = build2 (logic_code, boolean_type_node, cmp0,
> cmp1);
> +
> + return expand_expr (new_exp, target, tmode, modifier);
> + }
> + }
> temp = do_store_flag (ops,
> modifier != EXPAND_STACK_PARM ? target :
> NULL_RTX,
> tmode != VOIDmode ? tmode : mode);
> diff --git a/gcc/testsuite/gcc.target/riscv/pr122935-1.c
> b/gcc/testsuite/gcc.target/riscv/pr122935-1.c
> new file mode 100644
> index 00000000000..1cbdfc4daec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr122935-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gc -mabi=lp64d" { target { rv64 } } } */
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +
> +bool foo1(int32_t a, int32_t b) { return ((int32_t)a * b) == 0; }
> +
> +bool foo2(uint8_t a, uint8_t b) { return ((uint32_t)a * b) == 0; }
> +
> +bool foo3(int32_t a, int32_t b) { return ((int32_t)a * b) != 0; }
> +
> +bool foo4(uint8_t a, uint8_t b) { return ((uint32_t)a * b) != 0; }
> +
> +/* { dg-final { scan-assembler-times {seqz\t} } } */
> +/* { dg-final { scan-assembler-times {snez\t} } } */
> +/* { dg-final { scan-assembler-not {mulw} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr122935-2.c
> b/gcc/testsuite/gcc.target/riscv/pr122935-2.c
> new file mode 100644
> index 00000000000..54b02e518ce
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr122935-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gc -mabi=lp64d" { target { rv64 } } } */
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +
> +bool foo1(uint32_t a, uint32_t b) { return ((uint32_t)a * b) == 0; }
> +
> +bool foo2(uint32_t a, uint32_t b) { return ((uint32_t)a * b) != 0; }
> +
> +/* { dg-final { scan-assembler-times {mulw\t} } } */
> --
> 2.43.0
>