https://gcc.gnu.org/g:bcdc17211a70f290d46e5d10164c4bc776a89d1a

commit r16-7304-gbcdc17211a70f290d46e5d10164c4bc776a89d1a
Author: Tamar Christina <[email protected]>
Date:   Thu Feb 5 08:07:33 2026 +0000

    middle-end: use inner variable when determining deferred FMA order 
[PR123898]
    
    If we defer an FMA creation the code tries to determine the order of the
    operands before deferring.  To do this it compares the operands against the
    result expression (which should contain the multiplication expression).
    
    However the multiply might be wrapped in a conversion.  This change has us 
strip
    one level of conversion (the most that convert_mult_to_fma) supports 
handling
    and only then do the comparison.
    
    We cannot strip ops[0] and ops[1] and store them stripped since after the
    deferrence, if we create an FMA we need to know the original types and
    convert_mult_to_fma handles the conversions during FMA creation anyway.
    
    There's probably a similar helper to strip_nop_view_converts but I couldn't
    find one, since many of the stripping helpers are recursive or don't support
    stripping VIEW_CONVERTS.
    
    gcc/ChangeLog:
    
            PR tree-optimization/123898
            * tree-ssa-math-opts.cc (strip_nop_view_converts): New.
            (convert_mult_to_fma): Use it.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/123898
            * gcc.target/aarch64/sve/pr123898.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/pr123898.c | 17 +++++++++++++++++
 gcc/tree-ssa-math-opts.cc                       | 25 +++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123898.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr123898.c
new file mode 100644
index 000000000000..a5741d5058e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123898.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 --param avoid-fma-max-bits=512 -march=armv9-a 
-msve-vector-bits=256 -fdump-tree-widening_mul" } */
+
+typedef __attribute__((__vector_size__(32))) char A;
+typedef __attribute__((__vector_size__(32))) signed char D;
+
+A c;
+char x;
+
+void
+foo(D d)
+{
+  d *= x;
+  c += (A)d;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 469de10a432a..e3f88f236285 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -118,6 +118,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-math-opts.h"
 #include "dbgcnt.h"
 #include "cfghooks.h"
+#include "gimple-match.h"
 
 /* This structure represents one basic block that either computes a
    division, or is a common dominator for basic block that compute a
@@ -3356,6 +3357,26 @@ last_fma_candidate_feeds_initial_phi 
(fma_deferring_state *state,
   return false;
 }
 
+/* If ARG is a convert that only changes the sign then strip the outer
+   conversion away.  It does not strip conversions recursively.  Otherwise
+   return ARG.  */
+
+static tree
+strip_nop_view_converts (tree arg)
+{
+  if (TREE_CODE (arg) != SSA_NAME)
+    return arg;
+
+  gimple *assign = SSA_NAME_DEF_STMT (arg);
+  gimple_match_op res_op;
+  if (gimple_extract_op (assign, &res_op)
+      && (CONVERT_EXPR_CODE_P (res_op.code) || res_op.code == 
VIEW_CONVERT_EXPR)
+      && tree_nop_conversion_p (TREE_TYPE (res_op.ops[0]), TREE_TYPE (arg)))
+    return res_op.ops[0];
+
+  return arg;
+}
+
 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
    with uses in additions and subtractions to form fused multiply-add
    operations.  Returns true if successful and MUL_STMT should be removed.
@@ -3616,11 +3637,11 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree 
op2,
            {
              gcc_checking_assert (!state->m_initial_phi);
              gphi *phi;
-             if (ops[0] == result)
+             if (strip_nop_view_converts (ops[0]) == result)
                phi = result_of_phi (ops[1]);
              else
                {
-                 gcc_assert (ops[1] == result);
+                 gcc_assert (strip_nop_view_converts (ops[1]) == result);
                  phi = result_of_phi (ops[0]);
                }

Reply via email to