https://gcc.gnu.org/g:f53f8a859631bef97adba1522a8049a8fce57c1b

commit r15-623-gf53f8a859631bef97adba1522a8049a8fce57c1b
Author: Eric Botcazou <ebotca...@adacore.com>
Date:   Wed May 8 10:07:56 2024 +0200

    Add widening expansion of MULT_HIGHPART_EXPR for integral modes
    
    For integral modes the expansion of MULT_HIGHPART_EXPR requires the presence
    of an {s,u}mul_highpart optab whereas, for vector modes, widening expansion
    is supported.  This adds a widening expansion for integral modes too, which
    is in fact already implemented in expmed_mult_highpart_optab.
    
    gcc/
            * expmed.h (expmed_mult_highpart_optab): Declare.
            * expmed.cc (expmed_mult_highpart_optab): Remove static keyword.
            Do not assume that OP1 is a constant integer.  Fix pasto.
            (expmed_mult_highpart): Pass OP1 narrowed to MODE in all the calls
            to expmed_mult_highpart_optab.
            * optabs-query.cc (can_mult_highpart_p): Use 2 for integer widening
            and shift subsequent values accordingly.
            * optabs.cc (expand_mult_highpart): Call expmed_mult_highpart_optab
            when can_mult_highpart_p returns 2 and adjust to above change.

Diff:
---
 gcc/expmed.cc       | 54 +++++++++++++++++++++++++----------------------------
 gcc/expmed.h        |  2 ++
 gcc/optabs-query.cc | 24 ++++++++++++++++++++----
 gcc/optabs.cc       |  7 +++++--
 4 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 248940fe4147..50d22762cae0 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -2748,8 +2748,7 @@ static rtx expand_mult_const (machine_mode, rtx, 
HOST_WIDE_INT, rtx,
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
 static rtx extract_high_half (scalar_int_mode, rtx);
 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
-static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
-                                      int, int);
+
 /* Compute and return the best algorithm for multiplying by T.
    The algorithm must cost less than cost_limit
    If retval.cost >= COST_LIMIT, no algorithm was found and all
@@ -3856,30 +3855,25 @@ extract_high_half (scalar_int_mode mode, rtx op)
   return convert_modes (mode, wider_mode, op, 0);
 }
 
-/* Like expmed_mult_highpart, but only consider using a multiplication
-   optab.  OP1 is an rtx for the constant operand.  */
+/* Like expmed_mult_highpart, but only consider using multiplication optab.  */
 
-static rtx
+rtx
 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
                            rtx target, int unsignedp, int max_cost)
 {
-  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
+  const scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
+  const bool speed = optimize_insn_for_speed_p ();
+  const int size = GET_MODE_BITSIZE (mode);
   optab moptab;
   rtx tem;
-  int size;
-  bool speed = optimize_insn_for_speed_p ();
-
-  scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
-
-  size = GET_MODE_BITSIZE (mode);
 
   /* Firstly, try using a multiplication insn that only generates the needed
      high part of the product, and in the sign flavor of unsignedp.  */
   if (mul_highpart_cost (speed, mode) < max_cost)
     {
       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
-                         unsignedp, OPTAB_DIRECT);
+      tem = expand_binop (mode, moptab, op0, op1, target, unsignedp,
+                         OPTAB_DIRECT);
       if (tem)
        return tem;
     }
@@ -3892,12 +3886,12 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx 
op0, rtx op1,
          + 4 * add_cost (speed, mode) < max_cost))
     {
       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
-                         unsignedp, OPTAB_DIRECT);
+      tem = expand_binop (mode, moptab, op0, op1, target, !unsignedp,
+                         OPTAB_DIRECT);
       if (tem)
        /* We used the wrong signedness.  Adjust the result.  */
-       return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
-                                           tem, unsignedp);
+       return expand_mult_highpart_adjust (mode, tem, op0, op1, tem,
+                                           unsignedp);
     }
 
   /* Try widening multiplication.  */
@@ -3905,8 +3899,8 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx 
op0, rtx op1,
   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
       && mul_widen_cost (speed, wider_mode) < max_cost)
     {
-      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
-                         unsignedp, OPTAB_WIDEN);
+      tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp,
+                         OPTAB_WIDEN);
       if (tem)
        return extract_high_half (mode, tem);
     }
@@ -3947,14 +3941,14 @@ expmed_mult_highpart_optab (scalar_int_mode mode, rtx 
op0, rtx op1,
          + 2 * shift_cost (speed, mode, size-1)
          + 4 * add_cost (speed, mode) < max_cost))
     {
-      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
-                         NULL_RTX, ! unsignedp, OPTAB_WIDEN);
+      tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, !unsignedp,
+                         OPTAB_WIDEN);
       if (tem != 0)
        {
          tem = extract_high_half (mode, tem);
          /* We used the wrong signedness.  Adjust the result.  */
-         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
-                                             target, unsignedp);
+         return expand_mult_highpart_adjust (mode, tem, op0, op1, target,
+                                             unsignedp);
        }
     }
 
@@ -3976,18 +3970,19 @@ static rtx
 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
                      rtx target, int unsignedp, int max_cost)
 {
+  const bool speed = optimize_insn_for_speed_p ();
   unsigned HOST_WIDE_INT cnst1;
   int extra_cost;
   bool sign_adjust = false;
   enum mult_variant variant;
   struct algorithm alg;
-  rtx tem;
-  bool speed = optimize_insn_for_speed_p ();
+  rtx narrow_op1, tem;
 
   /* We can't support modes wider than HOST_BITS_PER_INT.  */
   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
 
   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
+  narrow_op1 = gen_int_mode (INTVAL (op1), mode);
 
   /* We can't optimize modes wider than BITS_PER_WORD.
      ??? We might be able to perform double-word arithmetic if
@@ -3995,7 +3990,7 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx 
op1,
      synth_mult etc. assume single-word operations.  */
   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
-    return expmed_mult_highpart_optab (mode, op0, op1, target,
+    return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
                                       unsignedp, max_cost);
 
   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
@@ -4013,7 +4008,8 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx 
op1,
     {
       /* See whether the specialized multiplication optabs are
         cheaper than the shift/add version.  */
-      tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
+      tem = expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
+                                       unsignedp,
                                        alg.cost.cost + extra_cost);
       if (tem)
        return tem;
@@ -4028,7 +4024,7 @@ expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx 
op1,
 
       return tem;
     }
-  return expmed_mult_highpart_optab (mode, op0, op1, target,
+  return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
                                     unsignedp, max_cost);
 }
 
diff --git a/gcc/expmed.h b/gcc/expmed.h
index f5375c84f25a..0a19176b77ab 100644
--- a/gcc/expmed.h
+++ b/gcc/expmed.h
@@ -724,5 +724,7 @@ extern rtx extract_low_bits (machine_mode, machine_mode, 
rtx);
 extern rtx expand_mult (machine_mode, rtx, rtx, rtx, int, bool = false);
 extern rtx expand_mult_highpart_adjust (scalar_int_mode, rtx, rtx, rtx,
                                        rtx, int);
+extern rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
+                                      int, int);
 
 #endif  // EXPMED_H
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index e36a1506c790..de145be7075f 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -502,19 +502,35 @@ find_widening_optab_handler_and_mode (optab op, 
machine_mode to_mode,
   return CODE_FOR_nothing;
 }
 
-/* Return non-zero if a highpart multiply is supported of can be synthisized.
+/* Return non-zero if a highpart multiply is supported or can be synthesized.
    For the benefit of expand_mult_highpart, the return value is 1 for direct,
-   2 for even/odd widening, and 3 for hi/lo widening.  */
+   2 for integral widening, 3 for even/odd widening, 4 for hi/lo widening.  */
 
 int
 can_mult_highpart_p (machine_mode mode, bool uns_p)
 {
   optab op;
+  scalar_int_mode int_mode;
 
   op = uns_p ? umul_highpart_optab : smul_highpart_optab;
   if (optab_handler (op, mode) != CODE_FOR_nothing)
     return 1;
 
+  /* If the mode is integral, synth from widening or larger operations.  */
+  if (is_a <scalar_int_mode> (mode, &int_mode))
+    {
+      scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (int_mode).require ();
+
+      op = uns_p ? umul_widen_optab : smul_widen_optab;
+      if (convert_optab_handler (op, wider_mode, mode) != CODE_FOR_nothing)
+       return 2;
+
+      /* The test on the size comes from expmed_mult_highpart_optab.  */
+      if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
+         && GET_MODE_BITSIZE (int_mode) - 1 < BITS_PER_WORD)
+       return 2;
+    }
+
   /* If the mode is an integral vector, synth from widening operations.  */
   if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
     return 0;
@@ -535,7 +551,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
                            + ((i & 1) ? nunits : 0));
          vec_perm_indices indices (sel, 2, nunits);
          if (can_vec_perm_const_p (mode, mode, indices))
-           return 2;
+           return 3;
        }
     }
 
@@ -551,7 +567,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
            sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
          vec_perm_indices indices (sel, 2, nunits);
          if (can_vec_perm_const_p (mode, mode, indices))
-           return 3;
+           return 4;
        }
     }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index ce91f94ed43f..e79138845671 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -6751,10 +6751,13 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx 
op1,
       return expand_binop (mode, tab1, op0, op1, target, uns_p,
                           OPTAB_LIB_WIDEN);
     case 2:
+      return expmed_mult_highpart_optab (as_a <scalar_int_mode> (mode),
+                                        op0, op1, target, uns_p, INT_MAX);
+    case 3:
       tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
       tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
       break;
-    case 3:
+    case 4:
       tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
       tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
       if (BYTES_BIG_ENDIAN)
@@ -6783,7 +6786,7 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1,
   m2 = gen_lowpart (mode, eops[0].value);
 
   vec_perm_builder sel;
-  if (method == 2)
+  if (method == 3)
     {
       /* The encoding has 2 interleaved stepped patterns.  */
       sel.new_vector (GET_MODE_NUNITS (mode), 2, 3);

Reply via email to