https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69693

--- Comment #8 from Uroš Bizjak <ubizjak at gmail dot com> ---
(In reply to Uroš Bizjak from comment #3)
> Your patch will just paper over the real issue in this particular testcase.

This can be illustrated with an example from PR89654:

--cut here--
unsigned long long
foo (unsigned long long i, int z)
{
  return i << 3;
}
--cut here--

with a patch:

--cut here--
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d4c01407f4a2..6142f5272a2e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1290,6 +1290,16 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])

+;; Used by STV to load a DI into an xmm register.
+(define_insn "*movdi_to_v2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (subreg:V2DI (match_operand:DI 1 "nonimmediate_operand" "xm") 0))]
+  "!TARGET_64BIT && TARGET_SSE2"
+  "%vmovq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
 ;; from memory, we'd prefer to load the memory directly into the %xmm
--cut here--

./cc1 -O2 -m32 -march=skylake indeed creates:

        movl    32(%esp), %eax  # 24    [c=8 l=4]  *movsi_internal/0
        movl    36(%esp), %edx  # 25    [c=8 l=4]  *movsi_internal/0
        movl    %eax, (%esp)    # 26    [c=4 l=3]  *movsi_internal/1
        movl    %edx, 4(%esp)   # 27    [c=4 l=4]  *movsi_internal/1
        vmovq   (%esp), %xmm1   # 21    [c=24 l=5]  *movdi_to_v2di
        vpsllq  $3, %xmm1, %xmm0        # 7     [c=4 l=5]  ashlv2di3/1

but ./cc1 -O2 -m32 -march=skylake-avx512 shows:

        movl    32(%esp), %eax  # 23    [c=8 l=4]  *movsi_internal/0
        movl    36(%esp), %edx  # 24    [c=8 l=4]  *movsi_internal/0
        movl    %eax, (%esp)    # 25    [c=4 l=3]  *movsi_internal/1
        movl    %edx, 4(%esp)   # 26    [c=4 l=4]  *movsi_internal/1
        vpsllq  $3, (%esp), %xmm0       # 7     [c=20 l=6]  *ashlv2di3/1

where VPSLLQ still loads V2DImode from stack.

Reply via email to