https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69693
--- Comment #8 from Uroš Bizjak <ubizjak at gmail dot com> --- (In reply to Uroš Bizjak from comment #3) > Your patch will just paper over the real issue in this particular testcase. This can be illustrated with an example from PR89654: --cut here-- unsigned long long foo (unsigned long long i, int z) { return i << 3; } --cut here-- with a patch: --cut here-- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d4c01407f4a2..6142f5272a2e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1290,6 +1290,16 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +;; Used by STV to load a DI into an xmm register. +(define_insn "*movdi_to_v2di" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (subreg:V2DI (match_operand:DI 1 "nonimmediate_operand" "xm") 0))] + "!TARGET_64BIT && TARGET_SSE2" + "%vmovq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DI")]) + ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded ;; from memory, we'd prefer to load the memory directly into the %xmm --cut here-- ./cc1 -O2 -m32 -march=skylake indeed creates: movl 32(%esp), %eax # 24 [c=8 l=4] *movsi_internal/0 movl 36(%esp), %edx # 25 [c=8 l=4] *movsi_internal/0 movl %eax, (%esp) # 26 [c=4 l=3] *movsi_internal/1 movl %edx, 4(%esp) # 27 [c=4 l=4] *movsi_internal/1 vmovq (%esp), %xmm1 # 21 [c=24 l=5] *movdi_to_v2di vpsllq $3, %xmm1, %xmm0 # 7 [c=4 l=5] ashlv2di3/1 but ./cc1 -O2 -m32 -march=skylake-avx512 shows: movl 32(%esp), %eax # 23 [c=8 l=4] *movsi_internal/0 movl 36(%esp), %edx # 24 [c=8 l=4] *movsi_internal/0 movl %eax, (%esp) # 25 [c=4 l=3] *movsi_internal/1 movl %edx, 4(%esp) # 26 [c=4 l=4] *movsi_internal/1 vpsllq $3, (%esp), %xmm0 # 7 [c=20 l=6] *ashlv2di3/1 where VPSLLQ still loads V2DImode from stack.