This patch enhances the i386 backend's stv2 pass to consider the
pattern (zero_extend:DI (mem:SI ...)) to be a candidate for conversion.
Loading an SImode value into an SSE register clears the rest of the
vector, i.e. effectively (v4si){ mem, 0, 0, 0 }, which can be used
to conveniently implement zero-extension to DImode, when performing
V2DImode Scalar-To-Vector (STV) conversion.

Consider the new test case:

long long y,z;
unsigned int p;

void foo()
{
    long long t = p;
    t ^= y;
    z = t;
}

With -m32 -O2 -msse2 this currently generates:

foo:    movl    p, %eax
        xorl    %edx, %edx
        movd    %edx, %xmm1
        movd    %eax, %xmm0
        punpckldq       %xmm1, %xmm0
        movq    y, %xmm1
        pxor    %xmm1, %xmm0
        movq    %xmm0, z
        ret

With this patch we now generate:

foo:    movq    y, %xmm1
        movd    p, %xmm0
        pxor    %xmm1, %xmm0
        movq    %xmm0, z
        ret


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2026-06-30  Roger Sayle  <[email protected]>

gcc/ChangeLog
        * config/i386/i386-features.cc (compute_convert_gain) <ZERO_EXTEND>:
        Provide costs for the new transformation.
        (convert_insn): Implement *zero_extendsidi2 using the backend's
        vec_setv2di_0_zero_extendsi_1 pattern (i.e. movq mem, %xmm).
        (general_scalar_to_vector_candidate_p): Consider the pattern
        (zero_extend:DI (mem:SI ...)) to be a candidate for DImode STV.

gcc/testsuite/ChangeLog
        * gcc.target/i386/sse2-stv-6.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 10d84f52001..e0fc13efe86 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -863,6 +863,16 @@ general_scalar_chain::compute_convert_gain ()
                }
              break;
 
+           case ZERO_EXTEND:
+             /* mov eax (6 bytes) vs movd xmm0 (8 bytes). */
+             /* mov eax; xor edx,edx (7 bytes).  */
+             if (speed_p)
+               igain += COSTS_N_INSNS (ix86_cost->int_load[2]
+                                       - ix86_cost->sse_load[0]) / 2;
+             else
+               igain += COSTS_N_BYTES (TARGET_64BIT ? -2 : -1);
+             break;
+
            default:
              gcc_unreachable ();
            }
@@ -1588,6 +1598,11 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
        }
       break;
 
+    case ZERO_EXTEND:
+      /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1.  */
+      src = gen_rtx_VEC_CONCAT (V2DImode, src, const0_rtx);
+      break;
+
     default:
       gcc_unreachable ();
     }
@@ -2499,6 +2514,13 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, 
enum machine_mode mode)
             && XVECLEN (XEXP (src, 1), 0) == 1
             && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
 
+    case ZERO_EXTEND:
+      /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1.  */
+      return mode == DImode
+            && REG_P (dst)
+            && GET_MODE (XEXP (src, 0)) == SImode
+            && MEM_P (XEXP (src, 0));
+
     default:
       return false;
     }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-stv-6.c 
b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
new file mode 100644
index 00000000000..4e1095418da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-m32 -O2 -msse2 -mno-stackrealign" } */
+
+long long y,z;
+unsigned int p;
+
+void foo()
+{
+    long long t = p;
+    t ^= y;
+    z = t;
+}
+
+/* { dg-final { scan-assembler-not "movl" } } */
+/* { dg-final { scan-assembler-not "xorl" } } */
+/* { dg-final { scan-assembler-not "punpckldq" } } */
+

Reply via email to