https://gcc.gnu.org/g:83880beea997e435f56de9cab08929e213732025

commit r17-2059-g83880beea997e435f56de9cab08929e213732025
Author: Roger Sayle <[email protected]>
Date:   Wed Jul 1 14:31:10 2026 +0100

    i386: Handle (zero_extend:DI (mem:SI)) in x86's STV.
    
    This patch enhances the i386 backend's stv2 pass to consider the
    pattern (zero_extend:DI (mem:SI ...)) to be a candidate for conversion.
    Loading an SImode value into an SSE register clears the rest of the
    vector, i.e. effectively (v4si){ mem, 0, 0, 0 }, which can be used
    to conveniently implement zero-extension to DImode, when performing
    V2DImode Scalar-To-Vector (STV) conversion.
    
    Consider the new test case:
    
    long long y,z;
    unsigned int p;
    
    void foo()
    {
        long long t = p;
        t ^= y;
        z = t;
    }
    
    With -m32 -O2 -msse2 this currently generates:
    
    foo:    movl    p, %eax
            xorl    %edx, %edx
            movd    %edx, %xmm1
            movd    %eax, %xmm0
            punpckldq       %xmm1, %xmm0
            movq    y, %xmm1
            pxor    %xmm1, %xmm0
            movq    %xmm0, z
            ret
    
    With this patch we now generate:
    
    foo:    movq    y, %xmm1
            movd    p, %xmm0
            pxor    %xmm1, %xmm0
            movq    %xmm0, z
            ret
    
    2026-07-01  Roger Sayle  <[email protected]>
    
    gcc/ChangeLog
            * config/i386/i386-features.cc (compute_convert_gain) <ZERO_EXTEND>:
            Provide costs for the new transformation.
            (convert_insn): Implement *zero_extendsidi2 using the backend's
            vec_setv2di_0_zero_extendsi_1 pattern (i.e. movq mem, %xmm).
            (general_scalar_to_vector_candidate_p): Consider the pattern
            (zero_extend:DI (mem:SI ...)) to be a candidate for DImode STV.
    
    gcc/testsuite/ChangeLog
            * gcc.target/i386/sse2-stv-6.c: New test case.

Diff:
---
 gcc/config/i386/i386-features.cc           | 22 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/sse2-stv-6.c | 17 +++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 10d84f520016..e0fc13efe86e 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -863,6 +863,16 @@ general_scalar_chain::compute_convert_gain ()
                }
              break;
 
+           case ZERO_EXTEND:
+             /* mov eax (6 bytes) vs movd xmm0 (8 bytes). */
+             /* mov eax; xor edx,edx (7 bytes).  */
+             if (speed_p)
+               igain += COSTS_N_INSNS (ix86_cost->int_load[2]
+                                       - ix86_cost->sse_load[0]) / 2;
+             else
+               igain += COSTS_N_BYTES (TARGET_64BIT ? -2 : -1);
+             break;
+
            default:
              gcc_unreachable ();
            }
@@ -1588,6 +1598,11 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
        }
       break;
 
+    case ZERO_EXTEND:
+      /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1.  */
+      src = gen_rtx_VEC_CONCAT (V2DImode, src, const0_rtx);
+      break;
+
     default:
       gcc_unreachable ();
     }
@@ -2499,6 +2514,13 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, 
enum machine_mode mode)
             && XVECLEN (XEXP (src, 1), 0) == 1
             && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
 
+    case ZERO_EXTEND:
+      /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1.  */
+      return mode == DImode
+            && REG_P (dst)
+            && GET_MODE (XEXP (src, 0)) == SImode
+            && MEM_P (XEXP (src, 0));
+
     default:
       return false;
     }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-stv-6.c 
b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
new file mode 100644
index 000000000000..4e1095418daa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-m32 -O2 -msse2 -mno-stackrealign" } */
+
+long long y,z;
+unsigned int p;
+
+void foo()
+{
+    long long t = p;
+    t ^= y;
+    z = t;
+}
+
+/* { dg-final { scan-assembler-not "movl" } } */
+/* { dg-final { scan-assembler-not "xorl" } } */
+/* { dg-final { scan-assembler-not "punpckldq" } } */
+

Reply via email to