This patch address the inefficient return of complex values (with the x86
ABI) where the result is returned to the caller in an integer register.
Currently this results in RTL expansion spilling the value to memory
and reloading it in an integer register.  The patch below recognizes
this case, and composes the real and imaginary parts using shifts and
addition.  The real part always appears first in memory, so is lowpart
on little-endian targets, and the highpart on big-endian targets.

Consider the new test case:

_Complex float mem;
_Complex float foo(_Complex float x) { return x; }
_Complex float bar() { return mem; }

Currently, with -O2 GCC generates:

foo:    movss   %xmm0, -8(%rsp)
        shufps  $85, %xmm0, %xmm0
        movss   %xmm0, -4(%rsp)
        movq    -8(%rsp), %xmm0
        ret

bar:    movss   mem(%rip), %xmm0
        movss   %xmm0, -8(%rsp)
        movss   mem+4(%rip), %xmm0
        movss   %xmm0, -4(%rsp)
        movq    -8(%rsp), %xmm0
        ret

With this patch, we now generate:

foo:    ret

bar:    movl    mem+4(%rip), %edx
        movl    mem(%rip), %eax
        salq    $32, %rdx
        addq    %rdx, %rax
        movq    %rax, %xmm0
        ret


For those folks noticing that bar could be improved further, I've
a follow-up patch to the i386's STV2 pass, to perform concatsidi2 in
SSE registers.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2026-07-01  Roger Sayle  <[email protected]>

gcc/ChangeLog
        PR target/48609
        * expr.cc (emit_group_load_1): When passing a complex value in an
        integer mode of the same size, explicitly construct (hi<<N)+lo to
        avoid spilling to memory before reload.

gcc/testsuite/ChangeLog
        PR target/48609
        * gcc.target/i386/pr48609-2.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 43931687d63..d10626979ab 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -3062,6 +3062,28 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, 
tree type,
               && known_eq (bytelen, GET_MODE_SIZE (mode)))
        /* Let emit_move_complex do the bulk of the work.  */
        tmps[i] = src;
+      else if (SCALAR_INT_MODE_P (mode)
+              && COMPLEX_MODE_P (GET_MODE (src))
+              && known_eq (GET_MODE_SIZE (mode),
+                           GET_MODE_SIZE (GET_MODE (src)))
+              && known_eq (bytelen, GET_MODE_SIZE (mode)))
+       {
+         /* When passing a complex value in an integer mode of the same
+            size, explicitly construct (highpart<<isize)+lowpart to
+            avoid spilling to memory before reload.  */
+         rtx tmp = read_complex_part (src, !BYTES_BIG_ENDIAN);
+         scalar_int_mode imode = int_mode_for_mode (GET_MODE (tmp)).require();
+         tmp = gen_lowpart (imode, tmp);
+         tmp = simplify_gen_unary (ZERO_EXTEND, mode, tmp, imode);
+         rtx result = force_reg (mode, tmp);
+         result = expand_shift (LSHIFT_EXPR, mode, result,
+                                GET_MODE_BITSIZE (imode), NULL_RTX, 1);
+         tmp = read_complex_part (src, BYTES_BIG_ENDIAN);
+         tmp = gen_lowpart (imode, tmp);
+         tmp = simplify_gen_unary (ZERO_EXTEND, mode, tmp, imode);
+         result = simplify_gen_binary (PLUS, mode, result, tmp);
+         tmps[i] = force_reg (mode, result);
+       }
       else if (GET_CODE (src) == CONCAT)
        {
          poly_int64 slen = GET_MODE_SIZE (GET_MODE (src));
diff --git a/gcc/testsuite/gcc.target/i386/pr48609-2.c 
b/gcc/testsuite/gcc.target/i386/pr48609-2.c
new file mode 100644
index 00000000000..f7c0fa06a71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr48609-2.c
@@ -0,0 +1,17 @@
+/* PR target/48609 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+_Complex float mem;
+
+_Complex float foo(_Complex float x)
+{
+  return x;
+}
+
+_Complex float bar()
+{
+  return mem;
+}
+
+/* { dg-final { scan-assembler-not "%rsp" } } */

Reply via email to