This patch address the inefficient return of complex values (with the x86
ABI) where the result is returned to the caller in an integer register.
Currently this results in RTL expansion spilling the value to memory
and reloading it in an integer register. The patch below recognizes
this case, and composes the real and imaginary parts using shifts and
addition. The real part always appears first in memory, so is lowpart
on little-endian targets, and the highpart on big-endian targets.
Consider the new test case:
_Complex float mem;
_Complex float foo(_Complex float x) { return x; }
_Complex float bar() { return mem; }
Currently, with -O2 GCC generates:
foo: movss %xmm0, -8(%rsp)
shufps $85, %xmm0, %xmm0
movss %xmm0, -4(%rsp)
movq -8(%rsp), %xmm0
ret
bar: movss mem(%rip), %xmm0
movss %xmm0, -8(%rsp)
movss mem+4(%rip), %xmm0
movss %xmm0, -4(%rsp)
movq -8(%rsp), %xmm0
ret
With this patch, we now generate:
foo: ret
bar: movl mem+4(%rip), %edx
movl mem(%rip), %eax
salq $32, %rdx
addq %rdx, %rax
movq %rax, %xmm0
ret
For those folks noticing that bar could be improved further, I've
a follow-up patch to the i386's STV2 pass, to perform concatsidi2 in
SSE registers.
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline?
2026-07-01 Roger Sayle <[email protected]>
gcc/ChangeLog
PR target/48609
* expr.cc (emit_group_load_1): When passing a complex value in an
integer mode of the same size, explicitly construct (hi<<N)+lo to
avoid spilling to memory before reload.
gcc/testsuite/ChangeLog
PR target/48609
* gcc.target/i386/pr48609-2.c: New test case.
Thanks in advance,
Roger
--
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 43931687d63..d10626979ab 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -3062,6 +3062,28 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src,
tree type,
&& known_eq (bytelen, GET_MODE_SIZE (mode)))
/* Let emit_move_complex do the bulk of the work. */
tmps[i] = src;
+ else if (SCALAR_INT_MODE_P (mode)
+ && COMPLEX_MODE_P (GET_MODE (src))
+ && known_eq (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (GET_MODE (src)))
+ && known_eq (bytelen, GET_MODE_SIZE (mode)))
+ {
+ /* When passing a complex value in an integer mode of the same
+ size, explicitly construct (highpart<<isize)+lowpart to
+ avoid spilling to memory before reload. */
+ rtx tmp = read_complex_part (src, !BYTES_BIG_ENDIAN);
+ scalar_int_mode imode = int_mode_for_mode (GET_MODE (tmp)).require();
+ tmp = gen_lowpart (imode, tmp);
+ tmp = simplify_gen_unary (ZERO_EXTEND, mode, tmp, imode);
+ rtx result = force_reg (mode, tmp);
+ result = expand_shift (LSHIFT_EXPR, mode, result,
+ GET_MODE_BITSIZE (imode), NULL_RTX, 1);
+ tmp = read_complex_part (src, BYTES_BIG_ENDIAN);
+ tmp = gen_lowpart (imode, tmp);
+ tmp = simplify_gen_unary (ZERO_EXTEND, mode, tmp, imode);
+ result = simplify_gen_binary (PLUS, mode, result, tmp);
+ tmps[i] = force_reg (mode, result);
+ }
else if (GET_CODE (src) == CONCAT)
{
poly_int64 slen = GET_MODE_SIZE (GET_MODE (src));
diff --git a/gcc/testsuite/gcc.target/i386/pr48609-2.c
b/gcc/testsuite/gcc.target/i386/pr48609-2.c
new file mode 100644
index 00000000000..f7c0fa06a71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr48609-2.c
@@ -0,0 +1,17 @@
+/* PR target/48609 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+_Complex float mem;
+
+_Complex float foo(_Complex float x)
+{
+ return x;
+}
+
+_Complex float bar()
+{
+ return mem;
+}
+
+/* { dg-final { scan-assembler-not "%rsp" } } */