Hello,
In gcc 9.0 r266355 there was change in ABI for mingw-w64 (64-bit) target --
"Return AX_REG instead of FIRST_SSE_REG for 4 or 8 byte modes". It is wrong --
now function return double in rax instead of xmm0 register. This patch shows
how to change return register from xmm0 to eax/rax (which is cool).
There are 2 problems:
1) r266355 broke 64-bit gcc 9 for mingw-w64 target
2) gcc could/should return aggregate types of up to 8 bytes via EAX register
(not st(0) or xmm0)
I've prepared patches to gcc (branches 7 and 8 + trunk 9) that checks if type
is not aggregate of up to 8 bytes before it returns in st(0)/xmm0 -- now it
should be like in C++ from MSVC. (svn patches -- please apply with -p0 option)
I think this is not new problem so if there are some hints/opinion -- please
share.
My questions are:
are the patches OK?
are the changes really needed (return struct with float in eax)?
Now (with patches) it works like this:
$ cat t.cpp
float fun1(void)
{
return 4.14f;
}
typedef struct {float x;} Float;
Float fun2(void)
{
Float v;
v.x = 4.14f;
return v;
}
double fun3(void)
{
return 3.13;
}
typedef struct {double x;} Double;
Double fun4(void)
{
Double v;
v.x = 3.13;
return v;
}
Mateusz@Mateusz-i7 /c/temp
$ g++ -c -Wall -O2 -o t.o t.cpp
Mateusz@Mateusz-i7 /c/temp
$ objdump -dr t.o
t.o: file format pe-x86-64
Disassembly of section .text:
<_Z4fun1v>:
0: f3 0f 10 05 00 00 00 movss 0x0(%rip),%xmm0 # 8 <_Z4fun1v+0x8>
7: 00
4: R_X86_64_PC32 .rdata
8: c3 retq
9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
0010 <_Z4fun2v>:
10: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 16 <_Z4fun2v+0x6>
12: R_X86_64_PC32 .rdata
16: c3 retq
17: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
1e: 00 00
0020 <_Z4fun3v>:
20: f2 0f 10 05 08 00 00 movsd 0x8(%rip),%xmm0 # 30 <_Z4fun4v>
27: 00
24: R_X86_64_PC32 .rdata
28: c3 retq
29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
0030 <_Z4fun4v>:
30: 48 8b 05 08 00 00 00 mov 0x8(%rip),%rax # 3f <_Z4fun4v+0xf>
33: R_X86_64_PC32 .rdata
37: c3 retq
38: 90 nop
39: 90 nop
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 90 nop
3e: 90 nop
3f: 90 nop
Mateusz@Mateusz-i7 /c/temp
$ m32- 900
Mateusz@Mateusz-i7 /c/temp
$ g++ -c -Wall -O2 -o t32.o t.cpp
Mateusz@Mateusz-i7 /c/temp
$ objdump -dr t32.o
t32.o: file format pe-i386
Disassembly of section .text:
<__Z4fun1v>:
0: d9 05 00 00 00 00 flds 0x0
2: dir32 .rdata
6: c3 ret
7: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
e: 66 90 xchg %ax,%ax
0010 <__Z4fun2v>:
10: a1 00 00 00 00 mov 0x0,%eax
11: dir32 .rdata
15: c3 ret
16: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
1d: 8d 76 00 lea 0x0(%esi),%esi
0020 <__Z4fun3v>:
20: dd 05 08 00 00 00 fldl 0x8
22: dir32 .rdata
26: c3 ret
27: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
2e: 66 90 xchg %ax,%ax
0030 <__Z4fun4v>:
30: b8 0a d7 a3 70 mov $0x70a3d70a,%eax
35: ba 3d 0a 09 40 mov $0x40090a3d,%edx
3a: c3 ret
3b: 90 nop
3c: 90 nop
3d: 90 nop
3e: 90 nop
3f: 90 nop
Regards,
Mateusz
Index: gcc/config/i386/i386.c
===
--- gcc/config/i386/i386.c (revision 267306)
+++ gcc/config/i386/i386.c (working copy)
@@ -10553,6 +10553,66 @@
}
static rtx
+function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
+ const_tree fntype, const_tree fn, const_tree valtype)
+{
+ unsigned int regno;
+
+ /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+ we normally prevent this case when mmx is not available. However
+ some ABIs may require the result to be returned like DImode. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+regno = FIRST_MMX_REG;
+
+ /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
+ we prevent this case when sse is not available. However some ABIs
+ may require the result to be returned like integer TImode. */
+ else i