https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120870
--- Comment #47 from Uroš Bizjak <ubizjak at gmail dot com> ---
The testcase also fails when tail is *not* marked with preserve_none attribute:
--cut here--
void
ext (long a, long b, long c, long d, long e, long f, long g)
{
}
void
tail (long a, long b)
{
}
__attribute__ ((preserve_none))
void
caller (long a, long b, long c, long d, long e, long f, long g)
{
__attribute__ ((aligned (32))) long x[4]; // for stack alignment
ext (a, b, c, d, e, f, g);
__attribute__ ((musttail)) return tail (a + b, b + c);
}
__attribute__ ((noipa))
static void
do_test ()
{
caller (1, 2, 3, 4, 5, 6, 7);
}
int
main (void)
{
if (__builtin_cpu_supports ("x86-64-v3"))
do_test ();
return 0;
}
--cut here--
The issue is in caller function.
With only this patch where RBX is returned as DRAP register for preserve_none:
--cut here--
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e73c2d7f7d0..db532251f98 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -496,6 +496,14 @@ ix86_profile_before_prologue (void)
return flag_fentry != 0;
}
+/* In no-callee-saved and preserve_none functions, reserve BX_REG for
+ DRAP register.
+
+ FIXME: Why is BX_REG the only working DRAP register in preserve_none
+ functions? Is is because only BX_REG is 0 in CALL_USED_REGISTERS
+ which has some permanent impacts on register allocator. */
+#define X86_NO_CALLEE_DRAP_REG BX_REG
+
/* Update register usage after having seen the compiler flags. */
static void
@@ -7940,6 +7948,11 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx
offset,
static unsigned int
find_drap_reg (void)
{
+ if (cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE
+ || (cfun->machine->call_saved_registers
+ == TYPE_NO_CALLEE_SAVED_REGISTERS))
+ return X86_NO_CALLEE_DRAP_REG;
+
tree decl = cfun->decl;
/* Always use callee-saved register if there are no caller-saved
--cut here--
we get this in _.ira (gcc -O0 -march=x86-64-v3):
(note 10 1 32 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(insn/f 32 10 2 2 (set (reg:DI 107)
(reg:DI 3 bx)) "pr120870-1.c":17:1 99 {*movdi_internal}
(expr_list:REG_DEAD (reg:DI 3 bx)
(expr_list:REG_CFA_SET_VDRAP (reg:DI 107)
(nil))))
(insn 2 32 3 2 (set (reg/v:DI 100 [ a ])
(reg:DI 40 r12 [ a ])) "pr120870-1.c":17:1 99 {*movdi_internal}
(expr_list:REG_DEAD (reg:DI 40 r12 [ a ])
(nil)))
(insn 3 2 4 2 (set (reg/v:DI 101 [ b ])
(reg:DI 41 r13 [ b ])) "pr120870-1.c":17:1 99 {*movdi_internal}
(expr_list:REG_DEAD (reg:DI 41 r13 [ b ])
(nil)))
where reload does:
(note 10 1 32 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(insn/f 32 10 3 2 (set (reg:DI 0 ax [107])
(reg:DI 3 bx)) "pr120870-1.c":17:1 99 {*movdi_internal}
(expr_list:REG_CFA_SET_VDRAP (reg:DI 0 ax [107])
(nil)))
(insn 3 32 4 2 (set (reg/v:DI 3 bx [orig:101 b ] [101])
(reg:DI 41 r13 [ b ])) "pr120870-1.c":17:1 99 {*movdi_internal}
(nil))
(insn 3) instantly clobbers RBX. Shouldn't reload avoid DRAP register?
Prologue and epilogue emit:
(insn/f 34 10 35 2 (set (mem:DI (pre_dec:DI (reg/f:DI 7 sp)) [0 S8 A8])
(reg:DI 3 bx)) "pr120870-1.c":17:1 -1
(nil))
(insn/f 35 34 36 2 (set (reg:DI 3 bx)
(plus:DI (reg/f:DI 7 sp)
(const_int 16 [0x10]))) "pr120870-1.c":17:1 -1
(nil))
...
(insn/f 48 47 49 2 (set (reg/f:DI 7 sp)
(plus:DI (reg:DI 3 bx)
(const_int -16 [0xfffffffffffffff0]))) "pr120870-1.c":21:1 -1
(expr_list:REG_CFA_DEF_CFA (plus:DI (reg/f:DI 7 sp)
(const_int 16 [0x10]))
(nil)))
(insn/f 49 48 25 2 (set (reg:DI 3 bx)
(mem:DI (post_inc:DI (reg/f:DI 7 sp)) [0 S8 A8])) "pr120870-1.c":21:1
-1
(expr_list:REG_CFA_ADJUST_CFA (set (reg/f:DI 7 sp)
(plus:DI (reg/f:DI 7 sp)
(const_int 8 [0x8])))
(nil)))
But fails to restore correct RSP in (insn 48) due to clobbered RBX.
So, should we really mark preserve_none functions with
fixed_regs[X86_NO_CALLEE_DRAP_REG] = 1;
or should RA avoid DRAP reg by itself?