https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78255

            Bug ID: 78255
           Summary: [5/6/7 regression] Indirect sibling call causing wrong
                    code generation for ARM
           Product: gcc
           Version: 7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: avieira at gcc dot gnu.org
  Target Milestone: ---

As first reported by Andrew on
https://bugs.launchpad.net/gcc-arm-embedded/+bug/1616992

To reproduce on trunk:
$ cat test.c
#include <string.h>
struct table_s
    {
    void (*fun0)
        ( void );
    void (*fun1)
        ( void );
    void (*fun2)
        ( void );
    void (*fun3)
        ( void );
    void (*fun4)
        ( void );
    void (*fun5)
        ( void );
    void (*fun6)
        ( void );
    void (*fun7)
        ( void );
    } table;

void callback0(){__asm("mov r0, r0 \n\t");}
void callback1(){__asm("mov r0, r0 \n\t");}
void callback2(){__asm("mov r0, r0 \n\t");}
void callback3(){__asm("mov r0, r0 \n\t");}
void callback4(){__asm("mov r0, r0 \n\t");}

void test(void) {
    memset(&table, 0, sizeof table);

    asm volatile ("" : : : "r3");

    table.fun0 = callback0;
    table.fun1 = callback1;
    table.fun2 = callback2;
    table.fun3 = callback3;
    table.fun4 = callback4;
    table.fun0();
}

$ arm-none-eabi-gcc -S -O2 -mthumb -mcpu=cortex-m3 test.c
$ cat test.s
...
        ldr     r5, .L8+4
        ldr     r3, .L8+8
        ldr     r0, .L8+12
        ldr     r1, .L8+16
        ldr     r2, .L8+20
        str     r5, [r4]
        str     r0, [r4, #4]
        str     r1, [r4, #8]
        str     r2, [r4, #12]
        str     r3, [r4, #16]
        pop     {r3, r4, r5, lr}
        bx      r3      @ indirect register sibling call
...

As reported, we see that r3 is "restored" before being used to do the sibling
call. So it will no longer contain the address of the call.

I believe this is because 'arm_get_frame_offsets' is called to determine
whether we can safely use 'r3' to align the stack using the function
'any_sibcall_could_use_r3'. This is done before the address of the sibcall is
assigned a hard register, so 'any_sibcall_could_use_r3' returns 'false' and we
push and pop 'r3' in the pro- and epilogue.

Reply via email to