For comparison fragment of zcopy_tx in release loader.elf until a call to
eventfd which is after new:

Dump of assembler code for function zcopy_tx(int, zmsghdr*):
   0x0000000040100da0 <+0>:     stp     x29, x30, [sp, #-144]!
   0x0000000040100da4 <+4>:     mov     x29, sp
   0x0000000040100da8 <+8>:     stp     x21, x22, [sp, #32]
   0x0000000040100dac <+12>:    mov     x22, x1
   0x0000000040100db0 <+16>:    stp     x19, x20, [sp, #16]
   0x0000000040100db4 <+20>:    mov     w20, w0
   0x0000000040100db8 <+24>:    mov     x0, #0x8                        //
#8
   0x0000000040100dbc <+28>:    stp     x23, x24, [sp, #48]
   0x0000000040100dc0 <+32>:    stp     x25, x26, [sp, #64]
   0x0000000040100dc4 <+36>:    stp     x27, x28, [sp, #80]
   0x0000000040100dc8 <+40>:    stp     xzr, xzr, [sp, #104]
   0x0000000040100dcc <+44>:    stp     xzr, xzr, [sp, #120]
   0x0000000040100dd0 <+48>:    str     xzr, [sp, #136]
   0x0000000040100dd4 <+52>:    bl      0x403920e0 <_Znwm>
   0x0000000040100dd8 <+56>:    mov     x23, x0
   0x0000000040100ddc <+60>:    str     x0, [x22, #64]
   0x0000000040100de0 <+64>:    mov     w1, #0x800                      //
#2048
   0x0000000040100de4 <+68>:    mov     w0, #0x0                        //
#0
   0x0000000040100de8 <+72>:    movk    w1, #0x8, lsl #16
   0x0000000040100dec <+76>:    str     xzr, [x23]
   0x0000000040100df0 <+80>:    bl      0x403520e0 <eventfd(unsigned int,
int)>

Now for debug version equivalent fragment (the crash happens at PC:
0x0000000040111e80 <zcopy_tx+84>):

Dump of assembler code for function zcopy_tx(int, zmsghdr*):
   0x0000000040111e2c <+0>:     stp     x29, x30, [sp, #-208]!
   0x0000000040111e30 <+4>:     mov     x29, sp
   0x0000000040111e34 <+8>:     stp     x19, x20, [sp, #16]
   0x0000000040111e38 <+12>:    str     w0, [sp, #44]
   0x0000000040111e3c <+16>:    str     x1, [sp, #32]
   0x0000000040111e40 <+20>:    str     xzr, [sp, #88]
   0x0000000040111e44 <+24>:    str     xzr, [sp, #96]
   0x0000000040111e48 <+28>:    str     xzr, [sp, #104]
   0x0000000040111e4c <+32>:    str     xzr, [sp, #112]
   0x0000000040111e50 <+36>:    str     xzr, [sp, #120]
   0x0000000040111e54 <+40>:    str     xzr, [sp, #184]
   0x0000000040111e58 <+44>:    ldr     x0, [sp, #32]
   0x0000000040111e5c <+48>:    str     x0, [sp, #176]
   0x0000000040111e60 <+52>:    mov     x0, #0x8                        //
#8
   0x0000000040111e64 <+56>:    bl      0x405b7e60 <_Znwm>
   0x0000000040111e68 <+60>:    mov     x19, x0
   0x0000000040111e6c <+64>:    mov     x0, x19
   0x0000000040111e70 <+68>:    bl      0x40112544
<ztx_handle::ztx_handle()>
   0x0000000040111e74 <+72>:    str     x19, [sp, #168]
   0x0000000040111e78 <+76>:    ldr     x0, [sp, #32]
   0x0000000040111e7c <+80>:    ldr     x1, [sp, #168]
   *0x0000000040111e80 <+84>:    str     x1, [x0, #64] -> pc reported in
the stack trace*
   0x0000000040111e84 <+88>:    mov     w1, #0x800                      //
#2048
   0x0000000040111e88 <+92>:    movk    w1, #0x8, lsl #16
   0x0000000040111e8c <+96>:    mov     w0, #0x0                        //
#0
   0x0000000040111e90 <+100>:   bl      0x40557c1c <eventfd(unsigned int,
int)>

Assembly of _Znwm which I believe is the same in both cases:

Dump of assembler code for function _Znwm:
   0x00000000405b7e60 <+0>: stp x29, x30, [sp, #-32]!
   0x00000000405b7e64 <+4>: cmp x0, #0x0
   0x00000000405b7e68 <+8>: mov x29, sp
   0x00000000405b7e6c <+12>: str x19, [sp, #16]
   0x00000000405b7e70 <+16>: csinc x19, x0, xzr, ne  // ne = any
   0x00000000405b7e74 <+20>: mov x0, x19
   0x00000000405b7e78 <+24>: bl 0x40406f4c <malloc(size_t)>
   0x00000000405b7e7c <+28>: cbz x0, 0x405b7e8c <_Znwm+44>
   0x00000000405b7e80 <+32>: ldr x19, [sp, #16]
   0x00000000405b7e84 <+36>: ldp x29, x30, [sp], #32
   0x00000000405b7e88 <+40>: ret
   0x00000000405b7e8c <+44>: bl 0x405b7e50 <_ZSt15get_new_handlerv>
   0x00000000405b7e90 <+48>: cbz x0, 0x405b7e9c <_Znwm+60>
   0x00000000405b7e94 <+52>: blr x0
   0x00000000405b7e98 <+56>: b 0x405b7e74 <_Znwm+20>
   0x00000000405b7e9c <+60>: mov x0, #0x8                   // #8
   0x00000000405b7ea0 <+64>: bl 0x405b6170 <__cxa_allocate_exception>
   0x00000000405b7ea4 <+68>: adrp x3, 0x40098000
   0x00000000405b7ea8 <+72>: adrp x2, 0x40099000
   0x00000000405b7eac <+76>: adrp x1, 0x40098000
   0x00000000405b7eb0 <+80>: ldr x3, [x3, #1128]
   0x00000000405b7eb4 <+84>: ldr x2, [x2, #928]
   0x00000000405b7eb8 <+88>: add x3, x3, #0x10
   0x00000000405b7ebc <+92>: ldr x1, [x1, #2992]
   0x00000000405b7ec0 <+96>: str x3, [x0]
   0x00000000405b7ec4 <+100>: bl 0x405b76e0 <__cxa_throw>

And the ztx_handle::ztx_handle(): for debug:

Dump of assembler code for function ztx_handle::ztx_handle():
   0x0000000040112544 <+0>: stp x29, x30, [sp, #-32]!
   0x0000000040112548 <+4>: mov x29, sp
   0x000000004011254c <+8>: str x0, [sp, #24]
   0x0000000040112550 <+12>: ldr x0, [sp, #24]
   0x0000000040112554 <+16>: mov x1, #0x0                   // #0
   0x0000000040112558 <+20>: bl 0x4011245c <std::atomic<unsigned
long>::atomic(unsigned long)>
   0x000000004011255c <+24>: nop
   0x0000000040112560 <+28>: ldp x29, x30, [sp], #32
   0x0000000040112564 <+32>: ret
End of assembler dump.

On Mon, Feb 15, 2021 at 1:11 PM Avi Kivity <[email protected]> wrote:

>
> On 11/02/2021 07.42, Waldek Kozaczuk wrote:
> > Apart from the TLS issue reported here OSv can be built in the aarch64
> > debug mode.
> >
> > Some of the tests pass as well (as on release) but there are some that
> > seem to fail in a similar way due to possibly wrong compiled code in
> > kernel possibly due to -O0.
> >
> > Here is one example:
> >
> > ./scripts/run.py -e '/tests/tst-bsd-tcp1-zsnd.so' -c 1
> >
> > page fault outside application, addr: 0x0000000000000000
> >
> > [registers]
> >
> > PC: 0x0000000040111e40 <zcopy_tx+84>
> >
> > X00: 0x0000000000000001 X01: 0xffffa0004100f9c0 X02: 0x0000000000000008
> >
> > X03: 0x0000000000000008 X04: 0x0000000000000008 X05: 0x0000000000007001
> >
> > X06: 0x0000000000000000 X07: 0x00000000b71b0000 X08: 0xffff800041782aa0
> >
> > X09: 0x0000000000000000 X10: 0x0000000000000002 X11: 0x0000000000000000
> >
> > X12: 0x2050435420612073 X13: 0x006567617373656d X14: 0x0000000000001af8
> >
> > X15: 0x0000000000000000 X16: 0x000010000005b5d0 X17: 0x0000000040111dec
> >
> > X18: 0x0000000000001120 X19: 0xffffa0004100f9c0 X20: 0x0000000000000190
> >
> > X21: 0x0000000000000001 X22: 0xffff800041782db8 X23: 0x0000000000000001
> >
> > X24: 0xffffa000414c4b80 X25: 0xffff800041793d98 X26: 0xffff800041793da8
> >
> > X27: 0x00002000006ffb00 X28: 0x000010000005a000 X29: 0xffff800041782c10
> >
> > X30: 0x0000000040111e34 SP:0xffff800041782c10 ESR: 0x0000000096000046
> >
> > PSTATE: 0x0000000060000345
> >
> > Aborted
> >
> >
> > [backtrace]
> >
> > 0x00000000400e9e14 <abort(char const*, ...)+288>
> >
> >
> > After connecting with gdb and reconstructing the stacktrace, it looks
> > like this:
> >
> > 00x0000000040111e40in zcopy_tx(s=5, zm=0x1) at
> > bsd/sys/kern/uipc_syscalls.cc:1027
> >
> > #10x0000100000037954in test_bsd_tcp1::tcp_server(this=0x2000006ff988)
> > at /home/wkozaczuk/projects/osv/tests/tst-bsd-tcp1-zsnd.cc:114
> >
> > #20x0000100000037a64in
> > test_bsd_tcp1::run()::{lambda()#1}::operator()()
> > const(__closure=<optimized out>) at
> > /home/wkozaczuk/projects/osv/tests/tst-bsd-tcp1-zsnd.cc:229
> >
> > #3std::__invoke_impl<void,
> > test_bsd_tcp1::run()::{lambda()#1}&>(std::__invoke_other,
> > test_bsd_tcp1::run()::{lambda()#1}&)(__f=...) at
> > /usr/include/c++/10/bits/invoke.h:60
> >
> > #4std::__invoke_r<void,
> > test_bsd_tcp1::run()::{lambda()#1}&>(std::__is_invocable&&,
> > (test_bsd_tcp1::run()::{lambda()#1}&)...)(__fn=...) at
> > /usr/include/c++/10/bits/invoke.h:153
> >
> > #5std::_Function_handler<void (),
> > test_bsd_tcp1::run()::{lambda()#1}>::_M_invoke(std::_Any_data
> > const&)(__functor=...) at /usr/include/c++/10/bits/std_function.h:291
> >
> > #60x000000004031cba8in std::function<void ()>::operator()()
> > const(this=0xffffa0004168d630) at
> > /usr/include/c++/10/bits/std_function.h:622
> >
> > #70x000000004043e1ccin sched::thread::main(this=0xffffa0004168d600) at
> > core/sched.cc:1219
> >
> > #80x000000004043a188in sched::thread_main_c(t=0xffffa0004168d600) at
> > arch/aarch64/arch-switch.hh:186
> >
> > #90x0000000040439cf4in sched::thread::switch_to(this=0x0) at
> > arch/aarch64/arch-switch.hh:28
> >
> > #10 0x0000000000000000in ??()
> >
> > Backtrace stopped: previous frame identical to this frame (corrupt
> stack?)
> >
> > (gdb) frame 1
> >
> > #10x0000100000037954in test_bsd_tcp1::tcp_server(this=0x2000006ff988)
> > at /home/wkozaczuk/projects/osv/tests/tst-bsd-tcp1-zsnd.cc:114
> >
> > 114int bytes2 = zcopy_tx(client_s, &zm);
> >
> > (gdb) p client_s
> >
> > $1 = 5
> >
> > (gdb) p &zm
> >
> > $2 = (zmsghdr *) 0xffff800041782d40
> >
> >
> > As you can see the test app calls zcopy_tx() which takes 2 arguments:
> >
> > ssize_t zcopy_tx(int s, struct zmsghdr *zm)
> >
> > the 1st one is int and has value 5 in the caller - the test app - and
> > is received as such
> >
> > in the kernel zcopy_tx.
> >
> >
> > The second one - the address of struct zmsghdr - is problematic. On
> > the caller's side looks OK but when received in the kernel it is wrong
> > - 0x1.
> >
> > Why?
> >
> >
> > I saw another test crashing in a similar way when the caller (another
> > test) would pass 3 arguments to kernel function and 2 of those
> > (non-addresses) were passed correctly but the 3rd one - address one
> > was not.
> >
> >
> > Any ideas what might be going on?
> >
> >
>
>
> Can you provide a disassembly of zcopy_tx? From the start of the
> function until the crash site (there should only be register saves and
> other preamble, and the call to operator new, if I read it correctly).
>
>
> Maybe save zm in some global before calling new, to see if operator new
> is the problem.
>
>

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/CAL9cFfN9gbQ45wdqMc_hsx2tN4a4kBKYME5zuzvanosUim-YtQ%40mail.gmail.com.

Reply via email to