Hi! Could someone help me with the following please?
We have a crash in v5.2.0-based downstream branch. I don't have a realiable reproduce, neither can find significant differencies with upstream code in touched functions. #0 0x00007fc2a8dbfe7f in raise () from /lib64/libc.so.6 #1 0x00007fc2a8daa8b5 in abort () from /lib64/libc.so.6 #2 0x00007fc2a8daa789 in __assert_fail_base.cold.0 () from /lib64/libc.so.6 #3 0x00007fc2a8db8576 in __assert_fail () from /lib64/libc.so.6 #4 0x000055fc1d76e195 in aio_ctx_finalize (source=<optimized out>) at ../util/async.c:343 #5 aio_ctx_finalize (source=0x55fc1f9cebf0) at ../util/async.c:311 #6 0x00007fc2aa335789 in g_source_unref_internal () from /lib64/libglib-2.0.so.0 #7 0x00007fc2aa335a0e in g_source_iter_next () from /lib64/libglib-2.0.so.0 #8 0x00007fc2aa336b35 in g_main_context_unref () from /lib64/libglib-2.0.so.0 #9 0x00007fc2aa338d9c in g_main_loop_unref () from /lib64/libglib-2.0.so.0 #10 0x000055fc1d666094 in iothread_instance_finalize (obj=<optimized out>) at ../iothread.c:145 #11 0x000055fc1d644e19 in object_deinit (type=0x55fc1f7db490, obj=<optimized out>) at ../qom/object.c:671 #12 object_finalize (data=0x55fc1f88da00) at ../qom/object.c:685 #13 object_unref (objptr=0x55fc1f88da00) at ../qom/object.c:1183 #14 0x000055fc1d643365 in object_property_del_child (obj=0x55fc1f9a80e0, child=0x55fc1f88da00) at ../qom/object.c:645 #15 0x000055fc1d644618 in object_unparent (obj=<optimized out>) at ../qom/object.c:664 #16 0x000055fc1d6661d9 in iothread_destroy (iothread=<optimized out>) at ../iothread.c:369 #17 0x000055fc1d6ec5d9 in monitor_cleanup () at ../monitor/monitor.c:670 #18 0x000055fc1d63584b in qemu_cleanup () at ../softmmu/vl.c:4562 #19 0x000055fc1d374307 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at ../softmmu/main.c:51 (gdb) fr 10 #10 0x000055fc1d666094 in iothread_instance_finalize (obj=<optimized out>) at ../iothread.c:145 145 g_main_loop_unref(iothread->main_loop); (gdb) list 140 iothread->ctx = NULL; 141 } 142 if (iothread->worker_context) { 143 g_main_context_unref(iothread->worker_context); 144 iothread->worker_context = NULL; 145 g_main_loop_unref(iothread->main_loop); 146 iothread->main_loop = NULL; 147 } 148 qemu_sem_destroy(&iothread->init_done_sem); 149 } (gdb) p iothread $24 = (IOThread *) 0x55fc1f88da00 (gdb) p mon_iothread $25 = (IOThread *) 0x55fc1f88da00 (gdb) p *mon_iothread $26 = {parent_obj = {class = 0x55fc1f92b4e0, free = 0x7fc2aa33e3a0 <g_free>, properties = 0x55fc1f9ad980, ref = 0, parent = 0x0}, thread = {thread = 140473870030592}, ctx = 0x0, run_gcontext = true, worker_context = 0x0, main_loop = 0x55fc1f9a8300, init_done_sem = {lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, cond = {__data = {{__wseq = 3, __wseq32 = {__low = 3, __high = 0}}, {__g1_start = 1, __g1_start32 = {__low = 1, __high = 0}}, __g_refs = {0, 0}, __g_size = {0, 0}, __g1_orig_size = 4, __wrefs = 0, __g_signals = {0, 0}}, __size = "\003\000\000\000\000\000\000\000\001", '\000' <repeats 23 times>, "\004", '\000' <repeats 14 times>, __align = 3}, count = 0, initialized = true}, stopping = true, running = false, thread_id = 10141, poll_max_ns = 32768, poll_grow = 0, poll_shrink = 0} (gdb) info thr Id Target Id Frame * 1 Thread 0x7fc2a4a19f00 (LWP 10134) 0x000055fc1d666094 in iothread_instance_finalize (obj=<optimized out>) at ../iothread.c:145 2 Thread 0x7fc2a4a16700 (LWP 10136) 0x00007fc2a8e8002d in syscall () from /lib64/libc.so.6 3 Thread 0x7fc29e9f8700 (LWP 10143) 0x00007fc2a8f5e65c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 4 Thread 0x7fc29e1f7700 (LWP 10144) 0x00007fc2a8f5e65c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 5 Thread 0x7fc2027ff700 (LWP 10147) 0x00007fc2a8f5e65c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 6 Thread 0x7fc29ffff700 (LWP 10137) 0x00007fc2a8f5e9aa in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 7 Thread 0x7fc20301a700 (LWP 10146) 0x00007fc2a8e7a541 in poll () from /lib64/libc.so.6 So, it's about mon_iothread, and it's already finished to the moment of the crash (this seems correct as earlier in iothread_instance_finalize() there is iothread_stop() which does qemu_thread_join()) Now, what crashed: (gdb) fr 4 #4 0x000055fc1d76e195 in aio_ctx_finalize (source=<optimized out>) at ../util/async.c:343 343 assert(flags & BH_DELETED); (gdb) list 338 /* There must be no aio_bh_poll() calls going on */ 339 assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list)); 340 341 while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) { 342 /* qemu_bh_delete() must have been called on BHs in this AioContext */ 343 assert(flags & BH_DELETED); 344 345 g_free(bh); 346 } 347 (gdb) p flags $1 = 11 (gdb) # BH_ONESHOT | BH_SCHEDULED | BH_PENDING So, there is unfinished BH in the context when thread is already finished.. Does iothread has own aio context or it may use qemu_aio_context? Looking at iothread_run(), I can't understand, what prevent creating bh after iothread finish? So, what prevents such situation: 1. iothread->running set to true, so we leave while loop in iothread_run 2. exactly after it (from some other thread) we schedule a bh into aio context of the finished iothread.. -- Best regards, Vladimir