Doing something foolish like

# while true; do killall -HUP libvirtd; sleep 1; done
# virsh create vm.xml

can deadlock libvirtd. Threads of interest are

Thread 4 (Thread 0x7fc13b53e700 (LWP 64084)):
#0 0x00007fc13fba10bf in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/
#1  0x00007fc14310213c in virCondWait (c=0x7fc110017fa8, m=0x7fc110017f80)
    at util/virthread.c:154
#2  0x00007fc1280244e9 in qemuMonitorSend (mon=0x7fc110017f70, 
    at qemu/qemu_monitor.c:1083
#3  0x00007fc12803bf5a in qemuMonitorJSONCommandWithFd (mon=0x7fc110017f70,
cmd=0x7fc110017700, scm_fd=-1, reply=0x7fc13b53d318) at qemu/qemu_monitor_json.c:305 #4 0x00007fc12803c09c in qemuMonitorJSONCommand (mon=0x7fc110017f70, cmd=0x7fc110017700,
    reply=0x7fc13b53d318) at qemu/qemu_monitor_json.c:335
---Type <return> to continue, or q <return> to quit---
#5  0x00007fc12803f116 in qemuMonitorJSONSetCapabilities (mon=0x7fc110017f70)
    at qemu/qemu_monitor_json.c:1298
#6  0x00007fc128026e14 in qemuMonitorSetCapabilities (mon=0x7fc110017f70)
    at qemu/qemu_monitor.c:1697
#7  0x00007fc127ffe250 in qemuProcessInitMonitor (driver=0x7fc12004e1e0,
    vm=0x7fc110003d00, asyncJob=QEMU_ASYNC_JOB_START) at 
#8 0x00007fc127ffe564 in qemuConnectMonitor (driver=0x7fc12004e1e0, vm=0x7fc110003d00,
    asyncJob=6, logCtxt=0x7fc1100089c0) at qemu/qemu_process.c:1835
#9  0x00007fc127fff386 in qemuProcessWaitForMonitor (driver=0x7fc12004e1e0,
vm=0x7fc110003d00, asyncJob=6, logCtxt=0x7fc1100089c0) at qemu/qemu_process.c:2180 #10 0x00007fc128009269 in qemuProcessLaunch (conn=0x7fc1100009a0, driver=0x7fc12004e1e0,
    vm=0x7fc110003d00, asyncJob=QEMU_ASYNC_JOB_START, incoming=0x0, 
    vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=17) at 
#11 0x00007fc128009e85 in qemuProcessStart (conn=0x7fc1100009a0, driver=0x7fc12004e1e0, vm=0x7fc110003d00, updatedCPU=0x0, asyncJob=QEMU_ASYNC_JOB_START, migrateFrom=0x0, migrateFd=-1, migratePath=0x0, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE,
    flags=17) at qemu/qemu_process.c:6334
#12 0x00007fc1280552f1 in qemuDomainCreateXML (conn=0x7fc1100009a0,
xml=0x7fc110000ed0 "<!--\nWARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE\nOVERWRITTEN AND LOST. Changes to this xml configuration should be made using:\n virsh edit sles12sp2-kvm\nor other applicati"..., flags=0) at qemu/qemu_driver.c:1776

Thread 1 (Thread 0x7fc143c66880 (LWP 64081)):
#0 0x00007fc13fb9aac8 in __pthread_rwlock_wrlock_slow () from /lib64/
#1  0x00007fc143101ffa in virRWLockWrite (m=0x7fc143678cc0 <updateLock>)
    at util/virthread.c:122
#2 0x00007fc1431a394f in virNWFilterWriteLockFilterUpdates () at conf/nwfilter_conf.c:159
#3  0x00007fc12a5230a0 in nwfilterStateReload () at 
#4  0x00007fc1433170c2 in virStateReload () at libvirt.c:829
#5 0x0000558c522d5686 in daemonReloadHandler (dmn=0x558c5328b230, sig=0x7ffe0a831e30,
    opaque=0x0) at remote/remote_daemon.c:724
#6  0x00007fc14321e3c7 in virNetDaemonSignalEvent (watch=2, fd=9, events=1,
    opaque=0x558c5328b230) at rpc/virnetdaemon.c:654
#7  0x00007fc143085cc7 in virEventPollDispatchHandles (nfds=11, 
    at util/vireventpoll.c:508
#8  0x00007fc143086586 in virEventPollRunOnce () at util/vireventpoll.c:657
#9  0x00007fc143084312 in virEventRunDefaultImpl () at util/virevent.c:327
#10 0x00007fc14321ecb8 in virNetDaemonRun (dmn=0x558c5328b230) at rpc/virnetdaemon.c:858 #11 0x0000558c522d7add in main (argc=3, argv=0x7ffe0a832758) at remote/remote_daemon.c:1496
(gdb) thr 1
[Switching to thread 1 (Thread 0x7fc143c66880 (LWP 64081))]
#0 0x00007fc13fb9aac8 in __pthread_rwlock_wrlock_slow () from /lib64/
(gdb) f 1
#1  0x00007fc143101ffa in virRWLockWrite (m=0x7fc143678cc0 <updateLock>)
    at util/virthread.c:122
122         pthread_rwlock_wrlock(&m->lock);
(gdb) p updateLock
$1 = {lock = {__data = {__lock = 0, __nr_readers = 1, __readers_wakeup = 0,
__writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 1, __writer = 0,
      __shared = 0, __rwelision = 0 '\000', __pad1 = "\000\000\000\000\000\000",
      __pad2 = 0, __flags = 0},
__size = "\000\000\000\000\001", '\000' <repeats 15 times>, "\001", '\000' <repeats 34 times>, __align = 4294967296}}

Reloading of the nwfilter driver is stuck waiting for a write lock, which already has a reader (from qemuDomainCreateXML) in the critical section.

I'm not really sure how to best fix this deadlock. One hack that avoids the deadlock is to load the nwfilter driver after the hypervisor drivers, as the below patch does. This is obviously problematic if loading a hypervisor driver requires the nwfilter one.

Any suggestions on better fixes for this issue?


diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c
index f8082f62f..7a6655a23 100644
--- a/src/remote/remote_daemon.c
+++ b/src/remote/remote_daemon.c
@@ -327,9 +327,6 @@ static void daemonInitialize(void)
     VIR_DAEMON_LOAD_MODULE(secretRegister, "secret");
-    VIR_DAEMON_LOAD_MODULE(nwfilterRegister, "nwfilter");
 #ifdef WITH_XEN
     VIR_DAEMON_LOAD_MODULE(xenRegister, "xen");
@@ -354,6 +351,9 @@ static void daemonInitialize(void)
 #ifdef WITH_VZ
     VIR_DAEMON_LOAD_MODULE(vzRegister, "vz");
+    VIR_DAEMON_LOAD_MODULE(nwfilterRegister, "nwfilter");

