Both main loop thread and vCPU thread are allowed to call
pause_all_vcpus(), and in general resume_all_vcpus() is called
after it. Two issues live in pause_all_vcpus():

1. There is possibility that during thread T1 waits on
qemu_pause_cond with bql unlocked, other thread has called
pause_all_vcpus() and resume_all_vcpus(), then thread T1 will
stuck, because the condition all_vcpus_paused() is always false.

2. After all_vcpus_paused() has been checked as true, we will
unlock bql to relock replay_mutex. During the bql was unlocked,
the vcpu's state may has been changed by other thread, so we
must retry.

Signed-off-by: Keqian Zhu <zhukeqi...@huawei.com>
---
 system/cpus.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/system/cpus.c b/system/cpus.c
index 68d161d96b..4e41abe23e 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -571,12 +571,14 @@ static bool all_vcpus_paused(void)
     return true;
 }
 
-void pause_all_vcpus(void)
+static void request_pause_all_vcpus(void)
 {
     CPUState *cpu;
 
-    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
     CPU_FOREACH(cpu) {
+        if (cpu->stopped) {
+            continue;
+        }
         if (qemu_cpu_is_self(cpu)) {
             qemu_cpu_stop(cpu, true);
         } else {
@@ -584,6 +586,14 @@ void pause_all_vcpus(void)
             qemu_cpu_kick(cpu);
         }
     }
+}
+
+void pause_all_vcpus(void)
+{
+    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
+
+retry:
+    request_pause_all_vcpus();
 
     /* We need to drop the replay_lock so any vCPU threads woken up
      * can finish their replay tasks
@@ -592,14 +602,23 @@ void pause_all_vcpus(void)
 
     while (!all_vcpus_paused()) {
         qemu_cond_wait(&qemu_pause_cond, &bql);
-        CPU_FOREACH(cpu) {
-            qemu_cpu_kick(cpu);
-        }
+        /* During we waited on qemu_pause_cond the bql was unlocked,
+         * the vcpu's state may has been changed by other thread, so
+         * we must request the pause state on all vcpus again.
+         */
+        request_pause_all_vcpus();
     }
 
     bql_unlock();
     replay_mutex_lock();
     bql_lock();
+
+    /* During the bql was unlocked, the vcpu's state may has been
+     * changed by other thread, so we must retry.
+     */
+    if (!all_vcpus_paused()) {
+        goto retry;
+    }
 }
 
 void cpu_resume(CPUState *cpu)
-- 
2.33.0


Reply via email to