We currently use the 'sem_sync' semaphore on the sending side:

1) to know when the multifd_send_thread() has finished sending the
   MULTIFD_FLAG_SYNC packet;

  This is unnecessary. Multifd sends packets one by one and completion
  is already bound by the 'sem' semaphore. The SYNC packet has nothing
  special that would require it to have a separate semaphore on the
  sending side.

2) to wait for the multifd threads to finish before cleaning up;

   This happens because multifd_send_sync_main() blocks
   ram_save_complete() from finishing until the semaphore is
   posted. This is surprising and not documented.

Clarify the above situation by renaming 'sem_sync' to 'sem_done' and
making the #2 usage the main one. Post to 'sem_sync' only when there's
no more pending_jobs.

Signed-off-by: Fabiano Rosas <faro...@suse.de>
---
I remove the parts about the receiving side. I wasn't sure about them
and we don't need to mix the two. Potentially we need the sem_sync on
the recv to ensure all channels wait before becoming available to read
once again after a FLUSH.
---
 migration/multifd.c    | 76 ++++++++++++++++++++++++------------------
 migration/multifd.h    |  4 +--
 migration/trace-events |  2 +-
 3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 92ae61a50f..94f4ae5ff8 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -533,7 +533,7 @@ void multifd_save_cleanup(void)
         p->c = NULL;
         qemu_mutex_destroy(&p->mutex);
         qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
+        qemu_sem_destroy(&p->sem_done);
         g_free(p->name);
         p->name = NULL;
         multifd_pages_clear(p->pages);
@@ -591,6 +591,44 @@ int multifd_send_sync_main(QEMUFile *f)
         }
     }
 
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+
+        trace_multifd_send_sync_main_signal(p->id);
+
+        qemu_mutex_lock(&p->mutex);
+
+        if (p->quit) {
+            error_report("%s: channel %d has already quit", __func__, i);
+            qemu_mutex_unlock(&p->mutex);
+            return -1;
+        }
+
+        p->packet_num = multifd_send_state->packet_num++;
+        p->flags |= MULTIFD_FLAG_SYNC;
+        p->pending_job++;
+        qemu_mutex_unlock(&p->mutex);
+        qemu_sem_post(&p->sem);
+    }
+
+    /* wait for all channels to be idle */
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+
+        /*
+         * Even idle channels will wait for p->sem at the top of the
+         * loop.
+         */
+        qemu_sem_post(&p->sem);
+
+        trace_multifd_send_wait(migrate_multifd_channels() - i);
+        qemu_sem_wait(&p->sem_done);
+
+        qemu_mutex_lock(&p->mutex);
+        assert(!p->pending_job || p->quit);
+        qemu_mutex_unlock(&p->mutex);
+    }
+
     /*
      * When using zero-copy, it's necessary to flush the pages before any of
      * the pages can be sent again, so we'll make sure the new version of the
@@ -601,34 +639,11 @@ int multifd_send_sync_main(QEMUFile *f)
      * to be less frequent, e.g. only after we finished one whole scanning of
      * all the dirty bitmaps.
      */
-
     flush_zero_copy = migrate_zero_copy_send();
 
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDSendParams *p = &multifd_send_state->params[i];
 
-        trace_multifd_send_sync_main_signal(p->id);
-
-        qemu_mutex_lock(&p->mutex);
-
-        if (p->quit) {
-            error_report("%s: channel %d has already quit", __func__, i);
-            qemu_mutex_unlock(&p->mutex);
-            return -1;
-        }
-
-        p->packet_num = multifd_send_state->packet_num++;
-        p->flags |= MULTIFD_FLAG_SYNC;
-        p->pending_job++;
-        qemu_mutex_unlock(&p->mutex);
-        qemu_sem_post(&p->sem);
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        trace_multifd_send_sync_main_wait(p->id);
-        qemu_sem_wait(&p->sem_sync);
-
         if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
             return -1;
         }
@@ -728,12 +743,9 @@ static void *multifd_send_thread(void *opaque)
             p->pending_job--;
             qemu_mutex_unlock(&p->mutex);
 
-            if (flags & MULTIFD_FLAG_SYNC) {
-                qemu_sem_post(&p->sem_sync);
-            }
         } else {
             qemu_mutex_unlock(&p->mutex);
-            /* sometimes there are spurious wakeups */
+            qemu_sem_post(&p->sem_done);
         }
     }
 
@@ -749,7 +761,7 @@ out:
      * who pay attention to me.
      */
     if (ret != 0) {
-        qemu_sem_post(&p->sem_sync);
+        qemu_sem_post(&p->sem_done);
     }
 
     qemu_mutex_lock(&p->mutex);
@@ -786,7 +798,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task,
          * is not created, and then tell who pay attention to me.
          */
         p->quit = true;
-        qemu_sem_post(&p->sem_sync);
+        qemu_sem_post(&p->sem_done);
     }
 }
 
@@ -863,7 +875,7 @@ static void 
multifd_new_send_channel_cleanup(MultiFDSendParams *p,
 {
      migrate_set_error(migrate_get_current(), err);
      /* Error happen, we need to tell who pay attention to me */
-     qemu_sem_post(&p->sem_sync);
+     qemu_sem_post(&p->sem_done);
      /*
       * Although multifd_send_thread is not created, but main migration
       * thread need to judge whether it is running, so we need to mark
@@ -915,7 +927,7 @@ int multifd_save_setup(Error **errp)
 
         qemu_mutex_init(&p->mutex);
         qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
+        qemu_sem_init(&p->sem_done, 0);
         p->quit = false;
         p->pending_job = 0;
         p->id = i;
diff --git a/migration/multifd.h b/migration/multifd.h
index a835643b48..71bd66974d 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -90,8 +90,8 @@ typedef struct {
 
     /* sem where to wait for more work */
     QemuSemaphore sem;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
+    /* channel is done transmitting until more pages are queued */
+    QemuSemaphore sem_done;
 
     /* this mutex protects the following parameters */
     QemuMutex mutex;
diff --git a/migration/trace-events b/migration/trace-events
index ee9c8f4d63..3aef79a951 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -137,7 +137,7 @@ multifd_send(uint8_t id, uint64_t packet_num, uint32_t 
normal, uint32_t flags, u
 multifd_send_error(uint8_t id) "channel %u"
 multifd_send_sync_main(long packet_num) "packet num %ld"
 multifd_send_sync_main_signal(uint8_t id) "channel %u"
-multifd_send_sync_main_wait(uint8_t id) "channel %u"
+multifd_send_wait(uint8_t n) "waiting for %u channels to finish sending"
 multifd_send_terminate_threads(bool error) "error %d"
 multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) 
"channel %u packets %" PRIu64 " normal pages %"  PRIu64
 multifd_send_thread_start(uint8_t id) "%u"
-- 
2.35.3


Reply via email to