[PATCH v4 09/14] migration/multifd: Prepare to introduce DSA acceleration on the multifd path.

2024-04-24 Thread Hao Xiang
1. Refactor multifd_send_thread function.
2. Introduce the batch task structure in MultiFDSendParams.

Signed-off-by: Hao Xiang 
---
 include/qemu/dsa.h  | 51 +++--
 migration/multifd.c |  5 +
 migration/multifd.h |  2 ++
 util/dsa.c  | 51 ++---
 4 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
index e002652879..0c36e93016 100644
--- a/include/qemu/dsa.h
+++ b/include/qemu/dsa.h
@@ -2,6 +2,7 @@
 #define QEMU_DSA_H
 
 #include "qemu/error-report.h"
+#include "exec/cpu-common.h"
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 
@@ -42,6 +43,21 @@ typedef struct dsa_batch_task {
 QSIMPLEQ_ENTRY(dsa_batch_task) entry;
 } dsa_batch_task;
 
+#endif
+
+struct batch_task {
+#ifdef CONFIG_DSA_OPT
+/* Address of each pages in pages */
+ram_addr_t *addr;
+/* Zero page checking results */
+bool *results;
+/* Batch task DSA specific implementation */
+struct dsa_batch_task *dsa_batch;
+#endif
+};
+
+#ifdef CONFIG_DSA_OPT
+
 /**
  * @brief Initializes DSA devices.
  *
@@ -74,7 +90,7 @@ void dsa_cleanup(void);
 bool dsa_is_running(void);
 
 /**
- * @brief Initializes a buffer zero batch task.
+ * @brief Initializes a buffer zero DSA batch task.
  *
  * @param task A pointer to the batch task to initialize.
  * @param results A pointer to an array of zero page checking results.
@@ -102,9 +118,26 @@ void buffer_zero_batch_task_destroy(struct dsa_batch_task 
*task);
  * @return Zero if successful, otherwise non-zero.
  */
 int
-buffer_is_zero_dsa_batch_async(struct dsa_batch_task *batch_task,
+buffer_is_zero_dsa_batch_async(struct batch_task *batch_task,
const void **buf, size_t count, size_t len);
 
+/**
+ * @brief Initializes a general buffer zero batch task.
+ *
+ * @param batch_size The number of zero page checking tasks in the batch.
+ * @return A pointer to the general batch task initialized.
+ */
+struct batch_task *
+batch_task_init(int batch_size);
+
+/**
+ * @brief Destroys a general buffer zero batch task.
+ *
+ * @param task A pointer to the general batch task to destroy.
+ */
+void
+batch_task_destroy(struct batch_task *task);
+
 #else
 
 static inline bool dsa_is_running(void)
@@ -128,6 +161,20 @@ static inline void dsa_stop(void) {}
 
 static inline void dsa_cleanup(void) {}
 
+static inline int
+buffer_is_zero_dsa_batch_async(struct batch_task *batch_task,
+   const void **buf, size_t count, size_t len)
+{
+exit(1);
+}
+
+static inline struct batch_task *batch_task_init(int batch_size)
+{
+return NULL;
+}
+
+static inline void batch_task_destroy(struct batch_task *task) {}
+
 #endif
 
 #endif
diff --git a/migration/multifd.c b/migration/multifd.c
index f317bff077..cfd3a92f6c 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -13,6 +13,8 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/rcu.h"
+#include "qemu/dsa.h"
+#include "qemu/memalign.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
 #include "exec/ramblock.h"
@@ -780,6 +782,8 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams 
*p, Error **errp)
 p->name = NULL;
 multifd_pages_clear(p->pages);
 p->pages = NULL;
+batch_task_destroy(p->batch_task);
+p->batch_task = NULL;
 p->packet_len = 0;
 g_free(p->packet);
 p->packet = NULL;
@@ -1172,6 +1176,7 @@ bool multifd_send_setup(void)
 qemu_sem_init(>sem_sync, 0);
 p->id = i;
 p->pages = multifd_pages_init(page_count);
+p->batch_task = batch_task_init(page_count);
 
 if (use_packets) {
 p->packet_len = sizeof(MultiFDPacket_t)
diff --git a/migration/multifd.h b/migration/multifd.h
index c9d9b09239..16e27db5e9 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -135,6 +135,8 @@ typedef struct {
  * pending_job != 0 -> multifd_channel can use it.
  */
 MultiFDPages_t *pages;
+/* Zero page checking batch task */
+struct batch_task *batch_task;
 
 /* thread local variables. No locking required */
 
diff --git a/util/dsa.c b/util/dsa.c
index 5a2bf33651..4f695e58af 100644
--- a/util/dsa.c
+++ b/util/dsa.c
@@ -802,7 +802,7 @@ buffer_zero_task_init_int(struct dsa_hw_desc *descriptor,
 }
 
 /**
- * @brief Initializes a buffer zero batch task.
+ * @brief Initializes a buffer zero DSA batch task.
  *
  * @param task A pointer to the batch task to initialize.
  * @param results A pointer to an array of zero page checking results.
@@ -1107,29 +1107,64 @@ void dsa_cleanup(void)
  * @return Zero if successful, otherwise non-zero.
  */
 int
-buffer_is_zero_dsa_batch_async(struct dsa_batch_task *batch_task,
+buffer_is_zero_dsa_batch_async(struct batch_tas

[PATCH v4 07/14] util/dsa: Implement DSA task asynchronous submission and wait for completion.

2024-04-24 Thread Hao Xiang
* Add a DSA task completion callback.
* DSA completion thread will call the tasks's completion callback
on every task/batch task completion.
* DSA submission path to wait for completion.
* Implement CPU fallback if DSA is not able to complete the task.

Signed-off-by: Hao Xiang 
Signed-off-by: Bryan Zhang 
---
 include/qemu/dsa.h |  14 +
 util/dsa.c | 147 -
 2 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
index 645e6fc367..e002652879 100644
--- a/include/qemu/dsa.h
+++ b/include/qemu/dsa.h
@@ -91,6 +91,20 @@ buffer_zero_batch_task_init(struct dsa_batch_task *task,
  */
 void buffer_zero_batch_task_destroy(struct dsa_batch_task *task);
 
+/**
+ * @brief Performs buffer zero comparison on a DSA batch task asynchronously.
+ *
+ * @param batch_task A pointer to the batch task.
+ * @param buf An array of memory buffers.
+ * @param count The number of buffers in the array.
+ * @param len The buffer length.
+ *
+ * @return Zero if successful, otherwise non-zero.
+ */
+int
+buffer_is_zero_dsa_batch_async(struct dsa_batch_task *batch_task,
+   const void **buf, size_t count, size_t len);
+
 #else
 
 static inline bool dsa_is_running(void)
diff --git a/util/dsa.c b/util/dsa.c
index 9db4cfcf1d..5a2bf33651 100644
--- a/util/dsa.c
+++ b/util/dsa.c
@@ -473,6 +473,57 @@ poll_completion(struct dsa_completion_record *completion,
 return 0;
 }
 
+/**
+ * @brief Helper function to use CPU to complete a single
+ *zero page checking task.
+ *
+ * @param completion A pointer to a DSA task completion record.
+ * @param descriptor A pointer to a DSA task descriptor.
+ * @param result A pointer to the result of a zero page checking.
+ */
+static void
+task_cpu_fallback_int(struct dsa_completion_record *completion,
+  struct dsa_hw_desc *descriptor, bool *result)
+{
+const uint8_t *buf;
+size_t len;
+
+if (completion->status == DSA_COMP_SUCCESS) {
+return;
+}
+
+/*
+ * DSA was able to partially complete the operation. Check the
+ * result. If we already know this is not a zero page, we can
+ * return now.
+ */
+if (completion->bytes_completed != 0 && completion->result != 0) {
+*result = false;
+return;
+}
+
+/* Let's fallback to use CPU to complete it. */
+buf = (const uint8_t *)descriptor->src_addr;
+len = descriptor->xfer_size;
+*result = buffer_is_zero(buf + completion->bytes_completed,
+ len - completion->bytes_completed);
+}
+
+/**
+ * @brief Use CPU to complete a single zero page checking task.
+ *
+ * @param task A pointer to the task.
+ */
+static void
+task_cpu_fallback(struct dsa_batch_task *task)
+{
+assert(task->task_type == DSA_TASK);
+
+task_cpu_fallback_int(>completions[0],
+  >descriptors[0],
+  >results[0]);
+}
+
 /**
  * @brief Complete a single DSA task in the batch task.
  *
@@ -574,6 +625,47 @@ exit:
 return ret;
 }
 
+/**
+ * @brief Use CPU to complete the zero page checking batch task.
+ *
+ * @param batch_task A pointer to the batch task.
+ */
+static void
+batch_task_cpu_fallback(struct dsa_batch_task *batch_task)
+{
+assert(batch_task->task_type == DSA_BATCH_TASK);
+
+struct dsa_completion_record *batch_completion =
+_task->batch_completion;
+struct dsa_completion_record *completion;
+uint8_t status;
+bool *results = batch_task->results;
+uint32_t count = batch_task->batch_descriptor.desc_count;
+
+/* DSA is able to complete the entire batch task. */
+if (batch_completion->status == DSA_COMP_SUCCESS) {
+assert(count == batch_completion->bytes_completed);
+return;
+}
+
+/*
+ * DSA encounters some error and is not able to complete
+ * the entire batch task. Use CPU fallback.
+ */
+for (int i = 0; i < count; i++) {
+
+completion = _task->completions[i];
+status = completion->status;
+
+assert(status == DSA_COMP_SUCCESS ||
+status == DSA_COMP_PAGE_FAULT_NOBOF);
+
+task_cpu_fallback_int(completion,
+  _task->descriptors[i],
+  [i]);
+}
+}
+
 /**
  * @brief Handles an asynchronous DSA batch task completion.
  *
@@ -861,7 +953,6 @@ buffer_zero_batch_task_set(struct dsa_batch_task 
*batch_task,
  *
  * @return int Zero if successful, otherwise an appropriate error code.
  */
-__attribute__((unused))
 static int
 buffer_zero_dsa_async(struct dsa_batch_task *task,
   const void *buf, size_t len)
@@ -880,7 +971,6 @@ buffer_zero_dsa_async(struct dsa_batch_task *task,
  * @param count The number of buffers.
  * @param len The buffer length.
  */
-__attribute__((unused))
 static int
 buffer_zero_dsa_batch_asyn

[PATCH v4 10/14] migration/multifd: Enable DSA offloading in multifd sender path.

2024-04-24 Thread Hao Xiang
Multifd sender path gets an array of pages queued by the migration
thread. It performs zero page checking on every page in the array.
The pages are classfied as either a zero page or a normal page. This
change uses Intel DSA to offload the zero page checking from CPU to
the DSA accelerator. The sender thread submits a batch of pages to DSA
hardware and waits for the DSA completion thread to signal for work
completion.

Signed-off-by: Hao Xiang 
---
 migration/multifd-zero-page.c | 99 +--
 migration/multifd.c   | 27 +-
 migration/multifd.h   |  1 +
 3 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
index e1b8370f88..4f426289e4 100644
--- a/migration/multifd-zero-page.c
+++ b/migration/multifd-zero-page.c
@@ -37,25 +37,83 @@ static void swap_page_offset(ram_addr_t *pages_offset, int 
a, int b)
 }
 
 /**
- * multifd_send_zero_page_detect: Perform zero page detection on all pages.
+ * zero_page_detect_cpu: Perform zero page detection using CPU.
  *
  * Sorts normal pages before zero pages in p->pages->offset and updates
  * p->pages->normal_num.
  *
  * @param p A pointer to the send params.
  */
-void multifd_send_zero_page_detect(MultiFDSendParams *p)
+static void zero_page_detect_cpu(MultiFDSendParams *p)
 {
 MultiFDPages_t *pages = p->pages;
 RAMBlock *rb = pages->block;
 int i = 0;
 int j = pages->num - 1;
 
-if (!multifd_zero_page_enabled()) {
-pages->normal_num = pages->num;
+/*
+ * Sort the page offset array by moving all normal pages to
+ * the left and all zero pages to the right of the array.
+ */
+while (i <= j) {
+uint64_t offset = pages->offset[i];
+
+if (!buffer_is_zero(rb->host + offset, p->page_size)) {
+i++;
+continue;
+}
+
+swap_page_offset(pages->offset, i, j);
+ram_release_page(rb->idstr, offset);
+j--;
+}
+
+pages->normal_num = i;
+}
+
+
+#ifdef CONFIG_DSA_OPT
+
+static void swap_result(bool *results, int a, int b)
+{
+bool temp;
+
+if (a == b) {
 return;
 }
 
+temp = results[a];
+results[a] = results[b];
+results[b] = temp;
+}
+
+/**
+ * zero_page_detect_dsa: Perform zero page detection using
+ * Intel Data Streaming Accelerator (DSA).
+ *
+ * Sorts normal pages before zero pages in p->pages->offset and updates
+ * p->pages->normal_num.
+ *
+ * @param p A pointer to the send params.
+ */
+static void zero_page_detect_dsa(MultiFDSendParams *p)
+{
+MultiFDPages_t *pages = p->pages;
+RAMBlock *rb = pages->block;
+bool *results = p->batch_task->results;
+
+for (int i = 0; i < p->pages->num; i++) {
+p->batch_task->addr[i] = (ram_addr_t)(rb->host + p->pages->offset[i]);
+}
+
+buffer_is_zero_dsa_batch_async(p->batch_task,
+   (const void **)p->batch_task->addr,
+   p->pages->num,
+   p->page_size);
+
+int i = 0;
+int j = pages->num - 1;
+
 /*
  * Sort the page offset array by moving all normal pages to
  * the left and all zero pages to the right of the array.
@@ -63,11 +121,12 @@ void multifd_send_zero_page_detect(MultiFDSendParams *p)
 while (i <= j) {
 uint64_t offset = pages->offset[i];
 
-if (!buffer_is_zero(rb->host + offset, p->page_size)) {
+if (!results[i]) {
 i++;
 continue;
 }
 
+swap_result(results, i, j);
 swap_page_offset(pages->offset, i, j);
 ram_release_page(rb->idstr, offset);
 j--;
@@ -76,6 +135,15 @@ void multifd_send_zero_page_detect(MultiFDSendParams *p)
 pages->normal_num = i;
 }
 
+#else
+
+static void zero_page_detect_dsa(MultiFDSendParams *p)
+{
+exit(1);
+}
+
+#endif
+
 void multifd_recv_zero_page_process(MultiFDRecvParams *p)
 {
 for (int i = 0; i < p->zero_num; i++) {
@@ -87,3 +155,24 @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p)
 }
 }
 }
+
+/**
+ * multifd_send_zero_page_detect: Perform zero page detection on all pages.
+ *
+ * @param p A pointer to the send params.
+ */
+void multifd_send_zero_page_detect(MultiFDSendParams *p)
+{
+MultiFDPages_t *pages = p->pages;
+
+if (!multifd_zero_page_enabled()) {
+pages->normal_num = pages->num;
+return;
+}
+
+if (dsa_is_running()) {
+zero_page_detect_dsa(p);
+} else {
+zero_page_detect_cpu(p);
+}
+}
diff --git a/migration/multifd.c b/migration/multifd.c
index cfd3a92f6c..7316643d0a 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -818,6 +818,8 @@ void multifd_send_shutdown(void)
 
 multifd_send_terminate_threads();
 
+dsa_clea

[PATCH v4 04/14] util/dsa: Implement DSA task enqueue and dequeue.

2024-04-24 Thread Hao Xiang
* Use a safe thread queue for DSA task enqueue/dequeue.
* Implement DSA task submission.
* Implement DSA batch task submission.

Signed-off-by: Hao Xiang 
---
 include/qemu/dsa.h |  28 +++
 util/dsa.c | 201 +
 2 files changed, 229 insertions(+)

diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
index f15c05ee85..37cae8d9d2 100644
--- a/include/qemu/dsa.h
+++ b/include/qemu/dsa.h
@@ -13,6 +13,34 @@
 #include 
 #include "x86intrin.h"
 
+typedef enum DsaTaskType {
+DSA_TASK = 0,
+DSA_BATCH_TASK
+} DsaTaskType;
+
+typedef enum DsaTaskStatus {
+DSA_TASK_READY = 0,
+DSA_TASK_PROCESSING,
+DSA_TASK_COMPLETION
+} DsaTaskStatus;
+
+typedef void (*dsa_completion_fn)(void *);
+
+typedef struct dsa_batch_task {
+struct dsa_hw_desc batch_descriptor;
+struct dsa_hw_desc *descriptors;
+struct dsa_completion_record batch_completion __attribute__((aligned(32)));
+struct dsa_completion_record *completions;
+struct dsa_device_group *group;
+struct dsa_device *device;
+dsa_completion_fn completion_callback;
+QemuSemaphore sem_task_complete;
+DsaTaskType task_type;
+DsaTaskStatus status;
+int batch_size;
+QSIMPLEQ_ENTRY(dsa_batch_task) entry;
+} dsa_batch_task;
+
 /**
  * @brief Initializes DSA devices.
  *
diff --git a/util/dsa.c b/util/dsa.c
index 05bbf8e31a..75739a1af6 100644
--- a/util/dsa.c
+++ b/util/dsa.c
@@ -244,6 +244,205 @@ dsa_device_group_get_next_device(struct dsa_device_group 
*group)
 return >dsa_devices[current];
 }
 
+/**
+ * @brief Empties out the DSA task queue.
+ *
+ * @param group A pointer to the DSA device group.
+ */
+static void
+dsa_empty_task_queue(struct dsa_device_group *group)
+{
+qemu_mutex_lock(>task_queue_lock);
+dsa_task_queue *task_queue = >task_queue;
+while (!QSIMPLEQ_EMPTY(task_queue)) {
+QSIMPLEQ_REMOVE_HEAD(task_queue, entry);
+}
+qemu_mutex_unlock(>task_queue_lock);
+}
+
+/**
+ * @brief Adds a task to the DSA task queue.
+ *
+ * @param group A pointer to the DSA device group.
+ * @param context A pointer to the DSA task to enqueue.
+ *
+ * @return int Zero if successful, otherwise a proper error code.
+ */
+static int
+dsa_task_enqueue(struct dsa_device_group *group,
+ struct dsa_batch_task *task)
+{
+dsa_task_queue *task_queue = >task_queue;
+QemuMutex *task_queue_lock = >task_queue_lock;
+QemuCond *task_queue_cond = >task_queue_cond;
+
+bool notify = false;
+
+qemu_mutex_lock(task_queue_lock);
+
+if (!group->running) {
+error_report("DSA: Tried to queue task to stopped device queue.");
+qemu_mutex_unlock(task_queue_lock);
+return -1;
+}
+
+/* The queue is empty. This enqueue operation is a 0->1 transition. */
+if (QSIMPLEQ_EMPTY(task_queue)) {
+notify = true;
+}
+
+QSIMPLEQ_INSERT_TAIL(task_queue, task, entry);
+
+/* We need to notify the waiter for 0->1 transitions. */
+if (notify) {
+qemu_cond_signal(task_queue_cond);
+}
+
+qemu_mutex_unlock(task_queue_lock);
+
+return 0;
+}
+
+/**
+ * @brief Takes a DSA task out of the task queue.
+ *
+ * @param group A pointer to the DSA device group.
+ * @return dsa_batch_task* The DSA task being dequeued.
+ */
+__attribute__((unused))
+static struct dsa_batch_task *
+dsa_task_dequeue(struct dsa_device_group *group)
+{
+struct dsa_batch_task *task = NULL;
+dsa_task_queue *task_queue = >task_queue;
+QemuMutex *task_queue_lock = >task_queue_lock;
+QemuCond *task_queue_cond = >task_queue_cond;
+
+qemu_mutex_lock(task_queue_lock);
+
+while (true) {
+if (!group->running) {
+goto exit;
+}
+task = QSIMPLEQ_FIRST(task_queue);
+if (task != NULL) {
+break;
+}
+qemu_cond_wait(task_queue_cond, task_queue_lock);
+}
+
+QSIMPLEQ_REMOVE_HEAD(task_queue, entry);
+
+exit:
+qemu_mutex_unlock(task_queue_lock);
+return task;
+}
+
+/**
+ * @brief Submits a DSA work item to the device work queue.
+ *
+ * @param wq A pointer to the DSA work queue's device memory.
+ * @param descriptor A pointer to the DSA work item descriptor.
+ *
+ * @return Zero if successful, non-zero otherwise.
+ */
+static int
+submit_wi_int(void *wq, struct dsa_hw_desc *descriptor)
+{
+uint64_t retry = 0;
+
+_mm_sfence();
+
+while (true) {
+if (_enqcmd(wq, descriptor) == 0) {
+break;
+}
+retry++;
+if (retry > max_retry_count) {
+error_report("Submit work retry %lu times.", retry);
+return -1;
+}
+}
+
+return 0;
+}
+
+/**
+ * @brief Synchronously submits a DSA work item to the
+ *device work queue.
+ *
+ * @param wq A pointer to the DSA worjk queue's device memory.
+ * @param descriptor A pointer to the DSA work item descriptor.
+ *

[PATCH v4 03/14] util/dsa: Implement DSA device start and stop logic.

2024-04-24 Thread Hao Xiang
* DSA device open and close.
* DSA group contains multiple DSA devices.
* DSA group configure/start/stop/clean.

Signed-off-by: Hao Xiang 
Signed-off-by: Bryan Zhang 
---
 include/qemu/dsa.h |  72 +++
 util/dsa.c | 316 +
 util/meson.build   |   1 +
 3 files changed, 389 insertions(+)
 create mode 100644 include/qemu/dsa.h
 create mode 100644 util/dsa.c

diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
new file mode 100644
index 00..f15c05ee85
--- /dev/null
+++ b/include/qemu/dsa.h
@@ -0,0 +1,72 @@
+#ifndef QEMU_DSA_H
+#define QEMU_DSA_H
+
+#include "qemu/error-report.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+
+#ifdef CONFIG_DSA_OPT
+
+#pragma GCC push_options
+#pragma GCC target("enqcmd")
+
+#include 
+#include "x86intrin.h"
+
+/**
+ * @brief Initializes DSA devices.
+ *
+ * @param dsa_parameter A list of DSA device path from migration parameter.
+ *
+ * @return int Zero if successful, otherwise non zero.
+ */
+int dsa_init(const char *dsa_parameter);
+
+/**
+ * @brief Start logic to enable using DSA.
+ */
+void dsa_start(void);
+
+/**
+ * @brief Stop the device group and the completion thread.
+ */
+void dsa_stop(void);
+
+/**
+ * @brief Clean up system resources created for DSA offloading.
+ */
+void dsa_cleanup(void);
+
+/**
+ * @brief Check if DSA is running.
+ *
+ * @return True if DSA is running, otherwise false.
+ */
+bool dsa_is_running(void);
+
+#else
+
+static inline bool dsa_is_running(void)
+{
+return false;
+}
+
+static inline int dsa_init(const char *dsa_parameter)
+{
+if (dsa_parameter != NULL && strlen(dsa_parameter) != 0) {
+error_report("DSA not supported.");
+return -1;
+}
+
+return 0;
+}
+
+static inline void dsa_start(void) {}
+
+static inline void dsa_stop(void) {}
+
+static inline void dsa_cleanup(void) {}
+
+#endif
+
+#endif
diff --git a/util/dsa.c b/util/dsa.c
new file mode 100644
index 00..05bbf8e31a
--- /dev/null
+++ b/util/dsa.c
@@ -0,0 +1,316 @@
+/*
+ * Use Intel Data Streaming Accelerator to offload certain background
+ * operations.
+ *
+ * Copyright (c) 2023 Hao Xiang 
+ *Bryan Zhang 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/queue.h"
+#include "qemu/memalign.h"
+#include "qemu/lockable.h"
+#include "qemu/cutils.h"
+#include "qemu/dsa.h"
+#include "qemu/bswap.h"
+#include "qemu/error-report.h"
+#include "qemu/rcu.h"
+
+#ifdef CONFIG_DSA_OPT
+
+#pragma GCC push_options
+#pragma GCC target("enqcmd")
+
+#include 
+#include "x86intrin.h"
+
+#define DSA_WQ_SIZE 4096
+#define MAX_DSA_DEVICES 16
+
+typedef QSIMPLEQ_HEAD(dsa_task_queue, dsa_batch_task) dsa_task_queue;
+
+struct dsa_device {
+void *work_queue;
+};
+
+struct dsa_device_group {
+struct dsa_device *dsa_devices;
+int num_dsa_devices;
+/* The index of the next DSA device to be used. */
+uint32_t device_allocator_index;
+bool running;
+QemuMutex task_queue_lock;
+QemuCond task_queue_cond;
+dsa_task_queue task_queue;
+};
+
+uint64_t max_retry_count;
+static struct dsa_device_group dsa_group;
+
+
+/**
+ * @brief This function opens a DSA device's work queue and
+ *maps the DSA device memory into the current process.
+ *
+ * @param dsa_wq_path A pointer to the DSA device work queue's file path.
+ * @return A pointer to the mapped memory, or MAP_FAILED on failure.
+ */
+static void *
+map_dsa_device(const char *dsa_wq_path)
+{
+void *dsa_device;
+int fd;
+
+fd = open(dsa_wq_path, O_RDWR);
+if (fd < 0) {
+error_report("Open %s failed with errno = %d.",
+dsa_wq_path, errno);
+return MAP_FAILED;
+}
+   

[PATCH v4 00/14] Use Intel DSA accelerator to offload zero page checking in multifd live migration.

2024-04-24 Thread Hao Xiang
  |multifdsend_1  |898.498|   |
|   |multifdsend_2  |787.456|   |
|   |multifdsend_3  |764.537|   |
|   |multifdsend_4  |785.687|   |
|   |multifdsend_5  |756.941|   |
|   |multifdsend_6  |774.084|   |
|   |multifdsend_7  |782.900|11154  |
|---|---|---|
|DSA offloading |live_migration |3846.976   |   |
|   |multifdsend_0  |191.880|   |
|   |multifdsend_1  |166.331|   |
|   |multifdsend_2  |168.528|   |
|   |multifdsend_3  |197.831|   |
|   |multifdsend_4  |169.580|   |
|   |multifdsend_5  |167.984|   |
|   |multifdsend_6  |198.042|   |
|   |multifdsend_7  |170.624|   |
|   |dsa_completion |3428.669   |8700   |
|---|---|---|---|

Baseline total runtime is 11154 msec and DSA offloading total runtime is
8700 msec. That is 22% CPU savings.

Latency

||
|   |total time |down time  |throughput 
|transferred-ram|total-ram   |

|---|---|---|---|---||
  
|Baseline   |4867 ms|20 ms  |1.51 mbps  |565 kb 
|102400520 kb|

|---|---|---|---||
|DSA offload|3888 ms|18 ms  |1.89 mbps  |565 kb 
|102400520 kb|  

|---|---|---|---|---||

Total time 20% faster and down time 10% faster.

* Testing:

1. Added unit tests for cover the added code path in dsa.c
2. Added integration tests to cover multifd live migration using DSA
offloading.

* Patchset

Apply this patchset on top of commit
85b597413d4370cb168f711192eaef2eb70535ac

Hao Xiang (14):
  meson: Introduce new instruction set enqcmd to the build system.
  util/dsa: Add dependency idxd.
  util/dsa: Implement DSA device start and stop logic.
  util/dsa: Implement DSA task enqueue and dequeue.
  util/dsa: Implement DSA task asynchronous completion thread model.
  util/dsa: Implement zero page checking in DSA task.
  util/dsa: Implement DSA task asynchronous submission and wait for
completion.
  migration/multifd: Add new migration option for multifd DSA
offloading.
  migration/multifd: Prepare to introduce DSA acceleration on the
multifd path.
  migration/multifd: Enable DSA offloading in multifd sender path.
  migration/multifd: Add migration option set packet size.
  migration/multifd: Enable set packet size migration option.
  util/dsa: Add unit test coverage for Intel DSA task submission and
completion.
  migration/multifd: Add integration tests for multifd with Intel DSA
offloading.

 include/qemu/dsa.h |  180 +
 linux-headers/linux/idxd.h |  356 ++
 meson.build|   14 +
 meson_options.txt  |2 +
 migration/migration-hmp-cmds.c |   15 +
 migration/multifd-zero-page.c  |   99 ++-
 migration/multifd-zlib.c   |6 +-
 migration/multifd-zstd.c   |6 +-
 migration/multifd.c|   38 +-
 migration/multifd.h|6 +-
 migration/options.c|   66 ++
 migration/options.h|2 +
 qapi/migration.json|   43 +-
 scripts/meson-buildoptions.sh  |3 +
 tests/qtest/migration-test.c   |   77 ++-
 tests/unit/meson.build |6 +
 tests/unit/test-dsa.c  |  499 ++
 util/dsa.c | 1170 
 util/meson.build   |1 +
 19 files changed, 2568 insertions(+), 21 deletions(-)
 create mode 100644 include/qemu/dsa.h
 create mode 100644 linux-headers/linux/idxd.h
 create mode 100644 tests/unit/test-dsa.c
 create mode 100644 util/dsa.c

-- 
2.30.2




[PATCH v4 11/14] migration/multifd: Add migration option set packet size.

2024-04-24 Thread Hao Xiang
The current multifd packet size is 128 * 4kb. This change adds
an option to set the packet size. Both sender and receiver needs
to set the same packet size for things to work.

Signed-off-by: Hao Xiang 
---
 migration/options.c | 36 
 migration/options.h |  1 +
 qapi/migration.json | 21 ++---
 3 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/migration/options.c b/migration/options.c
index dc8642df81..a9deb079eb 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -79,6 +79,12 @@
 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
 #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
 
+/*
+ * Parameter for multifd packet size.
+ */
+#define DEFAULT_MIGRATE_MULTIFD_PACKET_SIZE (128 * 4 * 1024)
+#define MAX_MIGRATE_MULTIFD_PACKET_SIZE (1023 * 4 * 1024)
+
 #define DEFINE_PROP_MIG_CAP(name, x) \
 DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false)
 
@@ -184,6 +190,9 @@ Property migration_properties[] = {
ZERO_PAGE_DETECTION_MULTIFD),
 DEFINE_PROP_STRING("multifd-dsa-accel", MigrationState,
parameters.multifd_dsa_accel),
+DEFINE_PROP_SIZE("multifd-packet-size", MigrationState,
+ parameters.multifd_packet_size,
+ DEFAULT_MIGRATE_MULTIFD_PACKET_SIZE),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -879,6 +888,13 @@ int migrate_multifd_channels(void)
 return s->parameters.multifd_channels;
 }
 
+uint64_t migrate_multifd_packet_size(void)
+{
+MigrationState *s = migrate_get_current();
+
+return s->parameters.multifd_packet_size;
+}
+
 MultiFDCompression migrate_multifd_compression(void)
 {
 MigrationState *s = migrate_get_current();
@@ -1031,6 +1047,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
 params->has_block_incremental = true;
 params->block_incremental = s->parameters.block_incremental;
+params->has_multifd_packet_size = true;
+params->multifd_packet_size = s->parameters.multifd_packet_size;
 params->has_multifd_channels = true;
 params->multifd_channels = s->parameters.multifd_channels;
 params->has_multifd_compression = true;
@@ -1094,6 +1112,7 @@ void migrate_params_init(MigrationParameters *params)
 params->has_downtime_limit = true;
 params->has_x_checkpoint_delay = true;
 params->has_block_incremental = true;
+params->has_multifd_packet_size = true;
 params->has_multifd_channels = true;
 params->has_multifd_compression = true;
 params->has_multifd_zlib_level = true;
@@ -1195,6 +1214,17 @@ bool migrate_params_check(MigrationParameters *params, 
Error **errp)
 
 /* x_checkpoint_delay is now always positive */
 
+if (params->has_multifd_packet_size &&
+((params->multifd_packet_size < DEFAULT_MIGRATE_MULTIFD_PACKET_SIZE) ||
+(params->multifd_packet_size >  MAX_MIGRATE_MULTIFD_PACKET_SIZE) ||
+(params->multifd_packet_size % qemu_target_page_size() != 0))) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+"multifd_packet_size",
+"a value between 524288 and 4190208, "
+"must be a multiple of guest VM's page size.");
+return false;
+}
+
 if (params->has_multifd_channels && (params->multifd_channels < 1)) {
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"multifd_channels",
@@ -1374,6 +1404,9 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_block_incremental) {
 dest->block_incremental = params->block_incremental;
 }
+if (params->has_multifd_packet_size) {
+dest->multifd_packet_size = params->multifd_packet_size;
+}
 if (params->has_multifd_channels) {
 dest->multifd_channels = params->multifd_channels;
 }
@@ -1524,6 +1557,9 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 " use blockdev-mirror with NBD instead");
 s->parameters.block_incremental = params->block_incremental;
 }
+if (params->has_multifd_packet_size) {
+s->parameters.multifd_packet_size = params->multifd_packet_size;
+}
 if (params->has_multifd_channels) {
 s->parameters.multifd_channels = params->multifd_channels;
 }
diff --git a/migration/options.h b/migration/options.h
index 1cb3393be9..23995e6608 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -92,6 +92,7 @@ const char *migrate_tls_hostname(void);
 uint64_t migrate_xbzrle_cache_size(void);
 ZeroPageDetection mi

[PATCH v4 02/14] util/dsa: Add dependency idxd.

2024-04-24 Thread Hao Xiang
Idxd is the device driver for DSA (Intel Data Streaming
Accelerator). The driver is fully functioning since Linux
kernel 5.19. This change adds the driver's header file used
for userspace development.

Signed-off-by: Hao Xiang 
---
 linux-headers/linux/idxd.h | 356 +
 1 file changed, 356 insertions(+)
 create mode 100644 linux-headers/linux/idxd.h

diff --git a/linux-headers/linux/idxd.h b/linux-headers/linux/idxd.h
new file mode 100644
index 00..1d553bedbd
--- /dev/null
+++ b/linux-headers/linux/idxd.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _USR_IDXD_H_
+#define _USR_IDXD_H_
+
+#ifdef __KERNEL__
+#include 
+#else
+#include 
+#endif
+
+/* Driver command error status */
+enum idxd_scmd_stat {
+   IDXD_SCMD_DEV_ENABLED = 0x8010,
+   IDXD_SCMD_DEV_NOT_ENABLED = 0x8020,
+   IDXD_SCMD_WQ_ENABLED = 0x8021,
+   IDXD_SCMD_DEV_DMA_ERR = 0x8002,
+   IDXD_SCMD_WQ_NO_GRP = 0x8003,
+   IDXD_SCMD_WQ_NO_NAME = 0x8004,
+   IDXD_SCMD_WQ_NO_SVM = 0x8005,
+   IDXD_SCMD_WQ_NO_THRESH = 0x8006,
+   IDXD_SCMD_WQ_PORTAL_ERR = 0x8007,
+   IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x8008,
+   IDXD_SCMD_PERCPU_ERR = 0x8009,
+   IDXD_SCMD_DMA_CHAN_ERR = 0x800a,
+   IDXD_SCMD_CDEV_ERR = 0x800b,
+   IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c,
+   IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d,
+   IDXD_SCMD_WQ_NO_SIZE = 0x800e,
+   IDXD_SCMD_WQ_NO_PRIV = 0x800f,
+   IDXD_SCMD_WQ_IRQ_ERR = 0x8010,
+   IDXD_SCMD_WQ_USER_NO_IOMMU = 0x8011,
+};
+
+#define IDXD_SCMD_SOFTERR_MASK 0x8000
+#define IDXD_SCMD_SOFTERR_SHIFT16
+
+/* Descriptor flags */
+#define IDXD_OP_FLAG_FENCE 0x0001
+#define IDXD_OP_FLAG_BOF   0x0002
+#define IDXD_OP_FLAG_CRAV  0x0004
+#define IDXD_OP_FLAG_RCR   0x0008
+#define IDXD_OP_FLAG_RCI   0x0010
+#define IDXD_OP_FLAG_CRSTS 0x0020
+#define IDXD_OP_FLAG_CR0x0080
+#define IDXD_OP_FLAG_CC0x0100
+#define IDXD_OP_FLAG_ADDR1_TCS 0x0200
+#define IDXD_OP_FLAG_ADDR2_TCS 0x0400
+#define IDXD_OP_FLAG_ADDR3_TCS 0x0800
+#define IDXD_OP_FLAG_CR_TCS0x1000
+#define IDXD_OP_FLAG_STORD 0x2000
+#define IDXD_OP_FLAG_DRDBK 0x4000
+#define IDXD_OP_FLAG_DSTS  0x8000
+
+/* IAX */
+#define IDXD_OP_FLAG_RD_SRC2_AECS  0x01
+#define IDXD_OP_FLAG_RD_SRC2_2ND   0x02
+#define IDXD_OP_FLAG_WR_SRC2_AECS_COMP 0x04
+#define IDXD_OP_FLAG_WR_SRC2_AECS_OVFL 0x08
+#define IDXD_OP_FLAG_SRC2_STS  0x10
+#define IDXD_OP_FLAG_CRC_RFC3720   0x20
+
+/* Opcode */
+enum dsa_opcode {
+   DSA_OPCODE_NOOP = 0,
+   DSA_OPCODE_BATCH,
+   DSA_OPCODE_DRAIN,
+   DSA_OPCODE_MEMMOVE,
+   DSA_OPCODE_MEMFILL,
+   DSA_OPCODE_COMPARE,
+   DSA_OPCODE_COMPVAL,
+   DSA_OPCODE_CR_DELTA,
+   DSA_OPCODE_AP_DELTA,
+   DSA_OPCODE_DUALCAST,
+   DSA_OPCODE_CRCGEN = 0x10,
+   DSA_OPCODE_COPY_CRC,
+   DSA_OPCODE_DIF_CHECK,
+   DSA_OPCODE_DIF_INS,
+   DSA_OPCODE_DIF_STRP,
+   DSA_OPCODE_DIF_UPDT,
+   DSA_OPCODE_CFLUSH = 0x20,
+};
+
+enum iax_opcode {
+   IAX_OPCODE_NOOP = 0,
+   IAX_OPCODE_DRAIN = 2,
+   IAX_OPCODE_MEMMOVE,
+   IAX_OPCODE_DECOMPRESS = 0x42,
+   IAX_OPCODE_COMPRESS,
+   IAX_OPCODE_CRC64,
+   IAX_OPCODE_ZERO_DECOMP_32 = 0x48,
+   IAX_OPCODE_ZERO_DECOMP_16,
+   IAX_OPCODE_ZERO_COMP_32 = 0x4c,
+   IAX_OPCODE_ZERO_COMP_16,
+   IAX_OPCODE_SCAN = 0x50,
+   IAX_OPCODE_SET_MEMBER,
+   IAX_OPCODE_EXTRACT,
+   IAX_OPCODE_SELECT,
+   IAX_OPCODE_RLE_BURST,
+   IAX_OPCODE_FIND_UNIQUE,
+   IAX_OPCODE_EXPAND,
+};
+
+/* Completion record status */
+enum dsa_completion_status {
+   DSA_COMP_NONE = 0,
+   DSA_COMP_SUCCESS,
+   DSA_COMP_SUCCESS_PRED,
+   DSA_COMP_PAGE_FAULT_NOBOF,
+   DSA_COMP_PAGE_FAULT_IR,
+   DSA_COMP_BATCH_FAIL,
+   DSA_COMP_BATCH_PAGE_FAULT,
+   DSA_COMP_DR_OFFSET_NOINC,
+   DSA_COMP_DR_OFFSET_ERANGE,
+   DSA_COMP_DIF_ERR,
+   DSA_COMP_BAD_OPCODE = 0x10,
+   DSA_COMP_INVALID_FLAGS,
+   DSA_COMP_NOZERO_RESERVE,
+   DSA_COMP_XFER_ERANGE,
+   DSA_COMP_DESC_CNT_ERANGE,
+   DSA_COMP_DR_ERANGE,
+   DSA_COMP_OVERLAP_BUFFERS,
+   DSA_COMP_DCAST_ERR,
+   DSA_COMP_DESCLIST_ALIGN,
+   DSA_COMP_INT_HANDLE_INVAL,
+   DSA_COMP_CRA_XLAT,
+   DSA_COMP_CRA_ALIGN,
+   DSA_COMP_ADDR_ALIGN,
+   DSA_COMP_PRIV_BAD,
+   DSA_COMP_TRAFFIC_CLASS_CONF,
+   DSA_COMP_PFAULT_RDBA,
+   DSA_COMP_HW_ERR1,
+   DSA_COMP_HW_ERR_DRB,
+   DSA_COMP_TRANSLATION_FAIL,
+};
+
+enum iax_completion_status {
+   IAX_COMP_NONE = 0,
+   IAX_COMP_SUCCESS,
+   IAX_COMP_PAGE_FAULT_IR = 0x04,
+   IAX_COMP_ANALYTICS_ERROR = 0x0a

[PATCH v4 08/14] migration/multifd: Add new migration option for multifd DSA offloading.

2024-04-24 Thread Hao Xiang
Intel DSA offloading is an optional feature that turns on if
proper hardware and software stack is available. To turn on
DSA offloading in multifd live migration:

multifd-dsa-accel="[dsa_dev_path1] [dsa_dev_path2] ... [dsa_dev_pathX]"

This feature is turned off by default.

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c |  8 
 migration/options.c| 30 ++
 migration/options.h|  1 +
 qapi/migration.json| 26 +++---
 4 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 7e96ae6ffd..7e9bb278c9 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -358,6 +358,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: '%s'\n",
 MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ),
 params->tls_authz);
+monitor_printf(mon, "%s: '%s'\n",
+MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_DSA_ACCEL),
+params->multifd_dsa_accel);
 
 if (params->has_block_bitmap_mapping) {
 const BitmapMigrationNodeAliasList *bmnal;
@@ -622,6 +625,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_block_incremental = true;
 visit_type_bool(v, param, >block_incremental, );
 break;
+case MIGRATION_PARAMETER_MULTIFD_DSA_ACCEL:
+p->multifd_dsa_accel = g_new0(StrOrNull, 1);
+p->multifd_dsa_accel->type = QTYPE_QSTRING;
+visit_type_str(v, param, >multifd_dsa_accel->u.s, );
+break;
 case MIGRATION_PARAMETER_MULTIFD_CHANNELS:
 p->has_multifd_channels = true;
 visit_type_uint8(v, param, >multifd_channels, );
diff --git a/migration/options.c b/migration/options.c
index 239f5ecfb4..dc8642df81 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -182,6 +182,8 @@ Property migration_properties[] = {
 DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
ZERO_PAGE_DETECTION_MULTIFD),
+DEFINE_PROP_STRING("multifd-dsa-accel", MigrationState,
+   parameters.multifd_dsa_accel),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -920,6 +922,13 @@ const char *migrate_tls_creds(void)
 return s->parameters.tls_creds;
 }
 
+const char *migrate_multifd_dsa_accel(void)
+{
+MigrationState *s = migrate_get_current();
+
+return s->parameters.multifd_dsa_accel;
+}
+
 const char *migrate_tls_hostname(void)
 {
 MigrationState *s = migrate_get_current();
@@ -1060,6 +1069,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->mode = s->parameters.mode;
 params->has_zero_page_detection = true;
 params->zero_page_detection = s->parameters.zero_page_detection;
+params->multifd_dsa_accel = g_strdup(s->parameters.multifd_dsa_accel ?
+ s->parameters.multifd_dsa_accel : "");
 
 return params;
 }
@@ -1068,6 +1079,7 @@ void migrate_params_init(MigrationParameters *params)
 {
 params->tls_hostname = g_strdup("");
 params->tls_creds = g_strdup("");
+params->multifd_dsa_accel = g_strdup("");
 
 /* Set has_* up only for parameter checks */
 params->has_compress_level = true;
@@ -1416,6 +1428,11 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_zero_page_detection) {
 dest->zero_page_detection = params->zero_page_detection;
 }
+
+if (params->multifd_dsa_accel) {
+assert(params->multifd_dsa_accel->type == QTYPE_QSTRING);
+dest->multifd_dsa_accel = params->multifd_dsa_accel->u.s;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1570,6 +1587,13 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 if (params->has_zero_page_detection) {
 s->parameters.zero_page_detection = params->zero_page_detection;
 }
+
+if (params->multifd_dsa_accel) {
+g_free(s->parameters.multifd_dsa_accel);
+assert(params->multifd_dsa_accel->type == QTYPE_QSTRING);
+s->parameters.multifd_dsa_accel =
+g_strdup(params->multifd_dsa_accel->u.s);
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -1595,6 +1619,12 @@ void qmp_migrate_set_parameters(MigrateSetParameters 
*params, Error **errp)
 params->tls_authz->type = QTYPE_QSTRING;
 params->tls_authz->u.s 

[PATCH v4 12/14] migration/multifd: Enable set packet size migration option.

2024-04-24 Thread Hao Xiang
During live migration, if the latency between sender and receiver
is high and bandwidth is also high (a long and fat pipe), using a bigger
packet size can help reduce migration total time. In addition, Intel
DSA offloading performs better with a large batch task. Providing an
option to set the packet size is useful for performance tuning.

Set the option:
migrate_set_parameter multifd-packet-size 4190208

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c | 7 +++
 migration/multifd-zlib.c   | 6 --
 migration/multifd-zstd.c   | 6 --
 migration/multifd.c| 6 --
 migration/multifd.h| 3 ---
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 7e9bb278c9..053ad0283a 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -338,6 +338,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL),
 params->block_incremental ? "on" : "off");
+monitor_printf(mon, "%s: %" PRIu64 "\n",
+MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_PACKET_SIZE),
+params->multifd_packet_size);
 monitor_printf(mon, "%s: %u\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_CHANNELS),
 params->multifd_channels);
@@ -630,6 +633,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->multifd_dsa_accel->type = QTYPE_QSTRING;
 visit_type_str(v, param, >multifd_dsa_accel->u.s, );
 break;
+case MIGRATION_PARAMETER_MULTIFD_PACKET_SIZE:
+p->has_multifd_packet_size = true;
+visit_type_size(v, param, >multifd_packet_size, );
+break;
 case MIGRATION_PARAMETER_MULTIFD_CHANNELS:
 p->has_multifd_channels = true;
 visit_type_uint8(v, param, >multifd_channels, );
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 737a9645d2..2880d35841 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -49,6 +49,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
 struct zlib_data *z = g_new0(struct zlib_data, 1);
 z_stream *zs = >zs;
 const char *err_msg;
+uint64_t multifd_packet_size = migrate_multifd_packet_size();
 
 zs->zalloc = Z_NULL;
 zs->zfree = Z_NULL;
@@ -58,7 +59,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
 goto err_free_z;
 }
 /* This is the maximum size of the compressed buffer */
-z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
+z->zbuff_len = compressBound(multifd_packet_size);
 z->zbuff = g_try_malloc(z->zbuff_len);
 if (!z->zbuff) {
 err_msg = "out of memory for zbuff";
@@ -193,6 +194,7 @@ out:
  */
 static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp)
 {
+uint64_t multifd_packet_size = migrate_multifd_packet_size();
 struct zlib_data *z = g_new0(struct zlib_data, 1);
 z_stream *zs = >zs;
 
@@ -207,7 +209,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error 
**errp)
 return -1;
 }
 /* To be safe, we reserve twice the size of the packet */
-z->zbuff_len = MULTIFD_PACKET_SIZE * 2;
+z->zbuff_len = multifd_packet_size * 2;
 z->zbuff = g_try_malloc(z->zbuff_len);
 if (!z->zbuff) {
 inflateEnd(zs);
diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
index 256858df0a..edc738afbb 100644
--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
@@ -49,6 +49,7 @@ struct zstd_data {
  */
 static int zstd_send_setup(MultiFDSendParams *p, Error **errp)
 {
+uint64_t multifd_packet_size = migrate_multifd_packet_size();
 struct zstd_data *z = g_new0(struct zstd_data, 1);
 int res;
 
@@ -69,7 +70,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp)
 return -1;
 }
 /* This is the maximum size of the compressed buffer */
-z->zbuff_len = ZSTD_compressBound(MULTIFD_PACKET_SIZE);
+z->zbuff_len = ZSTD_compressBound(multifd_packet_size);
 z->zbuff = g_try_malloc(z->zbuff_len);
 if (!z->zbuff) {
 ZSTD_freeCStream(z->zcs);
@@ -182,6 +183,7 @@ out:
  */
 static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp)
 {
+uint64_t multifd_packet_size = migrate_multifd_packet_size();
 struct zstd_data *z = g_new0(struct zstd_data, 1);
 int ret;
 
@@ -203,7 +205,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error 
**errp)
 }
 
 /* To be safe, we reserve twice the size of the packet */
-z->zbuff_len = MULTIFD_PACKET_SIZE * 2;
+z->zbuff_len = multifd_packet_size * 2;
 z->zbuff = g_try_malloc(z->zbuff_len);

[PATCH v4 14/14] migration/multifd: Add integration tests for multifd with Intel DSA offloading.

2024-04-24 Thread Hao Xiang
* Add test case to start and complete multifd live migration with DSA
offloading enabled.
* Add test case to start and cancel multifd live migration with DSA
offloading enabled.

Signed-off-by: Bryan Zhang 
Signed-off-by: Hao Xiang 
---
 tests/qtest/migration-test.c | 77 +++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 5d6d8cd634..354c5f26f8 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -616,6 +616,12 @@ typedef struct {
 bool suspend_me;
 } MigrateStart;
 
+/*
+ * It requires separate steps to configure and enable DSA device.
+ * This test assumes that the configuration is done already.
+ */
+static const char *dsa_dev_path = "/dev/dsa/wq4.0";
+
 /*
  * A hook that runs after the src and dst QEMUs have been
  * created, but before the migration is started. This can
@@ -3025,7 +3031,7 @@ static void 
test_multifd_tcp_tls_x509_reject_anon_client(void)
  *
  *  And see that it works
  */
-static void test_multifd_tcp_cancel(void)
+static void test_multifd_tcp_cancel_common(bool use_dsa)
 {
 MigrateStart args = {
 .hide_stderr = true,
@@ -3045,6 +3051,10 @@ static void test_multifd_tcp_cancel(void)
 migrate_set_capability(from, "multifd", true);
 migrate_set_capability(to, "multifd", true);
 
+if (use_dsa) {
+migrate_set_parameter_str(from, "multifd-dsa-accel", dsa_dev_path);
+}
+
 /* Start incoming migration from the 1st socket */
 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}");
 
@@ -3094,6 +3104,48 @@ static void test_multifd_tcp_cancel(void)
 test_migrate_end(from, to2, true);
 }
 
+/*
+ * This test does:
+ *  source   target
+ *   migrate_incoming
+ * migrate
+ * migrate_cancel
+ *   launch another target
+ * migrate
+ *
+ *  And see that it works
+ */
+static void test_multifd_tcp_cancel(void)
+{
+test_multifd_tcp_cancel_common(false);
+}
+
+#ifdef CONFIG_DSA_OPT
+
+static void *test_migrate_precopy_tcp_multifd_start_dsa(QTestState *from,
+QTestState *to)
+{
+migrate_set_parameter_str(from, "multifd-dsa-accel", dsa_dev_path);
+return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+}
+
+static void test_multifd_tcp_zero_page_dsa(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_dsa,
+};
+
+test_precopy_common();
+}
+
+static void test_multifd_tcp_cancel_dsa(void)
+{
+test_multifd_tcp_cancel_common(true);
+}
+
+#endif
+
 static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
 {
 qtest_qmp_assert_success(who,
@@ -3518,6 +3570,19 @@ static bool kvm_dirty_ring_supported(void)
 #endif
 }
 
+#ifdef CONFIG_DSA_OPT
+static int test_dsa_setup(void)
+{
+int fd;
+fd = open(dsa_dev_path, O_RDWR);
+if (fd < 0) {
+return -1;
+}
+close(fd);
+return 0;
+}
+#endif
+
 int main(int argc, char **argv)
 {
 bool has_kvm, has_tcg;
@@ -3752,6 +3817,16 @@ int main(int argc, char **argv)
test_multifd_tcp_zero_page_legacy);
 migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
test_multifd_tcp_no_zero_page);
+
+#ifdef CONFIG_DSA_OPT
+if (g_str_equal(arch, "x86_64") && test_dsa_setup() == 0) {
+migration_test_add("/migration/multifd/tcp/plain/zero-page/dsa",
+   test_multifd_tcp_zero_page_dsa);
+migration_test_add("/migration/multifd/tcp/plain/cancel/dsa",
+   test_multifd_tcp_cancel_dsa);
+}
+#endif
+
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH v4 05/14] util/dsa: Implement DSA task asynchronous completion thread model.

2024-04-24 Thread Hao Xiang
* Create a dedicated thread for DSA task completion.
* DSA completion thread runs a loop and poll for completed tasks.
* Start and stop DSA completion thread during DSA device start stop.

User space application can directly submit task to Intel DSA
accelerator by writing to DSA's device memory (mapped in user space).
Once a task is submitted, the device starts processing it and write
the completion status back to the task. A user space application can
poll the task's completion status to check for completion. This change
uses a dedicated thread to perform DSA task completion checking.

Signed-off-by: Hao Xiang 
---
 include/qemu/dsa.h |   1 +
 util/dsa.c | 274 -
 2 files changed, 274 insertions(+), 1 deletion(-)

diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
index 37cae8d9d2..2513192a2b 100644
--- a/include/qemu/dsa.h
+++ b/include/qemu/dsa.h
@@ -38,6 +38,7 @@ typedef struct dsa_batch_task {
 DsaTaskType task_type;
 DsaTaskStatus status;
 int batch_size;
+bool *results;
 QSIMPLEQ_ENTRY(dsa_batch_task) entry;
 } dsa_batch_task;
 
diff --git a/util/dsa.c b/util/dsa.c
index 75739a1af6..003c4f47d9 100644
--- a/util/dsa.c
+++ b/util/dsa.c
@@ -44,6 +44,7 @@
 
 #define DSA_WQ_SIZE 4096
 #define MAX_DSA_DEVICES 16
+#define DSA_COMPLETION_THREAD "dsa_completion"
 
 typedef QSIMPLEQ_HEAD(dsa_task_queue, dsa_batch_task) dsa_task_queue;
 
@@ -62,8 +63,18 @@ struct dsa_device_group {
 dsa_task_queue task_queue;
 };
 
+struct dsa_completion_thread {
+bool stopping;
+bool running;
+QemuThread thread;
+int thread_id;
+QemuSemaphore sem_init_done;
+struct dsa_device_group *group;
+};
+
 uint64_t max_retry_count;
 static struct dsa_device_group dsa_group;
+static struct dsa_completion_thread completion_thread;
 
 
 /**
@@ -443,6 +454,265 @@ submit_batch_wi_async(struct dsa_batch_task *batch_task)
 return dsa_task_enqueue(device_group, batch_task);
 }
 
+/**
+ * @brief Poll for the DSA work item completion.
+ *
+ * @param completion A pointer to the DSA work item completion record.
+ * @param opcode The DSA opcode.
+ *
+ * @return Zero if successful, non-zero otherwise.
+ */
+static int
+poll_completion(struct dsa_completion_record *completion,
+enum dsa_opcode opcode)
+{
+uint8_t status;
+uint64_t retry = 0;
+
+while (true) {
+/* The DSA operation completes successfully or fails. */
+status = completion->status;
+if (status == DSA_COMP_SUCCESS ||
+status == DSA_COMP_PAGE_FAULT_NOBOF ||
+status == DSA_COMP_BATCH_PAGE_FAULT ||
+status == DSA_COMP_BATCH_FAIL) {
+break;
+} else if (status != DSA_COMP_NONE) {
+error_report("DSA opcode %d failed with status = %d.",
+opcode, status);
+return 1;
+}
+retry++;
+if (retry > max_retry_count) {
+error_report("DSA wait for completion retry %lu times.", retry);
+return 1;
+}
+_mm_pause();
+}
+
+return 0;
+}
+
+/**
+ * @brief Complete a single DSA task in the batch task.
+ *
+ * @param task A pointer to the batch task structure.
+ *
+ * @return Zero if successful, otherwise non-zero.
+ */
+static int
+poll_task_completion(struct dsa_batch_task *task)
+{
+assert(task->task_type == DSA_TASK);
+
+struct dsa_completion_record *completion = >completions[0];
+uint8_t status;
+int ret;
+
+ret = poll_completion(completion, task->descriptors[0].opcode);
+if (ret != 0) {
+goto exit;
+}
+
+status = completion->status;
+if (status == DSA_COMP_SUCCESS) {
+task->results[0] = (completion->result == 0);
+goto exit;
+}
+
+assert(status == DSA_COMP_PAGE_FAULT_NOBOF);
+
+exit:
+return ret;
+}
+
+/**
+ * @brief Poll a batch task status until it completes. If DSA task doesn't
+ *complete properly, use CPU to complete the task.
+ *
+ * @param batch_task A pointer to the DSA batch task.
+ *
+ * @return Zero if successful, otherwise non-zero.
+ */
+static int
+poll_batch_task_completion(struct dsa_batch_task *batch_task)
+{
+struct dsa_completion_record *batch_completion =
+_task->batch_completion;
+struct dsa_completion_record *completion;
+uint8_t batch_status;
+uint8_t status;
+bool *results = batch_task->results;
+uint32_t count = batch_task->batch_descriptor.desc_count;
+int ret;
+
+ret = poll_completion(batch_completion,
+  batch_task->batch_descriptor.opcode);
+if (ret != 0) {
+goto exit;
+}
+
+batch_status = batch_completion->status;
+
+if (batch_status == DSA_COMP_SUCCESS) {
+if (batch_completion->bytes_completed == count) {
+/*
+ * Let's skip checking for each descriptors' comp

[PATCH v4 13/14] util/dsa: Add unit test coverage for Intel DSA task submission and completion.

2024-04-24 Thread Hao Xiang
* Test DSA start and stop path.
* Test DSA configure and cleanup path.
* Test DSA task submission and completion path.

Signed-off-by: Bryan Zhang 
Signed-off-by: Hao Xiang 
---
 tests/unit/meson.build |   6 +
 tests/unit/test-dsa.c  | 499 +
 2 files changed, 505 insertions(+)
 create mode 100644 tests/unit/test-dsa.c

diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 26c109c968..1d4d48898b 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -49,6 +49,12 @@ tests = {
   'test-interval-tree': [],
 }
 
+if config_host_data.get('CONFIG_DSA_OPT')
+  tests += {
+'test-dsa': [],
+  }
+endif
+
 if have_system or have_tools
   tests += {
 'test-qmp-event': [testqapi],
diff --git a/tests/unit/test-dsa.c b/tests/unit/test-dsa.c
new file mode 100644
index 00..0f2092767d
--- /dev/null
+++ b/tests/unit/test-dsa.c
@@ -0,0 +1,499 @@
+/*
+ * Test DSA functions.
+ *
+ * Copyright (c) 2023 Hao Xiang 
+ * Copyright (c) 2023 Bryan Zhang 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+
+#include "qemu/cutils.h"
+#include "qemu/memalign.h"
+#include "qemu/dsa.h"
+
+/*
+ * TODO Communicate that DSA must be configured to support this batch size.
+ * TODO Alternatively, poke the DSA device to figure out batch size.
+ */
+#define batch_size 128
+#define page_size 4096
+
+#define oversized_batch_size (batch_size + 1)
+#define num_devices 2
+#define max_buffer_size (64 * 1024)
+
+/* TODO Make these not-hardcoded. */
+static const char *path1 = "/dev/dsa/wq4.0";
+static const char *path2 = "/dev/dsa/wq4.0 /dev/dsa/wq4.1";
+
+static struct batch_task *task;
+
+/* A helper for running a single task and checking for correctness. */
+static void do_single_task(void)
+{
+task = batch_task_init(batch_size);
+char buf[page_size];
+char *ptr = buf;
+
+buffer_is_zero_dsa_batch_async(task,
+   (const void **),
+   1,
+   page_size);
+g_assert(task->results[0] == buffer_is_zero(buf, page_size));
+
+batch_task_destroy(task);
+}
+
+static void test_single_zero(void)
+{
+g_assert(!dsa_init(path1));
+dsa_start();
+
+task = batch_task_init(batch_size);
+
+char buf[page_size];
+char *ptr = buf;
+
+memset(buf, 0x0, page_size);
+buffer_is_zero_dsa_batch_async(task,
+   (const void **),
+   1, page_size);
+g_assert(task->results[0]);
+
+batch_task_destroy(task);
+
+dsa_cleanup();
+}
+
+static void test_single_zero_async(void)
+{
+test_single_zero();
+}
+
+static void test_single_nonzero(void)
+{
+g_assert(!dsa_init(path1));
+dsa_start();
+
+task = batch_task_init(batch_size);
+
+char buf[page_size];
+char *ptr = buf;
+
+memset(buf, 0x1, page_size);
+buffer_is_zero_dsa_batch_async(task,
+   (const void **),
+   1, page_size);
+g_assert(!task->results[0]);
+
+batch_task_destroy(task);
+
+dsa_cleanup();
+}
+
+static void test_single_nonzero_async(void)
+{
+test_single_nonzero();
+}
+
+/* count == 0 should return quickly without calling into DSA. */
+static void test_zero_count_async(void)
+{
+char buf[page_size];
+buffer_is_zero_dsa_batch_async(task,
+ (const void **),
+ 0,
+ page_size);
+}
+
+static void test_null_task_async(void)
+{
+if (g_test_subprocess()) {
+g_assert(!dsa_init(path1));
+
+char buf[page_size * batch_size];
+char *addrs[batch_size];
+for (int i = 0; i < batch_size; i++) {
+addrs[i] = buf + (page_size * i);
+}
+
+buffer_is_zero_dsa_batch_async(NULL, (const void **)addrs,
+  batch_size,
+  page_size);
+} else {
+g_test_trap_subprocess(NULL, 0, 0);
+g_test_trap_assert_failed();
+}
+}
+
+static void test_oversized_batch(void)
+{
+g_assert(!dsa_init(path1));
+dsa_start();
+
+task = batch_ta

[PATCH v4 01/14] meson: Introduce new instruction set enqcmd to the build system.

2024-04-24 Thread Hao Xiang
Enable instruction set enqcmd in build.

Signed-off-by: Hao Xiang 
---
 meson.build   | 14 ++
 meson_options.txt |  2 ++
 scripts/meson-buildoptions.sh |  3 +++
 3 files changed, 19 insertions(+)

diff --git a/meson.build b/meson.build
index 95cee7046e..9e008ddc34 100644
--- a/meson.build
+++ b/meson.build
@@ -2824,6 +2824,20 @@ config_host_data.set('CONFIG_AVX512BW_OPT', 
get_option('avx512bw') \
 int main(int argc, char *argv[]) { return bar(argv[0]); }
   '''), error_message: 'AVX512BW not available').allowed())
 
+config_host_data.set('CONFIG_DSA_OPT', get_option('enqcmd') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable 
ENQCMD') \
+  .require(cc.links('''
+#include 
+#include 
+#include 
+static int __attribute__((target("enqcmd"))) bar(void *a) {
+  uint64_t dst[8] = { 0 };
+  uint64_t src[8] = { 0 };
+  return _enqcmd(dst, src);
+}
+int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
+  '''), error_message: 'ENQCMD not available').allowed())
+
 # For both AArch64 and AArch32, detect if builtins are available.
 config_host_data.set('CONFIG_ARM_AES_BUILTIN', cc.compiles('''
 #include 
diff --git a/meson_options.txt b/meson_options.txt
index b5c0bad9e7..63c1bf815b 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -121,6 +121,8 @@ option('avx512f', type: 'feature', value: 'disabled',
description: 'AVX512F optimizations')
 option('avx512bw', type: 'feature', value: 'auto',
description: 'AVX512BW optimizations')
+option('enqcmd', type: 'feature', value: 'disabled',
+   description: 'MENQCMD optimizations')
 option('keyring', type: 'feature', value: 'auto',
description: 'Linux keyring support')
 option('libkeyutils', type: 'feature', value: 'auto',
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 5ace33f167..2cdfc84455 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -93,6 +93,7 @@ meson_options_help() {
   printf "%s\n" '  avx2AVX2 optimizations'
   printf "%s\n" '  avx512bwAVX512BW optimizations'
   printf "%s\n" '  avx512f AVX512F optimizations'
+  printf "%s\n" '  enqcmd  ENQCMD optimizations'
   printf "%s\n" '  blkio   libblkio block device driver'
   printf "%s\n" '  bochs   bochs image format support'
   printf "%s\n" '  bpf eBPF support'
@@ -239,6 +240,8 @@ _meson_option_parse() {
 --disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
 --enable-avx512f) printf "%s" -Davx512f=enabled ;;
 --disable-avx512f) printf "%s" -Davx512f=disabled ;;
+--enable-enqcmd) printf "%s" -Denqcmd=enabled ;;
+--disable-enqcmd) printf "%s" -Denqcmd=disabled ;;
 --enable-gcov) printf "%s" -Db_coverage=true ;;
 --disable-gcov) printf "%s" -Db_coverage=false ;;
 --enable-lto) printf "%s" -Db_lto=true ;;
-- 
2.30.2




[PATCH v4 06/14] util/dsa: Implement zero page checking in DSA task.

2024-04-24 Thread Hao Xiang
Create DSA task with operation code DSA_OPCODE_COMPVAL.
Here we create two types of DSA tasks, a single DSA task and
a batch DSA task. Batch DSA task reduces task submission overhead
and hence should be the default option. However, due to the way DSA
hardware works, a DSA batch task must contain at least two individual
tasks. There are times we need to submit a single task and hence a
single DSA task submission is also required.

Signed-off-by: Hao Xiang 
Signed-off-by: Bryan Zhang 
---
 include/qemu/dsa.h |  18 
 util/dsa.c | 247 +
 2 files changed, 244 insertions(+), 21 deletions(-)

diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
index 2513192a2b..645e6fc367 100644
--- a/include/qemu/dsa.h
+++ b/include/qemu/dsa.h
@@ -73,6 +73,24 @@ void dsa_cleanup(void);
  */
 bool dsa_is_running(void);
 
+/**
+ * @brief Initializes a buffer zero batch task.
+ *
+ * @param task A pointer to the batch task to initialize.
+ * @param results A pointer to an array of zero page checking results.
+ * @param batch_size The number of DSA tasks in the batch.
+ */
+void
+buffer_zero_batch_task_init(struct dsa_batch_task *task,
+bool *results, int batch_size);
+
+/**
+ * @brief Performs the proper cleanup on a DSA batch task.
+ *
+ * @param task A pointer to the batch task to cleanup.
+ */
+void buffer_zero_batch_task_destroy(struct dsa_batch_task *task);
+
 #else
 
 static inline bool dsa_is_running(void)
diff --git a/util/dsa.c b/util/dsa.c
index 003c4f47d9..9db4cfcf1d 100644
--- a/util/dsa.c
+++ b/util/dsa.c
@@ -76,6 +76,7 @@ uint64_t max_retry_count;
 static struct dsa_device_group dsa_group;
 static struct dsa_completion_thread completion_thread;
 
+static void buffer_zero_dsa_completion(void *context);
 
 /**
  * @brief This function opens a DSA device's work queue and
@@ -207,7 +208,6 @@ dsa_device_group_start(struct dsa_device_group *group)
  *
  * @param group A pointer to the DSA device group.
  */
-__attribute__((unused))
 static void
 dsa_device_group_stop(struct dsa_device_group *group)
 {
@@ -243,7 +243,6 @@ dsa_device_group_cleanup(struct dsa_device_group *group)
  * @return struct dsa_device* A pointer to the next available DSA device
  * in the group.
  */
-__attribute__((unused))
 static struct dsa_device *
 dsa_device_group_get_next_device(struct dsa_device_group *group)
 {
@@ -320,7 +319,6 @@ dsa_task_enqueue(struct dsa_device_group *group,
  * @param group A pointer to the DSA device group.
  * @return dsa_batch_task* The DSA task being dequeued.
  */
-__attribute__((unused))
 static struct dsa_batch_task *
 dsa_task_dequeue(struct dsa_device_group *group)
 {
@@ -378,22 +376,6 @@ submit_wi_int(void *wq, struct dsa_hw_desc *descriptor)
 return 0;
 }
 
-/**
- * @brief Synchronously submits a DSA work item to the
- *device work queue.
- *
- * @param wq A pointer to the DSA worjk queue's device memory.
- * @param descriptor A pointer to the DSA work item descriptor.
- *
- * @return int Zero if successful, non-zero otherwise.
- */
-__attribute__((unused))
-static int
-submit_wi(void *wq, struct dsa_hw_desc *descriptor)
-{
-return submit_wi_int(wq, descriptor);
-}
-
 /**
  * @brief Asynchronously submits a DSA work item to the
  *device work queue.
@@ -402,7 +384,6 @@ submit_wi(void *wq, struct dsa_hw_desc *descriptor)
  *
  * @return int Zero if successful, non-zero otherwise.
  */
-__attribute__((unused))
 static int
 submit_wi_async(struct dsa_batch_task *task)
 {
@@ -431,7 +412,6 @@ submit_wi_async(struct dsa_batch_task *task)
  *
  * @return int Zero if successful, non-zero otherwise.
  */
-__attribute__((unused))
 static int
 submit_batch_wi_async(struct dsa_batch_task *batch_task)
 {
@@ -713,6 +693,231 @@ static void dsa_completion_thread_stop(void *opaque)
 qemu_sem_destroy(_context->sem_init_done);
 }
 
+/**
+ * @brief Initializes a buffer zero comparison DSA task.
+ *
+ * @param descriptor A pointer to the DSA task descriptor.
+ * @param completion A pointer to the DSA task completion record.
+ */
+static void
+buffer_zero_task_init_int(struct dsa_hw_desc *descriptor,
+  struct dsa_completion_record *completion)
+{
+descriptor->opcode = DSA_OPCODE_COMPVAL;
+descriptor->flags = IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CRAV;
+descriptor->comp_pattern = (uint64_t)0;
+descriptor->completion_addr = (uint64_t)completion;
+}
+
+/**
+ * @brief Initializes a buffer zero batch task.
+ *
+ * @param task A pointer to the batch task to initialize.
+ * @param results A pointer to an array of zero page checking results.
+ * @param batch_size The number of DSA tasks in the batch.
+ */
+void
+buffer_zero_batch_task_init(struct dsa_batch_task *task,
+bool *results, int batch_size)
+{
+int descriptors_size = sizeof(*task->descriptors) * batch_size;
+memset(task, 0, sizeof(*task));
+
+task->descripto

[PATCH v6 0/7] Introduce multifd zero page checking.

2024-03-11 Thread Hao Xiang
v6 update:
* Make ZERO_PAGE_DETECTION_NONE option work in legacy migration.
* Rebase on top of 7489f7f3f81dcb776df8c1b9a9db281fc21bf05f.

v5 update:
* Move QEMU9.0 -> QEMU8.2 migration backward compatibility handling into
the patch where "multifd" zero page checking becomes the default option.
* A few function renaming according to feedback.
* Fix bug in multifd_send_zero_page_detect.
* Rebase on the new mapped-ram feature.
* Pulled in 2 commits from Fabiano.

v4 update:
* Fix documentation for interface ZeroPageDetection.
* Fix implementation in multifd_send_zero_page_check.
* Rebase on top of c0c6a0e3528b88aaad0b9d333e295707a195587b.

v3 update:
* Change "zero" to "zero-pages" and use type size for "zero-bytes".
* Fixed ZeroPageDetection interface description.
* Move zero page unit tests to its own path.
* Removed some asserts.
* Added backward compatibility support for migration 9.0 -> 8.2.
* Removed fields "zero" and "normal" page address arrays from v2. Now
multifd_zero_page_check_send sorts normal/zero pages in the "offset" array.

v2 update:
* Implement zero-page-detection switch with enumeration "legacy",
"none" and "multifd".
* Move normal/zero pages from MultiFDSendParams to MultiFDPages_t.
* Add zeros and zero_bytes accounting.

This patchset is based on Juan Quintela's old series here
https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/

In the multifd live migration model, there is a single migration main
thread scanning the page map, queuing the pages to multiple multifd
sender threads. The migration main thread runs zero page checking on
every page before queuing the page to the sender threads. Zero page
checking is a CPU intensive task and hence having a single thread doing
all that doesn't scale well. This change introduces a new function
to run the zero page checking on the multifd sender threads. This
patchset also lays the ground work for future changes to offload zero
page checking task to accelerator hardwares.

Use two Intel 4th generation Xeon servers for testing.

Architecture:x86_64
CPU(s):  192
Thread(s) per core:  2
Core(s) per socket:  48
Socket(s):   2
NUMA node(s):2
Vendor ID:   GenuineIntel
CPU family:  6
Model:   143
Model name:  Intel(R) Xeon(R) Platinum 8457C
Stepping:8
CPU MHz: 2538.624
CPU max MHz: 3800.
CPU min MHz: 800.

Perform multifd live migration with below setup:
1. VM has 100GB memory. All pages in the VM are zero pages.
2. Use tcp socket for live migration.
3. Use 4 multifd channels and zero page checking on migration main thread.
4. Use 1/2/4 multifd channels and zero page checking on multifd sender
threads.
5. Record migration total time from sender QEMU console's "info migrate"
command.

++
|zero-page-checking | total-time(ms) |
++
|main-thread| 9629   |
++
|multifd-1-threads  | 6182   |
++
|multifd-2-threads  | 4643   |
++
|multifd-4-threads  | 4143   |
++

Apply this patchset on top of commit
7489f7f3f81dcb776df8c1b9a9db281fc21bf05f

Fabiano Rosas (2):
  migration/multifd: Allow zero pages in file migration
  migration/multifd: Allow clearing of the file_bmap from multifd

Hao Xiang (5):
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Implement zero page transmission on the multifd
thread.
  migration/multifd: Implement ram_save_target_page_multifd to handle
multifd version of MigrationOps::ram_save_target_page.
  migration/multifd: Enable multifd zero page checking by default.
  migration/multifd: Add new migration test cases for legacy zero page
checking.

 hw/core/machine.c   |  4 +-
 hw/core/qdev-properties-system.c| 10 
 include/hw/qdev-properties-system.h |  4 ++
 migration/file.c|  2 +-
 migration/meson.build   |  1 +
 migration/migration-hmp-cmds.c  |  9 +++
 migration/multifd-zero-page.c   | 87 +++
 migration/multifd-zlib.c| 21 +--
 migration/multifd-zstd.c| 20 +--
 migration/multifd.c | 92 -
 migration/multifd.h | 23 +++-
 migration/options.c | 21 +++
 migration/options.h |  1 +
 migration/ram.c | 51 
 migration/ram.h |  3 +-
 migration/trace-events  |  8 +--
 qapi/migration.json | 38 +++-
 tests/qtest/migration-test.c| 52 
 18 files changed, 399 insertions(+), 48 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

-- 
2.30.2




Re: [PATCH v4 3/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

2024-03-11 Thread hao . xiang
March 11, 2024 at 6:20 AM, "Peter Xu"  wrote:



> 
> On Sat, Mar 09, 2024 at 02:06:33AM +, hao.xi...@linux.dev wrote:
> 
> > 
> > > @@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, 
> > > PageSearchStatus *pss,
> > 
> >  > QEMUFile *file = pss->pss_channel;
> > 
> >  > int len = 0;
> > 
> >  >
> > 
> >  > - if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
> > 
> >  > - return 0;
> > 
> >  > - }
> > 
> >  > 
> > 
> >  > We need to keep this to disable zero-page-detect on !multifd?
> > 
> >  
> > 
> >  So if multifd is enabled, the new parameter takes effect. If multifd is
> > 
> >  not enabled, zero page checking will always be done in the main thread,
> > 
> >  which is exactly the behavior it is now. I thought legacy migration is a
> > 
> >  deprecated feature so I am trying to not add new stuff to it.
> > 
> 
> There's no plan to deprecate legacy migrations, I think there was a plan to
> 
> make multifd the default, but I don't yet think it all thorougly yet, and
> 
> even if it happens it doesn't mean we'll remove legacy migration code.
> 
> When repost please still make sure this parameter works for both multifd
> 
> and !multifd.
> 
> Thanks,
> 
> -- 
> 
> Peter Xu


Sure. Fixed the issue now and reposted a new patchset.

>



[PATCH v6 1/7] migration/multifd: Allow zero pages in file migration

2024-03-11 Thread Hao Xiang
From: Fabiano Rosas 

Currently, it's an error to have no data pages in the multifd file
migration because zero page detection is done in the migration thread
and zero pages don't reach multifd. This is enforced with the
pages->num assert.

We're about to add zero page detection on the multifd thread. Fix the
file_write_ramblock_iov() to stop considering p->iovs_num=0 an error.

Signed-off-by: Fabiano Rosas 
---
 migration/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/file.c b/migration/file.c
index 164b079966..5075f9526f 100644
--- a/migration/file.c
+++ b/migration/file.c
@@ -159,7 +159,7 @@ void file_start_incoming_migration(FileMigrationArgs 
*file_args, Error **errp)
 int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
 int niov, RAMBlock *block, Error **errp)
 {
-ssize_t ret = -1;
+ssize_t ret = 0;
 int i, slice_idx, slice_num;
 uintptr_t base, next, offset;
 size_t len;
-- 
2.30.2




[PATCH v6 5/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

2024-03-11 Thread Hao Xiang
From: Hao Xiang 

1. Add a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration.
2. Refactor ram_save_target_page_legacy so that the legacy and multifd
handlers don't have internal functions calling into each other.

Signed-off-by: Hao Xiang 
Reviewed-by: Fabiano Rosas 
Message-Id: <20240226195654.934709-4-hao.xi...@bytedance.com>
---
 migration/ram.c | 38 +-
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index d1f97cf862..1d52b5c37f 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2079,7 +2079,6 @@ static bool save_compress_page(RAMState *rs, 
PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-RAMBlock *block = pss->block;
 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
 int res;
 
@@ -2095,17 +2094,33 @@ static int ram_save_target_page_legacy(RAMState *rs, 
PageSearchStatus *pss)
 return 1;
 }
 
+return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: send one target page to multifd workers
+ *
+ * Returns 1 if the page was queued, -1 otherwise.
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+RAMBlock *block = pss->block;
+ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
 /*
- * Do not use multifd in postcopy as one whole host page should be
- * placed.  Meanwhile postcopy requires atomic update of pages, so even
- * if host page size == guest page size the dest guest during run may
- * still see partially copied pages which is data corruption.
+ * While using multifd live migration, we still need to handle zero
+ * page checking on the migration main thread.
  */
-if (migrate_multifd() && !migration_in_postcopy()) {
-return ram_save_multifd_page(block, offset);
+if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+if (save_zero_page(rs, pss, offset)) {
+return 1;
+}
 }
 
-return ram_save_page(rs, pss);
+return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -3113,7 +3128,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 }
 
 migration_ops = g_malloc0(sizeof(MigrationOps));
-migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+if (migrate_multifd()) {
+migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+} else {
+migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+}
 
 bql_unlock();
 ret = multifd_send_sync_main();
-- 
2.30.2




[PATCH v6 6/7] migration/multifd: Enable multifd zero page checking by default.

2024-03-11 Thread Hao Xiang
From: Hao Xiang 

1. Set default "zero-page-detection" option to "multifd". Now
zero page checking can be done in the multifd threads and this
becomes the default configuration.
2. Handle migration QEMU9.0 -> QEMU8.2 compatibility. We provide
backward compatibility where zero page checking is done from the
migration main thread.

Signed-off-by: Hao Xiang 
---
 hw/core/machine.c   | 4 +++-
 migration/options.c | 2 +-
 qapi/migration.json | 6 +++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ac5d5389a..0e9d646b61 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -32,7 +32,9 @@
 #include "hw/virtio/virtio-net.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_8_2[] = {};
+GlobalProperty hw_compat_8_2[] = {
+{ "migration", "zero-page-detection", "legacy"},
+};
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
 GlobalProperty hw_compat_8_1[] = {
diff --git a/migration/options.c b/migration/options.c
index 8c849620dd..d61d31be24 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -181,7 +181,7 @@ Property migration_properties[] = {
   MIG_MODE_NORMAL),
 DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
-   ZERO_PAGE_DETECTION_LEGACY),
+   ZERO_PAGE_DETECTION_MULTIFD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/qapi/migration.json b/qapi/migration.json
index 2684e4e9ac..aa1b39bce1 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -909,7 +909,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
@@ -1106,7 +1106,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
@@ -1339,7 +1339,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
-- 
2.30.2




[PATCH v6 7/7] migration/multifd: Add new migration test cases for legacy zero page checking.

2024-03-11 Thread Hao Xiang
From: Hao Xiang 

Now that zero page checking is done on the multifd sender threads by
default, we still provide an option for backward compatibility. This
change adds a qtest migration test case to set the zero-page-detection
option to "legacy" and run multifd migration with zero page checking on the
migration main thread.

Signed-off-by: Hao Xiang 
Reviewed-by: Peter Xu 
Message-Id: <20240301022829.3390548-6-hao.xi...@bytedance.com>
---
 tests/qtest/migration-test.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 4023d808f9..71895abb7f 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2771,6 +2771,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
 return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
 }
 
+static void *
+test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
+QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "legacy");
+return NULL;
+}
+
+static void *
+test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
+  QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "none");
+return NULL;
+}
+
 static void *
 test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
 QTestState *to)
@@ -2812,6 +2830,36 @@ static void test_multifd_tcp_none(void)
 test_precopy_common();
 }
 
+static void test_multifd_tcp_zero_page_legacy(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
+static void test_multifd_tcp_no_zero_page(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
 static void test_multifd_tcp_zlib(void)
 {
 MigrateCommon args = {
@@ -3729,6 +3777,10 @@ int main(int argc, char **argv)
 }
 migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
+   test_multifd_tcp_zero_page_legacy);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
+   test_multifd_tcp_no_zero_page);
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH v6 3/7] migration/multifd: Add new migration option zero-page-detection.

2024-03-11 Thread Hao Xiang
From: Hao Xiang 

This new parameter controls where the zero page checking is running.
1. If this parameter is set to 'legacy', zero page checking is
done in the migration main thread.
2. If this parameter is set to 'none', zero page checking is disabled.

Signed-off-by: Hao Xiang 
Reviewed-by: Peter Xu 
Acked-by: Markus Armbruster 
Message-Id: <20240301022829.3390548-2-hao.xi...@bytedance.com>
---
 hw/core/qdev-properties-system.c| 10 +
 include/hw/qdev-properties-system.h |  4 
 migration/migration-hmp-cmds.c  |  9 
 migration/options.c | 21 ++
 migration/options.h |  1 +
 migration/ram.c |  4 
 qapi/migration.json | 33 ++---
 7 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index b45e90edb2..71a21bf24e 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -693,6 +693,16 @@ const PropertyInfo qdev_prop_granule_mode = {
 .set_default_value = qdev_propinfo_set_default_value_enum,
 };
 
+const PropertyInfo qdev_prop_zero_page_detection = {
+.name = "ZeroPageDetection",
+.description = "zero_page_detection values, "
+   "none,legacy",
+.enum_table = _lookup,
+.get = qdev_propinfo_get_enum,
+.set = qdev_propinfo_set_enum,
+.set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
 /* --- Reserved Region --- */
 
 /*
diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 626be87dd3..438f65389f 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -9,6 +9,7 @@ extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_multifd_compression;
 extern const PropertyInfo qdev_prop_mig_mode;
 extern const PropertyInfo qdev_prop_granule_mode;
+extern const PropertyInfo qdev_prop_zero_page_detection;
 extern const PropertyInfo qdev_prop_losttickpolicy;
 extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
@@ -50,6 +51,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
MigMode)
 #define DEFINE_PROP_GRANULE_MODE(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_granule_mode, GranuleMode)
+#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
+DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
+   ZeroPageDetection)
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
 LostTickPolicy)
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 99b49df5dd..7e96ae6ffd 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
 MultiFDCompression_str(params->multifd_compression));
+assert(params->has_zero_page_detection);
+monitor_printf(mon, "%s: %s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
+qapi_enum_lookup(_lookup,
+params->zero_page_detection));
 monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
 MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
 params->xbzrle_cache_size);
@@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_multifd_zstd_level = true;
 visit_type_uint8(v, param, >multifd_zstd_level, );
 break;
+case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
+p->has_zero_page_detection = true;
+visit_type_ZeroPageDetection(v, param, >zero_page_detection, );
+break;
 case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
 p->has_xbzrle_cache_size = true;
 if (!visit_type_size(v, param, _size, )) {
diff --git a/migration/options.c b/migration/options.c
index 40eb930940..8c849620dd 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -179,6 +179,9 @@ Property migration_properties[] = {
 DEFINE_PROP_MIG_MODE("mode", MigrationState,
   parameters.mode,
   MIG_MODE_NORMAL),
+DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
+   parameters.zero_page_detection,
+   ZERO_PAGE_DETECTION_LEGACY),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -931,6 +934,13 @@ uint64_t 

[PATCH v6 2/7] migration/multifd: Allow clearing of the file_bmap from multifd

2024-03-11 Thread Hao Xiang
From: Fabiano Rosas 

We currently only need to clear the mapped-ram file bitmap from the
migration thread during save_zero_page.

We're about to add support for zero page detection on the multifd
thread, so allow ramblock_set_file_bmap_atomic() to also clear the
bits.

Signed-off-by: Fabiano Rosas 
---
 migration/multifd.c | 2 +-
 migration/ram.c | 8 ++--
 migration/ram.h | 3 ++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index d4a44da559..6b8a78e4ca 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -115,7 +115,7 @@ static void multifd_set_file_bitmap(MultiFDSendParams *p)
 assert(pages->block);
 
 for (int i = 0; i < p->pages->num; i++) {
-ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]);
+ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
 }
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 003c28e133..f4abc47bbf 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3150,9 +3150,13 @@ static void ram_save_file_bmap(QEMUFile *f)
 }
 }
 
-void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset)
+void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, bool 
set)
 {
-set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+if (set) {
+set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+} else {
+clear_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+}
 }
 
 /**
diff --git a/migration/ram.h b/migration/ram.h
index b9ac0da587..08feecaf51 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -75,7 +75,8 @@ bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, 
Error **errp);
 bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
 void postcopy_preempt_shutdown_file(MigrationState *s);
 void *postcopy_preempt_thread(void *opaque);
-void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset);
+void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
+   bool set);
 
 /* ram cache */
 int colo_init_ram_cache(void);
-- 
2.30.2




[PATCH v6 4/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-03-11 Thread Hao Xiang
From: Hao Xiang 

1. Add zero_pages field in MultiFDPacket_t.
2. Implements the zero page detection and handling on the multifd
threads for non-compression, zlib and zstd compression backends.
3. Added a new value 'multifd' in ZeroPageDetection enumeration.
4. Adds zero page counters and updates multifd send/receive tracing
format to track the newly added counters.

Signed-off-by: Hao Xiang 
Acked-by: Markus Armbruster 
---
 hw/core/qdev-properties-system.c |  2 +-
 migration/meson.build|  1 +
 migration/multifd-zero-page.c| 87 ++
 migration/multifd-zlib.c | 21 ++--
 migration/multifd-zstd.c | 20 +--
 migration/multifd.c  | 90 +++-
 migration/multifd.h  | 23 +++-
 migration/ram.c  |  1 -
 migration/trace-events   |  8 +--
 qapi/migration.json  |  7 ++-
 10 files changed, 228 insertions(+), 32 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 71a21bf24e..7eca2f2377 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -696,7 +696,7 @@ const PropertyInfo qdev_prop_granule_mode = {
 const PropertyInfo qdev_prop_zero_page_detection = {
 .name = "ZeroPageDetection",
 .description = "zero_page_detection values, "
-   "none,legacy",
+   "none,legacy,multifd",
 .enum_table = _lookup,
 .get = qdev_propinfo_get_enum,
 .set = qdev_propinfo_set_enum,
diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..1eeb915ff6 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -22,6 +22,7 @@ system_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
+  'multifd-zero-page.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
new file mode 100644
index 00..1ba38be636
--- /dev/null
+++ b/migration/multifd-zero-page.c
@@ -0,0 +1,87 @@
+/*
+ * Multifd zero page detection implementation.
+ *
+ * Copyright (c) 2024 Bytedance Inc
+ *
+ * Authors:
+ *  Hao Xiang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/ramblock.h"
+#include "migration.h"
+#include "multifd.h"
+#include "options.h"
+#include "ram.h"
+
+static bool multifd_zero_page_enabled(void)
+{
+return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD;
+}
+
+static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
+{
+ram_addr_t temp;
+
+if (a == b) {
+return;
+}
+
+temp = pages_offset[a];
+pages_offset[a] = pages_offset[b];
+pages_offset[b] = temp;
+}
+
+/**
+ * multifd_send_zero_page_detect: Perform zero page detection on all pages.
+ *
+ * Sorts normal pages before zero pages in p->pages->offset and updates
+ * p->pages->normal_num.
+ *
+ * @param p A pointer to the send params.
+ */
+void multifd_send_zero_page_detect(MultiFDSendParams *p)
+{
+MultiFDPages_t *pages = p->pages;
+RAMBlock *rb = pages->block;
+int i = 0;
+int j = pages->num - 1;
+
+if (!multifd_zero_page_enabled()) {
+pages->normal_num = pages->num;
+return;
+}
+
+/*
+ * Sort the page offset array by moving all normal pages to
+ * the left and all zero pages to the right of the array.
+ */
+while (i <= j) {
+uint64_t offset = pages->offset[i];
+
+if (!buffer_is_zero(rb->host + offset, p->page_size)) {
+i++;
+continue;
+}
+
+swap_page_offset(pages->offset, i, j);
+ram_release_page(rb->idstr, offset);
+j--;
+}
+
+pages->normal_num = i;
+}
+
+void multifd_recv_zero_page_process(MultiFDRecvParams *p)
+{
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->page_size)) {
+memset(page, 0, p->page_size);
+}
+}
+}
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 6120faad65..83c0374380 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
 int ret;
 uint32_t i;
 
-multifd_send_prepare_header(p);
+if (!multifd_send_prepare_common(p)) {
+goto out;
+}
 
-for (i = 0; i < pages->num; i++) {
+for (i = 0; i < pages->normal_num; i++) {
 uint32_t available = z->zbuff_len - out_size;
 int flush = Z_NO_FLUSH;
 
-if (i == page

Re: [PATCH v4 7/7] Update maintainer contact for migration multifd zero page checking acceleration.

2024-03-09 Thread hao . xiang
> 
> On Sun, Mar 3, 2024 at 11:34 PM Peter Xu  wrote:
> 
> > 
> > On Fri, Mar 01, 2024 at 02:28:29AM +, Hao Xiang wrote:
> > 
> >  Add myself to maintain multifd zero page checking acceleration function.
> > 
> >  Signed-off-by: Hao Xiang 
> > 
> >  ---
> > 
> >  MAINTAINERS | 5 +
> > 
> >  1 file changed, 5 insertions(+)
> > 
> >  diff --git a/MAINTAINERS b/MAINTAINERS
> > 
> >  index 65dfdc9677..b547918e4d 100644
> > 
> >  --- a/MAINTAINERS
> > 
> >  +++ b/MAINTAINERS
> > 
> >  @@ -3414,6 +3414,11 @@ F: tests/migration/
> > 
> >  F: util/userfaultfd.c
> > 
> >  X: migration/rdma*
> > 
> >  +Migration multifd zero page checking acceleration
> > 
> >  +M: Hao Xiang 
> > 
> >  +S: Maintained
> > 
> >  +F: migration/multifd-zero-page.c
> > 
> >  +
> > 
> >  Firstly appreciate a lot for volunteering!
> > 
> >  My fault to not have made it clear. This file alone so far will need to be
> > 
> >  closely related to the multifd core, so whoever maintains migration should
> > 
> >  look after this. It's also slightly weird to have a separate entry for a
> > 
> >  file that is tens of LOC if it's already covered by another upper entry.
> > 
> >  What I worry is about vendor/library specific parts that will be harder to
> > 
> >  maintain, and migration maintainers (no matter who does the job in the
> > 
> >  future) may not always cover those areas. So I was expecting we could have
> > 
> >  volunteers covering e.g. QAT / DSA / IAA accelerators. Since all these
> > 
> >  accelerators will all be part of Intel's new chips, there's also one way
> > 
> >  that we have "Intel accelerators" section to cover vendor specific codes
> > 
> >  and then cover all those areas no matter it's zero detect accelerator or HW
> > 
> >  compressors.
> > 
> >  I'd suggest we discuss this with Intel people to check out a solid plan
> > 
> >  later when we start to merge HW/LIB specific codes. For now I suggest we
> > 
> >  can drop this patch and stick with the feature implementation, to see
> > 
> >  whether it can catch the train for 9.0. IMHO this is a good feature even
> > 
> >  without HW accelerators (and I think it's close to ready), so I hope it can
> > 
> >  still make it.
> > 
> >  Thanks,

No worries. I misunderstood you. We can talk about maintenance later on when we 
have the accelerator changes ready.

> > 
> >  --
> > 
> >  Peter Xu
> >
>



Re: [PATCH v4 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-03-09 Thread hao . xiang
> 
> On Mon, Mar 4, 2024 at 10:24 AM Fabiano Rosas  wrote:
> 
>  
> 
>  
> 
>  Fabiano Rosas  writes:
> 
>  
> 
>  Peter Xu  writes:
> 
>  
> 
>  On Fri, Mar 01, 2024 at 02:28:24AM +, Hao Xiang wrote:
> 
>  
> 
>  -GlobalProperty hw_compat_8_2[] = {};
> 
>  
> 
>  +GlobalProperty hw_compat_8_2[] = {
> 
>  
> 
>  + { "migration", "zero-page-detection", "legacy"},
> 
>  
> 
>  +};
> 
>  
> 
>  I hope we can make it for 9.0, then this (and many rest places) can be kept
> 
>  
> 
>  as-is. Let's see.. soft-freeze is March 12th.
> 
>  
> 
>  One thing to mention is I just sent a pull which has mapped-ram feature
> 
>  
> 
>  merged. You may need a rebase onto that, and hopefully mapped-ram can also
> 
>  
> 
>  use your feature too within the same patch when you repost.
> 
>  
> 
>  The key points are:
> 
>  
> 
>  - The socket migration is under "use_packets", the mapped-ram is under
> 
>  
> 
>  "!use_packets" always.
> 
>  
> 
>  - mapped-ram doesn't trasmit zero-pages, it just clears the
> 
>  
> 
>  corresponding bit in block->file_bmap.
> 
>  
> 
>  https://lore.kernel.org/all/20240229153017.2221-1-faro...@suse.de/
> 
>  
> 
>  That rebase may or may not need much caution, I apologize for that:
> 
>  
> 
>  mapped-ram as a feature was discussed 1+ years, so it was a plan to merge
> 
>  
> 
>  it (actually still partly of it) into QEMU 9.0.
> 
>  
> 
>  I started doing that rebase last week and saw issues with a sender
> 
>  
> 
>  thread always getting -EPIPE at the sendmsg() on the regular socket
> 
>  
> 
>  migration. Let's hope it was just me being tired.
> 
>  
> 
>  I'll try to get something ready this week.
> 
>  
> 
>  @Hao Xiang:
> 
>  
> 
>  Here's a branch with the rebase. Please include the first two commits
> 
>  
> 
>  when you repost:
> 
>  
> 
>  migration/multifd: Allow clearing of the file_bmap from multifd
> 
>  
> 
>  migration/multifd: Allow zero pages in file migration
> 
>  
> 
>  There are also two small additions and some conflict resolution at the
> 
>  
> 
>  "Implement zero page..." commit. Make sure you don't miss them.
> 
>  
> 
>  https://gitlab.com/farosas/qemu/-/commits/migration-multifd-zero-page
> 
>  
> 
>  Let me know if you encounter any issues.
> 

Sorry about the delay. I have rebased and pulled in the two commits you 
mentioned. Test works fine. I just sent out a new version.

I removed the zero/zero-bytes interface changes out of this patchset but will 
follow up with a separate one.



[PATCH v5 3/7] migration/multifd: Add new migration option zero-page-detection.

2024-03-09 Thread Hao Xiang
From: Hao Xiang 

This new parameter controls where the zero page checking is running.
1. If this parameter is set to 'legacy', zero page checking is
done in the migration main thread.
2. If this parameter is set to 'none', zero page checking is disabled.

Signed-off-by: Hao Xiang 
Reviewed-by: Peter Xu 
Acked-by: Markus Armbruster 
Message-Id: <20240301022829.3390548-2-hao.xi...@bytedance.com>
---
 hw/core/qdev-properties-system.c| 10 +
 include/hw/qdev-properties-system.h |  4 
 migration/migration-hmp-cmds.c  |  9 
 migration/options.c | 21 ++
 migration/options.h |  1 +
 migration/ram.c |  4 
 qapi/migration.json | 33 ++---
 7 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 1a396521d5..228e685f52 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -679,6 +679,16 @@ const PropertyInfo qdev_prop_mig_mode = {
 .set_default_value = qdev_propinfo_set_default_value_enum,
 };
 
+const PropertyInfo qdev_prop_zero_page_detection = {
+.name = "ZeroPageDetection",
+.description = "zero_page_detection values, "
+   "none,legacy",
+.enum_table = _lookup,
+.get = qdev_propinfo_get_enum,
+.set = qdev_propinfo_set_enum,
+.set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
 /* --- Reserved Region --- */
 
 /*
diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 06c359c190..839b170235 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr;
 extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_multifd_compression;
 extern const PropertyInfo qdev_prop_mig_mode;
+extern const PropertyInfo qdev_prop_zero_page_detection;
 extern const PropertyInfo qdev_prop_losttickpolicy;
 extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
@@ -47,6 +48,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
 #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
MigMode)
+#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
+DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
+   ZeroPageDetection)
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
 LostTickPolicy)
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 99b49df5dd..7e96ae6ffd 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
 MultiFDCompression_str(params->multifd_compression));
+assert(params->has_zero_page_detection);
+monitor_printf(mon, "%s: %s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
+qapi_enum_lookup(_lookup,
+params->zero_page_detection));
 monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
 MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
 params->xbzrle_cache_size);
@@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_multifd_zstd_level = true;
 visit_type_uint8(v, param, >multifd_zstd_level, );
 break;
+case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
+p->has_zero_page_detection = true;
+visit_type_ZeroPageDetection(v, param, >zero_page_detection, );
+break;
 case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
 p->has_xbzrle_cache_size = true;
 if (!visit_type_size(v, param, _size, )) {
diff --git a/migration/options.c b/migration/options.c
index 40eb930940..8c849620dd 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -179,6 +179,9 @@ Property migration_properties[] = {
 DEFINE_PROP_MIG_MODE("mode", MigrationState,
   parameters.mode,
   MIG_MODE_NORMAL),
+DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
+   parameters.zero_page_detection,
+   ZERO_PAGE_DETECTION_LEGACY),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -931,6 +934,13 @@ uint64_t migrate_xbzrle_cache_size(v

[PATCH v5 6/7] migration/multifd: Enable multifd zero page checking by default.

2024-03-08 Thread Hao Xiang
From: Hao Xiang 

1. Set default "zero-page-detection" option to "multifd". Now
zero page checking can be done in the multifd threads and this
becomes the default configuration.
2. Handle migration QEMU9.0 -> QEMU8.2 compatibility. We provide
backward compatibility where zero page checking is done from the
migration main thread.

Signed-off-by: Hao Xiang 
---
 hw/core/machine.c   | 4 +++-
 migration/options.c | 2 +-
 qapi/migration.json | 6 +++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ac5d5389a..0e9d646b61 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -32,7 +32,9 @@
 #include "hw/virtio/virtio-net.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_8_2[] = {};
+GlobalProperty hw_compat_8_2[] = {
+{ "migration", "zero-page-detection", "legacy"},
+};
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
 GlobalProperty hw_compat_8_1[] = {
diff --git a/migration/options.c b/migration/options.c
index 8c849620dd..d61d31be24 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -181,7 +181,7 @@ Property migration_properties[] = {
   MIG_MODE_NORMAL),
 DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
-   ZERO_PAGE_DETECTION_LEGACY),
+   ZERO_PAGE_DETECTION_MULTIFD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/qapi/migration.json b/qapi/migration.json
index 2684e4e9ac..aa1b39bce1 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -909,7 +909,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
@@ -1106,7 +1106,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
@@ -1339,7 +1339,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
-- 
2.30.2




[PATCH v5 0/7] Introduce multifd zero page checking.

2024-03-08 Thread Hao Xiang
From: Hao Xiang 

v5 update:
* Move QEMU9.0 -> QEMU8.2 migration backward compatibility handling into
the patch where "multifd" zero page checking becomes the default option.
* A few function renaming according to feedback.
* Fix bug in multifd_send_zero_page_detect.
* Rebase on the new mapped-ram feature.
* Pulled in 2 commits from Fabiano.

v4 update:
* Fix documentation for interface ZeroPageDetection.
* Fix implementation in multifd_send_zero_page_check.
* Rebase on top of c0c6a0e3528b88aaad0b9d333e295707a195587b.

v3 update:
* Change "zero" to "zero-pages" and use type size for "zero-bytes".
* Fixed ZeroPageDetection interface description.
* Move zero page unit tests to its own path.
* Removed some asserts.
* Added backward compatibility support for migration 9.0 -> 8.2.
* Removed fields "zero" and "normal" page address arrays from v2. Now
multifd_zero_page_check_send sorts normal/zero pages in the "offset" array.

v2 update:
* Implement zero-page-detection switch with enumeration "legacy",
"none" and "multifd".
* Move normal/zero pages from MultiFDSendParams to MultiFDPages_t.
* Add zeros and zero_bytes accounting.

This patchset is based on Juan Quintela's old series here
https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/

In the multifd live migration model, there is a single migration main
thread scanning the page map, queuing the pages to multiple multifd
sender threads. The migration main thread runs zero page checking on
every page before queuing the page to the sender threads. Zero page
checking is a CPU intensive task and hence having a single thread doing
all that doesn't scale well. This change introduces a new function
to run the zero page checking on the multifd sender threads. This
patchset also lays the ground work for future changes to offload zero
page checking task to accelerator hardwares.

Use two Intel 4th generation Xeon servers for testing.

Architecture:x86_64
CPU(s):  192
Thread(s) per core:  2
Core(s) per socket:  48
Socket(s):   2
NUMA node(s):2
Vendor ID:   GenuineIntel
CPU family:  6
Model:   143
Model name:  Intel(R) Xeon(R) Platinum 8457C
Stepping:8
CPU MHz: 2538.624
CPU max MHz: 3800.
CPU min MHz: 800.

Perform multifd live migration with below setup:
1. VM has 100GB memory. All pages in the VM are zero pages.
2. Use tcp socket for live migration.
3. Use 4 multifd channels and zero page checking on migration main thread.
4. Use 1/2/4 multifd channels and zero page checking on multifd sender
threads.
5. Record migration total time from sender QEMU console's "info migrate"
command.

++
|zero-page-checking | total-time(ms) |
++
|main-thread| 9629   |
++
|multifd-1-threads  | 6182   |
++
|multifd-2-threads  | 4643   |
++
|multifd-4-threads  | 4143   |
++

Apply this patchset on top of commit
cbccded4a2b5d685a426a437e25f67d3a375b292

Fabiano Rosas (2):
  migration/multifd: Allow zero pages in file migration
  migration/multifd: Allow clearing of the file_bmap from multifd

Hao Xiang (5):
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Implement zero page transmission on the multifd
thread.
  migration/multifd: Implement ram_save_target_page_multifd to handle
multifd version of MigrationOps::ram_save_target_page.
  migration/multifd: Enable multifd zero page checking by default.
  migration/multifd: Add new migration test cases for legacy zero page
checking.

 hw/core/machine.c   |  4 +-
 hw/core/qdev-properties-system.c| 10 
 include/hw/qdev-properties-system.h |  4 ++
 migration/file.c|  2 +-
 migration/meson.build   |  1 +
 migration/migration-hmp-cmds.c  |  9 +++
 migration/multifd-zero-page.c   | 87 +++
 migration/multifd-zlib.c| 21 +--
 migration/multifd-zstd.c| 20 +--
 migration/multifd.c | 92 -
 migration/multifd.h | 23 +++-
 migration/options.c | 21 +++
 migration/options.h |  1 +
 migration/ram.c | 47 +++
 migration/ram.h |  3 +-
 migration/trace-events  |  8 +--
 qapi/migration.json | 38 +++-
 tests/qtest/migration-test.c| 52 
 18 files changed, 395 insertions(+), 48 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

-- 
2.30.2




[PATCH v5 2/7] migration/multifd: Allow clearing of the file_bmap from multifd

2024-03-08 Thread Hao Xiang
From: Fabiano Rosas 

We currently only need to clear the mapped-ram file bitmap from the
migration thread during save_zero_page.

We're about to add support for zero page detection on the multifd
thread, so allow ramblock_set_file_bmap_atomic() to also clear the
bits.

Signed-off-by: Fabiano Rosas 
---
 migration/multifd.c | 2 +-
 migration/ram.c | 8 ++--
 migration/ram.h | 3 ++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index d4a44da559..6b8a78e4ca 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -115,7 +115,7 @@ static void multifd_set_file_bitmap(MultiFDSendParams *p)
 assert(pages->block);
 
 for (int i = 0; i < p->pages->num; i++) {
-ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]);
+ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
 }
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 003c28e133..f4abc47bbf 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3150,9 +3150,13 @@ static void ram_save_file_bmap(QEMUFile *f)
 }
 }
 
-void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset)
+void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, bool 
set)
 {
-set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+if (set) {
+set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+} else {
+clear_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+}
 }
 
 /**
diff --git a/migration/ram.h b/migration/ram.h
index b9ac0da587..08feecaf51 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -75,7 +75,8 @@ bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, 
Error **errp);
 bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
 void postcopy_preempt_shutdown_file(MigrationState *s);
 void *postcopy_preempt_thread(void *opaque);
-void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset);
+void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
+   bool set);
 
 /* ram cache */
 int colo_init_ram_cache(void);
-- 
2.30.2




[PATCH v5 7/7] migration/multifd: Add new migration test cases for legacy zero page checking.

2024-03-08 Thread Hao Xiang
From: Hao Xiang 

Now that zero page checking is done on the multifd sender threads by
default, we still provide an option for backward compatibility. This
change adds a qtest migration test case to set the zero-page-detection
option to "legacy" and run multifd migration with zero page checking on the
migration main thread.

Signed-off-by: Hao Xiang 
Reviewed-by: Peter Xu 
Message-Id: <20240301022829.3390548-6-hao.xi...@bytedance.com>
---
 tests/qtest/migration-test.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 4023d808f9..71895abb7f 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2771,6 +2771,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
 return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
 }
 
+static void *
+test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
+QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "legacy");
+return NULL;
+}
+
+static void *
+test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
+  QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "none");
+return NULL;
+}
+
 static void *
 test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
 QTestState *to)
@@ -2812,6 +2830,36 @@ static void test_multifd_tcp_none(void)
 test_precopy_common();
 }
 
+static void test_multifd_tcp_zero_page_legacy(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
+static void test_multifd_tcp_no_zero_page(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
 static void test_multifd_tcp_zlib(void)
 {
 MigrateCommon args = {
@@ -3729,6 +3777,10 @@ int main(int argc, char **argv)
 }
 migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
+   test_multifd_tcp_zero_page_legacy);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
+   test_multifd_tcp_no_zero_page);
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH v5 4/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-03-08 Thread Hao Xiang
From: Hao Xiang 

1. Add zero_pages field in MultiFDPacket_t.
2. Implements the zero page detection and handling on the multifd
threads for non-compression, zlib and zstd compression backends.
3. Added a new value 'multifd' in ZeroPageDetection enumeration.
4. Adds zero page counters and updates multifd send/receive tracing
format to track the newly added counters.

Signed-off-by: Hao Xiang 
Acked-by: Markus Armbruster 
---
 hw/core/qdev-properties-system.c |  2 +-
 migration/meson.build|  1 +
 migration/multifd-zero-page.c| 87 ++
 migration/multifd-zlib.c | 21 ++--
 migration/multifd-zstd.c | 20 +--
 migration/multifd.c  | 90 +++-
 migration/multifd.h  | 23 +++-
 migration/ram.c  |  1 -
 migration/trace-events   |  8 +--
 qapi/migration.json  |  7 ++-
 10 files changed, 228 insertions(+), 32 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 228e685f52..6e6f68ae1b 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -682,7 +682,7 @@ const PropertyInfo qdev_prop_mig_mode = {
 const PropertyInfo qdev_prop_zero_page_detection = {
 .name = "ZeroPageDetection",
 .description = "zero_page_detection values, "
-   "none,legacy",
+   "none,legacy,multifd",
 .enum_table = _lookup,
 .get = qdev_propinfo_get_enum,
 .set = qdev_propinfo_set_enum,
diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..1eeb915ff6 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -22,6 +22,7 @@ system_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
+  'multifd-zero-page.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
new file mode 100644
index 00..1ba38be636
--- /dev/null
+++ b/migration/multifd-zero-page.c
@@ -0,0 +1,87 @@
+/*
+ * Multifd zero page detection implementation.
+ *
+ * Copyright (c) 2024 Bytedance Inc
+ *
+ * Authors:
+ *  Hao Xiang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/ramblock.h"
+#include "migration.h"
+#include "multifd.h"
+#include "options.h"
+#include "ram.h"
+
+static bool multifd_zero_page_enabled(void)
+{
+return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD;
+}
+
+static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
+{
+ram_addr_t temp;
+
+if (a == b) {
+return;
+}
+
+temp = pages_offset[a];
+pages_offset[a] = pages_offset[b];
+pages_offset[b] = temp;
+}
+
+/**
+ * multifd_send_zero_page_detect: Perform zero page detection on all pages.
+ *
+ * Sorts normal pages before zero pages in p->pages->offset and updates
+ * p->pages->normal_num.
+ *
+ * @param p A pointer to the send params.
+ */
+void multifd_send_zero_page_detect(MultiFDSendParams *p)
+{
+MultiFDPages_t *pages = p->pages;
+RAMBlock *rb = pages->block;
+int i = 0;
+int j = pages->num - 1;
+
+if (!multifd_zero_page_enabled()) {
+pages->normal_num = pages->num;
+return;
+}
+
+/*
+ * Sort the page offset array by moving all normal pages to
+ * the left and all zero pages to the right of the array.
+ */
+while (i <= j) {
+uint64_t offset = pages->offset[i];
+
+if (!buffer_is_zero(rb->host + offset, p->page_size)) {
+i++;
+continue;
+}
+
+swap_page_offset(pages->offset, i, j);
+ram_release_page(rb->idstr, offset);
+j--;
+}
+
+pages->normal_num = i;
+}
+
+void multifd_recv_zero_page_process(MultiFDRecvParams *p)
+{
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->page_size)) {
+memset(page, 0, p->page_size);
+}
+}
+}
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 6120faad65..83c0374380 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
 int ret;
 uint32_t i;
 
-multifd_send_prepare_header(p);
+if (!multifd_send_prepare_common(p)) {
+goto out;
+}
 
-for (i = 0; i < pages->num; i++) {
+for (i = 0; i < pages->normal_num; i++) {
 uint32_t available = z->zbuff_len - out_size;
 int flush = Z_NO_FLUSH;
 
-if (i == pages->nu

[PATCH v5 5/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

2024-03-08 Thread Hao Xiang
From: Hao Xiang 

1. Add a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration.
2. Refactor ram_save_target_page_legacy so that the legacy and multifd
handlers don't have internal functions calling into each other.

Signed-off-by: Hao Xiang 
Reviewed-by: Fabiano Rosas 
Message-Id: <20240226195654.934709-4-hao.xi...@bytedance.com>
---
 migration/ram.c | 42 +-
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index d1f97cf862..887e20bf5b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1140,10 +1140,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus 
*pss,
 QEMUFile *file = pss->pss_channel;
 int len = 0;
 
-if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
-return 0;
-}
-
 if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
 return 0;
 }
@@ -2079,7 +2075,6 @@ static bool save_compress_page(RAMState *rs, 
PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-RAMBlock *block = pss->block;
 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
 int res;
 
@@ -2095,17 +2090,33 @@ static int ram_save_target_page_legacy(RAMState *rs, 
PageSearchStatus *pss)
 return 1;
 }
 
+return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: send one target page to multifd workers
+ *
+ * Returns 1 if the page was queued, -1 otherwise.
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+RAMBlock *block = pss->block;
+ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
 /*
- * Do not use multifd in postcopy as one whole host page should be
- * placed.  Meanwhile postcopy requires atomic update of pages, so even
- * if host page size == guest page size the dest guest during run may
- * still see partially copied pages which is data corruption.
+ * While using multifd live migration, we still need to handle zero
+ * page checking on the migration main thread.
  */
-if (migrate_multifd() && !migration_in_postcopy()) {
-return ram_save_multifd_page(block, offset);
+if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+if (save_zero_page(rs, pss, offset)) {
+return 1;
+}
 }
 
-return ram_save_page(rs, pss);
+return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -3113,7 +3124,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 }
 
 migration_ops = g_malloc0(sizeof(MigrationOps));
-migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+if (migrate_multifd()) {
+migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+} else {
+migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+}
 
 bql_unlock();
 ret = multifd_send_sync_main();
-- 
2.30.2




[PATCH v5 1/7] migration/multifd: Allow zero pages in file migration

2024-03-08 Thread Hao Xiang
From: Fabiano Rosas 

Currently, it's an error to have no data pages in the multifd file
migration because zero page detection is done in the migration thread
and zero pages don't reach multifd. This is enforced with the
pages->num assert.

We're about to add zero page detection on the multifd thread. Fix the
file_write_ramblock_iov() to stop considering p->iovs_num=0 an error.

Signed-off-by: Fabiano Rosas 
---
 migration/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/file.c b/migration/file.c
index 164b079966..5075f9526f 100644
--- a/migration/file.c
+++ b/migration/file.c
@@ -159,7 +159,7 @@ void file_start_incoming_migration(FileMigrationArgs 
*file_args, Error **errp)
 int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
 int niov, RAMBlock *block, Error **errp)
 {
-ssize_t ret = -1;
+ssize_t ret = 0;
 int i, slice_idx, slice_num;
 uintptr_t base, next, offset;
 size_t len;
-- 
2.30.2




Re: [External] Re: [PATCH v4 6/7] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-03-08 Thread hao . xiang
> 
> On Thu, Feb 29, 2024 at 11:40 PM Markus Armbruster  wrote:
> 
> > 
> > Hao Xiang  writes:
> > 
> >  This change extends the MigrationStatus interface to track zero pages
> > 
> >  and zero bytes counter.
> > 
> >  Signed-off-by: Hao Xiang 
> > 
> >  [...]
> > 
> >  diff --git a/qapi/migration.json b/qapi/migration.json
> > 
> >  index ca9561fbf1..03b850bab7 100644
> > 
> >  --- a/qapi/migration.json
> > 
> >  +++ b/qapi/migration.json
> > 
> >  @@ -63,6 +63,10 @@
> > 
> >  # between 0 and @dirty-sync-count * @multifd-channels. (since
> > 
> >  # 7.1)
> > 
> >  #
> > 
> >  +# @zero-pages: number of zero pages (since 9.0)
> > 
> >  +#
> > 
> >  +# @zero-bytes: number of zero bytes sent (since 9.0)
> > 
> >  +#
> > 
> >  Discussion of v3 has led me to believe:
> > 
> >  1. A page is either migrated as a normal page or as a zero page.
> > 
> >  2. The following equations hold:
> > 
> >  @normal-bytes = @normal * @page-size
> > 
> >  @zero-bytes = @zero-pages * @page-size
> > 
> >  3. @zero-pages is the same as @duplicate, with a better name. We intend
> > 
> >  to drop @duplicate eventually.
> > 
> >  If this is correct, I'd like you to
> > 
> >  A. Name it @zero for consistency with @normal. Disregard my advice to
> > 
> >  name it @zero-pages; two consistent bad names are better than one bad
> > 
> >  name, one good name, and inconsistency.
> > 
> >  B. Add @zero and @zero-bytes next to @normal and @normal-bytes.
> > 
> >  C. Deprecate @duplicate (item 3). Separate patch, please.
> > 
> >  D. Consider documenting more clearly what normal and zero pages are
> > 
> >  (item 1), and how @FOO, @FOO-pages and @page-size are related (item
> > 
> >  2). Could be done in a followup patch.

I will move this out of the current patchset and put them into a seperate 
patchset. I think I am not totally understanding the exact process of 
deprecating an interface and hence will need your help to probably go a few 
more versions. And I read from earlier conversation the soft release for 
QEMU9.0 is 3/12 so hopefully the rest of this patchset can catch it.

> > 
> >  # Features:
> > 
> >  #
> > 
> >  # @deprecated: Member @skipped is always zero since 1.5.3
> > 
> >  @@ -81,7 +85,8 @@
> > 
> >  'multifd-bytes': 'uint64', 'pages-per-second': 'uint64',
> > 
> >  'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64',
> > 
> >  'postcopy-bytes': 'uint64',
> > 
> >  - 'dirty-sync-missed-zero-copy': 'uint64' } }
> > 
> >  + 'dirty-sync-missed-zero-copy': 'uint64',
> > 
> >  + 'zero-pages': 'int', 'zero-bytes': 'size' } }
> > 
> >  [...]
> >
>



Re: [External] Re: [PATCH v4 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-03-08 Thread hao . xiang
> 
> On Sun, Mar 3, 2024 at 11:16 PM Peter Xu  wrote:
> 
> > 
> > On Fri, Mar 01, 2024 at 02:28:24AM +, Hao Xiang wrote:
> > 
> >  -GlobalProperty hw_compat_8_2[] = {};
> > 
> >  +GlobalProperty hw_compat_8_2[] = {
> > 
> >  + { "migration", "zero-page-detection", "legacy"},
> > 
> >  +};
> > 
> >  I hope we can make it for 9.0, then this (and many rest places) can be kept
> > 
> >  as-is. Let's see.. soft-freeze is March 12th.
> > 
> >  One thing to mention is I just sent a pull which has mapped-ram feature
> > 
> >  merged. You may need a rebase onto that, and hopefully mapped-ram can also
> > 
> >  use your feature too within the same patch when you repost.
> > 
> >  https://lore.kernel.org/all/20240229153017.2221-1-faro...@suse.de/
> > 
> >  That rebase may or may not need much caution, I apologize for that:
> > 
> >  mapped-ram as a feature was discussed 1+ years, so it was a plan to merge
> > 
> >  it (actually still partly of it) into QEMU 9.0.

Let's see if we can catch that.

> > 
> >  [...]
> > 
> >  +static bool multifd_zero_page(void)
> > 
> >  multifd_zero_page_enabled()?

Changed.

> > 
> >  +{
> > 
> >  + return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD;
> > 
> >  +}
> > 
> >  +
> > 
> >  +static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
> > 
> >  +{
> > 
> >  + ram_addr_t temp;
> > 
> >  +
> > 
> >  + if (a == b) {
> > 
> >  + return;
> > 
> >  + }
> > 
> >  +
> > 
> >  + temp = pages_offset[a];
> > 
> >  + pages_offset[a] = pages_offset[b];
> > 
> >  + pages_offset[b] = temp;
> > 
> >  +}
> > 
> >  +
> > 
> >  +/**
> > 
> >  + * multifd_send_zero_page_check: Perform zero page detection on all pages.
> > 
> >  + *
> > 
> >  + * Sorts normal pages before zero pages in p->pages->offset and updates
> > 
> >  + * p->pages->normal_num.
> > 
> >  + *
> > 
> >  + * @param p A pointer to the send params.
> > 
> >  Nit: the majority of doc style in QEMU (it seems to me) is:
> > 
> >  @p: pointer to @MultiFDSendParams.
> > 
> >  + */
> > 
> >  +void multifd_send_zero_page_check(MultiFDSendParams *p)
> > 
> >  multifd_send_zero_page_detect()?
> > 
> >  This patch used "check" on both sides, but neither of them is a pure check
> > 
> >  to me. For the other side, maybe multifd_recv_zero_page_process()? As
> > 
> >  that one applies the zero pages.


Renamed.

> > 
> >  +{
> > 
> >  + MultiFDPages_t *pages = p->pages;
> > 
> >  + RAMBlock *rb = pages->block;
> > 
> >  + int i = 0;
> > 
> >  + int j = pages->num - 1;
> > 
> >  +
> > 
> >  + /*
> > 
> >  + * QEMU older than 9.0 don't understand zero page
> > 
> >  + * on multifd channel. This switch is required to
> > 
> >  + * maintain backward compatibility.
> > 
> >  + */
> > 
> >  IMHO we can drop this comment; it is not accurate as the user can disable
> > 
> >  it explicitly through the parameter, then it may not always about 
> > compatibility.

Dropped.

> > 
> >  + if (multifd_zero_page()) {
> > 
> >  Shouldn't this be "!multifd_zero_page_enabled()"?

Thanks for catching this! My bad. Fixed.

> > 
> >  + pages->normal_num = pages->num;
> > 
> >  + return;
> > 
> >  + }
> > 
> >  The rest looks all sane.
> > 
> >  Thanks,
> > 
> >  --
> > 
> >  Peter Xu
> >
>



Re: [PATCH v4 3/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

2024-03-08 Thread hao . xiang
> 
> On Sun, Mar 3, 2024 at 11:46 PM Peter Xu  wrote:
> 
> > 
> > On Fri, Mar 01, 2024 at 02:28:25AM +, Hao Xiang wrote:
> > 
> >  1. Add a dedicated handler for MigrationOps::ram_save_target_page in
> > 
> >  multifd live migration.
> > 
> >  2. Refactor ram_save_target_page_legacy so that the legacy and multifd
> > 
> >  handlers don't have internal functions calling into each other.
> > 
> >  Signed-off-by: Hao Xiang 
> > 
> >  Reviewed-by: Fabiano Rosas 
> > 
> >  Message-Id: <20240226195654.934709-4-hao.xi...@bytedance.com>
> > 
> >  ---
> > 
> >  migration/ram.c | 43 ++-
> > 
> >  1 file changed, 30 insertions(+), 13 deletions(-)
> > 
> >  diff --git a/migration/ram.c b/migration/ram.c
> > 
> >  index e1fa229acf..f9d6ea65cc 100644
> > 
> >  --- a/migration/ram.c
> > 
> >  +++ b/migration/ram.c
> > 
> >  @@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, 
> > PageSearchStatus *pss,
> > 
> >  QEMUFile *file = pss->pss_channel;
> > 
> >  int len = 0;
> > 
> >  - if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
> > 
> >  - return 0;
> > 
> >  - }
> > 
> >  We need to keep this to disable zero-page-detect on !multifd?

So if multifd is enabled, the new parameter takes effect. If multifd is not 
enabled, zero page checking will always be done in the main thread, which is 
exactly the behavior it is now. I thought legacy migration is a deprecated 
feature so I am trying to not add new stuff to it.

> > 
> >  -
> > 
> >  if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
> > 
> >  return 0;
> > 
> >  }
> > 
> >  @@ -2045,7 +2041,6 @@ static bool save_compress_page(RAMState *rs, 
> > PageSearchStatus *pss,
> > 
> >  */
> > 
> >  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> > 
> >  {
> > 
> >  - RAMBlock *block = pss->block;
> > 
> >  ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> > 
> >  int res;
> > 
> >  @@ -2061,17 +2056,34 @@ static int ram_save_target_page_legacy(RAMState 
> > *rs, PageSearchStatus *pss)
> > 
> >  return 1;
> > 
> >  }
> > 
> >  + return ram_save_page(rs, pss);
> > 
> >  +}
> > 
> >  +
> > 
> >  +/**
> > 
> >  + * ram_save_target_page_multifd: send one target page to multifd workers
> > 
> >  + *
> > 
> >  + * Returns 1 if the page was queued, -1 otherwise.
> > 
> >  + *
> > 
> >  + * @rs: current RAM state
> > 
> >  + * @pss: data about the page we want to send
> > 
> >  + */
> > 
> >  +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus 
> > *pss)
> > 
> >  +{
> > 
> >  + RAMBlock *block = pss->block;
> > 
> >  + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> > 
> >  +
> > 
> >  /*
> > 
> >  - * Do not use multifd in postcopy as one whole host page should be
> > 
> >  - * placed. Meanwhile postcopy requires atomic update of pages, so even
> > 
> >  - * if host page size == guest page size the dest guest during run may
> > 
> >  - * still see partially copied pages which is data corruption.
> > 
> >  + * Backward compatibility support. While using multifd live
> > 
> >  We can also avoid mentioning "compatibility support" here - it's a
> > 
> >  parameter, user can legally set it to anything.

Will drop that.

> > 
> >  + * migration, we still need to handle zero page checking on the
> > 
> >  + * migration main thread.
> > 
> >  */
> > 
> >  - if (migrate_multifd() && !migration_in_postcopy()) {
> > 
> >  - return ram_save_multifd_page(block, offset);
> > 
> >  + if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
> > 
> >  + if (save_zero_page(rs, pss, offset)) {
> > 
> >  + return 1;
> > 
> >  + }
> > 
> >  }
> > 
> >  - return ram_save_page(rs, pss);
> > 
> >  + return ram_save_multifd_page(block, offset);
> > 
> >  }
> > 
> >  /* Should be called before sending a host page */
> > 
> >  @@ -2983,7 +2995,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> > 
> >  }
> > 
> >  migration_ops = g_malloc0(sizeof(MigrationOps));
> > 
> >  - migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > 
> >  +
> > 
> >  + if (migrate_multifd()) {
> > 
> >  + migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> > 
> >  + } else {
> > 
> >  + migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > 
> >  + }
> > 
> >  bql_unlock();
> > 
> >  ret = multifd_send_sync_main();
> > 
> >  --
> > 
> >  2.30.2
> > 
> >  --
> > 
> >  Peter Xu
> >
>



Re: [External] Re: [PATCH v3 11/20] util/dsa: Implement DSA task asynchronous submission and wait for completion.

2024-03-08 Thread hao . xiang
> 
> On Fri, Mar 8, 2024 at 2:11 AM Jonathan Cameron
> 
>  wrote:
> 
> > 
> > On Thu, 4 Jan 2024 00:44:43 +
> > 
> >  Hao Xiang  wrote:
> > 
> >  * Add a DSA task completion callback.
> > 
> >  * DSA completion thread will call the tasks's completion callback
> > 
> >  on every task/batch task completion.
> > 
> >  * DSA submission path to wait for completion.
> > 
> >  * Implement CPU fallback if DSA is not able to complete the task.
> > 
> >  Signed-off-by: Hao Xiang 
> > 
> >  Signed-off-by: Bryan Zhang 
> > 
> >  Hi,
> > 
> >  One naming comment inline. You had me confused on how you were handling 
> > async
> > 
> >  processing at where this is used. Answer is that I think you aren't!
> > 
> >  +/**
> > 
> >  + * @brief Performs buffer zero comparison on a DSA batch task 
> > asynchronously.
> > 
> >  The hardware may be doing it asynchronously but unless that
> > 
> >  buffer_zero_dsa_wait() call doesn't do what it's name suggests, this 
> > function
> > 
> >  is wrapping the async hardware related stuff to make it synchronous.
> > 
> >  So name it buffer_is_zero_dsa_batch_sync()!
> > 
> >  Jonathan


Thanks for reviewing this. The first completion model I tried was to use a busy 
loop to pull for completion on the submission thread but it turns out to have 
too much unnecessary overhead. Think about 10 threads all submitting tasks and 
we end up having 10 busy loops. I moved the completion work to a dedicated 
thread and named it async! However, the async model doesn't fit well with the 
current live migration thread model so eventually I added a wait on the 
submission thread. It was intended to be async but I agree that it is not 
currently. I will rename it in the next revision.

> > 
> >  + *
> > 
> >  + * @param batch_task A pointer to the batch task.
> > 
> >  + * @param buf An array of memory buffers.
> > 
> >  + * @param count The number of buffers in the array.
> > 
> >  + * @param len The buffer length.
> > 
> >  + *
> > 
> >  + * @return Zero if successful, otherwise non-zero.
> > 
> >  + */
> > 
> >  +int
> > 
> >  +buffer_is_zero_dsa_batch_async(struct dsa_batch_task *batch_task,
> > 
> >  + const void **buf, size_t count, size_t len)
> > 
> >  +{
> > 
> >  + if (count <= 0 || count > batch_task->batch_size) {
> > 
> >  + return -1;
> > 
> >  + }
> > 
> >  +
> > 
> >  + assert(batch_task != NULL);
> > 
> >  + assert(len != 0);
> > 
> >  + assert(buf != NULL);
> > 
> >  +
> > 
> >  + if (count == 1) {
> > 
> >  + /* DSA doesn't take batch operation with only 1 task. */
> > 
> >  + buffer_zero_dsa_async(batch_task, buf[0], len);
> > 
> >  + } else {
> > 
> >  + buffer_zero_dsa_batch_async(batch_task, buf, count, len);
> > 
> >  + }
> > 
> >  +
> > 
> >  + buffer_zero_dsa_wait(batch_task);
> > 
> >  + buffer_zero_cpu_fallback(batch_task);
> > 
> >  +
> > 
> >  + return 0;
> > 
> >  +}
> > 
> >  +
> > 
> >  #endif
> >
>



Re: [External] Re: [PATCH v4 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-03-01 Thread Hao Xiang
On Thu, Feb 29, 2024 at 11:28 PM Markus Armbruster  wrote:
>
> Hao Xiang  writes:
>
> > 1. Add zero_pages field in MultiFDPacket_t.
> > 2. Implements the zero page detection and handling on the multifd
> > threads for non-compression, zlib and zstd compression backends.
> > 3. Added a new value 'multifd' in ZeroPageDetection enumeration.
> > 4. Handle migration QEMU9.0 -> QEMU8.2 compatibility.
> > 5. Adds zero page counters and updates multifd send/receive tracing
> > format to track the newly added counters.
> >
> > Signed-off-by: Hao Xiang 
>
> [...]
>
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 8da05dba47..846d0411d5 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -671,10 +671,15 @@
> >  #
> >  # @legacy: Perform zero page checking in main migration thread.
> >  #
> > +# @multifd: Perform zero page checking in multifd sender thread.
> > +# This option only takes effect if migration capability multifd
> > +# is set.  Otherwise, it will have the same effect as legacy.
>
> Suggest
>
># @multifd: Perform zero page checking in multifd sender thread if
># multifd migration is enabled, else in the main migration
># thread as for @legacy.
>
> Thoughts?

Sounds good. Will change that.

>
> > +#
> >  # Since: 9.0
> > +#
> >  ##
> >  { 'enum': 'ZeroPageDetection',
> > -  'data': [ 'none', 'legacy' ] }
> > +  'data': [ 'none', 'legacy', 'multifd' ] }
> >
> >  ##
> >  # @BitmapMigrationBitmapAliasTransform:
>
> QAPI schema
> Acked-by: Markus Armbruster 
>



[PATCH v4 3/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

2024-02-29 Thread Hao Xiang
1. Add a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration.
2. Refactor ram_save_target_page_legacy so that the legacy and multifd
handlers don't have internal functions calling into each other.

Signed-off-by: Hao Xiang 
Reviewed-by: Fabiano Rosas 
Message-Id: <20240226195654.934709-4-hao.xi...@bytedance.com>
---
 migration/ram.c | 43 ++-
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index e1fa229acf..f9d6ea65cc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus 
*pss,
 QEMUFile *file = pss->pss_channel;
 int len = 0;
 
-if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
-return 0;
-}
-
 if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
 return 0;
 }
@@ -2045,7 +2041,6 @@ static bool save_compress_page(RAMState *rs, 
PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-RAMBlock *block = pss->block;
 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
 int res;
 
@@ -2061,17 +2056,34 @@ static int ram_save_target_page_legacy(RAMState *rs, 
PageSearchStatus *pss)
 return 1;
 }
 
+return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: send one target page to multifd workers
+ *
+ * Returns 1 if the page was queued, -1 otherwise.
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+RAMBlock *block = pss->block;
+ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
 /*
- * Do not use multifd in postcopy as one whole host page should be
- * placed.  Meanwhile postcopy requires atomic update of pages, so even
- * if host page size == guest page size the dest guest during run may
- * still see partially copied pages which is data corruption.
+ * Backward compatibility support. While using multifd live
+ * migration, we still need to handle zero page checking on the
+ * migration main thread.
  */
-if (migrate_multifd() && !migration_in_postcopy()) {
-return ram_save_multifd_page(block, offset);
+if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+if (save_zero_page(rs, pss, offset)) {
+return 1;
+}
 }
 
-return ram_save_page(rs, pss);
+return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -2983,7 +2995,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 }
 
 migration_ops = g_malloc0(sizeof(MigrationOps));
-migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+if (migrate_multifd()) {
+migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+} else {
+migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+}
 
 bql_unlock();
 ret = multifd_send_sync_main();
-- 
2.30.2




[PATCH v4 5/7] migration/multifd: Add new migration test cases for legacy zero page checking.

2024-02-29 Thread Hao Xiang
Now that zero page checking is done on the multifd sender threads by
default, we still provide an option for backward compatibility. This
change adds a qtest migration test case to set the zero-page-detection
option to "legacy" and run multifd migration with zero page checking on the
migration main thread.

Signed-off-by: Hao Xiang 
---
 tests/qtest/migration-test.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 83512bce85..8a966364b5 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2660,6 +2660,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
 return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
 }
 
+static void *
+test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
+QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "legacy");
+return NULL;
+}
+
+static void *
+test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
+  QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "none");
+return NULL;
+}
+
 static void *
 test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
 QTestState *to)
@@ -2691,6 +2709,36 @@ static void test_multifd_tcp_none(void)
 test_precopy_common();
 }
 
+static void test_multifd_tcp_zero_page_legacy(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
+static void test_multifd_tcp_no_zero_page(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
 static void test_multifd_tcp_zlib(void)
 {
 MigrateCommon args = {
@@ -3592,6 +3640,10 @@ int main(int argc, char **argv)
 }
 migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
+   test_multifd_tcp_zero_page_legacy);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
+   test_multifd_tcp_no_zero_page);
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH v4 0/7] Introduce multifd zero page checking.

2024-02-29 Thread Hao Xiang
v4 update:
* Fix documentation for interface ZeroPageDetection.
* Fix implementation in multifd_send_zero_page_check.
* Rebase on top of c0c6a0e3528b88aaad0b9d333e295707a195587b.

v3 update:
* Change "zero" to "zero-pages" and use type size for "zero-bytes".
* Fixed ZeroPageDetection interface description.
* Move zero page unit tests to its own path.
* Removed some asserts.
* Added backward compatibility support for migration 9.0 -> 8.2.
* Removed fields "zero" and "normal" page address arrays from v2. Now
multifd_zero_page_check_send sorts normal/zero pages in the "offset" array.

v2 update:
* Implement zero-page-detection switch with enumeration "legacy",
"none" and "multifd".
* Move normal/zero pages from MultiFDSendParams to MultiFDPages_t.
* Add zeros and zero_bytes accounting.

This patchset is based on Juan Quintela's old series here
https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/

In the multifd live migration model, there is a single migration main
thread scanning the page map, queuing the pages to multiple multifd
sender threads. The migration main thread runs zero page checking on
every page before queuing the page to the sender threads. Zero page
checking is a CPU intensive task and hence having a single thread doing
all that doesn't scale well. This change introduces a new function
to run the zero page checking on the multifd sender threads. This
patchset also lays the ground work for future changes to offload zero
page checking task to accelerator hardwares.

Use two Intel 4th generation Xeon servers for testing.

Architecture:x86_64
CPU(s):  192
Thread(s) per core:  2
Core(s) per socket:  48
Socket(s):   2
NUMA node(s):2
Vendor ID:   GenuineIntel
CPU family:  6
Model:   143
Model name:  Intel(R) Xeon(R) Platinum 8457C
Stepping:8
CPU MHz: 2538.624
CPU max MHz: 3800.
CPU min MHz: 800.

Perform multifd live migration with below setup:
1. VM has 100GB memory. All pages in the VM are zero pages.
2. Use tcp socket for live migration.
3. Use 4 multifd channels and zero page checking on migration main thread.
4. Use 1/2/4 multifd channels and zero page checking on multifd sender
threads.
5. Record migration total time from sender QEMU console's "info migrate"
command.

++
|zero-page-checking | total-time(ms) |
++
|main-thread| 9629   |
++
|multifd-1-threads  | 6182   |
++
|multifd-2-threads  | 4643   |
++
|multifd-4-threads  | 4143   |
+--------+

Apply this patchset on top of commit
c0c6a0e3528b88aaad0b9d333e295707a195587b

Hao Xiang (7):
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Implement zero page transmission on the multifd
thread.
  migration/multifd: Implement ram_save_target_page_multifd to handle
multifd version of MigrationOps::ram_save_target_page.
  migration/multifd: Enable multifd zero page checking by default.
  migration/multifd: Add new migration test cases for legacy zero page
checking.
  migration/multifd: Add zero pages and zero bytes counter to migration
status interface.
  Update maintainer contact for migration multifd zero page checking
acceleration.

 MAINTAINERS |  5 ++
 hw/core/machine.c   |  4 +-
 hw/core/qdev-properties-system.c| 10 
 include/hw/qdev-properties-system.h |  4 ++
 migration/meson.build   |  1 +
 migration/migration-hmp-cmds.c  | 13 
 migration/migration.c   |  2 +
 migration/multifd-zero-page.c   | 92 +
 migration/multifd-zlib.c| 21 +--
 migration/multifd-zstd.c| 20 +--
 migration/multifd.c | 83 ++
 migration/multifd.h | 24 +++-
 migration/options.c | 21 +++
 migration/options.h |  1 +
 migration/ram.c | 40 +
 migration/trace-events  |  8 +--
 qapi/migration.json | 53 +++--
 tests/migration/guestperf/engine.py |  2 +
 tests/qtest/migration-test.c| 52 
 19 files changed, 412 insertions(+), 44 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

-- 
2.30.2




[PATCH v4 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-29 Thread Hao Xiang
This new parameter controls where the zero page checking is running.
1. If this parameter is set to 'legacy', zero page checking is
done in the migration main thread.
2. If this parameter is set to 'none', zero page checking is disabled.

Signed-off-by: Hao Xiang 
---
 hw/core/qdev-properties-system.c| 10 +
 include/hw/qdev-properties-system.h |  4 
 migration/migration-hmp-cmds.c  |  9 
 migration/options.c | 21 ++
 migration/options.h |  1 +
 migration/ram.c |  4 
 qapi/migration.json | 33 ++---
 7 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 1a396521d5..228e685f52 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -679,6 +679,16 @@ const PropertyInfo qdev_prop_mig_mode = {
 .set_default_value = qdev_propinfo_set_default_value_enum,
 };
 
+const PropertyInfo qdev_prop_zero_page_detection = {
+.name = "ZeroPageDetection",
+.description = "zero_page_detection values, "
+   "none,legacy",
+.enum_table = _lookup,
+.get = qdev_propinfo_get_enum,
+.set = qdev_propinfo_set_enum,
+.set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
 /* --- Reserved Region --- */
 
 /*
diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 06c359c190..839b170235 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr;
 extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_multifd_compression;
 extern const PropertyInfo qdev_prop_mig_mode;
+extern const PropertyInfo qdev_prop_zero_page_detection;
 extern const PropertyInfo qdev_prop_losttickpolicy;
 extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
@@ -47,6 +48,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
 #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
MigMode)
+#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
+DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
+   ZeroPageDetection)
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
 LostTickPolicy)
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 99b49df5dd..7e96ae6ffd 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
 MultiFDCompression_str(params->multifd_compression));
+assert(params->has_zero_page_detection);
+monitor_printf(mon, "%s: %s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
+qapi_enum_lookup(_lookup,
+params->zero_page_detection));
 monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
 MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
 params->xbzrle_cache_size);
@@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_multifd_zstd_level = true;
 visit_type_uint8(v, param, >multifd_zstd_level, );
 break;
+case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
+p->has_zero_page_detection = true;
+visit_type_ZeroPageDetection(v, param, >zero_page_detection, );
+break;
 case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
 p->has_xbzrle_cache_size = true;
 if (!visit_type_size(v, param, _size, )) {
diff --git a/migration/options.c b/migration/options.c
index 3e3e0b93b4..3c603391b0 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -179,6 +179,9 @@ Property migration_properties[] = {
 DEFINE_PROP_MIG_MODE("mode", MigrationState,
   parameters.mode,
   MIG_MODE_NORMAL),
+DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
+   parameters.zero_page_detection,
+   ZERO_PAGE_DETECTION_LEGACY),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -903,6 +906,13 @@ uint64_t migrate_xbzrle_cache_size(void)
 return s->parameters.xbzrle_cache_size;
 }
 
+ZeroPageDetection migrate_zero_page_detection(void)
+{
+MigrationState *s = migrate_get_current(

[PATCH v4 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-02-29 Thread Hao Xiang
1. Add zero_pages field in MultiFDPacket_t.
2. Implements the zero page detection and handling on the multifd
threads for non-compression, zlib and zstd compression backends.
3. Added a new value 'multifd' in ZeroPageDetection enumeration.
4. Handle migration QEMU9.0 -> QEMU8.2 compatibility.
5. Adds zero page counters and updates multifd send/receive tracing
format to track the newly added counters.

Signed-off-by: Hao Xiang 
---
 hw/core/machine.c|  4 +-
 hw/core/qdev-properties-system.c |  2 +-
 migration/meson.build|  1 +
 migration/multifd-zero-page.c| 92 
 migration/multifd-zlib.c | 21 ++--
 migration/multifd-zstd.c | 20 +--
 migration/multifd.c  | 83 +++-
 migration/multifd.h  | 24 -
 migration/ram.c  |  1 -
 migration/trace-events   |  8 +--
 qapi/migration.json  |  7 ++-
 11 files changed, 230 insertions(+), 33 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ac5d5389a..0e9d646b61 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -32,7 +32,9 @@
 #include "hw/virtio/virtio-net.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_8_2[] = {};
+GlobalProperty hw_compat_8_2[] = {
+{ "migration", "zero-page-detection", "legacy"},
+};
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
 GlobalProperty hw_compat_8_1[] = {
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 228e685f52..6e6f68ae1b 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -682,7 +682,7 @@ const PropertyInfo qdev_prop_mig_mode = {
 const PropertyInfo qdev_prop_zero_page_detection = {
 .name = "ZeroPageDetection",
 .description = "zero_page_detection values, "
-   "none,legacy",
+   "none,legacy,multifd",
 .enum_table = _lookup,
 .get = qdev_propinfo_get_enum,
 .set = qdev_propinfo_set_enum,
diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..1eeb915ff6 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -22,6 +22,7 @@ system_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
+  'multifd-zero-page.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
new file mode 100644
index 00..e695f6ff7d
--- /dev/null
+++ b/migration/multifd-zero-page.c
@@ -0,0 +1,92 @@
+/*
+ * Multifd zero page detection implementation.
+ *
+ * Copyright (c) 2024 Bytedance Inc
+ *
+ * Authors:
+ *  Hao Xiang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/ramblock.h"
+#include "migration.h"
+#include "multifd.h"
+#include "options.h"
+#include "ram.h"
+
+static bool multifd_zero_page(void)
+{
+return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD;
+}
+
+static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
+{
+ram_addr_t temp;
+
+if (a == b) {
+return;
+}
+
+temp = pages_offset[a];
+pages_offset[a] = pages_offset[b];
+pages_offset[b] = temp;
+}
+
+/**
+ * multifd_send_zero_page_check: Perform zero page detection on all pages.
+ *
+ * Sorts normal pages before zero pages in p->pages->offset and updates
+ * p->pages->normal_num.
+ *
+ * @param p A pointer to the send params.
+ */
+void multifd_send_zero_page_check(MultiFDSendParams *p)
+{
+MultiFDPages_t *pages = p->pages;
+RAMBlock *rb = pages->block;
+int i = 0;
+int j = pages->num - 1;
+
+/*
+ * QEMU older than 9.0 don't understand zero page
+ * on multifd channel. This switch is required to
+ * maintain backward compatibility.
+ */
+if (multifd_zero_page()) {
+pages->normal_num = pages->num;
+return;
+}
+
+/*
+ * Sort the page offset array by moving all normal pages to
+ * the left and all zero pages to the right of the array.
+ */
+while (i <= j) {
+uint64_t offset = pages->offset[i];
+
+if (!buffer_is_zero(rb->host + offset, p->page_size)) {
+i++;
+continue;
+}
+
+swap_page_offset(pages->offset, i, j);
+ram_release_page(rb->idstr, offset);
+j--;
+}
+
+pages->normal_num = i;
+}
+
+void multifd_recv_zero_page_check(MultiFDRecvParams *p)
+{
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->

[PATCH v4 6/7] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-29 Thread Hao Xiang
This change extends the MigrationStatus interface to track zero pages
and zero bytes counter.

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c  |  4 
 migration/migration.c   |  2 ++
 qapi/migration.json | 15 ++-
 tests/migration/guestperf/engine.py |  2 ++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 7e96ae6ffd..a38ad0255d 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -111,6 +111,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->ram->normal);
 monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
info->ram->normal_bytes >> 10);
+monitor_printf(mon, "zero pages: %" PRIu64 " pages\n",
+   info->ram->zero_pages);
+monitor_printf(mon, "zero bytes: %" PRIu64 " kbytes\n",
+   info->ram->zero_bytes >> 10);
 monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
info->ram->dirty_sync_count);
 monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
diff --git a/migration/migration.c b/migration/migration.c
index bab68bcbef..fc4e3ef52d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1126,6 +1126,8 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->skipped = 0;
 info->ram->normal = stat64_get(_stats.normal_pages);
 info->ram->normal_bytes = info->ram->normal * page_size;
+info->ram->zero_pages = stat64_get(_stats.zero_pages);
+info->ram->zero_bytes = info->ram->zero_pages * page_size;
 info->ram->mbps = s->mbps;
 info->ram->dirty_sync_count =
 stat64_get(_stats.dirty_sync_count);
diff --git a/qapi/migration.json b/qapi/migration.json
index ca9561fbf1..03b850bab7 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -63,6 +63,10 @@
 # between 0 and @dirty-sync-count * @multifd-channels.  (since
 # 7.1)
 #
+# @zero-pages: number of zero pages (since 9.0)
+#
+# @zero-bytes: number of zero bytes sent (since 9.0)
+#
 # Features:
 #
 # @deprecated: Member @skipped is always zero since 1.5.3
@@ -81,7 +85,8 @@
'multifd-bytes': 'uint64', 'pages-per-second': 'uint64',
'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64',
'postcopy-bytes': 'uint64',
-   'dirty-sync-missed-zero-copy': 'uint64' } }
+   'dirty-sync-missed-zero-copy': 'uint64',
+   'zero-pages': 'int', 'zero-bytes': 'size' } }
 
 ##
 # @XBZRLECacheStats:
@@ -332,6 +337,8 @@
 #   "duplicate":123,
 #   "normal":123,
 #   "normal-bytes":123456,
+#   "zero-pages":123,
+#   "zero-bytes":123456,
 #   "dirty-sync-count":15
 # }
 #  }
@@ -358,6 +365,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero-pages":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  }
 #   }
@@ -379,6 +388,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero-pages":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  },
 #  "disk":{
@@ -405,6 +416,8 @@
 # "duplicate":10,
 # "normal":,
 # "normal-bytes":3412992,
+# "zero-pages":,
+# "zero-bytes":3412992,
 # "dirty-sync-count":15
 #  },
 #  "xbzrle-cache":{
diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 608d7270f6..693e07c227 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -92,6 +92,8 @@ def _migrate_progress(self, vm):
 info["ram"].get("skipped", 0),
 info["ram"].get("normal", 0),
 info["ram"].get("normal-bytes", 0),
+info["ram"].get("zero-pages", 0);
+info["ram"].get("zero-bytes", 0);
 info["ram"].get("dirty-pages-rate", 0),
 info["ram"].get("mbps", 0),
 info["ram"].get("dirty-sync-count", 0)
-- 
2.30.2




[PATCH v4 7/7] Update maintainer contact for migration multifd zero page checking acceleration.

2024-02-29 Thread Hao Xiang
Add myself to maintain multifd zero page checking acceleration function.

Signed-off-by: Hao Xiang 
---
 MAINTAINERS | 5 +
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 65dfdc9677..b547918e4d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3414,6 +3414,11 @@ F: tests/migration/
 F: util/userfaultfd.c
 X: migration/rdma*
 
+Migration multifd zero page checking acceleration
+M: Hao Xiang 
+S: Maintained
+F: migration/multifd-zero-page.c
+
 RDMA Migration
 R: Li Zhijian 
 R: Peter Xu 
-- 
2.30.2




[PATCH v4 4/7] migration/multifd: Enable multifd zero page checking by default.

2024-02-29 Thread Hao Xiang
Set default "zero-page-detection" option to "multifd". Now zero page
checking can be done in the multifd threads and this becomes the
default configuration. We still provide backward compatibility
where zero page checking is done from the migration main thread.

Signed-off-by: Hao Xiang 
---
 migration/options.c | 2 +-
 qapi/migration.json | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/migration/options.c b/migration/options.c
index 3c603391b0..3c79b6ccd4 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -181,7 +181,7 @@ Property migration_properties[] = {
   MIG_MODE_NORMAL),
 DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
-   ZERO_PAGE_DETECTION_LEGACY),
+   ZERO_PAGE_DETECTION_MULTIFD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/qapi/migration.json b/qapi/migration.json
index 846d0411d5..ca9561fbf1 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -903,7 +903,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
@@ -1100,7 +1100,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
@@ -1333,7 +1333,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages.
-# See description in @ZeroPageDetection.  Default is 'legacy'.
+# See description in @ZeroPageDetection.  Default is 'multifd'.
 # (since 9.0)
 #
 # Features:
-- 
2.30.2




Re: [External] Re: [PATCH v3 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-02-29 Thread Hao Xiang
On Wed, Feb 28, 2024 at 11:46 AM Fabiano Rosas  wrote:
>
> Hao Xiang  writes:
>
> > 1. Add zero_pages field in MultiFDPacket_t.
> > 2. Implements the zero page detection and handling on the multifd
> > threads for non-compression, zlib and zstd compression backends.
> > 3. Added a new value 'multifd' in ZeroPageDetection enumeration.
> > 4. Handle migration QEMU9.0 -> QEMU8.2 compatibility.
> > 5. Adds zero page counters and updates multifd send/receive tracing
> > format to track the newly added counters.
> >
> > Signed-off-by: Hao Xiang 
> > ---
> >  hw/core/machine.c|  4 +-
> >  hw/core/qdev-properties-system.c |  2 +-
> >  migration/meson.build|  1 +
> >  migration/multifd-zero-page.c| 78 ++
> >  migration/multifd-zlib.c | 21 ++--
> >  migration/multifd-zstd.c | 20 ++--
> >  migration/multifd.c  | 83 +++-
> >  migration/multifd.h  | 24 -
> >  migration/ram.c  |  1 -
> >  migration/trace-events   |  8 +--
> >  qapi/migration.json  |  5 +-
> >  11 files changed, 214 insertions(+), 33 deletions(-)
> >  create mode 100644 migration/multifd-zero-page.c
> >
> > diff --git a/hw/core/machine.c b/hw/core/machine.c
> > index fb5afdcae4..746da219a4 100644
> > --- a/hw/core/machine.c
> > +++ b/hw/core/machine.c
> > @@ -32,7 +32,9 @@
> >  #include "hw/virtio/virtio-net.h"
> >  #include "audio/audio.h"
> >
> > -GlobalProperty hw_compat_8_2[] = {};
> > +GlobalProperty hw_compat_8_2[] = {
> > +{ "migration", "zero-page-detection", "legacy"},
> > +};
> >  const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
> >
> >  GlobalProperty hw_compat_8_1[] = {
> > diff --git a/hw/core/qdev-properties-system.c 
> > b/hw/core/qdev-properties-system.c
> > index 228e685f52..6e6f68ae1b 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -682,7 +682,7 @@ const PropertyInfo qdev_prop_mig_mode = {
> >  const PropertyInfo qdev_prop_zero_page_detection = {
> >  .name = "ZeroPageDetection",
> >  .description = "zero_page_detection values, "
> > -   "none,legacy",
> > +   "none,legacy,multifd",
> >  .enum_table = _lookup,
> >  .get = qdev_propinfo_get_enum,
> >  .set = qdev_propinfo_set_enum,
> > diff --git a/migration/meson.build b/migration/meson.build
> > index 92b1cc4297..1eeb915ff6 100644
> > --- a/migration/meson.build
> > +++ b/migration/meson.build
> > @@ -22,6 +22,7 @@ system_ss.add(files(
> >'migration.c',
> >'multifd.c',
> >'multifd-zlib.c',
> > +  'multifd-zero-page.c',
> >'ram-compress.c',
> >'options.c',
> >'postcopy-ram.c',
> > diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
> > new file mode 100644
> > index 00..1650c41b26
> > --- /dev/null
> > +++ b/migration/multifd-zero-page.c
> > @@ -0,0 +1,78 @@
> > +/*
> > + * Multifd zero page detection implementation.
> > + *
> > + * Copyright (c) 2024 Bytedance Inc
> > + *
> > + * Authors:
> > + *  Hao Xiang 
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> > +#include "exec/ramblock.h"
> > +#include "migration.h"
> > +#include "multifd.h"
> > +#include "options.h"
> > +#include "ram.h"
> > +
> > +static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
> > +{
> > +ram_addr_t temp;
> > +
> > +if (a == b) {
> > +return;
> > +}
> > +
> > +temp = pages_offset[a];
> > +pages_offset[a] = pages_offset[b];
> > +pages_offset[b] = temp;
> > +}
> > +
> > +/**
> > + * multifd_zero_page_check_send: Perform zero page detection on all pages.
> > + *
> > + * Sort the page offset array by moving all normal pages to
> > + * the left and all zero pages to the right of the array.
>
> Let's move this to the loop as a comment. Here it's better to just
> inform about the side effects:
>
> Sorts no

Re: [External] Re: [PATCH v3 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-02-29 Thread Hao Xiang
On Thu, Feb 29, 2024 at 7:47 AM Fabiano Rosas  wrote:
>
> Markus Armbruster  writes:
>
> > Hao Xiang  writes:
> >
> >> On Wed, Feb 28, 2024 at 1:50 AM Markus Armbruster  
> >> wrote:
> >>>
> >>> Hao Xiang  writes:
> >>>
> >>> > 1. Add zero_pages field in MultiFDPacket_t.
> >>> > 2. Implements the zero page detection and handling on the multifd
> >>> > threads for non-compression, zlib and zstd compression backends.
> >>> > 3. Added a new value 'multifd' in ZeroPageDetection enumeration.
> >>> > 4. Handle migration QEMU9.0 -> QEMU8.2 compatibility.
> >>> > 5. Adds zero page counters and updates multifd send/receive tracing
> >>> > format to track the newly added counters.
> >>> >
> >>> > Signed-off-by: Hao Xiang 
> >>>
> >>> [...]
> >>>
> >>> > diff --git a/qapi/migration.json b/qapi/migration.json
> >>> > index 1e66272f8f..5a1bb8ad62 100644
> >>> > --- a/qapi/migration.json
> >>> > +++ b/qapi/migration.json
> >>> > @@ -660,10 +660,13 @@
> >>> >  #
> >>> >  # @legacy: Perform zero page checking from main migration thread.
> >>> >  #
> >>> > +# @multifd: Perform zero page checking from multifd sender thread.
> >>> > +#
> >>> >  # Since: 9.0
> >>> > +#
> >>> >  ##
> >>> >  { 'enum': 'ZeroPageDetection',
> >>> > -  'data': [ 'none', 'legacy' ] }
> >>> > +  'data': [ 'none', 'legacy', 'multifd' ] }
> >>> >
> >>> >  ##
> >>> >  # @BitmapMigrationBitmapAliasTransform:
> >>>
> >>> What happens when you set "zero-page-detection": "multifd" *without*
> >>> enabling multifd migration?
> >>
> >> Very good question! Right now the behavior is that if "multifd
> >> migration" is not enabled, it goes through the legacy code path and
> >> the "multifd zero page" option is ignored. The legacy path has its own
> >> zero page checking and will run the same way as before. This is for
> >> backward compatibility.
> >
> > We need one of two improvements then:
> >
> > 1. Make "zero-page-detection" reject value "multifd" when multifd
> >migration is not enabled.  Document this: "Requires migration
> >capability @multifd" or similar.
> >
> > 2. Document that "multifd" means multifd only when multifd is enabled,
> >else same as "legacy".
> >
> > I prefer 1., because it's easier to document.  But migration maintainers
> > may have their own preference.  Peter, Fabiano?
>
> I think we need to go with 2 for consistency with the other multifd_*
> parameters. I don't see any validation at options.c.

Yeah, it's hard to do 1. Someone can also set multifd and then disable
multifd migration. This is an existing problem. I will update the
document for "multifd".



Re: [External] Re: [PATCH v3 6/7] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-29 Thread Hao Xiang
On Wed, Feb 28, 2024 at 10:01 PM Markus Armbruster  wrote:
>
> Hao Xiang  writes:
>
> > On Wed, Feb 28, 2024 at 1:52 AM Markus Armbruster  wrote:
> >>
> >> Hao Xiang  writes:
> >>
> >> > This change extends the MigrationStatus interface to track zero pages
> >> > and zero bytes counter.
> >> >
> >> > Signed-off-by: Hao Xiang 
> >>
> >> [...]
> >>
> >> > diff --git a/qapi/migration.json b/qapi/migration.json
> >> > index a0a85a0312..171734c07e 100644
> >> > --- a/qapi/migration.json
> >> > +++ b/qapi/migration.json
> >> > @@ -63,6 +63,10 @@
> >> >  # between 0 and @dirty-sync-count * @multifd-channels.  (since
> >> >  # 7.1)
> >> >  #
> >> > +# @zero-pages: number of zero pages (since 9.0)
> >> > +#
> >> > +# @zero-bytes: number of zero bytes sent (since 9.0)
> >> > +#
> >>
> >> Awfully terse.  How are these two related?
> >
> > Sorry I forgot to address the same feedback from the last version.
>
> Happens :)
>
> > zero-pages are the number of pages being detected as all "zero" and
> > hence the payload isn't sent over the network. zero-bytes is basically
> > zero-pages * page_size. It's the number of bytes migrated (but not
> > actually sent through the network) because they are all "zero". These
> > two are related to the existing interface below. normal and
> > normal-bytes are the same representation of pages who are not all
> > "zero" and are actually sent through the network.
> >
> > # @normal: number of normal pages (since 1.2)
> > #
> > # @normal-bytes: number of normal bytes sent (since 1.2)
>
> We also have
>
>   # @duplicate: number of duplicate (zero) pages (since 1.2)
>   #
>   # @skipped: number of skipped zero pages. Always zero, only provided for
>   # compatibility (since 1.5)
>
> Page skipping was introduced in 1.5, and withdrawn in 1.5.3 and 1.6.
> @skipped was formally deprecated in 8.1.  It'll soon be gone, no need to
> worry about it now.
>
> That leaves three values related to pages sent: @normal (and
> @normal-bytes), @duplicate (but no @duplicate-bytes), and @zero-pages
> (and @zero-bytes).
>
> I unwittingly created a naming inconsistency between @normal,
> @duplicate, and @zero-pages when I asked you to rename @zero to
> @zero-pages.
>
> The meaning of the three values is not obvious, and the doc comments
> don't explain them.  Can you, or anybody familiar with migration,
> explain them to me?
>
> MigrationStats return some values as bytes, some as pages, and some as
> both.  I hate that.  Can we standardize on bytes?

I added zero/zero-bytes because I thought they were not there. But it
turns out "duplicate" is for that purpose. "zero/zero-bytes" is really
additional information to "normal/normal-bytes". Peter suggested that
if we add "zero/zero-bytes" we can slowly retire "duplicate" at a
later point.
I don't know the historical reason why pages/bytes are used the way it
is today. The way I understand migration, the granularity of ram
migration is "page". There are only two types of pages 1) normal 2)
zero. Zero pages' playload are not sent through the network because we
already know what it looks like. Only the page offset is sent. Normal
pages are pages that are not zero. The entire page is sent through the
network to the target host. if a user knows the zero/normal count,
they can already calculate the zero-bytes/normal-bytes (zero/normal *
page size) but it's just convenient to see both. During development, I
check on these counters a lot and they are useful.

>
> >>
> >> >  # Features:
> >> >  #
> >> >  # @deprecated: Member @skipped is always zero since 1.5.3
> >>
> >> [...]
> >>
>



Re: [External] Re: [PATCH v3 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-02-28 Thread Hao Xiang
On Wed, Feb 28, 2024 at 1:50 AM Markus Armbruster  wrote:
>
> Hao Xiang  writes:
>
> > 1. Add zero_pages field in MultiFDPacket_t.
> > 2. Implements the zero page detection and handling on the multifd
> > threads for non-compression, zlib and zstd compression backends.
> > 3. Added a new value 'multifd' in ZeroPageDetection enumeration.
> > 4. Handle migration QEMU9.0 -> QEMU8.2 compatibility.
> > 5. Adds zero page counters and updates multifd send/receive tracing
> > format to track the newly added counters.
> >
> > Signed-off-by: Hao Xiang 
>
> [...]
>
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 1e66272f8f..5a1bb8ad62 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -660,10 +660,13 @@
> >  #
> >  # @legacy: Perform zero page checking from main migration thread.
> >  #
> > +# @multifd: Perform zero page checking from multifd sender thread.
> > +#
> >  # Since: 9.0
> > +#
> >  ##
> >  { 'enum': 'ZeroPageDetection',
> > -  'data': [ 'none', 'legacy' ] }
> > +  'data': [ 'none', 'legacy', 'multifd' ] }
> >
> >  ##
> >  # @BitmapMigrationBitmapAliasTransform:
>
> What happens when you set "zero-page-detection": "multifd" *without*
> enabling multifd migration?

Very good question! Right now the behavior is that if "multifd
migration" is not enabled, it goes through the legacy code path and
the "multifd zero page" option is ignored. The legacy path has its own
zero page checking and will run the same way as before. This is for
backward compatibility.
>



Re: [External] Re: [PATCH v3 6/7] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-28 Thread Hao Xiang
On Wed, Feb 28, 2024 at 1:52 AM Markus Armbruster  wrote:
>
> Hao Xiang  writes:
>
> > This change extends the MigrationStatus interface to track zero pages
> > and zero bytes counter.
> >
> > Signed-off-by: Hao Xiang 
>
> [...]
>
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index a0a85a0312..171734c07e 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -63,6 +63,10 @@
> >  # between 0 and @dirty-sync-count * @multifd-channels.  (since
> >  # 7.1)
> >  #
> > +# @zero-pages: number of zero pages (since 9.0)
> > +#
> > +# @zero-bytes: number of zero bytes sent (since 9.0)
> > +#
>
> Awfully terse.  How are these two related?

Sorry I forgot to address the same feedback from the last version.
zero-pages are the number of pages being detected as all "zero" and
hence the payload isn't sent over the network. zero-bytes is basically
zero-pages * page_size. It's the number of bytes migrated (but not
actually sent through the network) because they are all "zero". These
two are related to the existing interface below. normal and
normal-bytes are the same representation of pages who are not all
"zero" and are actually sent through the network.

# @normal: number of normal pages (since 1.2)
#
# @normal-bytes: number of normal bytes sent (since 1.2)

>
> >  # Features:
> >  #
> >  # @deprecated: Member @skipped is always zero since 1.5.3
>
> [...]
>



Re: [External] Re: [PATCH v3 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-28 Thread Hao Xiang
On Wed, Feb 28, 2024 at 1:43 AM Markus Armbruster  wrote:
>
> Hao Xiang  writes:
>
> > This new parameter controls where the zero page checking is running.
> > 1. If this parameter is set to 'legacy', zero page checking is
> > done in the migration main thread.
> > 2. If this parameter is set to 'none', zero page checking is disabled.
> >
> > Signed-off-by: Hao Xiang 
>
> [...]
>
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 5a565d9b8d..1e66272f8f 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -653,6 +653,18 @@
> >  { 'enum': 'MigMode',
> >'data': [ 'normal', 'cpr-reboot' ] }
> >
> > +##
> > +# @ZeroPageDetection:
> > +#
> > +# @none: Do not perform zero page checking.
> > +#
> > +# @legacy: Perform zero page checking from main migration thread.
> > +#
> > +# Since: 9.0
> > +##
> > +{ 'enum': 'ZeroPageDetection',
> > +  'data': [ 'none', 'legacy' ] }
> > +
> >  ##
> >  # @BitmapMigrationBitmapAliasTransform:
> >  #
> > @@ -874,6 +886,9 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @zero-page-detection: Whether and how to detect zero pages. More details
> > +# see description in @ZeroPageDetection. Default is 'legacy'.  (since 
> > 9.0)
> > +#
>
> I'm not sure we need to point to the member's type.  If we want to, we
> better fix the prose: "For additional information, see
> @ZeroPageDetection" or similar.

This is mimicking what was done for the "mode" migration option. There
aren't many enumeration types on the interface I can learn from.

Existing code

#
# @mode: Migration mode. See description in @MigMode. Default is 'normal'.
# (Since 8.2)

>
> Two spaces between sentences for consistency, please.  Also, limit line
> length 70 characters.
>
> Together:
>
># @zero-page-detection: Whether and how to detect zero pages.  For
># additional information, see @ZeroPageDetection.  Default is
># 'multifd'.  (since 9.0)
>
> Same for the other two copies.

Will change that.

>
> >  # Features:
> >  #
> >  # @deprecated: Member @block-incremental is deprecated.  Use
> > @@ -907,7 +922,8 @@
> > 'block-bitmap-mapping',
> > { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] 
> > },
> > 'vcpu-dirty-limit',
> > -   'mode'] }
> > +   'mode',
> > +   'zero-page-detection'] }
> >
> >  ##
> >  # @MigrateSetParameters:
> > @@ -1066,6 +1082,9 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @zero-page-detection: Whether and how to detect zero pages. More details
> > +# see description in @ZeroPageDetection. Default is 'legacy'.  (since 
> > 9.0)
> > +#
> >  # Features:
> >  #
> >  # @deprecated: Member @block-incremental is deprecated.  Use
> > @@ -1119,7 +1138,8 @@
> >  '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
> >  'features': [ 'unstable' ] },
> >  '*vcpu-dirty-limit': 'uint64',
> > -'*mode': 'MigMode'} }
> > +'*mode': 'MigMode',
> > +'*zero-page-detection': 'ZeroPageDetection'} }
> >
> >  ##
> >  # @migrate-set-parameters:
> > @@ -1294,6 +1314,9 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @zero-page-detection: Whether and how to detect zero pages. More details
> > +# see description in @ZeroPageDetection. Default is 'legacy'.  (since 
> > 9.0)
> > +#
> >  # Features:
> >  #
> >  # @deprecated: Member @block-incremental is deprecated.  Use
> > @@ -1344,7 +1367,8 @@
> >  '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
> >  'features': [ 'unstable' ] },
> >  '*vcpu-dirty-limit': 'uint64',
> > -'*mode': 'MigMode'} }
> > +'*mode': 'MigMode',
> > +'*zero-page-detection': 'ZeroPageDetection'} }
> >
> >  ##
> >  # @query-migrate-parameters:
>



[PATCH v3 7/7] Update maintainer contact for migration multifd zero page checking acceleration.

2024-02-26 Thread Hao Xiang
Add myself to maintain multifd zero page checking acceleration function.

Signed-off-by: Hao Xiang 
---
 MAINTAINERS | 5 +
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 992799171f..4a4f8f24e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3413,6 +3413,11 @@ F: tests/migration/
 F: util/userfaultfd.c
 X: migration/rdma*
 
+Migration multifd zero page checking acceleration
+M: Hao Xiang 
+S: Maintained
+F: migration/multifd-zero-page.c
+
 RDMA Migration
 R: Li Zhijian 
 R: Peter Xu 
-- 
2.30.2




[PATCH v3 2/7] migration/multifd: Implement zero page transmission on the multifd thread.

2024-02-26 Thread Hao Xiang
1. Add zero_pages field in MultiFDPacket_t.
2. Implements the zero page detection and handling on the multifd
threads for non-compression, zlib and zstd compression backends.
3. Added a new value 'multifd' in ZeroPageDetection enumeration.
4. Handle migration QEMU9.0 -> QEMU8.2 compatibility.
5. Adds zero page counters and updates multifd send/receive tracing
format to track the newly added counters.

Signed-off-by: Hao Xiang 
---
 hw/core/machine.c|  4 +-
 hw/core/qdev-properties-system.c |  2 +-
 migration/meson.build|  1 +
 migration/multifd-zero-page.c| 78 ++
 migration/multifd-zlib.c | 21 ++--
 migration/multifd-zstd.c | 20 ++--
 migration/multifd.c  | 83 +++-
 migration/multifd.h  | 24 -
 migration/ram.c  |  1 -
 migration/trace-events   |  8 +--
 qapi/migration.json  |  5 +-
 11 files changed, 214 insertions(+), 33 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

diff --git a/hw/core/machine.c b/hw/core/machine.c
index fb5afdcae4..746da219a4 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -32,7 +32,9 @@
 #include "hw/virtio/virtio-net.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_8_2[] = {};
+GlobalProperty hw_compat_8_2[] = {
+{ "migration", "zero-page-detection", "legacy"},
+};
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
 GlobalProperty hw_compat_8_1[] = {
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 228e685f52..6e6f68ae1b 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -682,7 +682,7 @@ const PropertyInfo qdev_prop_mig_mode = {
 const PropertyInfo qdev_prop_zero_page_detection = {
 .name = "ZeroPageDetection",
 .description = "zero_page_detection values, "
-   "none,legacy",
+   "none,legacy,multifd",
 .enum_table = _lookup,
 .get = qdev_propinfo_get_enum,
 .set = qdev_propinfo_set_enum,
diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..1eeb915ff6 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -22,6 +22,7 @@ system_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
+  'multifd-zero-page.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
new file mode 100644
index 00..1650c41b26
--- /dev/null
+++ b/migration/multifd-zero-page.c
@@ -0,0 +1,78 @@
+/*
+ * Multifd zero page detection implementation.
+ *
+ * Copyright (c) 2024 Bytedance Inc
+ *
+ * Authors:
+ *  Hao Xiang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/ramblock.h"
+#include "migration.h"
+#include "multifd.h"
+#include "options.h"
+#include "ram.h"
+
+static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
+{
+ram_addr_t temp;
+
+if (a == b) {
+return;
+}
+
+temp = pages_offset[a];
+pages_offset[a] = pages_offset[b];
+pages_offset[b] = temp;
+}
+
+/**
+ * multifd_zero_page_check_send: Perform zero page detection on all pages.
+ *
+ * Sort the page offset array by moving all normal pages to
+ * the left and all zero pages to the right of the array.
+ *
+ * @param p A pointer to the send params.
+ */
+void multifd_zero_page_check_send(MultiFDSendParams *p)
+{
+/*
+ * QEMU older than 9.0 don't understand zero page
+ * on multifd channel. This switch is required to
+ * maintain backward compatibility.
+ */
+bool use_multifd_zero_page =
+(migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
+MultiFDPages_t *pages = p->pages;
+RAMBlock *rb = pages->block;
+int index_normal = 0;
+int index_zero = pages->num - 1;
+
+while (index_normal <= index_zero) {
+uint64_t offset = pages->offset[index_normal];
+if (use_multifd_zero_page &&
+buffer_is_zero(rb->host + offset, p->page_size)) {
+swap_page_offset(pages->offset, index_normal, index_zero);
+index_zero--;
+ram_release_page(rb->idstr, offset);
+} else {
+index_normal++;
+pages->normal_num++;
+}
+}
+}
+
+void multifd_zero_page_check_recv(MultiFDRecvParams *p)
+{
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->page_size)) {
+memset(page, 0, p->page_size);
+}
+}
+}
diff --git a/migration

[PATCH v3 0/7] Introduce multifd zero page checking.

2024-02-26 Thread Hao Xiang
v3 update:
* Change "zero" to "zero-pages" and use type size for "zero-bytes".
* Fixed ZeroPageDetection interface description.
* Move zero page unit tests to its own path.
* Removed some asserts.
* Added backward compatibility support for migration 9.0 -> 8.2.
* Removed fields "zero" and "normal" page address arrays from v2. Now
multifd_zero_page_check_send sorts normal/zero pages in the "offset" array.

v2 update:
* Implement zero-page-detection switch with enumeration "legacy",
"none" and "multifd".
* Move normal/zero pages from MultiFDSendParams to MultiFDPages_t.
* Add zeros and zero_bytes accounting.

This patchset is based on Juan Quintela's old series here
https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/

In the multifd live migration model, there is a single migration main
thread scanning the page map, queuing the pages to multiple multifd
sender threads. The migration main thread runs zero page checking on
every page before queuing the page to the sender threads. Zero page
checking is a CPU intensive task and hence having a single thread doing
all that doesn't scale well. This change introduces a new function
to run the zero page checking on the multifd sender threads. This
patchset also lays the ground work for future changes to offload zero
page checking task to accelerator hardwares.

Use two Intel 4th generation Xeon servers for testing.

Architecture:x86_64
CPU(s):  192
Thread(s) per core:  2
Core(s) per socket:  48
Socket(s):   2
NUMA node(s):2
Vendor ID:   GenuineIntel
CPU family:  6
Model:   143
Model name:  Intel(R) Xeon(R) Platinum 8457C
Stepping:8
CPU MHz: 2538.624
CPU max MHz: 3800.
CPU min MHz: 800.

Perform multifd live migration with below setup:
1. VM has 100GB memory. All pages in the VM are zero pages.
2. Use tcp socket for live migration.
3. Use 4 multifd channels and zero page checking on migration main thread.
4. Use 1/2/4 multifd channels and zero page checking on multifd sender
threads.
5. Record migration total time from sender QEMU console's "info migrate"
command.

++
|zero-page-checking | total-time(ms) |
++
|main-thread| 9629   |
++
|multifd-1-threads  | 6182   |
++
|multifd-2-threads  | 4643   |
++
|multifd-4-threads  | 4143   |
+--------+

Apply this patchset on top of commit
dd88d696ccecc0f3018568f8e281d3d526041e6f

Hao Xiang (7):
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Implement zero page transmission on the multifd
thread.
  migration/multifd: Implement ram_save_target_page_multifd to handle
multifd version of MigrationOps::ram_save_target_page.
  migration/multifd: Enable multifd zero page checking by default.
  migration/multifd: Add new migration test cases for legacy zero page
checking.
  migration/multifd: Add zero pages and zero bytes counter to migration
status interface.
  Update maintainer contact for migration multifd zero page checking
acceleration.

 MAINTAINERS |  5 ++
 hw/core/machine.c   |  4 +-
 hw/core/qdev-properties-system.c| 10 
 include/hw/qdev-properties-system.h |  4 ++
 migration/meson.build   |  1 +
 migration/migration-hmp-cmds.c  | 13 +
 migration/migration.c   |  2 +
 migration/multifd-zero-page.c   | 78 +++
 migration/multifd-zlib.c| 21 ++--
 migration/multifd-zstd.c| 20 +--
 migration/multifd.c | 83 -
 migration/multifd.h | 24 -
 migration/options.c | 21 
 migration/options.h |  1 +
 migration/ram.c | 40 ++
 migration/trace-events  |  8 +--
 qapi/migration.json | 48 +++--
 tests/migration/guestperf/engine.py |  2 +
 tests/qtest/migration-test.c| 52 ++
 19 files changed, 393 insertions(+), 44 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

-- 
2.30.2




[PATCH v3 6/7] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-26 Thread Hao Xiang
This change extends the MigrationStatus interface to track zero pages
and zero bytes counter.

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c  |  4 
 migration/migration.c   |  2 ++
 qapi/migration.json | 15 ++-
 tests/migration/guestperf/engine.py |  2 ++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 7e96ae6ffd..a38ad0255d 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -111,6 +111,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->ram->normal);
 monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
info->ram->normal_bytes >> 10);
+monitor_printf(mon, "zero pages: %" PRIu64 " pages\n",
+   info->ram->zero_pages);
+monitor_printf(mon, "zero bytes: %" PRIu64 " kbytes\n",
+   info->ram->zero_bytes >> 10);
 monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
info->ram->dirty_sync_count);
 monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
diff --git a/migration/migration.c b/migration/migration.c
index ab21de2cad..a99f86f273 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1112,6 +1112,8 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->skipped = 0;
 info->ram->normal = stat64_get(_stats.normal_pages);
 info->ram->normal_bytes = info->ram->normal * page_size;
+info->ram->zero_pages = stat64_get(_stats.zero_pages);
+info->ram->zero_bytes = info->ram->zero_pages * page_size;
 info->ram->mbps = s->mbps;
 info->ram->dirty_sync_count =
 stat64_get(_stats.dirty_sync_count);
diff --git a/qapi/migration.json b/qapi/migration.json
index a0a85a0312..171734c07e 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -63,6 +63,10 @@
 # between 0 and @dirty-sync-count * @multifd-channels.  (since
 # 7.1)
 #
+# @zero-pages: number of zero pages (since 9.0)
+#
+# @zero-bytes: number of zero bytes sent (since 9.0)
+#
 # Features:
 #
 # @deprecated: Member @skipped is always zero since 1.5.3
@@ -81,7 +85,8 @@
'multifd-bytes': 'uint64', 'pages-per-second': 'uint64',
'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64',
'postcopy-bytes': 'uint64',
-   'dirty-sync-missed-zero-copy': 'uint64' } }
+   'dirty-sync-missed-zero-copy': 'uint64',
+   'zero-pages': 'int', 'zero-bytes': 'size' } }
 
 ##
 # @XBZRLECacheStats:
@@ -332,6 +337,8 @@
 #   "duplicate":123,
 #   "normal":123,
 #   "normal-bytes":123456,
+#   "zero-pages":123,
+#   "zero-bytes":123456,
 #   "dirty-sync-count":15
 # }
 #  }
@@ -358,6 +365,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero-pages":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  }
 #   }
@@ -379,6 +388,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero-pages":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  },
 #  "disk":{
@@ -405,6 +416,8 @@
 # "duplicate":10,
 # "normal":,
 # "normal-bytes":3412992,
+# "zero-pages":,
+# "zero-bytes":3412992,
 # "dirty-sync-count":15
 #  },
 #  "xbzrle-cache":{
diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 608d7270f6..693e07c227 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -92,6 +92,8 @@ def _migrate_progress(self, vm):
 info["ram"].get("skipped", 0),
 info["ram"].get("normal", 0),
 info["ram"].get("normal-bytes", 0),
+info["ram"].get("zero-pages", 0);
+info["ram"].get("zero-bytes", 0);
 info["ram"].get("dirty-pages-rate", 0),
 info["ram"].get("mbps", 0),
 info["ram"].get("dirty-sync-count", 0)
-- 
2.30.2




[PATCH v3 3/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

2024-02-26 Thread Hao Xiang
1. Add a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration.
2. Refactor ram_save_target_page_legacy so that the legacy and multifd
handlers don't have internal functions calling into each other.

Signed-off-by: Hao Xiang 
---
 migration/ram.c | 43 ++-
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 414cd0d753..f60627e11a 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1123,10 +1123,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus 
*pss,
 QEMUFile *file = pss->pss_channel;
 int len = 0;
 
-if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
-return 0;
-}
-
 if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
 return 0;
 }
@@ -2046,7 +2042,6 @@ static bool save_compress_page(RAMState *rs, 
PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-RAMBlock *block = pss->block;
 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
 int res;
 
@@ -2062,17 +2057,34 @@ static int ram_save_target_page_legacy(RAMState *rs, 
PageSearchStatus *pss)
 return 1;
 }
 
+return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: send one target page to multifd workers
+ *
+ * Returns 1 if the page was queued, -1 otherwise.
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+RAMBlock *block = pss->block;
+ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
 /*
- * Do not use multifd in postcopy as one whole host page should be
- * placed.  Meanwhile postcopy requires atomic update of pages, so even
- * if host page size == guest page size the dest guest during run may
- * still see partially copied pages which is data corruption.
+ * Backward compatibility support. While using multifd live
+ * migration, we still need to handle zero page checking on the
+ * migration main thread.
  */
-if (migrate_multifd() && !migration_in_postcopy()) {
-return ram_save_multifd_page(block, offset);
+if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+if (save_zero_page(rs, pss, offset)) {
+return 1;
+}
 }
 
-return ram_save_page(rs, pss);
+return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -2984,7 +2996,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 }
 
 migration_ops = g_malloc0(sizeof(MigrationOps));
-migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+if (migrate_multifd()) {
+migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+} else {
+migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+}
 
 bql_unlock();
 ret = multifd_send_sync_main();
-- 
2.30.2




[PATCH v3 5/7] migration/multifd: Add new migration test cases for legacy zero page checking.

2024-02-26 Thread Hao Xiang
Now that zero page checking is done on the multifd sender threads by
default, we still provide an option for backward compatibility. This
change adds a qtest migration test case to set the zero-page-detection
option to "legacy" and run multifd migration with zero page checking on the
migration main thread.

Signed-off-by: Hao Xiang 
---
 tests/qtest/migration-test.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 8a5bb1752e..65b531d871 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2621,6 +2621,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
 return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
 }
 
+static void *
+test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
+QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "legacy");
+return NULL;
+}
+
+static void *
+test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
+  QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "none");
+return NULL;
+}
+
 static void *
 test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
 QTestState *to)
@@ -2652,6 +2670,36 @@ static void test_multifd_tcp_none(void)
 test_precopy_common();
 }
 
+static void test_multifd_tcp_zero_page_legacy(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
+static void test_multifd_tcp_no_zero_page(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
 static void test_multifd_tcp_zlib(void)
 {
 MigrateCommon args = {
@@ -3550,6 +3598,10 @@ int main(int argc, char **argv)
 }
 migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
+   test_multifd_tcp_zero_page_legacy);
+migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
+   test_multifd_tcp_no_zero_page);
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH v3 4/7] migration/multifd: Enable multifd zero page checking by default.

2024-02-26 Thread Hao Xiang
Set default "zero-page-detection" option to "multifd". Now zero page
checking can be done in the multifd threads and this becomes the
default configuration. We still provide backward compatibility
where zero page checking is done from the migration main thread.

Signed-off-by: Hao Xiang 
---
 migration/options.c | 2 +-
 qapi/migration.json | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/migration/options.c b/migration/options.c
index 3c603391b0..3c79b6ccd4 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -181,7 +181,7 @@ Property migration_properties[] = {
   MIG_MODE_NORMAL),
 DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
-   ZERO_PAGE_DETECTION_LEGACY),
+   ZERO_PAGE_DETECTION_MULTIFD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/qapi/migration.json b/qapi/migration.json
index 5a1bb8ad62..a0a85a0312 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -890,7 +890,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages. More details
-# see description in @ZeroPageDetection. Default is 'legacy'.  (since 9.0)
+# see description in @ZeroPageDetection. Default is 'multifd'.  (since 9.0)
 #
 # Features:
 #
@@ -1086,7 +1086,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages. More details
-# see description in @ZeroPageDetection. Default is 'legacy'.  (since 9.0)
+# see description in @ZeroPageDetection. Default is 'multifd'.  (since 9.0)
 #
 # Features:
 #
@@ -1318,7 +1318,7 @@
 #(Since 8.2)
 #
 # @zero-page-detection: Whether and how to detect zero pages. More details
-# see description in @ZeroPageDetection. Default is 'legacy'.  (since 9.0)
+# see description in @ZeroPageDetection. Default is 'multifd'.  (since 9.0)
 #
 # Features:
 #
-- 
2.30.2




[PATCH v3 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-26 Thread Hao Xiang
This new parameter controls where the zero page checking is running.
1. If this parameter is set to 'legacy', zero page checking is
done in the migration main thread.
2. If this parameter is set to 'none', zero page checking is disabled.

Signed-off-by: Hao Xiang 
---
 hw/core/qdev-properties-system.c| 10 ++
 include/hw/qdev-properties-system.h |  4 
 migration/migration-hmp-cmds.c  |  9 +
 migration/options.c | 21 
 migration/options.h |  1 +
 migration/ram.c |  4 
 qapi/migration.json | 30 ++---
 7 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 1a396521d5..228e685f52 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -679,6 +679,16 @@ const PropertyInfo qdev_prop_mig_mode = {
 .set_default_value = qdev_propinfo_set_default_value_enum,
 };
 
+const PropertyInfo qdev_prop_zero_page_detection = {
+.name = "ZeroPageDetection",
+.description = "zero_page_detection values, "
+   "none,legacy",
+.enum_table = _lookup,
+.get = qdev_propinfo_get_enum,
+.set = qdev_propinfo_set_enum,
+.set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
 /* --- Reserved Region --- */
 
 /*
diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 06c359c190..839b170235 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr;
 extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_multifd_compression;
 extern const PropertyInfo qdev_prop_mig_mode;
+extern const PropertyInfo qdev_prop_zero_page_detection;
 extern const PropertyInfo qdev_prop_losttickpolicy;
 extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
@@ -47,6 +48,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
 #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
MigMode)
+#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
+DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
+   ZeroPageDetection)
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
 LostTickPolicy)
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 99b49df5dd..7e96ae6ffd 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
 MultiFDCompression_str(params->multifd_compression));
+assert(params->has_zero_page_detection);
+monitor_printf(mon, "%s: %s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
+qapi_enum_lookup(_lookup,
+params->zero_page_detection));
 monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
 MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
 params->xbzrle_cache_size);
@@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_multifd_zstd_level = true;
 visit_type_uint8(v, param, >multifd_zstd_level, );
 break;
+case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
+p->has_zero_page_detection = true;
+visit_type_ZeroPageDetection(v, param, >zero_page_detection, );
+break;
 case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
 p->has_xbzrle_cache_size = true;
 if (!visit_type_size(v, param, _size, )) {
diff --git a/migration/options.c b/migration/options.c
index 3e3e0b93b4..3c603391b0 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -179,6 +179,9 @@ Property migration_properties[] = {
 DEFINE_PROP_MIG_MODE("mode", MigrationState,
   parameters.mode,
   MIG_MODE_NORMAL),
+DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
+   parameters.zero_page_detection,
+   ZERO_PAGE_DETECTION_LEGACY),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -903,6 +906,13 @@ uint64_t migrate_xbzrle_cache_size(void)
 return s->parameters.xbzrle_cache_size;
 }
 
+ZeroPageDetection migrate_zero_page_detection(void)
+{
+MigrationState *s = migrate_get_cu

Re: [External] Re: [PATCH v2 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-26 Thread Hao Xiang
On Sun, Feb 25, 2024 at 11:19 PM Wang, Lei  wrote:
>
> On 2/17/2024 6:39, Hao Xiang wrote:
> > This new parameter controls where the zero page checking is running.
> > 1. If this parameter is set to 'legacy', zero page checking is
> > done in the migration main thread.
> > 2. If this parameter is set to 'none', zero page checking is disabled.
> >
> > Signed-off-by: Hao Xiang 
> > ---
> >  hw/core/qdev-properties-system.c| 10 ++
> >  include/hw/qdev-properties-system.h |  4 
> >  migration/migration-hmp-cmds.c  |  9 +
> >  migration/options.c | 21 
> >  migration/options.h |  1 +
> >  migration/ram.c |  4 
> >  qapi/migration.json | 30 ++---
> >  7 files changed, 76 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/core/qdev-properties-system.c 
> > b/hw/core/qdev-properties-system.c
> > index 1a396521d5..63843f18b5 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -679,6 +679,16 @@ const PropertyInfo qdev_prop_mig_mode = {
> >  .set_default_value = qdev_propinfo_set_default_value_enum,
> >  };
> >
> > +const PropertyInfo qdev_prop_zero_page_detection = {
> > +.name = "ZeroPageDetection",
> > +.description = "zero_page_detection values, "
> > +   "multifd,legacy,none",
>
> Nit: Maybe multifd/legacy/none?

I changed it to

.description = "zero_page_detection values, "
"none,legacy,multifd",

Since both "," and "/" are used in existing code, I think it would be
fine either way.



Re: [External] Re: [PATCH v2 4/7] migration/multifd: Enable zero page checking from multifd threads.

2024-02-24 Thread Hao Xiang
On Thu, Feb 22, 2024 at 10:02 PM Hao Xiang  wrote:
>
> On Thu, Feb 22, 2024 at 6:33 PM Peter Xu  wrote:
> >
> > On Wed, Feb 21, 2024 at 06:06:19PM -0300, Fabiano Rosas wrote:
> > > Hao Xiang  writes:
> > >
> > > > This change adds a dedicated handler for 
> > > > MigrationOps::ram_save_target_page in
> > >
> > > nit: Add a dedicated handler...
> > >
> > > Usually "this patch/change" is used only when necessary to avoid
> > > ambiguity.
> > >
> > > > multifd live migration. Now zero page checking can be done in the 
> > > > multifd threads
> > > > and this becomes the default configuration. We still provide backward 
> > > > compatibility
> > > > where zero page checking is done from the migration main thread.
> > > >
> > > > Signed-off-by: Hao Xiang 
> > > > ---
> > > >  migration/multifd.c |  1 +
> > > >  migration/options.c |  2 +-
> > > >  migration/ram.c | 53 ++---
> > > >  3 files changed, 42 insertions(+), 14 deletions(-)
> > > >
> > > > diff --git a/migration/multifd.c b/migration/multifd.c
> > > > index fbb40ea10b..ef5dad1019 100644
> > > > --- a/migration/multifd.c
> > > > +++ b/migration/multifd.c
> > > > @@ -13,6 +13,7 @@
> > > >  #include "qemu/osdep.h"
> > > >  #include "qemu/cutils.h"
> > >
> > > This include...
> > >
> > > >  #include "qemu/rcu.h"
> > > > +#include "qemu/cutils.h"
> > >
> > > is there already.
> > >
> > > >  #include "exec/target_page.h"
> > > >  #include "sysemu/sysemu.h"
> > > >  #include "exec/ramblock.h"
> > > > diff --git a/migration/options.c b/migration/options.c
> > > > index 3c603391b0..3c79b6ccd4 100644
> > > > --- a/migration/options.c
> > > > +++ b/migration/options.c
> > > > @@ -181,7 +181,7 @@ Property migration_properties[] = {
> > > >MIG_MODE_NORMAL),
> > > >  DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", 
> > > > MigrationState,
> > > > parameters.zero_page_detection,
> > > > -   ZERO_PAGE_DETECTION_LEGACY),
> > > > +   ZERO_PAGE_DETECTION_MULTIFD),
> > >
> > > I think we'll need something to avoid a 9.0 -> 8.2 migration with this
> > > enabled. Otherwise it will go along happily until we get data corruption
> > > because the new QEMU didn't send any zero pages on the migration thread
> > > and the old QEMU did not look for them in the multifd packet.
> >
> > It could be even worse, as the new QEMU will only attach "normal" pages
> > after the multifd packet, the old QEMU could read more than it could,
> > expecting all pages..
> >
> > >
> > > Perhaps bumping the MULTIFD_VERSION when ZERO_PAGE_DETECTION_MULTIFD is
> > > in use. We'd just need to fix the test in the new QEMU to check
> > > (msg.version > MULTIFD_VERSION) instead of (msg.version != 
> > > MULTIFD_VERSION).
> >
> > IMHO we don't need yet to change MULTIFD_VERSION, what we need is perhaps a
> > compat entry in hw_compat_8_2 setting "zero-page-detection" to "legacy".
> > We should make sure when "legacy" is set, multifd ran the old protocol
> > (zero_num will always be 0, and will be ignored by old QEMUs, IIUC).
> >
> > One more comment is, when repost please consider split this patch into two;
> > The new ram_save_target_page_multifd() hook can be done in another patch,
> > AFAIU.
>
> Sorry, I kept missing this. I will keep telling myself, compatibility
> is king. I will set the hw_compat_8_2 setting and make sure to test
> migration 9.0 -> 8.2 fails with "multifd" option set.
> Will split patches.

So I just want to make sure I am coding the right solution. I added
setting "zero-page-detection" to "legacy" in hw_compat_8_2 and tested
it. The behavior is that if I set machine type to pc-q35-8.2,
zero-page-detection will automatically be set to "legacy". But if I
set the machine type to pc-q35-9.0, zero-page-detection will be the
default value "multifd". However, this doesn't seem to be a hard
requirement because I can still override zero-page-detection to
multifd on ma

Re: [External] Re: [PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-24 Thread Hao Xiang
On Thu, Feb 22, 2024 at 9:15 PM Hao Xiang  wrote:
>
> On Thu, Feb 22, 2024 at 6:21 PM Peter Xu  wrote:
> >
> > On Wed, Feb 21, 2024 at 06:04:10PM -0300, Fabiano Rosas wrote:
> > > Hao Xiang  writes:
> > >
> > > > 1. Implements the zero page detection and handling on the multifd
> > > > threads for non-compression, zlib and zstd compression backends.
> > > > 2. Added a new value 'multifd' in ZeroPageDetection enumeration.
> > > > 3. Add proper asserts to ensure pages->normal are used for normal pages
> > > > in all scenarios.
> > > >
> > > > Signed-off-by: Hao Xiang 
> > > > ---
> > > >  migration/meson.build |  1 +
> > > >  migration/multifd-zero-page.c | 59 +++
> > > >  migration/multifd-zlib.c  | 26 ---
> > > >  migration/multifd-zstd.c  | 25 ---
> > > >  migration/multifd.c   | 50 +++--
> > > >  migration/multifd.h   |  7 +
> > > >  qapi/migration.json   |  4 ++-
> > > >  7 files changed, 151 insertions(+), 21 deletions(-)
> > > >  create mode 100644 migration/multifd-zero-page.c
> > > >
> > > > diff --git a/migration/meson.build b/migration/meson.build
> > > > index 92b1cc4297..1eeb915ff6 100644
> > > > --- a/migration/meson.build
> > > > +++ b/migration/meson.build
> > > > @@ -22,6 +22,7 @@ system_ss.add(files(
> > > >'migration.c',
> > > >'multifd.c',
> > > >'multifd-zlib.c',
> > > > +  'multifd-zero-page.c',
> > > >'ram-compress.c',
> > > >'options.c',
> > > >'postcopy-ram.c',
> > > > diff --git a/migration/multifd-zero-page.c 
> > > > b/migration/multifd-zero-page.c
> > > > new file mode 100644
> > > > index 00..f0cd8e2c53
> > > > --- /dev/null
> > > > +++ b/migration/multifd-zero-page.c
> > > > @@ -0,0 +1,59 @@
> > > > +/*
> > > > + * Multifd zero page detection implementation.
> > > > + *
> > > > + * Copyright (c) 2024 Bytedance Inc
> > > > + *
> > > > + * Authors:
> > > > + *  Hao Xiang 
> > > > + *
> > > > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > > > later.
> > > > + * See the COPYING file in the top-level directory.
> > > > + */
> > > > +
> > > > +#include "qemu/osdep.h"
> > > > +#include "qemu/cutils.h"
> > > > +#include "exec/ramblock.h"
> > > > +#include "migration.h"
> > > > +#include "multifd.h"
> > > > +#include "options.h"
> > > > +#include "ram.h"
> > > > +
> > > > +void multifd_zero_page_check_send(MultiFDSendParams *p)
> > > > +{
> > > > +/*
> > > > + * QEMU older than 9.0 don't understand zero page
> > > > + * on multifd channel. This switch is required to
> > > > + * maintain backward compatibility.
> > > > + */
> > > > +bool use_multifd_zero_page =
> > > > +(migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
> > > > +MultiFDPages_t *pages = p->pages;
> > > > +RAMBlock *rb = pages->block;
> > > > +
> > > > +assert(pages->num != 0);
> > > > +assert(pages->normal_num == 0);
> > > > +assert(pages->zero_num == 0);
> > >
> > > We can drop these before the final version.
> > >
> > > > +
> > > > +for (int i = 0; i < pages->num; i++) {
> > > > +uint64_t offset = pages->offset[i];
> > > > +if (use_multifd_zero_page &&
> > > > +buffer_is_zero(rb->host + offset, p->page_size)) {
> > > > +pages->zero[pages->zero_num] = offset;
> > > > +pages->zero_num++;
> > > > +ram_release_page(rb->idstr, offset);
> > > > +} else {
> > > > +pages->normal[pages->normal_num] = offset;
> > > > +pages->normal_num++;
> > > > +}
> > > > +}
> > >
> > > I don't think it's super clean to have three arrays offset, zero and
> > &g

Re: [External] Re: [PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-24 Thread Hao Xiang
On Thu, Feb 22, 2024 at 8:38 PM Hao Xiang  wrote:
>
> On Fri, Feb 16, 2024 at 9:08 PM Richard Henderson
>  wrote:
> >
> > On 2/16/24 12:39, Hao Xiang wrote:
> > > +void multifd_zero_page_check_recv(MultiFDRecvParams *p)
> > > +{
> > > +for (int i = 0; i < p->zero_num; i++) {
> > > +void *page = p->host + p->zero[i];
> > > +if (!buffer_is_zero(page, p->page_size)) {
> > > +memset(page, 0, p->page_size);
> > > +}
> > > +}
> > > +}
> >
> > You should not check the buffer is zero here, you should just zero it.
>
> I will fix it in the next version.

I tested with zero out all pages but the performance is bad compared
to previously. In my test case, most pages are zero pages. I think
what happened is that the destination host already has the pages being
zero so performing a memcmp is much faster than memset on all zero
pages.

>
> >
> >
> > r~



Re: [External] Re: [PATCH v2 4/7] migration/multifd: Enable zero page checking from multifd threads.

2024-02-22 Thread Hao Xiang
On Thu, Feb 22, 2024 at 6:33 PM Peter Xu  wrote:
>
> On Wed, Feb 21, 2024 at 06:06:19PM -0300, Fabiano Rosas wrote:
> > Hao Xiang  writes:
> >
> > > This change adds a dedicated handler for 
> > > MigrationOps::ram_save_target_page in
> >
> > nit: Add a dedicated handler...
> >
> > Usually "this patch/change" is used only when necessary to avoid
> > ambiguity.
> >
> > > multifd live migration. Now zero page checking can be done in the multifd 
> > > threads
> > > and this becomes the default configuration. We still provide backward 
> > > compatibility
> > > where zero page checking is done from the migration main thread.
> > >
> > > Signed-off-by: Hao Xiang 
> > > ---
> > >  migration/multifd.c |  1 +
> > >  migration/options.c |  2 +-
> > >  migration/ram.c | 53 ++---
> > >  3 files changed, 42 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/migration/multifd.c b/migration/multifd.c
> > > index fbb40ea10b..ef5dad1019 100644
> > > --- a/migration/multifd.c
> > > +++ b/migration/multifd.c
> > > @@ -13,6 +13,7 @@
> > >  #include "qemu/osdep.h"
> > >  #include "qemu/cutils.h"
> >
> > This include...
> >
> > >  #include "qemu/rcu.h"
> > > +#include "qemu/cutils.h"
> >
> > is there already.
> >
> > >  #include "exec/target_page.h"
> > >  #include "sysemu/sysemu.h"
> > >  #include "exec/ramblock.h"
> > > diff --git a/migration/options.c b/migration/options.c
> > > index 3c603391b0..3c79b6ccd4 100644
> > > --- a/migration/options.c
> > > +++ b/migration/options.c
> > > @@ -181,7 +181,7 @@ Property migration_properties[] = {
> > >MIG_MODE_NORMAL),
> > >  DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", 
> > > MigrationState,
> > > parameters.zero_page_detection,
> > > -   ZERO_PAGE_DETECTION_LEGACY),
> > > +   ZERO_PAGE_DETECTION_MULTIFD),
> >
> > I think we'll need something to avoid a 9.0 -> 8.2 migration with this
> > enabled. Otherwise it will go along happily until we get data corruption
> > because the new QEMU didn't send any zero pages on the migration thread
> > and the old QEMU did not look for them in the multifd packet.
>
> It could be even worse, as the new QEMU will only attach "normal" pages
> after the multifd packet, the old QEMU could read more than it could,
> expecting all pages..
>
> >
> > Perhaps bumping the MULTIFD_VERSION when ZERO_PAGE_DETECTION_MULTIFD is
> > in use. We'd just need to fix the test in the new QEMU to check
> > (msg.version > MULTIFD_VERSION) instead of (msg.version != MULTIFD_VERSION).
>
> IMHO we don't need yet to change MULTIFD_VERSION, what we need is perhaps a
> compat entry in hw_compat_8_2 setting "zero-page-detection" to "legacy".
> We should make sure when "legacy" is set, multifd ran the old protocol
> (zero_num will always be 0, and will be ignored by old QEMUs, IIUC).
>
> One more comment is, when repost please consider split this patch into two;
> The new ram_save_target_page_multifd() hook can be done in another patch,
> AFAIU.

Sorry, I kept missing this. I will keep telling myself, compatibility
is king. I will set the hw_compat_8_2 setting and make sure to test
migration 9.0 -> 8.2 fails with "multifd" option set.
Will split patches.

>
> >
> > >
> > >  /* Migration capabilities */
> > >  DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index 5ece9f042e..b088c5a98c 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -1123,10 +1123,6 @@ static int save_zero_page(RAMState *rs, 
> > > PageSearchStatus *pss,
> > >  QEMUFile *file = pss->pss_channel;
> > >  int len = 0;
> > >
> > > -if (migrate_zero_page_detection() != ZERO_PAGE_DETECTION_LEGACY) {
> > > -return 0;
> > > -}
> >
> > How does 'none' work now?
> >
> > > -
> > >  if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
> > >  return 0;
> > >  }
> > > @@ -1256,6 +1252,10 @@ static int ram_save_page(RAMState *rs, 
> > > PageSearchStatus *

Re: [External] Re: [PATCH v2 4/7] migration/multifd: Enable zero page checking from multifd threads.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 1:06 PM Fabiano Rosas  wrote:
>
> Hao Xiang  writes:
>
> > This change adds a dedicated handler for MigrationOps::ram_save_target_page 
> > in
>
> nit: Add a dedicated handler...
>
> Usually "this patch/change" is used only when necessary to avoid
> ambiguity.

Will do.

>
> > multifd live migration. Now zero page checking can be done in the multifd 
> > threads
> > and this becomes the default configuration. We still provide backward 
> > compatibility
> > where zero page checking is done from the migration main thread.
> >
> > Signed-off-by: Hao Xiang 
> > ---
> >  migration/multifd.c |  1 +
> >  migration/options.c |  2 +-
> >  migration/ram.c | 53 ++---
> >  3 files changed, 42 insertions(+), 14 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index fbb40ea10b..ef5dad1019 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -13,6 +13,7 @@
> >  #include "qemu/osdep.h"
> >  #include "qemu/cutils.h"
>
> This include...
>
> >  #include "qemu/rcu.h"
> > +#include "qemu/cutils.h"
>
> is there already.
>
> >  #include "exec/target_page.h"
> >  #include "sysemu/sysemu.h"
> >  #include "exec/ramblock.h"
> > diff --git a/migration/options.c b/migration/options.c
> > index 3c603391b0..3c79b6ccd4 100644
> > --- a/migration/options.c
> > +++ b/migration/options.c
> > @@ -181,7 +181,7 @@ Property migration_properties[] = {
> >MIG_MODE_NORMAL),
> >  DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
> > parameters.zero_page_detection,
> > -   ZERO_PAGE_DETECTION_LEGACY),
> > +   ZERO_PAGE_DETECTION_MULTIFD),
>
> I think we'll need something to avoid a 9.0 -> 8.2 migration with this
> enabled. Otherwise it will go along happily until we get data corruption
> because the new QEMU didn't send any zero pages on the migration thread
> and the old QEMU did not look for them in the multifd packet.
>
> Perhaps bumping the MULTIFD_VERSION when ZERO_PAGE_DETECTION_MULTIFD is
> in use. We'd just need to fix the test in the new QEMU to check
> (msg.version > MULTIFD_VERSION) instead of (msg.version != MULTIFD_VERSION).
>
> >
> >  /* Migration capabilities */
> >  DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 5ece9f042e..b088c5a98c 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -1123,10 +1123,6 @@ static int save_zero_page(RAMState *rs, 
> > PageSearchStatus *pss,
> >  QEMUFile *file = pss->pss_channel;
> >  int len = 0;
> >
> > -if (migrate_zero_page_detection() != ZERO_PAGE_DETECTION_LEGACY) {
> > -return 0;
> > -}
>
> How does 'none' work now?

I tested it and all pages are transferred with payload (including the
zero pages).

>
> > -
> >  if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
> >  return 0;
> >  }
> > @@ -1256,6 +1252,10 @@ static int ram_save_page(RAMState *rs, 
> > PageSearchStatus *pss)
> >
> >  static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
> >  {
> > +assert(migrate_multifd());
> > +assert(!migrate_compress());
> > +assert(!migration_in_postcopy());
>
> Drop these, please. Keep only the asserts that are likely to trigger
> during development, such as the existing ones at multifd_send_pages.

I think I have got enough feedback regarding too many asserts. I will
drop these. assert is not compiled into the free build, correct?

>
> > +
> >  if (!multifd_queue_page(block, offset)) {
> >  return -1;
> >  }
> > @@ -2046,7 +2046,6 @@ static bool save_compress_page(RAMState *rs, 
> > PageSearchStatus *pss,
> >   */
> >  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> >  {
> > -RAMBlock *block = pss->block;
> >  ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> >  int res;
> >
> > @@ -2062,17 +2061,40 @@ static int ram_save_target_page_legacy(RAMState 
> > *rs, PageSearchStatus *pss)
> >  return 1;
> >  }
> >
> > +return ram_save_page(rs, pss);
>
> Look at where git put this! Are you using the default diff alg

Re: [External] Re: [PATCH v2 4/7] migration/multifd: Enable zero page checking from multifd threads.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 8:11 AM Elena Ufimtseva  wrote:
>
>
>
> On Fri, Feb 16, 2024 at 2:42 PM Hao Xiang  wrote:
>>
>> This change adds a dedicated handler for MigrationOps::ram_save_target_page 
>> in
>> multifd live migration. Now zero page checking can be done in the multifd 
>> threads
>> and this becomes the default configuration. We still provide backward 
>> compatibility
>> where zero page checking is done from the migration main thread.
>>
>> Signed-off-by: Hao Xiang 
>> ---
>>  migration/multifd.c |  1 +
>>  migration/options.c |  2 +-
>>  migration/ram.c | 53 ++---
>>  3 files changed, 42 insertions(+), 14 deletions(-)
>>
>> diff --git a/migration/multifd.c b/migration/multifd.c
>> index fbb40ea10b..ef5dad1019 100644
>> --- a/migration/multifd.c
>> +++ b/migration/multifd.c
>> @@ -13,6 +13,7 @@
>>  #include "qemu/osdep.h"
>>  #include "qemu/cutils.h"
>>  #include "qemu/rcu.h"
>> +#include "qemu/cutils.h"
>>  #include "exec/target_page.h"
>>  #include "sysemu/sysemu.h"
>>  #include "exec/ramblock.h"
>> diff --git a/migration/options.c b/migration/options.c
>> index 3c603391b0..3c79b6ccd4 100644
>> --- a/migration/options.c
>> +++ b/migration/options.c
>> @@ -181,7 +181,7 @@ Property migration_properties[] = {
>>MIG_MODE_NORMAL),
>>  DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
>> parameters.zero_page_detection,
>> -   ZERO_PAGE_DETECTION_LEGACY),
>> +   ZERO_PAGE_DETECTION_MULTIFD),
>>
>>  /* Migration capabilities */
>>  DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 5ece9f042e..b088c5a98c 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1123,10 +1123,6 @@ static int save_zero_page(RAMState *rs, 
>> PageSearchStatus *pss,
>>  QEMUFile *file = pss->pss_channel;
>>  int len = 0;
>>
>> -if (migrate_zero_page_detection() != ZERO_PAGE_DETECTION_LEGACY) {
>> -return 0;
>> -}
>> -
>>  if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
>>  return 0;
>>  }
>> @@ -1256,6 +1252,10 @@ static int ram_save_page(RAMState *rs, 
>> PageSearchStatus *pss)
>>
>>  static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
>>  {
>> +assert(migrate_multifd());
>
> We only call ram_save_multifd_page() if:
>  if (migrate_multifd()) {
> migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> So this assert is not needed.

The point of an assert is to ensure the current function is called
with the correct assumptions. In the future, if someone moves this
function to a different place, we can catch the potential issues.

>
>> +assert(!migrate_compress());
>>
>> +assert(!migration_in_postcopy());
>
> These two are redundant and done before we call in here.
>
>> +
>>  if (!multifd_queue_page(block, offset)) {
>>  return -1;
>>  }
>> @@ -2046,7 +2046,6 @@ static bool save_compress_page(RAMState *rs, 
>> PageSearchStatus *pss,
>>   */
>>  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
>>  {
>> -RAMBlock *block = pss->block;
>>  ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>>  int res;
>>
>> @@ -2062,17 +2061,40 @@ static int ram_save_target_page_legacy(RAMState *rs, 
>> PageSearchStatus *pss)
>>  return 1;
>>  }
>>
>> +return ram_save_page(rs, pss);
>> +}
>> +
>> +/**
>> + * ram_save_target_page_multifd: save one target page
>> + *
>> + * Returns the number of pages written
>> + *
>> + * @rs: current RAM state
>> + * @pss: data about the page we want to send
>> + */
>> +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
>> +{
>> +RAMBlock *block = pss->block;
>> +ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>> +
>> +/* Multifd is not compatible with old compression. */
>> +assert(!migrate_compress());
>
> Do we need to check this for every page?
>
>>
>> +/* Multifd is not compabible with postcopy. */
>> +assert(!m

Re: [External] Re: [PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 1:04 PM Fabiano Rosas  wrote:
>
> Hao Xiang  writes:
>
> > 1. Implements the zero page detection and handling on the multifd
> > threads for non-compression, zlib and zstd compression backends.
> > 2. Added a new value 'multifd' in ZeroPageDetection enumeration.
> > 3. Add proper asserts to ensure pages->normal are used for normal pages
> > in all scenarios.
> >
> > Signed-off-by: Hao Xiang 
> > ---
> >  migration/meson.build |  1 +
> >  migration/multifd-zero-page.c | 59 +++
> >  migration/multifd-zlib.c  | 26 ---
> >  migration/multifd-zstd.c  | 25 ---
> >  migration/multifd.c   | 50 +++--
> >  migration/multifd.h   |  7 +
> >  qapi/migration.json   |  4 ++-
> >  7 files changed, 151 insertions(+), 21 deletions(-)
> >  create mode 100644 migration/multifd-zero-page.c
> >
> > diff --git a/migration/meson.build b/migration/meson.build
> > index 92b1cc4297..1eeb915ff6 100644
> > --- a/migration/meson.build
> > +++ b/migration/meson.build
> > @@ -22,6 +22,7 @@ system_ss.add(files(
> >'migration.c',
> >'multifd.c',
> >'multifd-zlib.c',
> > +  'multifd-zero-page.c',
> >'ram-compress.c',
> >'options.c',
> >'postcopy-ram.c',
> > diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
> > new file mode 100644
> > index 00..f0cd8e2c53
> > --- /dev/null
> > +++ b/migration/multifd-zero-page.c
> > @@ -0,0 +1,59 @@
> > +/*
> > + * Multifd zero page detection implementation.
> > + *
> > + * Copyright (c) 2024 Bytedance Inc
> > + *
> > + * Authors:
> > + *  Hao Xiang 
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> > +#include "exec/ramblock.h"
> > +#include "migration.h"
> > +#include "multifd.h"
> > +#include "options.h"
> > +#include "ram.h"
> > +
> > +void multifd_zero_page_check_send(MultiFDSendParams *p)
> > +{
> > +/*
> > + * QEMU older than 9.0 don't understand zero page
> > + * on multifd channel. This switch is required to
> > + * maintain backward compatibility.
> > + */
> > +bool use_multifd_zero_page =
> > +(migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
> > +MultiFDPages_t *pages = p->pages;
> > +RAMBlock *rb = pages->block;
> > +
> > +assert(pages->num != 0);
> > +assert(pages->normal_num == 0);
> > +assert(pages->zero_num == 0);
>
> We can drop these before the final version.

Elena has the same concern. I will drop these.

>
> > +
> > +for (int i = 0; i < pages->num; i++) {
> > +uint64_t offset = pages->offset[i];
> > +if (use_multifd_zero_page &&
> > +buffer_is_zero(rb->host + offset, p->page_size)) {
> > +pages->zero[pages->zero_num] = offset;
> > +pages->zero_num++;
> > +ram_release_page(rb->idstr, offset);
> > +} else {
> > +pages->normal[pages->normal_num] = offset;
> > +pages->normal_num++;
> > +}
> > +}
>
> I don't think it's super clean to have three arrays offset, zero and
> normal, all sized for the full packet size. It might be possible to just
> carry a bitmap of non-zero pages along with pages->offset and operate on
> that instead.
>
> What do you think?
>
> Peter, any ideas? Should we just leave this for another time?
>
> > +}
> > +
> > +void multifd_zero_page_check_recv(MultiFDRecvParams *p)
> > +{
> > +for (int i = 0; i < p->zero_num; i++) {
> > +void *page = p->host + p->zero[i];
> > +if (!buffer_is_zero(page, p->page_size)) {
> > +memset(page, 0, p->page_size);
> > +}
> > +}
> > +}
> > diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
> > index 012e3bdea1..cdfe0fa70e 100644
> > --- a/migration/multifd-zlib.c
> > +++ b/migration/multifd-zlib.c
> > @@ -123,13 +123,20 @@ static int zlib_send_prepare(MultiFDSendParams *p, 
> > Error **errp)
> >

Re: [External] Re: [PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-22 Thread Hao Xiang
On Thu, Feb 22, 2024 at 6:21 PM Peter Xu  wrote:
>
> On Wed, Feb 21, 2024 at 06:04:10PM -0300, Fabiano Rosas wrote:
> > Hao Xiang  writes:
> >
> > > 1. Implements the zero page detection and handling on the multifd
> > > threads for non-compression, zlib and zstd compression backends.
> > > 2. Added a new value 'multifd' in ZeroPageDetection enumeration.
> > > 3. Add proper asserts to ensure pages->normal are used for normal pages
> > > in all scenarios.
> > >
> > > Signed-off-by: Hao Xiang 
> > > ---
> > >  migration/meson.build |  1 +
> > >  migration/multifd-zero-page.c | 59 +++
> > >  migration/multifd-zlib.c  | 26 ---
> > >  migration/multifd-zstd.c  | 25 ---
> > >  migration/multifd.c   | 50 +++--
> > >  migration/multifd.h   |  7 +
> > >  qapi/migration.json   |  4 ++-
> > >  7 files changed, 151 insertions(+), 21 deletions(-)
> > >  create mode 100644 migration/multifd-zero-page.c
> > >
> > > diff --git a/migration/meson.build b/migration/meson.build
> > > index 92b1cc4297..1eeb915ff6 100644
> > > --- a/migration/meson.build
> > > +++ b/migration/meson.build
> > > @@ -22,6 +22,7 @@ system_ss.add(files(
> > >'migration.c',
> > >'multifd.c',
> > >'multifd-zlib.c',
> > > +  'multifd-zero-page.c',
> > >'ram-compress.c',
> > >'options.c',
> > >'postcopy-ram.c',
> > > diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
> > > new file mode 100644
> > > index 00..f0cd8e2c53
> > > --- /dev/null
> > > +++ b/migration/multifd-zero-page.c
> > > @@ -0,0 +1,59 @@
> > > +/*
> > > + * Multifd zero page detection implementation.
> > > + *
> > > + * Copyright (c) 2024 Bytedance Inc
> > > + *
> > > + * Authors:
> > > + *  Hao Xiang 
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > > later.
> > > + * See the COPYING file in the top-level directory.
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "qemu/cutils.h"
> > > +#include "exec/ramblock.h"
> > > +#include "migration.h"
> > > +#include "multifd.h"
> > > +#include "options.h"
> > > +#include "ram.h"
> > > +
> > > +void multifd_zero_page_check_send(MultiFDSendParams *p)
> > > +{
> > > +/*
> > > + * QEMU older than 9.0 don't understand zero page
> > > + * on multifd channel. This switch is required to
> > > + * maintain backward compatibility.
> > > + */
> > > +bool use_multifd_zero_page =
> > > +(migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
> > > +MultiFDPages_t *pages = p->pages;
> > > +RAMBlock *rb = pages->block;
> > > +
> > > +assert(pages->num != 0);
> > > +assert(pages->normal_num == 0);
> > > +assert(pages->zero_num == 0);
> >
> > We can drop these before the final version.
> >
> > > +
> > > +for (int i = 0; i < pages->num; i++) {
> > > +uint64_t offset = pages->offset[i];
> > > +if (use_multifd_zero_page &&
> > > +buffer_is_zero(rb->host + offset, p->page_size)) {
> > > +pages->zero[pages->zero_num] = offset;
> > > +pages->zero_num++;
> > > +ram_release_page(rb->idstr, offset);
> > > +} else {
> > > +pages->normal[pages->normal_num] = offset;
> > > +pages->normal_num++;
> > > +}
> > > +}
> >
> > I don't think it's super clean to have three arrays offset, zero and
> > normal, all sized for the full packet size. It might be possible to just
> > carry a bitmap of non-zero pages along with pages->offset and operate on
> > that instead.
> >
> > What do you think?
> >
> > Peter, any ideas? Should we just leave this for another time?
>
> Yeah I think a bitmap should save quite a few fields indeed, it'll however
> make the latter iteration slightly harder by walking both (offset[],
> bitmap), process the page only if bitmap is set for the offset.
>
> IIUC we per

Re: [External] Re: [PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 8:00 AM Elena Ufimtseva  wrote:
>
>
>
> On Fri, Feb 16, 2024 at 2:42 PM Hao Xiang  wrote:
>>
>> 1. Implements the zero page detection and handling on the multifd
>> threads for non-compression, zlib and zstd compression backends.
>> 2. Added a new value 'multifd' in ZeroPageDetection enumeration.
>> 3. Add proper asserts to ensure pages->normal are used for normal pages
>> in all scenarios.
>>
>> Signed-off-by: Hao Xiang 
>> ---
>>  migration/meson.build |  1 +
>>  migration/multifd-zero-page.c | 59 +++
>>  migration/multifd-zlib.c  | 26 ---
>>  migration/multifd-zstd.c  | 25 ---
>>  migration/multifd.c   | 50 +++--
>>  migration/multifd.h   |  7 +
>>  qapi/migration.json   |  4 ++-
>>  7 files changed, 151 insertions(+), 21 deletions(-)
>>  create mode 100644 migration/multifd-zero-page.c
>>
>> diff --git a/migration/meson.build b/migration/meson.build
>> index 92b1cc4297..1eeb915ff6 100644
>> --- a/migration/meson.build
>> +++ b/migration/meson.build
>> @@ -22,6 +22,7 @@ system_ss.add(files(
>>'migration.c',
>>'multifd.c',
>>'multifd-zlib.c',
>> +  'multifd-zero-page.c',
>>'ram-compress.c',
>>'options.c',
>>'postcopy-ram.c',
>> diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
>> new file mode 100644
>> index 00..f0cd8e2c53
>> --- /dev/null
>> +++ b/migration/multifd-zero-page.c
>> @@ -0,0 +1,59 @@
>> +/*
>> + * Multifd zero page detection implementation.
>> + *
>> + * Copyright (c) 2024 Bytedance Inc
>> + *
>> + * Authors:
>> + *  Hao Xiang 
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/cutils.h"
>> +#include "exec/ramblock.h"
>> +#include "migration.h"
>> +#include "multifd.h"
>> +#include "options.h"
>> +#include "ram.h"
>> +
>> +void multifd_zero_page_check_send(MultiFDSendParams *p)
>> +{
>> +/*
>> + * QEMU older than 9.0 don't understand zero page
>> + * on multifd channel. This switch is required to
>> + * maintain backward compatibility.
>> + */
>> +bool use_multifd_zero_page =
>> +(migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
>> +MultiFDPages_t *pages = p->pages;
>> +RAMBlock *rb = pages->block;
>> +
>> +assert(pages->num != 0);
>
>
> Not needed, the check is done right before calling send_prepare.
>
>>
>> +assert(pages->normal_num == 0);
>> +assert(pages->zero_num == 0);
>
>
> Why these asserts are needed?

The idea is that when multifd_zero_page_check_send is called, I want
to make sure zero page checking was not processed on this packet
before. It is perhaps redundant. It won't compile in free build.

>>
>> +
>>
>> +for (int i = 0; i < pages->num; i++) {
>> +uint64_t offset = pages->offset[i];
>> +if (use_multifd_zero_page &&
>> +buffer_is_zero(rb->host + offset, p->page_size)) {
>> +pages->zero[pages->zero_num] = offset;
>> +pages->zero_num++;
>> +ram_release_page(rb->idstr, offset);
>> +} else {
>> +pages->normal[pages->normal_num] = offset;
>> +pages->normal_num++;
>> +}
>> +}
>> +}
>> +
>> +void multifd_zero_page_check_recv(MultiFDRecvParams *p)
>> +{
>> +for (int i = 0; i < p->zero_num; i++) {
>> +void *page = p->host + p->zero[i];
>> +if (!buffer_is_zero(page, p->page_size)) {
>> +memset(page, 0, p->page_size);
>> +}
>> +}
>> +}
>> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
>> index 012e3bdea1..cdfe0fa70e 100644
>> --- a/migration/multifd-zlib.c
>> +++ b/migration/multifd-zlib.c
>> @@ -123,13 +123,20 @@ static int zlib_send_prepare(MultiFDSendParams *p, 
>> Error **errp)
>>  int ret;
>>  uint32_t i;
>>
>> +multifd_zero_page_check_send(p);
>> +
>> +if (!pages->normal_num) {
>> +p->next_packet_size = 0;
>> +   

Re: [External] Re: [PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-22 Thread Hao Xiang
On Fri, Feb 16, 2024 at 9:08 PM Richard Henderson
 wrote:
>
> On 2/16/24 12:39, Hao Xiang wrote:
> > +void multifd_zero_page_check_recv(MultiFDRecvParams *p)
> > +{
> > +for (int i = 0; i < p->zero_num; i++) {
> > +void *page = p->host + p->zero[i];
> > +if (!buffer_is_zero(page, p->page_size)) {
> > +memset(page, 0, p->page_size);
> > +}
> > +}
> > +}
>
> You should not check the buffer is zero here, you should just zero it.

I will fix it in the next version.

>
>
> r~



Re: [External] Re: [PATCH v2 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 5:58 AM Elena Ufimtseva  wrote:
>
>
>
> On Fri, Feb 16, 2024 at 2:41 PM Hao Xiang  wrote:
>>
>> This new parameter controls where the zero page checking is running.
>> 1. If this parameter is set to 'legacy', zero page checking is
>> done in the migration main thread.
>> 2. If this parameter is set to 'none', zero page checking is disabled.
>>
>
> Hello Hao
>
> Few questions and comments.
>
> First the commit message states that the parameter control where the checking 
> is done, but it also controls
> if sending of zero pages is done by multifd threads or not.
>
>
>>
>> Signed-off-by: Hao Xiang 
>> ---
>>  hw/core/qdev-properties-system.c| 10 ++
>>  include/hw/qdev-properties-system.h |  4 
>>  migration/migration-hmp-cmds.c  |  9 +
>>  migration/options.c | 21 
>>  migration/options.h |  1 +
>>  migration/ram.c |  4 
>>  qapi/migration.json | 30 ++---
>>  7 files changed, 76 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/core/qdev-properties-system.c 
>> b/hw/core/qdev-properties-system.c
>> index 1a396521d5..63843f18b5 100644
>> --- a/hw/core/qdev-properties-system.c
>> +++ b/hw/core/qdev-properties-system.c
>> @@ -679,6 +679,16 @@ const PropertyInfo qdev_prop_mig_mode = {
>>  .set_default_value = qdev_propinfo_set_default_value_enum,
>>  };
>>
>> +const PropertyInfo qdev_prop_zero_page_detection = {
>> +.name = "ZeroPageDetection",
>> +.description = "zero_page_detection values, "
>> +   "multifd,legacy,none",
>> +.enum_table = _lookup,
>> +.get = qdev_propinfo_get_enum,
>> +.set = qdev_propinfo_set_enum,
>> +.set_default_value = qdev_propinfo_set_default_value_enum,
>> +};
>> +
>>  /* --- Reserved Region --- */
>>
>>  /*
>> diff --git a/include/hw/qdev-properties-system.h 
>> b/include/hw/qdev-properties-system.h
>> index 06c359c190..839b170235 100644
>> --- a/include/hw/qdev-properties-system.h
>> +++ b/include/hw/qdev-properties-system.h
>> @@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr;
>>  extern const PropertyInfo qdev_prop_reserved_region;
>>  extern const PropertyInfo qdev_prop_multifd_compression;
>>  extern const PropertyInfo qdev_prop_mig_mode;
>> +extern const PropertyInfo qdev_prop_zero_page_detection;
>>  extern const PropertyInfo qdev_prop_losttickpolicy;
>>  extern const PropertyInfo qdev_prop_blockdev_on_error;
>>  extern const PropertyInfo qdev_prop_bios_chs_trans;
>> @@ -47,6 +48,9 @@ extern const PropertyInfo 
>> qdev_prop_iothread_vq_mapping_list;
>>  #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
>>  DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
>> MigMode)
>> +#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
>> +DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
>> +   ZeroPageDetection)
>>  #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
>>  DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
>>  LostTickPolicy)
>> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
>> index 99b49df5dd..7e96ae6ffd 100644
>> --- a/migration/migration-hmp-cmds.c
>> +++ b/migration/migration-hmp-cmds.c
>> @@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const 
>> QDict *qdict)
>>  monitor_printf(mon, "%s: %s\n",
>>  MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
>>  MultiFDCompression_str(params->multifd_compression));
>> +assert(params->has_zero_page_detection);
>
>
> What is the reason to have assert here?

It's just to verify that the option is initialized properly before we
reach here. Same things are done for other options.

>
>>
>> +monitor_printf(mon, "%s: %s\n",
>> +MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
>> +qapi_enum_lookup(_lookup,
>> +params->zero_page_detection));
>>  monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
>>  MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
>>  params->xbzrle_cache_size);
>> @@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const 
>> QDict *q

Re: [External] Re: [PATCH v2 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 4:03 AM Markus Armbruster  wrote:
>
> Hao Xiang  writes:
>
> > This new parameter controls where the zero page checking is running.
> > 1. If this parameter is set to 'legacy', zero page checking is
> > done in the migration main thread.
> > 2. If this parameter is set to 'none', zero page checking is disabled.
> >
> > Signed-off-by: Hao Xiang 
>
> [...]
>
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 5a565d9b8d..99843a8e95 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -653,6 +653,17 @@
> >  { 'enum': 'MigMode',
> >'data': [ 'normal', 'cpr-reboot' ] }
> >
> > +##
> > +# @ZeroPageDetection:
> > +#
> > +# @legacy: Perform zero page checking from main migration thread. (since 
> > 9.0)
> > +#
> > +# @none: Do not perform zero page checking.
> > +#
> > +##
>
> The entire type is since 9.0.  Thus:
>
>##
># @ZeroPageDetection:
>#
># @legacy: Perform zero page checking from main migration thread.
>#
># @none: Do not perform zero page checking.
>#
># Since: 9.0
>##
>
> > +{ 'enum': 'ZeroPageDetection',
> > +  'data': [ 'legacy', 'none' ] }
> > +
> >  ##
> >  # @BitmapMigrationBitmapAliasTransform:
> >  #
> > @@ -874,6 +885,9 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @zero-page-detection: See description in @ZeroPageDetection.
> > +# Default is 'legacy'. (Since 9.0)
>
> The description feels a bit lazy :)
>
> Suggest
>
># @zero-page-detection: Whether and how to detect zero pages.  Default
># is 'legacy'.  (since 9.0)
>
> Same for the other two copies.

I will fix these in the next version.

>
> > +#
> >  # Features:
> >  #
> >  # @deprecated: Member @block-incremental is deprecated.  Use
> > @@ -907,7 +921,8 @@
> > 'block-bitmap-mapping',
> > { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] 
> > },
> > 'vcpu-dirty-limit',
> > -   'mode'] }
> > +   'mode',
> > +   'zero-page-detection'] }
> >
> >  ##
> >  # @MigrateSetParameters:
> > @@ -1066,6 +1081,10 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @zero-page-detection: See description in @ZeroPageDetection.
> > +# Default is 'legacy'. (Since 9.0)
> > +#
> > +#
> >  # Features:
> >  #
> >  # @deprecated: Member @block-incremental is deprecated.  Use
> > @@ -1119,7 +1138,8 @@
> >  '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
> >  'features': [ 'unstable' ] },
> >  '*vcpu-dirty-limit': 'uint64',
> > -'*mode': 'MigMode'} }
> > +'*mode': 'MigMode',
> > +'*zero-page-detection': 'ZeroPageDetection'} }
> >
> >  ##
> >  # @migrate-set-parameters:
> > @@ -1294,6 +1314,9 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @zero-page-detection: See description in @ZeroPageDetection.
> > +# Default is 'legacy'. (Since 9.0)
> > +#
> >  # Features:
> >  #
> >  # @deprecated: Member @block-incremental is deprecated.  Use
> > @@ -1344,7 +1367,8 @@
> >  '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
> >  'features': [ 'unstable' ] },
> >  '*vcpu-dirty-limit': 'uint64',
> > -'*mode': 'MigMode'} }
> > +'*mode': 'MigMode',
> > +'*zero-page-detection': 'ZeroPageDetection'} }
> >
> >  ##
> >  # @query-migrate-parameters:
>



Re: [External] Re: [PATCH v2 5/7] migration/multifd: Add new migration test cases for legacy zero page checking.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 12:59 PM Fabiano Rosas  wrote:
>
> Hao Xiang  writes:
>
> > Now that zero page checking is done on the multifd sender threads by
> > default, we still provide an option for backward compatibility. This
> > change adds a qtest migration test case to set the zero-page-detection
> > option to "legacy" and run multifd migration with zero page checking on the
> > migration main thread.
> >
> > Signed-off-by: Hao Xiang 
> > ---
> >  tests/qtest/migration-test.c | 52 
> >  1 file changed, 52 insertions(+)
> >
> > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > index 8a5bb1752e..c27083110a 100644
> > --- a/tests/qtest/migration-test.c
> > +++ b/tests/qtest/migration-test.c
> > @@ -2621,6 +2621,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState 
> > *from,
> >  return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
> >  }
> >
> > +static void *
> > +test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
> > +QTestState *to)
> > +{
> > +test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
> > +migrate_set_parameter_str(from, "zero-page-detection", "legacy");
> > +return NULL;
> > +}
> > +
> > +static void *
> > +test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
> > +  QTestState *to)
> > +{
> > +test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
> > +migrate_set_parameter_str(from, "zero-page-detection", "none");
> > +return NULL;
> > +}
> > +
> >  static void *
> >  test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
> >  QTestState *to)
> > @@ -2652,6 +2670,36 @@ static void test_multifd_tcp_none(void)
> >  test_precopy_common();
> >  }
> >
> > +static void test_multifd_tcp_zero_page_legacy(void)
> > +{
> > +MigrateCommon args = {
> > +.listen_uri = "defer",
> > +.start_hook = 
> > test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
> > +/*
> > + * Multifd is more complicated than most of the features, it
> > + * directly takes guest page buffers when sending, make sure
> > + * everything will work alright even if guest page is changing.
> > + */
> > +.live = true,
> > +};
> > +test_precopy_common();
> > +}
> > +
> > +static void test_multifd_tcp_no_zero_page(void)
> > +{
> > +MigrateCommon args = {
> > +.listen_uri = "defer",
> > +.start_hook = 
> > test_migration_precopy_tcp_multifd_start_no_zero_page,
> > +/*
> > + * Multifd is more complicated than most of the features, it
> > + * directly takes guest page buffers when sending, make sure
> > + * everything will work alright even if guest page is changing.
> > + */
> > +.live = true,
> > +};
> > +test_precopy_common();
> > +}
> > +
> >  static void test_multifd_tcp_zlib(void)
> >  {
> >  MigrateCommon args = {
> > @@ -3550,6 +3598,10 @@ int main(int argc, char **argv)
> >  }
> >  migration_test_add("/migration/multifd/tcp/plain/none",
> > test_multifd_tcp_none);
> > +migration_test_add("/migration/multifd/tcp/plain/zero_page_legacy",
> > +   test_multifd_tcp_zero_page_legacy);
> > +migration_test_add("/migration/multifd/tcp/plain/no_zero_page",
> > +   test_multifd_tcp_no_zero_page);
>
> Here it's better to separate the main feature from the states. That way
> we can run only the zero-page tests with:
>
>  migration-test -r /x86_64/migration/multifd/tcp/plain/zero-page
>
> Like so: (also dashes instead of underscores)
> /zero-page/legacy
> /zero-page/none
>

Sounds good.

> >  migration_test_add("/migration/multifd/tcp/plain/cancel",
> > test_multifd_tcp_cancel);
> >  migration_test_add("/migration/multifd/tcp/plain/zlib",



Re: [External] Re: [PATCH v2 2/7] migration/multifd: Support for zero pages transmission in multifd format.

2024-02-22 Thread Hao Xiang
On Wed, Feb 21, 2024 at 7:37 AM Elena Ufimtseva  wrote:
>
>
>
> On Fri, Feb 16, 2024 at 2:41 PM Hao Xiang  wrote:
>>
>> This change adds zero page counters and updates multifd send/receive
>> tracing format to track the newly added counters.
>>
>> Signed-off-by: Hao Xiang 
>> ---
>>  migration/multifd.c| 43 ++
>>  migration/multifd.h| 21 -
>>  migration/ram.c|  1 -
>>  migration/trace-events |  8 
>>  4 files changed, 59 insertions(+), 14 deletions(-)
>>
>> diff --git a/migration/multifd.c b/migration/multifd.c
>> index adfe8c9a0a..a33dba40d9 100644
>> --- a/migration/multifd.c
>> +++ b/migration/multifd.c
>> @@ -236,6 +236,8 @@ static void multifd_pages_reset(MultiFDPages_t *pages)
>>   * overwritten later when reused.
>>   */
>>  pages->num = 0;
>> +pages->normal_num = 0;
>> +pages->zero_num = 0;
>>  pages->block = NULL;
>>  }
>>
>>
>> @@ -309,6 +311,8 @@ static MultiFDPages_t *multifd_pages_init(uint32_t n)
>>
>>  pages->allocated = n;
>>  pages->offset = g_new0(ram_addr_t, n);
>> +pages->normal = g_new0(ram_addr_t, n);
>> +pages->zero = g_new0(ram_addr_t, n);
>>
>>
>>  return pages;
>>  }
>> @@ -319,6 +323,10 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
>>  pages->allocated = 0;
>>  g_free(pages->offset);
>>  pages->offset = NULL;
>> +g_free(pages->normal);
>> +pages->normal = NULL;
>> +g_free(pages->zero);
>> +pages->zero = NULL;
>>  g_free(pages);
>>  }
>>
>> @@ -332,6 +340,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
>>  packet->flags = cpu_to_be32(p->flags);
>>  packet->pages_alloc = cpu_to_be32(p->pages->allocated);
>>  packet->normal_pages = cpu_to_be32(pages->num);
>> +packet->zero_pages = cpu_to_be32(pages->zero_num);
>>  packet->next_packet_size = cpu_to_be32(p->next_packet_size);
>>
>>  packet_num = qatomic_fetch_inc(_send_state->packet_num);
>> @@ -350,9 +359,10 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
>>
>>  p->packets_sent++;
>>  p->total_normal_pages += pages->num;
>> +p->total_zero_pages += pages->zero_num;
>>
>> -trace_multifd_send(p->id, packet_num, pages->num, p->flags,
>> -   p->next_packet_size);
>> +trace_multifd_send(p->id, packet_num, pages->num, pages->zero_num,
>> +   p->flags, p->next_packet_size);
>>  }
>>
>>  static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
>> @@ -393,20 +403,29 @@ static int 
>> multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
>>  p->normal_num = be32_to_cpu(packet->normal_pages);
>>  if (p->normal_num > packet->pages_alloc) {
>>  error_setg(errp, "multifd: received packet "
>> -   "with %u pages and expected maximum pages are %u",
>> +   "with %u normal pages and expected maximum pages are %u",
>> p->normal_num, packet->pages_alloc) ;
>>  return -1;
>>  }
>>
>> +p->zero_num = be32_to_cpu(packet->zero_pages);
>> +if (p->zero_num > packet->pages_alloc - p->normal_num) {
>> +error_setg(errp, "multifd: received packet "
>> +   "with %u zero pages and expected maximum zero pages are 
>> %u",
>> +   p->zero_num, packet->pages_alloc - p->normal_num) ;
>> +return -1;
>> +}
>
>
> You could probably combine this check with normal_num against pages_alloc.
>>
>> +
>>  p->next_packet_size = be32_to_cpu(packet->next_packet_size);
>>  p->packet_num = be64_to_cpu(packet->packet_num);
>>  p->packets_recved++;
>>  p->total_normal_pages += p->normal_num;
>> +p->total_zero_pages += p->zero_num;
>>
>> -trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags,
>> -   p->next_packet_size);
>> +trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num,
>> +   p->flags, p->next_packet_size);
>>
>> -if (p->normal_num == 0) {
>> +if (p-

[PATCH v2 7/7] Update maintainer contact for migration multifd zero page checking acceleration.

2024-02-16 Thread Hao Xiang
Add myself to maintain multifd zero page checking acceleration function.

Signed-off-by: Hao Xiang 
---
 MAINTAINERS | 5 +
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a24c2b51b6..3ca407cb58 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3403,6 +3403,11 @@ F: tests/migration/
 F: util/userfaultfd.c
 X: migration/rdma*
 
+Migration multifd zero page checking acceleration
+M: Hao Xiang 
+S: Maintained
+F: migration/multifd-zero-page.c
+
 RDMA Migration
 R: Li Zhijian 
 R: Peter Xu 
-- 
2.30.2




[PATCH v2 6/7] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-16 Thread Hao Xiang
This change extends the MigrationStatus interface to track zero pages
and zero bytes counter.

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c  |  4 
 migration/migration.c   |  2 ++
 qapi/migration.json | 15 ++-
 tests/migration/guestperf/engine.py |  2 ++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 7e96ae6ffd..abe035c9f2 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -111,6 +111,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->ram->normal);
 monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
info->ram->normal_bytes >> 10);
+monitor_printf(mon, "zero: %" PRIu64 " pages\n",
+   info->ram->zero);
+monitor_printf(mon, "zero bytes: %" PRIu64 " kbytes\n",
+   info->ram->zero_bytes >> 10);
 monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
info->ram->dirty_sync_count);
 monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
diff --git a/migration/migration.c b/migration/migration.c
index ab21de2cad..1968ea7075 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1112,6 +1112,8 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->ram->skipped = 0;
 info->ram->normal = stat64_get(_stats.normal_pages);
 info->ram->normal_bytes = info->ram->normal * page_size;
+info->ram->zero = stat64_get(_stats.zero_pages);
+info->ram->zero_bytes = info->ram->zero * page_size;
 info->ram->mbps = s->mbps;
 info->ram->dirty_sync_count =
 stat64_get(_stats.dirty_sync_count);
diff --git a/qapi/migration.json b/qapi/migration.json
index e2450b92d4..892875da18 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -63,6 +63,10 @@
 # between 0 and @dirty-sync-count * @multifd-channels.  (since
 # 7.1)
 #
+# @zero: number of zero pages (since 9.0)
+#
+# @zero-bytes: number of zero bytes sent (since 9.0)
+#
 # Features:
 #
 # @deprecated: Member @skipped is always zero since 1.5.3
@@ -81,7 +85,8 @@
'multifd-bytes': 'uint64', 'pages-per-second': 'uint64',
'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64',
'postcopy-bytes': 'uint64',
-   'dirty-sync-missed-zero-copy': 'uint64' } }
+   'dirty-sync-missed-zero-copy': 'uint64',
+   'zero': 'int', 'zero-bytes': 'int' } }
 
 ##
 # @XBZRLECacheStats:
@@ -332,6 +337,8 @@
 #   "duplicate":123,
 #   "normal":123,
 #   "normal-bytes":123456,
+#   "zero":123,
+#   "zero-bytes":123456,
 #   "dirty-sync-count":15
 # }
 #  }
@@ -358,6 +365,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  }
 #   }
@@ -379,6 +388,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  },
 #  "disk":{
@@ -405,6 +416,8 @@
 # "duplicate":10,
 # "normal":,
 # "normal-bytes":3412992,
+# "zero":,
+# "zero-bytes":3412992,
 # "dirty-sync-count":15
 #  },
 #  "xbzrle-cache":{
diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 608d7270f6..75315b99b7 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -92,6 +92,8 @@ def _migrate_progress(self, vm):
 info["ram"].get("skipped", 0),
 info["ram"].get("normal", 0),
 info["ram"].get("normal-bytes", 0),
+info["ram"].get("zero", 0);
+info["ram"].get("zero-bytes", 0);
 info["ram"].get("dirty-pages-rate", 0),
 info["ram"].get("mbps", 0),
 info["ram"].get("dirty-sync-count", 0)
-- 
2.30.2




[PATCH v2 4/7] migration/multifd: Enable zero page checking from multifd threads.

2024-02-16 Thread Hao Xiang
This change adds a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration. Now zero page checking can be done in the multifd 
threads
and this becomes the default configuration. We still provide backward 
compatibility
where zero page checking is done from the migration main thread.

Signed-off-by: Hao Xiang 
---
 migration/multifd.c |  1 +
 migration/options.c |  2 +-
 migration/ram.c | 53 ++---
 3 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index fbb40ea10b..ef5dad1019 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/rcu.h"
+#include "qemu/cutils.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
 #include "exec/ramblock.h"
diff --git a/migration/options.c b/migration/options.c
index 3c603391b0..3c79b6ccd4 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -181,7 +181,7 @@ Property migration_properties[] = {
   MIG_MODE_NORMAL),
 DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
-   ZERO_PAGE_DETECTION_LEGACY),
+   ZERO_PAGE_DETECTION_MULTIFD),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
diff --git a/migration/ram.c b/migration/ram.c
index 5ece9f042e..b088c5a98c 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1123,10 +1123,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus 
*pss,
 QEMUFile *file = pss->pss_channel;
 int len = 0;
 
-if (migrate_zero_page_detection() != ZERO_PAGE_DETECTION_LEGACY) {
-return 0;
-}
-
 if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
 return 0;
 }
@@ -1256,6 +1252,10 @@ static int ram_save_page(RAMState *rs, PageSearchStatus 
*pss)
 
 static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
 {
+assert(migrate_multifd());
+assert(!migrate_compress());
+assert(!migration_in_postcopy());
+
 if (!multifd_queue_page(block, offset)) {
 return -1;
 }
@@ -2046,7 +2046,6 @@ static bool save_compress_page(RAMState *rs, 
PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-RAMBlock *block = pss->block;
 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
 int res;
 
@@ -2062,17 +2061,40 @@ static int ram_save_target_page_legacy(RAMState *rs, 
PageSearchStatus *pss)
 return 1;
 }
 
+return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: save one target page
+ *
+ * Returns the number of pages written
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+RAMBlock *block = pss->block;
+ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
+/* Multifd is not compatible with old compression. */
+assert(!migrate_compress());
+
+/* Multifd is not compabible with postcopy. */
+assert(!migration_in_postcopy());
+
 /*
- * Do not use multifd in postcopy as one whole host page should be
- * placed.  Meanwhile postcopy requires atomic update of pages, so even
- * if host page size == guest page size the dest guest during run may
- * still see partially copied pages which is data corruption.
+ * Backward compatibility support. While using multifd live
+ * migration, we still need to handle zero page checking on the
+ * migration main thread.
  */
-if (migrate_multifd() && !migration_in_postcopy()) {
-return ram_save_multifd_page(block, offset);
+if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+if (save_zero_page(rs, pss, offset)) {
+return 1;
+}
 }
 
-return ram_save_page(rs, pss);
+return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -2984,7 +3006,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 }
 
 migration_ops = g_malloc0(sizeof(MigrationOps));
-migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+if (migrate_multifd()) {
+migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+} else {
+migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+}
 
 bql_unlock();
 ret = multifd_send_sync_main();
-- 
2.30.2




[PATCH v2 2/7] migration/multifd: Support for zero pages transmission in multifd format.

2024-02-16 Thread Hao Xiang
This change adds zero page counters and updates multifd send/receive
tracing format to track the newly added counters.

Signed-off-by: Hao Xiang 
---
 migration/multifd.c| 43 ++
 migration/multifd.h| 21 -
 migration/ram.c|  1 -
 migration/trace-events |  8 
 4 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index adfe8c9a0a..a33dba40d9 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -236,6 +236,8 @@ static void multifd_pages_reset(MultiFDPages_t *pages)
  * overwritten later when reused.
  */
 pages->num = 0;
+pages->normal_num = 0;
+pages->zero_num = 0;
 pages->block = NULL;
 }
 
@@ -309,6 +311,8 @@ static MultiFDPages_t *multifd_pages_init(uint32_t n)
 
 pages->allocated = n;
 pages->offset = g_new0(ram_addr_t, n);
+pages->normal = g_new0(ram_addr_t, n);
+pages->zero = g_new0(ram_addr_t, n);
 
 return pages;
 }
@@ -319,6 +323,10 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
 pages->allocated = 0;
 g_free(pages->offset);
 pages->offset = NULL;
+g_free(pages->normal);
+pages->normal = NULL;
+g_free(pages->zero);
+pages->zero = NULL;
 g_free(pages);
 }
 
@@ -332,6 +340,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
 packet->flags = cpu_to_be32(p->flags);
 packet->pages_alloc = cpu_to_be32(p->pages->allocated);
 packet->normal_pages = cpu_to_be32(pages->num);
+packet->zero_pages = cpu_to_be32(pages->zero_num);
 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
 
 packet_num = qatomic_fetch_inc(_send_state->packet_num);
@@ -350,9 +359,10 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
 
 p->packets_sent++;
 p->total_normal_pages += pages->num;
+p->total_zero_pages += pages->zero_num;
 
-trace_multifd_send(p->id, packet_num, pages->num, p->flags,
-   p->next_packet_size);
+trace_multifd_send(p->id, packet_num, pages->num, pages->zero_num,
+   p->flags, p->next_packet_size);
 }
 
 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@@ -393,20 +403,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
*p, Error **errp)
 p->normal_num = be32_to_cpu(packet->normal_pages);
 if (p->normal_num > packet->pages_alloc) {
 error_setg(errp, "multifd: received packet "
-   "with %u pages and expected maximum pages are %u",
+   "with %u normal pages and expected maximum pages are %u",
p->normal_num, packet->pages_alloc) ;
 return -1;
 }
 
+p->zero_num = be32_to_cpu(packet->zero_pages);
+if (p->zero_num > packet->pages_alloc - p->normal_num) {
+error_setg(errp, "multifd: received packet "
+   "with %u zero pages and expected maximum zero pages are %u",
+   p->zero_num, packet->pages_alloc - p->normal_num) ;
+return -1;
+}
+
 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
 p->packet_num = be64_to_cpu(packet->packet_num);
 p->packets_recved++;
 p->total_normal_pages += p->normal_num;
+p->total_zero_pages += p->zero_num;
 
-trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags,
-   p->next_packet_size);
+trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num,
+   p->flags, p->next_packet_size);
 
-if (p->normal_num == 0) {
+if (p->normal_num == 0 && p->zero_num == 0) {
 return 0;
 }
 
@@ -823,6 +842,8 @@ static void *multifd_send_thread(void *opaque)
 
 stat64_add(_stats.multifd_bytes,
p->next_packet_size + p->packet_len);
+stat64_add(_stats.normal_pages, pages->num);
+stat64_add(_stats.zero_pages, pages->zero_num);
 
 multifd_pages_reset(p->pages);
 p->next_packet_size = 0;
@@ -866,7 +887,8 @@ out:
 
 rcu_unregister_thread();
 migration_threads_remove(thread);
-trace_multifd_send_thread_end(p->id, p->packets_sent, 
p->total_normal_pages);
+trace_multifd_send_thread_end(p->id, p->packets_sent, 
p->total_normal_pages,
+  p->total_zero_pages);
 
 return NULL;
 }
@@ -1132,6 +1154,8 @@ static void 
multifd_recv_cleanup_channel(MultiFDRecvParams *p)
 p->iov = NULL;
 g_free(p->normal);
 p->normal = NULL;
+g_free(p->zero);
+p->zero = NULL;
 multifd_recv_state->ops->recv_cleanup(

[PATCH v2 0/7] Introduce multifd zero page checking.

2024-02-16 Thread Hao Xiang
v2 update:
* Implement zero-page-detection switch with enumeration "legacy",
"none" and "multifd".
* Move normal/zero pages from MultiFDSendParams to MultiFDPages_t.
* Add zeros and zero_bytes accounting.

This patchset is based on Juan Quintela's old series here
https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/

In the multifd live migration model, there is a single migration main
thread scanning the page map, queuing the pages to multiple multifd
sender threads. The migration main thread runs zero page checking on
every page before queuing the page to the sender threads. Zero page
checking is a CPU intensive task and hence having a single thread doing
all that doesn't scale well. This change introduces a new function
to run the zero page checking on the multifd sender threads. This
patchset also lays the ground work for future changes to offload zero
page checking task to accelerator hardwares.

Use two Intel 4th generation Xeon servers for testing.

Architecture:x86_64
CPU(s):  192
Thread(s) per core:  2
Core(s) per socket:  48
Socket(s):   2
NUMA node(s):2
Vendor ID:   GenuineIntel
CPU family:  6
Model:   143
Model name:  Intel(R) Xeon(R) Platinum 8457C
Stepping:8
CPU MHz: 2538.624
CPU max MHz: 3800.
CPU min MHz: 800.

Perform multifd live migration with below setup:
1. VM has 100GB memory. All pages in the VM are zero pages.
2. Use tcp socket for live migration.
3. Use 4 multifd channels and zero page checking on migration main thread.
4. Use 1/2/4 multifd channels and zero page checking on multifd sender
threads.
5. Record migration total time from sender QEMU console's "info migrate"
command.

++
|zero-page-checking | total-time(ms) |
++
|main-thread| 9629   |
++
|multifd-1-threads  | 6182   |
++
|multifd-2-threads  | 4643   |
++
|multifd-4-threads  | 4143   |
++

Apply this patchset on top of commit
5767815218efd3cbfd409505ed824d5f356044ae

Hao Xiang (7):
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Support for zero pages transmission in multifd
format.
  migration/multifd: Zero page transmission on the multifd thread.
  migration/multifd: Enable zero page checking from multifd threads.
  migration/multifd: Add new migration test cases for legacy zero page
checking.
  migration/multifd: Add zero pages and zero bytes counter to migration
status interface.
  Update maintainer contact for migration multifd zero page checking
acceleration.

 MAINTAINERS |  5 ++
 hw/core/qdev-properties-system.c| 10 
 include/hw/qdev-properties-system.h |  4 ++
 migration/meson.build   |  1 +
 migration/migration-hmp-cmds.c  | 13 +
 migration/migration.c   |  2 +
 migration/multifd-zero-page.c   | 59 +++
 migration/multifd-zlib.c| 26 +++--
 migration/multifd-zstd.c| 25 ++--
 migration/multifd.c | 90 -
 migration/multifd.h | 28 -
 migration/options.c | 21 +++
 migration/options.h |  1 +
 migration/ram.c | 50 
 migration/trace-events  |  8 +--
 qapi/migration.json | 47 +--
 tests/migration/guestperf/engine.py |  2 +
 tests/qtest/migration-test.c| 52 +
 18 files changed, 399 insertions(+), 45 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

-- 
2.30.2




[PATCH v2 3/7] migration/multifd: Zero page transmission on the multifd thread.

2024-02-16 Thread Hao Xiang
1. Implements the zero page detection and handling on the multifd
threads for non-compression, zlib and zstd compression backends.
2. Added a new value 'multifd' in ZeroPageDetection enumeration.
3. Add proper asserts to ensure pages->normal are used for normal pages
in all scenarios.

Signed-off-by: Hao Xiang 
---
 migration/meson.build |  1 +
 migration/multifd-zero-page.c | 59 +++
 migration/multifd-zlib.c  | 26 ---
 migration/multifd-zstd.c  | 25 ---
 migration/multifd.c   | 50 +++--
 migration/multifd.h   |  7 +
 qapi/migration.json   |  4 ++-
 7 files changed, 151 insertions(+), 21 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..1eeb915ff6 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -22,6 +22,7 @@ system_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
+  'multifd-zero-page.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
new file mode 100644
index 00..f0cd8e2c53
--- /dev/null
+++ b/migration/multifd-zero-page.c
@@ -0,0 +1,59 @@
+/*
+ * Multifd zero page detection implementation.
+ *
+ * Copyright (c) 2024 Bytedance Inc
+ *
+ * Authors:
+ *  Hao Xiang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/ramblock.h"
+#include "migration.h"
+#include "multifd.h"
+#include "options.h"
+#include "ram.h"
+
+void multifd_zero_page_check_send(MultiFDSendParams *p)
+{
+/*
+ * QEMU older than 9.0 don't understand zero page
+ * on multifd channel. This switch is required to
+ * maintain backward compatibility.
+ */
+bool use_multifd_zero_page =
+(migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
+MultiFDPages_t *pages = p->pages;
+RAMBlock *rb = pages->block;
+
+assert(pages->num != 0);
+assert(pages->normal_num == 0);
+assert(pages->zero_num == 0);
+
+for (int i = 0; i < pages->num; i++) {
+uint64_t offset = pages->offset[i];
+if (use_multifd_zero_page &&
+buffer_is_zero(rb->host + offset, p->page_size)) {
+pages->zero[pages->zero_num] = offset;
+pages->zero_num++;
+ram_release_page(rb->idstr, offset);
+} else {
+pages->normal[pages->normal_num] = offset;
+pages->normal_num++;
+}
+}
+}
+
+void multifd_zero_page_check_recv(MultiFDRecvParams *p)
+{
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->page_size)) {
+memset(page, 0, p->page_size);
+}
+}
+}
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 012e3bdea1..cdfe0fa70e 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -123,13 +123,20 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
 int ret;
 uint32_t i;
 
+multifd_zero_page_check_send(p);
+
+if (!pages->normal_num) {
+p->next_packet_size = 0;
+goto out;
+}
+
 multifd_send_prepare_header(p);
 
-for (i = 0; i < pages->num; i++) {
+for (i = 0; i < pages->normal_num; i++) {
 uint32_t available = z->zbuff_len - out_size;
 int flush = Z_NO_FLUSH;
 
-if (i == pages->num - 1) {
+if (i == pages->normal_num - 1) {
 flush = Z_SYNC_FLUSH;
 }
 
@@ -138,7 +145,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
  * with compression. zlib does not guarantee that this is safe,
  * therefore copy the page before calling deflate().
  */
-memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size);
+memcpy(z->buf, p->pages->block->host + pages->normal[i], p->page_size);
 zs->avail_in = p->page_size;
 zs->next_in = z->buf;
 
@@ -172,10 +179,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
 p->iov[p->iovs_num].iov_len = out_size;
 p->iovs_num++;
 p->next_packet_size = out_size;
-p->flags |= MULTIFD_FLAG_ZLIB;
 
+out:
+p->flags |= MULTIFD_FLAG_ZLIB;
 multifd_send_fill_packet(p);
-
 return 0;
 }
 
@@ -261,6 +268,14 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error 
**errp)
p->id, flags, MULTIFD_FLAG_ZLIB);
 return -1;
 }
+
+multifd_zero_page_check_recv(p);
+
+if (

[PATCH v2 5/7] migration/multifd: Add new migration test cases for legacy zero page checking.

2024-02-16 Thread Hao Xiang
Now that zero page checking is done on the multifd sender threads by
default, we still provide an option for backward compatibility. This
change adds a qtest migration test case to set the zero-page-detection
option to "legacy" and run multifd migration with zero page checking on the
migration main thread.

Signed-off-by: Hao Xiang 
---
 tests/qtest/migration-test.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 8a5bb1752e..c27083110a 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2621,6 +2621,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
 return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
 }
 
+static void *
+test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
+QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "legacy");
+return NULL;
+}
+
+static void *
+test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
+  QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_str(from, "zero-page-detection", "none");
+return NULL;
+}
+
 static void *
 test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
 QTestState *to)
@@ -2652,6 +2670,36 @@ static void test_multifd_tcp_none(void)
 test_precopy_common();
 }
 
+static void test_multifd_tcp_zero_page_legacy(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
+static void test_multifd_tcp_no_zero_page(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
 static void test_multifd_tcp_zlib(void)
 {
 MigrateCommon args = {
@@ -3550,6 +3598,10 @@ int main(int argc, char **argv)
 }
 migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/plain/zero_page_legacy",
+   test_multifd_tcp_zero_page_legacy);
+migration_test_add("/migration/multifd/tcp/plain/no_zero_page",
+   test_multifd_tcp_no_zero_page);
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH v2 1/7] migration/multifd: Add new migration option zero-page-detection.

2024-02-16 Thread Hao Xiang
This new parameter controls where the zero page checking is running.
1. If this parameter is set to 'legacy', zero page checking is
done in the migration main thread.
2. If this parameter is set to 'none', zero page checking is disabled.

Signed-off-by: Hao Xiang 
---
 hw/core/qdev-properties-system.c| 10 ++
 include/hw/qdev-properties-system.h |  4 
 migration/migration-hmp-cmds.c  |  9 +
 migration/options.c | 21 
 migration/options.h |  1 +
 migration/ram.c |  4 
 qapi/migration.json | 30 ++---
 7 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 1a396521d5..63843f18b5 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -679,6 +679,16 @@ const PropertyInfo qdev_prop_mig_mode = {
 .set_default_value = qdev_propinfo_set_default_value_enum,
 };
 
+const PropertyInfo qdev_prop_zero_page_detection = {
+.name = "ZeroPageDetection",
+.description = "zero_page_detection values, "
+   "multifd,legacy,none",
+.enum_table = _lookup,
+.get = qdev_propinfo_get_enum,
+.set = qdev_propinfo_set_enum,
+.set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
 /* --- Reserved Region --- */
 
 /*
diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 06c359c190..839b170235 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr;
 extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_multifd_compression;
 extern const PropertyInfo qdev_prop_mig_mode;
+extern const PropertyInfo qdev_prop_zero_page_detection;
 extern const PropertyInfo qdev_prop_losttickpolicy;
 extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
@@ -47,6 +48,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
 #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
MigMode)
+#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
+DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
+   ZeroPageDetection)
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
 LostTickPolicy)
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 99b49df5dd..7e96ae6ffd 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
 MultiFDCompression_str(params->multifd_compression));
+assert(params->has_zero_page_detection);
+monitor_printf(mon, "%s: %s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
+qapi_enum_lookup(_lookup,
+params->zero_page_detection));
 monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
 MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
 params->xbzrle_cache_size);
@@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_multifd_zstd_level = true;
 visit_type_uint8(v, param, >multifd_zstd_level, );
 break;
+case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
+p->has_zero_page_detection = true;
+visit_type_ZeroPageDetection(v, param, >zero_page_detection, );
+break;
 case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
 p->has_xbzrle_cache_size = true;
 if (!visit_type_size(v, param, _size, )) {
diff --git a/migration/options.c b/migration/options.c
index 3e3e0b93b4..3c603391b0 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -179,6 +179,9 @@ Property migration_properties[] = {
 DEFINE_PROP_MIG_MODE("mode", MigrationState,
   parameters.mode,
   MIG_MODE_NORMAL),
+DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
+   parameters.zero_page_detection,
+   ZERO_PAGE_DETECTION_LEGACY),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -903,6 +906,13 @@ uint64_t migrate_xbzrle_cache_size(void)
 return s->parameters.xbzrle_cache_size;
 }
 
+ZeroPageDetection migrate_zero_page_detection(void)
+{
+   

Re: [External] [PATCH v2 05/23] migration/multifd: Drop MultiFDSendParams.normal[] array

2024-02-13 Thread Hao Xiang
On Fri, Feb 9, 2024 at 4:20 AM Fabiano Rosas  wrote:
>
> Hao Xiang  writes:
>
> > On Fri, Feb 2, 2024 at 2:30 AM  wrote:
> >>
> >> From: Peter Xu 
> >>
> >> This array is redundant when p->pages exists.  Now we extended the life of
> >> p->pages to the whole period where pending_job is set, it should be safe to
> >> always use p->pages->offset[] rather than p->normal[].  Drop the array.
> >>
> >> Alongside, the normal_num is also redundant, which is the same to
> >> p->pages->num.
> >
> > Can we not drop p->normal and p_normal_num? It is redundant now but I
> > think it will be needed for multifd zero page checking. In multifd
> > zero page, we find out all zero pages and we sort the normal pages and
> > zero pages in two seperate arrays. p->offset is the original array of
> > pages, p->normal will contain the array of normal pages and p->zero
> > will contain the array of zero pages.
>
> We're moving send_fill_packet into send_prepare(), so you should be able
> to do whatever data transformation at send_prepare() and add any fields
> you need into p->pages.
>
> If we keep p->normal we will not be able to switch into an opaque
> payload later on. There should be no mention of pages outside of
> hooks. This is long-term work, but let's avoid blocking it if possible.
>

Got it. I will make the proper changes.

Aside from that, I would like to get opinions from you guys regarding
zero page detection interface.
Here are the options I am thinking:
1) Do zero page detection in send_prepare().
This means no dedicated hook for zero_page_detection() otherwise we
will be calling a hook from inside a hook. But we will need a new
function multifd_zero_page_check_send() similar to how we use
multifd_send_fill_packet() now. multifd_zero_page_check_send() will
need to be called by all send_prepare() implementations.
2) Do zero page detection in a new hook zero_page_detection().
zero_page_detection will be called before send_prepare(). Seems like
extra complexity but I can go with that routine if you guys think it's
a cleaner way.

I am leaning towards 1) right now.

> >>
> >> This doesn't apply to recv side, because there's no extra buffering on recv
> >> side, so p->normal[] array is still needed.
> >>
> >> Reviewed-by: Fabiano Rosas 
> >> Signed-off-by: Peter Xu 
> >> ---
> >>  migration/multifd.h  |  4 
> >>  migration/multifd-zlib.c |  7 ---
> >>  migration/multifd-zstd.c |  7 ---
> >>  migration/multifd.c  | 33 +
> >>  4 files changed, 21 insertions(+), 30 deletions(-)
> >>
> >> diff --git a/migration/multifd.h b/migration/multifd.h
> >> index 7c040cb85a..3920bdbcf1 100644
> >> --- a/migration/multifd.h
> >> +++ b/migration/multifd.h
> >> @@ -122,10 +122,6 @@ typedef struct {
> >>  struct iovec *iov;
> >>  /* number of iovs used */
> >>  uint32_t iovs_num;
> >> -/* Pages that are not zero */
> >> -ram_addr_t *normal;
> >> -/* num of non zero pages */
> >> -uint32_t normal_num;
> >>  /* used for compression methods */
> >>  void *data;
> >>  }  MultiFDSendParams;
> >> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
> >> index 37ce48621e..100809abc1 100644
> >> --- a/migration/multifd-zlib.c
> >> +++ b/migration/multifd-zlib.c
> >> @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, 
> >> Error **errp)
> >>   */
> >>  static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
> >>  {
> >> +MultiFDPages_t *pages = p->pages;
> >>  struct zlib_data *z = p->data;
> >>  z_stream *zs = >zs;
> >>  uint32_t out_size = 0;
> >>  int ret;
> >>  uint32_t i;
> >>
> >> -for (i = 0; i < p->normal_num; i++) {
> >> +for (i = 0; i < pages->num; i++) {
> >>  uint32_t available = z->zbuff_len - out_size;
> >>  int flush = Z_NO_FLUSH;
> >>
> >> -if (i == p->normal_num - 1) {
> >> +if (i == pages->num - 1) {
> >>  flush = Z_SYNC_FLUSH;
> >>  }
> >>
> >> @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, 
> >> Error **errp)
> >>   * with compression. zlib does not guarantee that this is safe,
> >>   * therefore copy the page before calling defla

Re: [External] [PATCH v2 05/23] migration/multifd: Drop MultiFDSendParams.normal[] array

2024-02-08 Thread Hao Xiang
On Fri, Feb 2, 2024 at 2:30 AM  wrote:
>
> From: Peter Xu 
>
> This array is redundant when p->pages exists.  Now we extended the life of
> p->pages to the whole period where pending_job is set, it should be safe to
> always use p->pages->offset[] rather than p->normal[].  Drop the array.
>
> Alongside, the normal_num is also redundant, which is the same to
> p->pages->num.

Can we not drop p->normal and p_normal_num? It is redundant now but I
think it will be needed for multifd zero page checking. In multifd
zero page, we find out all zero pages and we sort the normal pages and
zero pages in two seperate arrays. p->offset is the original array of
pages, p->normal will contain the array of normal pages and p->zero
will contain the array of zero pages.

>
> This doesn't apply to recv side, because there's no extra buffering on recv
> side, so p->normal[] array is still needed.
>
> Reviewed-by: Fabiano Rosas 
> Signed-off-by: Peter Xu 
> ---
>  migration/multifd.h  |  4 
>  migration/multifd-zlib.c |  7 ---
>  migration/multifd-zstd.c |  7 ---
>  migration/multifd.c  | 33 +
>  4 files changed, 21 insertions(+), 30 deletions(-)
>
> diff --git a/migration/multifd.h b/migration/multifd.h
> index 7c040cb85a..3920bdbcf1 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -122,10 +122,6 @@ typedef struct {
>  struct iovec *iov;
>  /* number of iovs used */
>  uint32_t iovs_num;
> -/* Pages that are not zero */
> -ram_addr_t *normal;
> -/* num of non zero pages */
> -uint32_t normal_num;
>  /* used for compression methods */
>  void *data;
>  }  MultiFDSendParams;
> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
> index 37ce48621e..100809abc1 100644
> --- a/migration/multifd-zlib.c
> +++ b/migration/multifd-zlib.c
> @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, 
> Error **errp)
>   */
>  static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
>  {
> +MultiFDPages_t *pages = p->pages;
>  struct zlib_data *z = p->data;
>  z_stream *zs = >zs;
>  uint32_t out_size = 0;
>  int ret;
>  uint32_t i;
>
> -for (i = 0; i < p->normal_num; i++) {
> +for (i = 0; i < pages->num; i++) {
>  uint32_t available = z->zbuff_len - out_size;
>  int flush = Z_NO_FLUSH;
>
> -if (i == p->normal_num - 1) {
> +if (i == pages->num - 1) {
>  flush = Z_SYNC_FLUSH;
>  }
>
> @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
> **errp)
>   * with compression. zlib does not guarantee that this is safe,
>   * therefore copy the page before calling deflate().
>   */
> -memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size);
> +memcpy(z->buf, p->pages->block->host + pages->offset[i], 
> p->page_size);
>  zs->avail_in = p->page_size;
>  zs->next_in = z->buf;
>
> diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
> index b471daadcd..2023edd8cc 100644
> --- a/migration/multifd-zstd.c
> +++ b/migration/multifd-zstd.c
> @@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error 
> **errp)
>   */
>  static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
>  {
> +MultiFDPages_t *pages = p->pages;
>  struct zstd_data *z = p->data;
>  int ret;
>  uint32_t i;
> @@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, 
> Error **errp)
>  z->out.size = z->zbuff_len;
>  z->out.pos = 0;
>
> -for (i = 0; i < p->normal_num; i++) {
> +for (i = 0; i < pages->num; i++) {
>  ZSTD_EndDirective flush = ZSTD_e_continue;
>
> -if (i == p->normal_num - 1) {
> +if (i == pages->num - 1) {
>  flush = ZSTD_e_flush;
>  }
> -z->in.src = p->pages->block->host + p->normal[i];
> +z->in.src = p->pages->block->host + pages->offset[i];
>  z->in.size = p->page_size;
>  z->in.pos = 0;
>
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 5633ac245a..8bb1fd95cf 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -90,13 +90,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, 
> Error **errp)
>  {
>  MultiFDPages_t *pages = p->pages;
>
> -for (int i = 0; i < p->normal_num; i++) {
> -p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
> +for (int i = 0; i < pages->num; i++) {
> +p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
>  p->iov[p->iovs_num].iov_len = p->page_size;
>  p->iovs_num++;
>  }
>
> -p->next_packet_size = p->normal_num * p->page_size;
> +p->next_packet_size = pages->num * p->page_size;
>  p->flags |= MULTIFD_FLAG_NOCOMP;
>  return 0;
>  }
> @@ -269,21 +269,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
>  static void 

Re: [External] Re: [PATCH 3/6] migration/multifd: Support for zero pages transmission in multifd format.

2024-02-08 Thread Hao Xiang
On Tue, Feb 6, 2024 at 8:25 PM Peter Xu  wrote:
>
> On Tue, Feb 06, 2024 at 11:19:05PM +0000, Hao Xiang wrote:
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 25cbc6dc6b..a20d0ed10e 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -264,6 +264,7 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> > *p)
> >  packet->flags = cpu_to_be32(p->flags);
> >  packet->pages_alloc = cpu_to_be32(p->pages->allocated);
> >  packet->normal_pages = cpu_to_be32(p->normal_num);
> > +packet->zero_pages = cpu_to_be32(p->zero_num);
>
> This doesn't look right..
>
> If to fill up the zero accounting only, we shouldn't be touching multifd
> packet at all since multifd zero page detection is not yet supported.
>
> We should only reference mig_stats.zero_pages.

p->zero_num will always be 0 because multifd zero page checking is not
yet enabled. The next commit will contain the code to calculate
p->zero_num for each packet. This is just a preparation for the next
commit that enables the feature. Main migration thread zero page goes
through a different format. We are using the same counter
mig_stats.zero_pages to track zero pages for both the legacy zero page
checking and multifd zero page checking. These two are mutually
exclusive so zero page will not be double counted.

Let me know if I am missing something.

>
> >  packet->next_packet_size = cpu_to_be32(p->next_packet_size);
> >  packet->packet_num = cpu_to_be64(p->packet_num);
>
> --
> Peter Xu
>



Re: [External] RE: Regarding to the recent Intel IAA/DSA/QAT support on migration

2024-02-07 Thread Hao Xiang
On Wed, Feb 7, 2024 at 12:38 AM Liu, Yuan1  wrote:
>
> Thank you very much for your reminder and the rapid updates to the
> multifd function. I will incorporate your suggestions into the next
> version (IAA Accelerated Live Migration solution).
>
> Regarding the QAT and DSA optimization, my colleagues and I have
> already started reviewing and testing them, and it seems like a
> promising optimization direction. I am more than willing to contribute
> further efforts to the long-term maintenance of Intel accelerators in
> live migration.
>
> > -Original Message-
> > From: Peter Xu 
> > Sent: Wednesday, February 7, 2024 4:10 PM
> > To: Bryan Zhang ; Hao Xiang
> > ; Liu, Yuan1 
> > Cc: Fabiano Rosas ; QEMU Devel Mailing List  > de...@nongnu.org>
> > Subject: Regarding to the recent Intel IAA/DSA/QAT support on migration
> >
> > Copy qemu-devel.
> >
> > On Wed, Feb 07, 2024 at 04:07:40PM +0800, Peter Xu wrote:
> > > Hi,
> > >
> > > I'm sending this email just to leave a generic comment to the recent
> > > migration efforts to enable these new Intel technologies.
> > >
> > > The relevant patchsets (latest version so far) we're discussing are:
> > >
> > >   [PATCH v3 0/4] Live Migration Acceleration with IAA Compression
> > >
> > > https://lore.kernel.org/r/20240103112851.908082-1-yuan1@intel.com
> > >
> > >   [PATCH v3 00/20] Use Intel DSA accelerator to offload zero page
> > checking in multifd live migration.
> > >
> > > https://lore.kernel.org/r/20240104004452.324068-1-hao.xiang@bytedance.
> > > com
> > >
> > >   [PATCH 0/5] *** Implement using Intel QAT to offload ZLIB
> > >
> > > https://lore.kernel.org/r/20231231205804.2366509-1-bryan.zhang@bytedan
> > > ce.com
> > >
> > > I want to comment in a generic way since this should apply to all
> > > these
> > > series:
> > >
> > >   - A heads-up that multifd code is rapidly changing recently, I
> > apologize
> > > that you'll need a rebase.  It's just that it's probably much better
> > to
> > > do this before anything lands there.
> > >
> > > IIUC the good thing is we found that send_prepare() doesn't need to
> > be
> > > changed that much, however there's still some change; please refer
> > to
> > > the new code (I'll prepare a pull tomorrow to include most of the
> > > changes, and we should have a major thread race fixed too with
> > Fabiano
> > > & Avihai's help). I hope this will also provide some kind of
> > isolation
> > > to e.g. other works that may touch other areas.  E.g., I hope fixed-
> > ram
> > > won't need to conflict much with any of the above series now.

Thanks for the update. The rebase shouldn't be that bad so no worries.

> > >
> > >   - When posting the new patchset (if there is a plan..), please make
> > sure
> > > we have:
> > >
> > > - Proper unit tests for the new code (probably mostly software
> > >   fallbacks to be tested on the new libraries being introduced; just
> > to
> > >   make sure the new library code paths can get some torture please).
> > >
> > > - Proper documentation for the new code.  Please feel free to start
> > >   creating your own .rst file under docs/devel/migration/, we can
> > try
> > >   to merge them later.  It should help avoid conflictions.  Please
> > also
> > >   link the new file into index.rst there.
> > >
> > >   IMHO the document can contain many things, the important ones
> > could
> > >   start from: who should enable such feature; what one can get from
> > >   having it enabled; what is the HW requirement to enable it; how
> > >   should one tune the new parameters, and so on... some links to the
> > >   technology behinds it would be nice too to be referenced.
> > >
> > > - Try to add new code (especially HW/library based) into new file.
> > >   I see that QPL & QAT already proposed its own files (multifd-
> > pql.c,
> > >   multifd-qatzip.c) which is great.
> > >
> > >   Xiang, please also consider doing so for the DSA based zero page
> > >   detection.  It can be called multifd-zero-page.c, for example, and
> > >   you can create it when working on the
> > >   offload-zero-page-detect-to-multifd patchset already.

Sou

Re: [External] Re: [PATCH 1/6] migration/multifd: Add new migration option multifd-zero-page.

2024-02-07 Thread Hao Xiang
On Tue, Feb 6, 2024 at 7:45 PM Peter Xu  wrote:
>
> On Tue, Feb 06, 2024 at 11:19:03PM +0000, Hao Xiang wrote:
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 819708321d..ff033a0344 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -874,6 +874,11 @@
> >  # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
> >  #(Since 8.2)
> >  #
> > +# @multifd-zero-page: Multifd zero page checking. If the parameter is true,
> > +# zero page checking is done on the multifd sender thread. If the 
> > parameter
> > +# is false, zero page checking is done on the migration main thread. 
> > Default
> > +# is set to true. (Since 9.0)
>
> I replied somewhere before on this, but I can try again..
>
> Do you think it'll be better to introduce a generic parameter for zero page
> detection?
>
>   - "none" if disabled,
>   - "legacy" for main thread,
>   - "multifd" for multifd (software-based).
>
> A string could work, but maybe cleaner to introduce
> @MigrationZeroPageDetector enum?
>
> When you add more, you can keep extending that with the single field
> ("multifd-dsa", etc.).
>
> --
> Peter Xu
>

Sorry I overlooked the previous email. This sounds like a good idea.



Re: [External] Re: [PATCH 0/6] Introduce multifd zero page checking.

2024-02-07 Thread Hao Xiang
On Tue, Feb 6, 2024 at 7:39 PM Peter Xu  wrote:
>
> On Tue, Feb 06, 2024 at 11:19:02PM +0000, Hao Xiang wrote:
> > This patchset is based on Juan Quintela's old series here
> > https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/
> >
> > In the multifd live migration model, there is a single migration main
> > thread scanning the page map, queuing the pages to multiple multifd
> > sender threads. The migration main thread runs zero page checking on
> > every page before queuing the page to the sender threads. Zero page
> > checking is a CPU intensive task and hence having a single thread doing
> > all that doesn't scale well. This change introduces a new function
> > to run the zero page checking on the multifd sender threads. This
> > patchset also lays the ground work for future changes to offload zero
> > page checking task to accelerator hardwares.
> >
> > Use two Intel 4th generation Xeon servers for testing.
> >
> > Architecture:x86_64
> > CPU(s):  192
> > Thread(s) per core:  2
> > Core(s) per socket:  48
> > Socket(s):   2
> > NUMA node(s):2
> > Vendor ID:   GenuineIntel
> > CPU family:  6
> > Model:   143
> > Model name:  Intel(R) Xeon(R) Platinum 8457C
> > Stepping:8
> > CPU MHz: 2538.624
> > CPU max MHz: 3800.
> > CPU min MHz: 800.
> >
> > Perform multifd live migration with below setup:
> > 1. VM has 100GB memory. All pages in the VM are zero pages.
> > 2. Use tcp socket for live migratio.
> > 3. Use 4 multifd channels and zero page checking on migration main thread.
> > 4. Use 1/2/4 multifd channels and zero page checking on multifd sender
> > threads.
> > 5. Record migration total time from sender QEMU console's "info migrate"
> > command.
> > 6. Calculate throughput with "100GB / total time".
> >
> > +--+
> > |zero-page-checking | total-time(ms) | throughput(GB/s)|
> > +--+
> > |main-thread| 9629   | 10.38GB/s   |
> > +--+
> > |multifd-1-threads  | 6182   | 16.17GB/s   |
> > +--+
> > |multifd-2-threads  | 4643   | 21.53GB/s   |
> > +--+
> > |multifd-4-threads  | 4143   | 24.13GB/s   |
> > +--+
>
> This "throughput" is slightly confusing; I was initially surprised to see a
> large throughput for idle guests.  IMHO the "total-time" would explain.
> Feel free to drop that column if there's a repost.
>
> Did you check why 4 channels mostly already reached the top line?  Is it
> because main thread is already spinning 100%?
>
> Thanks,
>
> --
> Peter Xu

Sure I will drop "throughput" to avoid confusion. In my testing, 1
multifd channel already makes the main thread spin at 100%. So the
total-time is the same across 1/2/4 multifd channels as long as zero
page is run on the main migration thread. Of course, this is based on
the fact that the network is not the bottleneck. One interesting
finding is that multifd 1 channel with multifd zero page has better
performance than multifd 1 channel with main migration thread.
>



Re: [External] Re: Re: [PATCH 2/6] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-07 Thread Hao Xiang
On Wed, Feb 7, 2024 at 12:41 AM Jiri Denemark  wrote:
>
> On Wed, Feb 07, 2024 at 12:37:15 +0800, Peter Xu wrote:
> > On Wed, Feb 07, 2024 at 12:13:10PM +0800, Peter Xu wrote:
> > > On Tue, Feb 06, 2024 at 11:19:04PM +, Hao Xiang wrote:
> > > > This change extends the MigrationStatus interface to track zero pages
> > > > and zero bytes counter.
> > > >
> > > > Signed-off-by: Hao Xiang 
> > >
> > > Reviewed-by: Peter Xu 
> >
> > I'll need to scratch this, sorry..
> >
> > The issue is I forgot we have "duplicate" which is exactly "zero
> > page"s.. See:
> >
> > info->ram->duplicate = stat64_get(_stats.zero_pages);
> >
> > If you think the name too confusing and want a replacement, maybe it's fine
> > and maybe we can do that.  Then we can keep this zero page counter
> > introduced, reporting the same value as duplicates, then with a follow up
> > patch to deprecate "duplicate" parameter.  See an exmaple on how to
> > deprecate in 7b24d326348e1672.
> >
> > One thing I'm not sure is whether Libvirt will be fine on losing
> > "duplicates" after 2+ QEMU major releases.  Copy Jiri for this.  My
> > understanding is that Libvirt should be keeping an eye on deprecation list
> > and react, but I'd like to double check..
>
> This should not be a big deal as we can internally map either one
> (depending on what QEMU supports) to the same libvirt's field. AFAIK
> there is a consensus on Cc-ing libvirt-devel on patches that deprecate
> QEMU interfaces so that we can update our code in time before the
> deprecated interface is dropped.
>
> BTW, libvirt maps "duplicate" to:
>
> /**
>  * VIR_DOMAIN_JOB_MEMORY_CONSTANT:
>  *
>  * virDomainGetJobStats field: number of pages filled with a constant
>  * byte (all bytes in a single page are identical) transferred since the
>  * beginning of the migration job, as VIR_TYPED_PARAM_ULLONG.
>  *
>  * The most common example of such pages are zero pages, i.e., pages filled
>  * with zero bytes.
>  *
>  * Since: 1.0.3
>  */
> # define VIR_DOMAIN_JOB_MEMORY_CONSTANT  "memory_constant"
>
> Jirka
>

Interesting. I didn't notice the existence of "duplicate" for zero
pages. I do think the name is quite confusing. I will create the
"zero/zero_bytes" counter and a separate commit to deprecate
"duplicate". Will add libvirt devs per instruction above.



[PATCH 4/6] migration/multifd: Zero page transmission on the multifd thread.

2024-02-06 Thread Hao Xiang
This implements the zero page detection and handling on the multifd
threads.

Signed-off-by: Hao Xiang 
---
 migration/multifd.c | 62 +
 migration/multifd.h |  5 
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index a20d0ed10e..c031f947c7 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
@@ -278,6 +279,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 
 packet->offset[i] = cpu_to_be64(temp);
 }
+for (i = 0; i < p->zero_num; i++) {
+/* there are architectures where ram_addr_t is 32 bit */
+uint64_t temp = p->zero[i];
+
+packet->offset[p->normal_num + i] = cpu_to_be64(temp);
+}
 }
 
 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@@ -360,6 +367,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
*p, Error **errp)
 p->normal[i] = offset;
 }
 
+for (i = 0; i < p->zero_num; i++) {
+uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
+
+if (offset > (p->block->used_length - p->page_size)) {
+error_setg(errp, "multifd: offset too long %" PRIu64
+   " (max " RAM_ADDR_FMT ")",
+   offset, p->block->used_length);
+return -1;
+}
+p->zero[i] = offset;
+}
+
 return 0;
 }
 
@@ -658,13 +677,37 @@ int multifd_send_sync_main(void)
 return 0;
 }
 
+static void zero_page_check_send(MultiFDSendParams *p)
+{
+/*
+ * QEMU older than 9.0 don't understand zero page
+ * on multifd channel. This switch is required to
+ * maintain backward compatibility.
+ */
+bool use_multifd_zero_page = migrate_multifd_zero_page();
+RAMBlock *rb = p->pages->block;
+
+for (int i = 0; i < p->pages->num; i++) {
+uint64_t offset = p->pages->offset[i];
+if (use_multifd_zero_page &&
+buffer_is_zero(rb->host + offset, p->page_size)) {
+p->zero[p->zero_num] = offset;
+p->zero_num++;
+ram_release_page(rb->idstr, offset);
+} else {
+p->normal[p->normal_num] = offset;
+p->normal_num++;
+}
+}
+}
+
 static void *multifd_send_thread(void *opaque)
 {
 MultiFDSendParams *p = opaque;
 MigrationThread *thread = NULL;
 Error *local_err = NULL;
-int ret = 0;
 bool use_zero_copy_send = migrate_zero_copy_send();
+int ret = 0;
 
 thread = migration_threads_add(p->name, qemu_get_thread_id());
 
@@ -699,10 +742,7 @@ static void *multifd_send_thread(void *opaque)
 p->iovs_num = 1;
 }
 
-for (int i = 0; i < p->pages->num; i++) {
-p->normal[p->normal_num] = p->pages->offset[i];
-p->normal_num++;
-}
+zero_page_check_send(p);
 
 if (p->normal_num) {
 ret = multifd_send_state->ops->send_prepare(p, _err);
@@ -1107,6 +1147,16 @@ void multifd_recv_sync_main(void)
 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
 }
 
+static void zero_page_check_recv(MultiFDRecvParams *p)
+{
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->page_size)) {
+memset(page, 0, p->page_size);
+}
+}
+}
+
 static void *multifd_recv_thread(void *opaque)
 {
 MultiFDRecvParams *p = opaque;
@@ -1153,6 +1203,8 @@ static void *multifd_recv_thread(void *opaque)
 }
 }
 
+zero_page_check_recv(p);
+
 if (flags & MULTIFD_FLAG_SYNC) {
 qemu_sem_post(_recv_state->sem_sync);
 qemu_sem_wait(>sem_sync);
diff --git a/migration/multifd.h b/migration/multifd.h
index 6be9b2f6c1..7448cb1aa9 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -53,6 +53,11 @@ typedef struct {
 uint32_t unused32[1];/* Reserved for future use */
 uint64_t unused64[3];/* Reserved for future use */
 char ramblock[256];
+/*
+ * This array contains the pointers to:
+ *  - normal pages (initial normal_pages entries)
+ *  - zero pages (following zero_pages entries)
+ */
 uint64_t offset[];
 } __attribute__((packed)) MultiFDPacket_t;
 
-- 
2.30.2




[PATCH 3/6] migration/multifd: Support for zero pages transmission in multifd format.

2024-02-06 Thread Hao Xiang
This change adds zero page counters and updates multifd send/receive
tracing format to track the newly added counters.

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c |  4 
 migration/multifd.c| 43 ++
 migration/multifd.h| 17 +-
 migration/trace-events |  8 +++
 4 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 8b0c205a41..2dd99b0509 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -111,6 +111,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->ram->normal);
 monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
info->ram->normal_bytes >> 10);
+monitor_printf(mon, "zero: %" PRIu64 " pages\n",
+   info->ram->zero);
+monitor_printf(mon, "zero bytes: %" PRIu64 " kbytes\n",
+   info->ram->zero_bytes >> 10);
 monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
info->ram->dirty_sync_count);
 monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
diff --git a/migration/multifd.c b/migration/multifd.c
index 25cbc6dc6b..a20d0ed10e 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -264,6 +264,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 packet->flags = cpu_to_be32(p->flags);
 packet->pages_alloc = cpu_to_be32(p->pages->allocated);
 packet->normal_pages = cpu_to_be32(p->normal_num);
+packet->zero_pages = cpu_to_be32(p->zero_num);
 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
 packet->packet_num = cpu_to_be64(p->packet_num);
 
@@ -317,18 +318,26 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
*p, Error **errp)
 p->normal_num = be32_to_cpu(packet->normal_pages);
 if (p->normal_num > packet->pages_alloc) {
 error_setg(errp, "multifd: received packet "
-   "with %u pages and expected maximum pages are %u",
+   "with %u normal pages and expected maximum pages are %u",
p->normal_num, packet->pages_alloc) ;
 return -1;
 }
 
-p->next_packet_size = be32_to_cpu(packet->next_packet_size);
-p->packet_num = be64_to_cpu(packet->packet_num);
+p->zero_num = be32_to_cpu(packet->zero_pages);
+if (p->zero_num > packet->pages_alloc - p->normal_num) {
+error_setg(errp, "multifd: received packet "
+   "with %u zero pages and expected maximum zero pages are %u",
+   p->zero_num, packet->pages_alloc - p->normal_num) ;
+return -1;
+}
 
-if (p->normal_num == 0) {
+if (p->normal_num == 0 && p->zero_num == 0) {
 return 0;
 }
 
+p->next_packet_size = be32_to_cpu(packet->next_packet_size);
+p->packet_num = be64_to_cpu(packet->packet_num);
+
 /* make sure that ramblock is 0 terminated */
 packet->ramblock[255] = 0;
 p->block = qemu_ram_block_by_name(packet->ramblock);
@@ -430,6 +439,7 @@ static int multifd_send_pages(void)
 p->packet_num = multifd_send_state->packet_num++;
 multifd_send_state->pages = p->pages;
 p->pages = pages;
+
 qemu_mutex_unlock(>mutex);
 qemu_sem_post(>sem);
 
@@ -551,6 +561,8 @@ void multifd_save_cleanup(void)
 p->iov = NULL;
 g_free(p->normal);
 p->normal = NULL;
+g_free(p->zero);
+p->zero = NULL;
 multifd_send_state->ops->send_cleanup(p, _err);
 if (local_err) {
 migrate_set_error(migrate_get_current(), local_err);
@@ -679,6 +691,7 @@ static void *multifd_send_thread(void *opaque)
 uint64_t packet_num = p->packet_num;
 uint32_t flags;
 p->normal_num = 0;
+p->zero_num = 0;
 
 if (use_zero_copy_send) {
 p->iovs_num = 0;
@@ -703,12 +716,13 @@ static void *multifd_send_thread(void *opaque)
 p->flags = 0;
 p->num_packets++;
 p->total_normal_pages += p->normal_num;
+p->total_zero_pages += p->zero_num;
 p->pages->num = 0;
 p->pages->block = NULL;
 qemu_mutex_unlock(>mutex);
 
-trace_multifd_send(p->id, packet_num, p->normal_num, flags,
-   p->next_packet_size);
+trace_multifd_send(p->id, packet_num, p->normal_num, p->zero_num,
+  

[PATCH 6/6] migration/multifd: Add a new migration test case for legacy zero page checking.

2024-02-06 Thread Hao Xiang
Now that zero page checking is done on the multifd sender threads by
default, we still provide an option for backward compatibility. This
change adds a qtest migration test case to set the multifd-zero-page
option to false and run multifd migration with zero page checking on the
migration main thread.

Signed-off-by: Hao Xiang 
---
 tests/qtest/migration-test.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 7675519cfa..2c13df04c3 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -2621,6 +2621,15 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
 return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
 }
 
+static void *
+test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
+QTestState *to)
+{
+test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
+migrate_set_parameter_bool(from, "multifd-zero-page", false);
+return NULL;
+}
+
 static void *
 test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
 QTestState *to)
@@ -2652,6 +2661,21 @@ static void test_multifd_tcp_none(void)
 test_precopy_common();
 }
 
+static void test_multifd_tcp_zero_page_legacy(void)
+{
+MigrateCommon args = {
+.listen_uri = "defer",
+.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
+/*
+ * Multifd is more complicated than most of the features, it
+ * directly takes guest page buffers when sending, make sure
+ * everything will work alright even if guest page is changing.
+ */
+.live = true,
+};
+test_precopy_common();
+}
+
 static void test_multifd_tcp_zlib(void)
 {
 MigrateCommon args = {
@@ -3550,6 +3574,8 @@ int main(int argc, char **argv)
 }
 migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/plain/zero_page_legacy",
+   test_multifd_tcp_zero_page_legacy);
 migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
 migration_test_add("/migration/multifd/tcp/plain/zlib",
-- 
2.30.2




[PATCH 5/6] migration/multifd: Enable zero page checking from multifd threads.

2024-02-06 Thread Hao Xiang
This change adds a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration. Now zero page checking can be done in the multifd 
threads
and this becomes the default configuration. We still provide backward 
compatibility
where zero page checking is done from the migration main thread.

Signed-off-by: Hao Xiang 
---
 migration/multifd.c |  3 ++-
 migration/ram.c | 49 -
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index c031f947c7..c6833ccb07 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/rcu.h"
+#include "qemu/cutils.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
 #include "exec/ramblock.h"
@@ -458,7 +459,6 @@ static int multifd_send_pages(void)
 p->packet_num = multifd_send_state->packet_num++;
 multifd_send_state->pages = p->pages;
 p->pages = pages;
-
 qemu_mutex_unlock(>mutex);
 qemu_sem_post(>sem);
 
@@ -733,6 +733,7 @@ static void *multifd_send_thread(void *opaque)
 if (p->pending_job) {
 uint64_t packet_num = p->packet_num;
 uint32_t flags;
+
 p->normal_num = 0;
 p->zero_num = 0;
 
diff --git a/migration/ram.c b/migration/ram.c
index d5b7cd5ac2..e6742c9593 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1252,6 +1252,10 @@ static int ram_save_page(RAMState *rs, PageSearchStatus 
*pss)
 
 static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
 {
+assert(migrate_multifd());
+assert(!migrate_compress());
+assert(!migration_in_postcopy());
+
 if (multifd_queue_page(block, offset) < 0) {
 return -1;
 }
@@ -2043,7 +2047,6 @@ static bool save_compress_page(RAMState *rs, 
PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-RAMBlock *block = pss->block;
 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
 int res;
 
@@ -2059,17 +2062,40 @@ static int ram_save_target_page_legacy(RAMState *rs, 
PageSearchStatus *pss)
 return 1;
 }
 
+return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: save one target page
+ *
+ * Returns the number of pages written
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+RAMBlock *block = pss->block;
+ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
+/* Multifd is not compatible with old compression. */
+assert(!migrate_compress());
+
+/* Multifd is not compabible with postcopy. */
+assert(!migration_in_postcopy());
+
 /*
- * Do not use multifd in postcopy as one whole host page should be
- * placed.  Meanwhile postcopy requires atomic update of pages, so even
- * if host page size == guest page size the dest guest during run may
- * still see partially copied pages which is data corruption.
+ * Backward compatibility support. While using multifd live
+ * migration, we still need to handle zero page checking on the
+ * migration main thread.
  */
-if (migrate_multifd() && !migration_in_postcopy()) {
-return ram_save_multifd_page(block, offset);
+if (!migrate_multifd_zero_page()) {
+if (save_zero_page(rs, pss, offset)) {
+return 1;
+}
 }
 
-return ram_save_page(rs, pss);
+return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -2981,7 +3007,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 }
 
 migration_ops = g_malloc0(sizeof(MigrationOps));
-migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+if (migrate_multifd()) {
+migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+} else {
+migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+}
 
 bql_unlock();
 ret = multifd_send_sync_main();
-- 
2.30.2




[PATCH 2/6] migration/multifd: Add zero pages and zero bytes counter to migration status interface.

2024-02-06 Thread Hao Xiang
This change extends the MigrationStatus interface to track zero pages
and zero bytes counter.

Signed-off-by: Hao Xiang 
---
 qapi/migration.json | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index ff033a0344..69366fe3f4 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -63,6 +63,10 @@
 # between 0 and @dirty-sync-count * @multifd-channels.  (since
 # 7.1)
 #
+# @zero: number of zero pages (since 9.0)
+#
+# @zero-bytes: number of zero bytes sent (since 9.0)
+#
 # Features:
 #
 # @deprecated: Member @skipped is always zero since 1.5.3
@@ -81,7 +85,8 @@
'multifd-bytes': 'uint64', 'pages-per-second': 'uint64',
'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64',
'postcopy-bytes': 'uint64',
-   'dirty-sync-missed-zero-copy': 'uint64' } }
+   'dirty-sync-missed-zero-copy': 'uint64',
+   'zero': 'int', 'zero-bytes': 'int' } }
 
 ##
 # @XBZRLECacheStats:
@@ -332,6 +337,8 @@
 #   "duplicate":123,
 #   "normal":123,
 #   "normal-bytes":123456,
+#   "zero":123,
+#   "zero-bytes":123456,
 #   "dirty-sync-count":15
 # }
 #  }
@@ -358,6 +365,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  }
 #   }
@@ -379,6 +388,8 @@
 # "duplicate":123,
 # "normal":123,
 # "normal-bytes":123456,
+# "zero":123,
+# "zero-bytes":123456,
 # "dirty-sync-count":15
 #  },
 #  "disk":{
@@ -405,6 +416,8 @@
 # "duplicate":10,
 # "normal":,
 # "normal-bytes":3412992,
+# "zero":,
+# "zero-bytes":3412992,
 # "dirty-sync-count":15
 #  },
 #  "xbzrle-cache":{
-- 
2.30.2




[PATCH 1/6] migration/multifd: Add new migration option multifd-zero-page.

2024-02-06 Thread Hao Xiang
This new parameter controls where the zero page checking is running. If
this parameter is set to true, zero page checking is done in the multifd
sender threads. If this parameter is set to false, zero page checking is
done in the migration main thread.

Signed-off-by: Hao Xiang 
---
 migration/migration-hmp-cmds.c |  7 +++
 migration/options.c| 20 
 migration/options.h|  1 +
 qapi/migration.json| 24 +---
 4 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 99b49df5dd..8b0c205a41 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -344,6 +344,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
 monitor_printf(mon, "%s: %s\n",
 MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
 MultiFDCompression_str(params->multifd_compression));
+monitor_printf(mon, "%s: %s\n",
+MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_ZERO_PAGE),
+params->multifd_zero_page ? "on" : "off");
 monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
 MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
 params->xbzrle_cache_size);
@@ -634,6 +637,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
 p->has_multifd_zstd_level = true;
 visit_type_uint8(v, param, >multifd_zstd_level, );
 break;
+case MIGRATION_PARAMETER_MULTIFD_ZERO_PAGE:
+p->has_multifd_zero_page = true;
+visit_type_bool(v, param, >multifd_zero_page, );
+break;
 case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
 p->has_xbzrle_cache_size = true;
 if (!visit_type_size(v, param, _size, )) {
diff --git a/migration/options.c b/migration/options.c
index 3e3e0b93b4..cb18a41267 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -179,6 +179,8 @@ Property migration_properties[] = {
 DEFINE_PROP_MIG_MODE("mode", MigrationState,
   parameters.mode,
   MIG_MODE_NORMAL),
+DEFINE_PROP_BOOL("multifd-zero-page", MigrationState,
+ parameters.multifd_zero_page, true),
 
 /* Migration capabilities */
 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -903,6 +905,13 @@ uint64_t migrate_xbzrle_cache_size(void)
 return s->parameters.xbzrle_cache_size;
 }
 
+bool migrate_multifd_zero_page(void)
+{
+MigrationState *s = migrate_get_current();
+
+return s->parameters.multifd_zero_page;
+}
+
 /* parameter setters */
 
 void migrate_set_block_incremental(bool value)
@@ -1013,6 +1022,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
 params->has_mode = true;
 params->mode = s->parameters.mode;
+params->has_multifd_zero_page = true;
+params->multifd_zero_page = s->parameters.multifd_zero_page;
 
 return params;
 }
@@ -1049,6 +1060,7 @@ void migrate_params_init(MigrationParameters *params)
 params->has_x_vcpu_dirty_limit_period = true;
 params->has_vcpu_dirty_limit = true;
 params->has_mode = true;
+params->has_multifd_zero_page = true;
 }
 
 /*
@@ -1350,6 +1362,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 if (params->has_mode) {
 dest->mode = params->mode;
 }
+
+if (params->has_multifd_zero_page) {
+dest->multifd_zero_page = params->multifd_zero_page;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1494,6 +1510,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 if (params->has_mode) {
 s->parameters.mode = params->mode;
 }
+
+if (params->has_multifd_zero_page) {
+s->parameters.multifd_zero_page = params->multifd_zero_page;
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
diff --git a/migration/options.h b/migration/options.h
index 246c160aee..c080a6ba18 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -93,6 +93,7 @@ const char *migrate_tls_authz(void);
 const char *migrate_tls_creds(void);
 const char *migrate_tls_hostname(void);
 uint64_t migrate_xbzrle_cache_size(void);
+bool migrate_multifd_zero_page(void);
 
 /* parameters setters */
 
diff --git a/qapi/migration.json b/qapi/migration.json
index 819708321d..ff033a0344 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -874,6 +874,11 @@
 # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
 #(Since 8.2)
 #
+# @multifd-zero-page: Multifd zero page checking. If the para

[PATCH 0/6] Introduce multifd zero page checking.

2024-02-06 Thread Hao Xiang
This patchset is based on Juan Quintela's old series here
https://lore.kernel.org/all/20220802063907.18882-1-quint...@redhat.com/

In the multifd live migration model, there is a single migration main
thread scanning the page map, queuing the pages to multiple multifd
sender threads. The migration main thread runs zero page checking on
every page before queuing the page to the sender threads. Zero page
checking is a CPU intensive task and hence having a single thread doing
all that doesn't scale well. This change introduces a new function
to run the zero page checking on the multifd sender threads. This
patchset also lays the ground work for future changes to offload zero
page checking task to accelerator hardwares.

Use two Intel 4th generation Xeon servers for testing.

Architecture:x86_64
CPU(s):  192
Thread(s) per core:  2
Core(s) per socket:  48
Socket(s):   2
NUMA node(s):2
Vendor ID:   GenuineIntel
CPU family:  6
Model:   143
Model name:  Intel(R) Xeon(R) Platinum 8457C
Stepping:8
CPU MHz: 2538.624
CPU max MHz: 3800.
CPU min MHz: 800.

Perform multifd live migration with below setup:
1. VM has 100GB memory. All pages in the VM are zero pages.
2. Use tcp socket for live migratio.
3. Use 4 multifd channels and zero page checking on migration main thread.
4. Use 1/2/4 multifd channels and zero page checking on multifd sender
threads.
5. Record migration total time from sender QEMU console's "info migrate"
command.
6. Calculate throughput with "100GB / total time".

+--+
|zero-page-checking | total-time(ms) | throughput(GB/s)|
+--+
|main-thread| 9629   | 10.38GB/s   |
+--+
|multifd-1-threads  | 6182   | 16.17GB/s   |
+--+
|multifd-2-threads  | 4643   | 21.53GB/s   |
+--+
|multifd-4-threads  | 4143   | 24.13GB/s   |
+--+

Apply this patchset on top of commit
39a6e4f87e7b75a45b08d6dc8b8b7c2954c87440

Hao Xiang (6):
  migration/multifd: Add new migration option multifd-zero-page.
  migration/multifd: Add zero pages and zero bytes counter to migration
status interface.
  migration/multifd: Support for zero pages transmission in multifd
format.
  migration/multifd: Zero page transmission on the multifd thread.
  migration/multifd: Enable zero page checking from multifd threads.
  migration/multifd: Add a new migration test case for legacy zero page
checking.

 migration/migration-hmp-cmds.c |  11 
 migration/multifd.c| 106 -
 migration/multifd.h|  22 ++-
 migration/options.c|  20 +++
 migration/options.h|   1 +
 migration/ram.c|  49 ---
 migration/trace-events |   8 +--
 qapi/migration.json|  39 ++--
 tests/qtest/migration-test.c   |  26 
 9 files changed, 249 insertions(+), 33 deletions(-)

-- 
2.30.2




Re: [External] Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-02-01 Thread Hao Xiang
On Wed, Jan 31, 2024 at 9:22 PM Peter Xu  wrote:
>
> On Thu, Jan 04, 2024 at 12:44:35AM +0000, Hao Xiang wrote:
> > From: Juan Quintela 
> >
> > This implements the zero page dection and handling.
> >
> > Signed-off-by: Juan Quintela 
> > ---
> >  migration/multifd.c | 41 +++--
> >  migration/multifd.h |  5 +
> >  2 files changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 5a1f50c7e8..756673029d 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -11,6 +11,7 @@
> >   */
> >
> >  #include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> >  #include "qemu/rcu.h"
> >  #include "exec/target_page.h"
> >  #include "sysemu/sysemu.h"
> > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> > *p)
> >
> >  packet->offset[i] = cpu_to_be64(temp);
> >  }
> > +for (i = 0; i < p->zero_num; i++) {
> > +/* there are architectures where ram_addr_t is 32 bit */
> > +uint64_t temp = p->zero[i];
> > +
> > +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> > +}
> >  }
>
> I think changes like this needs to be moved into the previous patch.  I got
> quite confused when reading previous one and only understood what happens
> until now.  Fabiano, if you're going to pick these ones out and post
> separately, please also consider.  Perhaps squashing them together?
>

Discussed with Fabiano on a separate thread here
https://lore.kernel.org/all/CAAYibXi=WB5wfvLFM0b=d9ojf66lb7ftgonzzz-tvk4rbbx...@mail.gmail.com/

I am moving the original multifd zero page checking changes into a
seperate patchset. There is some necessary refactoring work on the top
of the original series. I will send that out this week.
> --
> Peter Xu
>



Re: [External] Re: [PATCH v3 15/20] migration/multifd: Add test hook to set normal page ratio.

2024-02-01 Thread Hao Xiang
On Wed, Jan 31, 2024 at 9:24 PM Peter Xu  wrote:
>
> On Thu, Jan 04, 2024 at 12:44:47AM +0000, Hao Xiang wrote:
> > +# @multifd-normal-page-ratio: Test hook setting the normal page ratio.
> > +# (Since 8.2)
>
> Please remember to touch all of them to 9.0 when repost, thanks.
>

Will do.

> --
> Peter Xu
>



  1   2   3   >