Re: [External] Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-02-01 Thread Hao Xiang
On Wed, Jan 31, 2024 at 9:22 PM Peter Xu  wrote:
>
> On Thu, Jan 04, 2024 at 12:44:35AM +, Hao Xiang wrote:
> > From: Juan Quintela 
> >
> > This implements the zero page dection and handling.
> >
> > Signed-off-by: Juan Quintela 
> > ---
> >  migration/multifd.c | 41 +++--
> >  migration/multifd.h |  5 +
> >  2 files changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 5a1f50c7e8..756673029d 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -11,6 +11,7 @@
> >   */
> >
> >  #include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> >  #include "qemu/rcu.h"
> >  #include "exec/target_page.h"
> >  #include "sysemu/sysemu.h"
> > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> > *p)
> >
> >  packet->offset[i] = cpu_to_be64(temp);
> >  }
> > +for (i = 0; i < p->zero_num; i++) {
> > +/* there are architectures where ram_addr_t is 32 bit */
> > +uint64_t temp = p->zero[i];
> > +
> > +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> > +}
> >  }
>
> I think changes like this needs to be moved into the previous patch.  I got
> quite confused when reading previous one and only understood what happens
> until now.  Fabiano, if you're going to pick these ones out and post
> separately, please also consider.  Perhaps squashing them together?
>

Discussed with Fabiano on a separate thread here
https://lore.kernel.org/all/CAAYibXi=WB5wfvLFM0b=d9ojf66lb7ftgonzzz-tvk4rbbx...@mail.gmail.com/

I am moving the original multifd zero page checking changes into a
seperate patchset. There is some necessary refactoring work on the top
of the original series. I will send that out this week.
> --
> Peter Xu
>



Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-01-31 Thread Peter Xu
On Thu, Jan 04, 2024 at 12:44:35AM +, Hao Xiang wrote:
> From: Juan Quintela 
> 
> This implements the zero page dection and handling.
> 
> Signed-off-by: Juan Quintela 
> ---
>  migration/multifd.c | 41 +++--
>  migration/multifd.h |  5 +
>  2 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 5a1f50c7e8..756673029d 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -11,6 +11,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>  #include "qemu/rcu.h"
>  #include "exec/target_page.h"
>  #include "sysemu/sysemu.h"
> @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> *p)
>  
>  packet->offset[i] = cpu_to_be64(temp);
>  }
> +for (i = 0; i < p->zero_num; i++) {
> +/* there are architectures where ram_addr_t is 32 bit */
> +uint64_t temp = p->zero[i];
> +
> +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> +}
>  }

I think changes like this needs to be moved into the previous patch.  I got
quite confused when reading previous one and only understood what happens
until now.  Fabiano, if you're going to pick these ones out and post
separately, please also consider.  Perhaps squashing them together?

-- 
Peter Xu




Re: [External] Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-01-22 Thread Hao Xiang
On Sun, Jan 14, 2024 at 11:01 PM Shivam Kumar  wrote:
>
>
>
> > On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> >
> > From: Juan Quintela 
> >
> > This implements the zero page dection and handling.
> >
> > Signed-off-by: Juan Quintela 
> > ---
> > migration/multifd.c | 41 +++--
> > migration/multifd.h |  5 +
> > 2 files changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 5a1f50c7e8..756673029d 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -11,6 +11,7 @@
> >  */
> >
> > #include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> > #include "qemu/rcu.h"
> > #include "exec/target_page.h"
> > #include "sysemu/sysemu.h"
> > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> > *p)
> >
> > packet->offset[i] = cpu_to_be64(temp);
> > }
> > +for (i = 0; i < p->zero_num; i++) {
> > +/* there are architectures where ram_addr_t is 32 bit */
> > +uint64_t temp = p->zero[i];
> > +
> > +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> > +}
> > }
> >
> > static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> > @@ -361,6 +368,18 @@ static int 
> > multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> > p->normal[i] = offset;
> > }
> >
> > +for (i = 0; i < p->zero_num; i++) {
> > +uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
> > +
> > +if (offset > (p->block->used_length - p->page_size)) {
> > +error_setg(errp, "multifd: offset too long %" PRIu64
> > +   " (max " RAM_ADDR_FMT ")",
> > +   offset, p->block->used_length);
> > +return -1;
> > +}
> > +p->zero[i] = offset;
> > +}
> > +
> > return 0;
> > }
> >
> > @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque)
> > MultiFDSendParams *p = opaque;
> > MigrationThread *thread = NULL;
> > Error *local_err = NULL;
> > +/* qemu older than 8.2 don't understand zero page on multifd channel */
> > +bool use_zero_page = !migrate_use_main_zero_page();
> > int ret = 0;
> > bool use_zero_copy_send = migrate_zero_copy_send();
> >
> > @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque)
> > qemu_mutex_lock(>mutex);
> >
> > if (p->pending_job) {
> > +RAMBlock *rb = p->pages->block;
> > uint64_t packet_num = p->packet_num;
> > uint32_t flags;
> > p->normal_num = 0;
> > @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque)
> > }
> >
> > for (int i = 0; i < p->pages->num; i++) {
> > -p->normal[p->normal_num] = p->pages->offset[i];
> > -p->normal_num++;
> > +uint64_t offset = p->pages->offset[i];
> > +if (use_zero_page &&
> > +buffer_is_zero(rb->host + offset, p->page_size)) {
> > +p->zero[p->zero_num] = offset;
> > +p->zero_num++;
> > +ram_release_page(rb->idstr, offset);
> > +} else {
> > +p->normal[p->normal_num] = offset;
> > +p->normal_num++;
> > +}
> > }
> >
> > if (p->normal_num) {
> > @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque)
> > }
> > }
> >
> > +for (int i = 0; i < p->zero_num; i++) {
> > +void *page = p->host + p->zero[i];
> > +if (!buffer_is_zero(page, p->page_size)) {
> > +memset(page, 0, p->page_size);
> > +}
> > +}
> > +
> I am wondering if zeroing the zero page on the destination can also be 
> offloaded to DSA. Can it help in reducing cpu consumption on the destination 
> in case of multifd-based migration?

I have that change coded up and I have tested for performance. It's
not saving as much CPU cycles as I hoped. The problem is that on the
sender side, we run zero page detection on every single page but on
the receiver side, we only zero-ing the pages if the sender tells us
so. For instance, if a multifd packet with 128 pages are all zero
pages, we can offload the zero-ing pages on the entire 128 pages in a
single DSA operation and that's the best case. In a worse case, if a
multifd packet with 128 pages only has say 10 zero pages, we can
offload the zero-ing pages on only the 10 pages instead of the entire
128 pages. Considering DSA software overhead, that is not a good deal.

In the future, if we refactor the multifd path to separate zero pages
in its own packet, it will be a different story. For instance, if
there are 1024 non-contiguous zero pages detected, they will be sent
across multiple packets. With a new packet format, those pages can be
put into the same packet (and we can put more than 

Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-01-14 Thread Shivam Kumar



> On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> 
> From: Juan Quintela 
> 
> This implements the zero page dection and handling.
> 
> Signed-off-by: Juan Quintela 
> ---
> migration/multifd.c | 41 +++--
> migration/multifd.h |  5 +
> 2 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 5a1f50c7e8..756673029d 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -11,6 +11,7 @@
>  */
> 
> #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
> #include "qemu/rcu.h"
> #include "exec/target_page.h"
> #include "sysemu/sysemu.h"
> @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> *p)
> 
> packet->offset[i] = cpu_to_be64(temp);
> }
> +for (i = 0; i < p->zero_num; i++) {
> +/* there are architectures where ram_addr_t is 32 bit */
> +uint64_t temp = p->zero[i];
> +
> +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> +}
> }
> 
> static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> @@ -361,6 +368,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
> *p, Error **errp)
> p->normal[i] = offset;
> }
> 
> +for (i = 0; i < p->zero_num; i++) {
> +uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
> +
> +if (offset > (p->block->used_length - p->page_size)) {
> +error_setg(errp, "multifd: offset too long %" PRIu64
> +   " (max " RAM_ADDR_FMT ")",
> +   offset, p->block->used_length);
> +return -1;
> +}
> +p->zero[i] = offset;
> +}
> +
> return 0;
> }
> 
> @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque)
> MultiFDSendParams *p = opaque;
> MigrationThread *thread = NULL;
> Error *local_err = NULL;
> +/* qemu older than 8.2 don't understand zero page on multifd channel */
> +bool use_zero_page = !migrate_use_main_zero_page();
> int ret = 0;
> bool use_zero_copy_send = migrate_zero_copy_send();
> 
> @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque)
> qemu_mutex_lock(>mutex);
> 
> if (p->pending_job) {
> +RAMBlock *rb = p->pages->block;
> uint64_t packet_num = p->packet_num;
> uint32_t flags;
> p->normal_num = 0;
> @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque)
> }
> 
> for (int i = 0; i < p->pages->num; i++) {
> -p->normal[p->normal_num] = p->pages->offset[i];
> -p->normal_num++;
> +uint64_t offset = p->pages->offset[i];
> +if (use_zero_page &&
> +buffer_is_zero(rb->host + offset, p->page_size)) {
> +p->zero[p->zero_num] = offset;
> +p->zero_num++;
> +ram_release_page(rb->idstr, offset);
> +} else {
> +p->normal[p->normal_num] = offset;
> +p->normal_num++;
> +}
> }
> 
> if (p->normal_num) {
> @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque)
> }
> }
> 
> +for (int i = 0; i < p->zero_num; i++) {
> +void *page = p->host + p->zero[i];
> +if (!buffer_is_zero(page, p->page_size)) {
> +memset(page, 0, p->page_size);
> +}
> +}
> +
I am wondering if zeroing the zero page on the destination can also be 
offloaded to DSA. Can it help in reducing cpu consumption on the destination in 
case of multifd-based migration?
> if (flags & MULTIFD_FLAG_SYNC) {
> qemu_sem_post(_recv_state->sem_sync);
> qemu_sem_wait(>sem_sync);
> diff --git a/migration/multifd.h b/migration/multifd.h
> index d587b0e19c..13762900d4 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -53,6 +53,11 @@ typedef struct {
> uint32_t unused32[1];/* Reserved for future use */
> uint64_t unused64[3];/* Reserved for future use */
> char ramblock[256];
> +/*
> + * This array contains the pointers to:
> + *  - normal pages (initial normal_pages entries)
> + *  - zero pages (following zero_pages entries)
> + */
> uint64_t offset[];
> } __attribute__((packed)) MultiFDPacket_t;
> 
> -- 
> 2.30.2
> 
> 
> 




[PATCH v3 03/20] multifd: Zero pages transmission

2024-01-03 Thread Hao Xiang
From: Juan Quintela 

This implements the zero page dection and handling.

Signed-off-by: Juan Quintela 
---
 migration/multifd.c | 41 +++--
 migration/multifd.h |  5 +
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 5a1f50c7e8..756673029d 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
@@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 
 packet->offset[i] = cpu_to_be64(temp);
 }
+for (i = 0; i < p->zero_num; i++) {
+/* there are architectures where ram_addr_t is 32 bit */
+uint64_t temp = p->zero[i];
+
+packet->offset[p->normal_num + i] = cpu_to_be64(temp);
+}
 }
 
 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@@ -361,6 +368,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
*p, Error **errp)
 p->normal[i] = offset;
 }
 
+for (i = 0; i < p->zero_num; i++) {
+uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
+
+if (offset > (p->block->used_length - p->page_size)) {
+error_setg(errp, "multifd: offset too long %" PRIu64
+   " (max " RAM_ADDR_FMT ")",
+   offset, p->block->used_length);
+return -1;
+}
+p->zero[i] = offset;
+}
+
 return 0;
 }
 
@@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque)
 MultiFDSendParams *p = opaque;
 MigrationThread *thread = NULL;
 Error *local_err = NULL;
+/* qemu older than 8.2 don't understand zero page on multifd channel */
+bool use_zero_page = !migrate_use_main_zero_page();
 int ret = 0;
 bool use_zero_copy_send = migrate_zero_copy_send();
 
@@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque)
 qemu_mutex_lock(>mutex);
 
 if (p->pending_job) {
+RAMBlock *rb = p->pages->block;
 uint64_t packet_num = p->packet_num;
 uint32_t flags;
 p->normal_num = 0;
@@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque)
 }
 
 for (int i = 0; i < p->pages->num; i++) {
-p->normal[p->normal_num] = p->pages->offset[i];
-p->normal_num++;
+uint64_t offset = p->pages->offset[i];
+if (use_zero_page &&
+buffer_is_zero(rb->host + offset, p->page_size)) {
+p->zero[p->zero_num] = offset;
+p->zero_num++;
+ram_release_page(rb->idstr, offset);
+} else {
+p->normal[p->normal_num] = offset;
+p->normal_num++;
+}
 }
 
 if (p->normal_num) {
@@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque)
 }
 }
 
+for (int i = 0; i < p->zero_num; i++) {
+void *page = p->host + p->zero[i];
+if (!buffer_is_zero(page, p->page_size)) {
+memset(page, 0, p->page_size);
+}
+}
+
 if (flags & MULTIFD_FLAG_SYNC) {
 qemu_sem_post(_recv_state->sem_sync);
 qemu_sem_wait(>sem_sync);
diff --git a/migration/multifd.h b/migration/multifd.h
index d587b0e19c..13762900d4 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -53,6 +53,11 @@ typedef struct {
 uint32_t unused32[1];/* Reserved for future use */
 uint64_t unused64[3];/* Reserved for future use */
 char ramblock[256];
+/*
+ * This array contains the pointers to:
+ *  - normal pages (initial normal_pages entries)
+ *  - zero pages (following zero_pages entries)
+ */
 uint64_t offset[];
 } __attribute__((packed)) MultiFDPacket_t;
 
-- 
2.30.2