Re: [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept

2021-05-25 Thread Dr. David Alan Gilbert
* Li Zhijian (lizhij...@cn.fujitsu.com) wrote:
> destination side:
> $ build/qemu-system-x86_64 -enable-kvm -netdev 
> tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device 
> e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive 
> if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 
> -device 
> virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 
> 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl 
> -spice streaming-video=filter,port=5902,disable-ticketing -incoming 
> rdma:192.168.1.10:
> (qemu) migrate_set_capability postcopy-ram on
> (qemu)
> dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name 
> uverbs0, infiniband_verbs class device path 
> /sys/class/infiniband_verbs/uverbs0, infiniband class device path 
> /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
> Segmentation fault (core dumped)
> 
>  (gdb) bt
>  #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
>  #1  rdma_accept_incoming_migration (opaque=0x0) at 
> ../migration/rdma.c:3986
>  #2  0x563c9e51f02a in aio_dispatch_handler
>  (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at 
> ../util/aio-posix.c:329
>  #3  0x563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at  
> ../util/aio-posix.c:372
>  #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
>  #5  0x563c9e4f4d9e in aio_ctx_dispatch (source=,  
> callback=, user_data=)at ../util/async.c:306
>  #6  0x7fe96ef3fa9f in g_main_context_dispatch () at  
> /lib64/libglib-2.0.so.0
>  #7  0x563c9e4ffeb8 in glib_pollfds_poll () at ../util/main-loop.c:231
>  #8  os_host_main_loop_wait (timeout=12188789) at ../util/main-loop.c:254
>  #9  main_loop_wait (nonblocking=nonblocking@entry=0) at 
> ../util/main-loop.c:530
>  #10 0x563c9e3c7211 in qemu_main_loop () at ../softmmu/runstate.c:725
>  #11 0x563c9dfd46fe in main (argc=, argv= out>, envp=) at ../softmmu/main.c:50
> 
> The rdma return path will not be created when qemu incoming is starting
> since migrate_copy() is false at that moment, then a  NULL return path
> rdma was referenced if the user enabled postcopy later.
> 
> Signed-off-by: Li Zhijian 

Reviewed-by: Dr. David Alan Gilbert 

> ---
> V2: alloc memory for host_port
> ---
>  migration/rdma.c | 32 +---
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 651534e8255..d829d08d076 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks {
>  typedef struct RDMAContext {
>  char *host;
>  int port;
> +char *host_port;
>  
>  RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
>  
> @@ -2392,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>  rdma->channel = NULL;
>  }
>  g_free(rdma->host);
> +g_free(rdma->host_port);
>  rdma->host = NULL;
> +rdma->host_port = NULL;
>  }
>  
>  
> @@ -2648,6 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, 
> Error **errp)
>  if (!inet_parse(addr, host_port, NULL)) {
>  rdma->port = atoi(addr->port);
>  rdma->host = g_strdup(addr->host);
> +rdma->host_port = g_strdup(host_port);
>  } else {
>  ERROR(errp, "bad RDMA migration address '%s'", host_port);
>  g_free(rdma);
> @@ -3276,6 +3280,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>  .private_data = ,
>  .private_data_len = sizeof(cap),
>   };
> +RDMAContext *rdma_return_path = NULL;
>  struct rdma_cm_event *cm_event;
>  struct ibv_context *verbs;
>  int ret = -EINVAL;
> @@ -3291,6 +3296,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>  goto err_rdma_dest_wait;
>  }
>  
> +/*
> + * initialize the RDMAContext for return path for postcopy after first
> + * connection request reached.
> + */
> +if (migrate_postcopy() && !rdma->is_return_path) {
> +rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
> +if (rdma_return_path == NULL) {
> +rdma_ack_cm_event(cm_event);
> +goto err_rdma_dest_wait;
> +}
> +
> +qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> +}
> +
>  memcpy(, cm_event->param.conn.private_data, sizeof(cap));
>  
>  network_to_caps();
> @@ -3406,6 +3425,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>  err_rdma_dest_wait:
>  rdma->error_state = ret;
>  qemu_rdma_cleanup(rdma);
> +g_free(rdma_return_path);
>  return ret;
>  }
>  
> @@ -4048,17 +4068,6 @@ void rdma_start_incoming_migration(const char 
> *host_port, Error **errp)
>  
>  trace_rdma_start_incoming_migration_after_rdma_listen();
>  
> -

[PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept

2021-05-25 Thread Li Zhijian
destination side:
$ build/qemu-system-x86_64 -enable-kvm -netdev 
tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device 
e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive 
if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device 
virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 
2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl 
-spice streaming-video=filter,port=5902,disable-ticketing -incoming 
rdma:192.168.1.10:
(qemu) migrate_set_capability postcopy-ram on
(qemu)
dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name 
uverbs0, infiniband_verbs class device path 
/sys/class/infiniband_verbs/uverbs0, infiniband class device path 
/sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
Segmentation fault (core dumped)

 (gdb) bt
 #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
 #1  rdma_accept_incoming_migration (opaque=0x0) at ../migration/rdma.c:3986
 #2  0x563c9e51f02a in aio_dispatch_handler
 (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at 
../util/aio-posix.c:329
 #3  0x563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at  
../util/aio-posix.c:372
 #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
 #5  0x563c9e4f4d9e in aio_ctx_dispatch (source=,  
callback=, user_data=)at ../util/async.c:306
 #6  0x7fe96ef3fa9f in g_main_context_dispatch () at  
/lib64/libglib-2.0.so.0
 #7  0x563c9e4ffeb8 in glib_pollfds_poll () at ../util/main-loop.c:231
 #8  os_host_main_loop_wait (timeout=12188789) at ../util/main-loop.c:254
 #9  main_loop_wait (nonblocking=nonblocking@entry=0) at 
../util/main-loop.c:530
 #10 0x563c9e3c7211 in qemu_main_loop () at ../softmmu/runstate.c:725
 #11 0x563c9dfd46fe in main (argc=, argv=, envp=) at ../softmmu/main.c:50

The rdma return path will not be created when qemu incoming is starting
since migrate_copy() is false at that moment, then a  NULL return path
rdma was referenced if the user enabled postcopy later.

Signed-off-by: Li Zhijian 
---
V2: alloc memory for host_port
---
 migration/rdma.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 651534e8255..d829d08d076 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks {
 typedef struct RDMAContext {
 char *host;
 int port;
+char *host_port;
 
 RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
 
@@ -2392,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
 rdma->channel = NULL;
 }
 g_free(rdma->host);
+g_free(rdma->host_port);
 rdma->host = NULL;
+rdma->host_port = NULL;
 }
 
 
@@ -2648,6 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, 
Error **errp)
 if (!inet_parse(addr, host_port, NULL)) {
 rdma->port = atoi(addr->port);
 rdma->host = g_strdup(addr->host);
+rdma->host_port = g_strdup(host_port);
 } else {
 ERROR(errp, "bad RDMA migration address '%s'", host_port);
 g_free(rdma);
@@ -3276,6 +3280,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 .private_data = ,
 .private_data_len = sizeof(cap),
  };
+RDMAContext *rdma_return_path = NULL;
 struct rdma_cm_event *cm_event;
 struct ibv_context *verbs;
 int ret = -EINVAL;
@@ -3291,6 +3296,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 goto err_rdma_dest_wait;
 }
 
+/*
+ * initialize the RDMAContext for return path for postcopy after first
+ * connection request reached.
+ */
+if (migrate_postcopy() && !rdma->is_return_path) {
+rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
+if (rdma_return_path == NULL) {
+rdma_ack_cm_event(cm_event);
+goto err_rdma_dest_wait;
+}
+
+qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
+}
+
 memcpy(, cm_event->param.conn.private_data, sizeof(cap));
 
 network_to_caps();
@@ -3406,6 +3425,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 err_rdma_dest_wait:
 rdma->error_state = ret;
 qemu_rdma_cleanup(rdma);
+g_free(rdma_return_path);
 return ret;
 }
 
@@ -4048,17 +4068,6 @@ void rdma_start_incoming_migration(const char 
*host_port, Error **errp)
 
 trace_rdma_start_incoming_migration_after_rdma_listen();
 
-/* initialize the RDMAContext for return path */
-if (migrate_postcopy()) {
-rdma_return_path = qemu_rdma_data_init(host_port, _err);
-
-if (rdma_return_path == NULL) {
-goto cleanup_rdma;
-}
-
-qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
-}
-