Re: [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path

2021-05-25 Thread Dr. David Alan Gilbert
* Li Zhijian (lizhij...@cn.fujitsu.com) wrote:
> source side always blocks if postcopy is only enabled at source side.
> users are not able to cancel this migration in this case.
> 
> Let source side have chance to cancel this migration
> 
> Signed-off-by: Li Zhijian 
> ---
> V2: utilize poll to check cm event
> ---
>  migration/rdma.c | 42 ++
>  1 file changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index d829d08d076..f67e21b4f54 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -36,6 +36,7 @@
>  #include 
>  #include "trace.h"
>  #include "qom/object.h"
> +#include 
>  
>  /*
>   * Print and error on both the Monitor and the Log file.
> @@ -2460,7 +2461,36 @@ err_rdma_source_init:
>  return -1;
>  }
>  
> -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
> +static int qemu_get_cm_event_timeout(RDMAContext *rdma,
> + struct rdma_cm_event **cm_event,
> + long msec, Error **errp)
> +{
> +int ret;
> +struct pollfd poll_fd = {
> +.fd = rdma->channel->fd,
> +.events = POLLIN,
> +.revents = 0
> +};
> +
> +do {
> +ret = poll(_fd, 1, msec);
> +} while (ret < 0 && errno == EINTR);
> +
> +if (ret == 0) {
> +ERROR(errp, "poll cm event timeout");
> +return -1;
> +} else if (ret < 0) {
> +ERROR(errp, "failed to pull cm event, errno=%i", errno);

Typo: 'poll' - I can fix that.

> +return -1;
> +} else if (poll_fd.revents & POLLIN) {
> +return rdma_get_cm_event(rdma->channel, cm_event);
> +} else {
> +ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
> +return -1;
> +}
> +}
> +
> +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool 
> return_path)
>  {
>  RDMACapabilities cap = {
>  .version = RDMA_CONTROL_VERSION_CURRENT,
> @@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error 
> **errp)
>  goto err_rdma_source_connect;
>  }
>  
> -ret = rdma_get_cm_event(rdma->channel, _event);
> +if (return_path) {
> +ret = qemu_get_cm_event_timeout(rdma, _event, 5000, errp);

Fixed timeouts are not a great fix; but I can't think of anything
better; the only alternative would be to register the fd on the main
thread's poll and get it to be called back when the event happened.

But for now;

Reviewed-by: Dr. David Alan Gilbert 

> +} else {
> +ret = rdma_get_cm_event(rdma->channel, _event);
> +}
>  if (ret) {
>  perror("rdma_get_cm_event after rdma_connect");
>  ERROR(errp, "connecting to destination!");
> @@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque,
>  }
>  
>  trace_rdma_start_outgoing_migration_after_rdma_source_init();
> -ret = qemu_rdma_connect(rdma, errp);
> +ret = qemu_rdma_connect(rdma, errp, false);
>  
>  if (ret) {
>  goto err;
> @@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque,
>  goto return_path_err;
>  }
>  
> -ret = qemu_rdma_connect(rdma_return_path, errp);
> +ret = qemu_rdma_connect(rdma_return_path, errp, true);
>  
>  if (ret) {
>  goto return_path_err;
> -- 
> 2.30.2
> 
> 
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




[PATCH v2 4/4] migration/rdma: source: poll cm_event from return path

2021-05-25 Thread Li Zhijian
source side always blocks if postcopy is only enabled at source side.
users are not able to cancel this migration in this case.

Let source side have chance to cancel this migration

Signed-off-by: Li Zhijian 
---
V2: utilize poll to check cm event
---
 migration/rdma.c | 42 ++
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index d829d08d076..f67e21b4f54 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -36,6 +36,7 @@
 #include 
 #include "trace.h"
 #include "qom/object.h"
+#include 
 
 /*
  * Print and error on both the Monitor and the Log file.
@@ -2460,7 +2461,36 @@ err_rdma_source_init:
 return -1;
 }
 
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+ struct rdma_cm_event **cm_event,
+ long msec, Error **errp)
+{
+int ret;
+struct pollfd poll_fd = {
+.fd = rdma->channel->fd,
+.events = POLLIN,
+.revents = 0
+};
+
+do {
+ret = poll(_fd, 1, msec);
+} while (ret < 0 && errno == EINTR);
+
+if (ret == 0) {
+ERROR(errp, "poll cm event timeout");
+return -1;
+} else if (ret < 0) {
+ERROR(errp, "failed to pull cm event, errno=%i", errno);
+return -1;
+} else if (poll_fd.revents & POLLIN) {
+return rdma_get_cm_event(rdma->channel, cm_event);
+} else {
+ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
+return -1;
+}
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
 {
 RDMACapabilities cap = {
 .version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error 
**errp)
 goto err_rdma_source_connect;
 }
 
-ret = rdma_get_cm_event(rdma->channel, _event);
+if (return_path) {
+ret = qemu_get_cm_event_timeout(rdma, _event, 5000, errp);
+} else {
+ret = rdma_get_cm_event(rdma->channel, _event);
+}
 if (ret) {
 perror("rdma_get_cm_event after rdma_connect");
 ERROR(errp, "connecting to destination!");
@@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque,
 }
 
 trace_rdma_start_outgoing_migration_after_rdma_source_init();
-ret = qemu_rdma_connect(rdma, errp);
+ret = qemu_rdma_connect(rdma, errp, false);
 
 if (ret) {
 goto err;
@@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque,
 goto return_path_err;
 }
 
-ret = qemu_rdma_connect(rdma_return_path, errp);
+ret = qemu_rdma_connect(rdma_return_path, errp, true);
 
 if (ret) {
 goto return_path_err;
-- 
2.30.2