Re: [Qemu-devel] [PATCH v4 10/28] migration: add reporting of errors for outgoing migration

2016-03-15 Thread Daniel P. Berrange
On Mon, Mar 14, 2016 at 07:57:25PM +, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrange (berra...@redhat.com) wrote:
> > Currently if an application initiates an outgoing migration,
> > it may or may not, get an error reported back on failure. If
> > the error occurs synchronously to the 'migrate' command
> > execution, the client app will see the error message. This
> > is the case for DNS lookup failures. If the error occurs
> > asynchronously to the monitor command though, the error
> > will be thrown away and the client left guessing about
> > what went wrong. This is the case for failure to connect
> > to the TCP server (eg due to wrong port, or firewall
> > rules, or other similar errors).
> > 
> > In the future we'll be adding more scope for errors to
> > happen asynchronously with the TLS protocol handshake.
> > TLS errors are hard to diagnose even when they are well
> > reported, so discarding errors entirely will make it
> > impossible to debug TLS connection problems.
> > 
> > Management apps which do migration are already using
> > 'query-migrate' / 'info migrate' to check up on progress
> > of background migration operations and to see their end
> > status. This is a fine place to also include the error
> > message when things go wrong.
> > 
> > This patch thus adds an 'error-desc' field to the
> > MigrationInfo struct, which will be populated when
> > the 'status' is set to 'failed':
> > 
> > (qemu) migrate -d tcp:localhost:9001
> > (qemu) info migrate
> > capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: 
> > off compress: off events: off x-postcopy-ram: off
> > Migration status: failed (Error connecting to socket: Connection refused)
> > total time: 0 milliseconds
> > 
> > In the HMP, when doing non-detached migration, it is
> > also possible to display this error message directly
> > to the app.
> > 
> > (qemu) migrate tcp:localhost:9001
> > Error connecting to socket: Connection refused
> > 
> > Or with QMP
> > 
> >   {
> > "execute": "query-migrate",
> > "arguments": {}
> >   }
> >   {
> > "return": {
> >   "status": "failed",
> >   "error-desc": "address resolution failed for myhost:9000: No address 
> > associated with hostname"
> > }
> >   }
> > 
> > Signed-off-by: Daniel P. Berrange 
> > ---
> >  hmp.c | 13 -
> >  include/migration/migration.h |  5 -
> >  include/qapi/error.h  |  2 +-
> >  migration/migration.c | 15 ---
> >  migration/rdma.c  | 10 +++---
> >  migration/tcp.c   |  2 +-
> >  migration/unix.c  |  2 +-
> >  qapi-schema.json  |  7 ++-
> >  trace-events  |  2 +-
> >  util/error.c  |  2 +-
> >  10 files changed, 42 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hmp.c b/hmp.c
> > index 5b6084a..7126f17 100644
> > --- a/hmp.c
> > +++ b/hmp.c
> > @@ -34,6 +34,7 @@
> >  #include "ui/console.h"
> >  #include "block/qapi.h"
> >  #include "qemu-io.h"
> > +#include "qemu/error-report.h"
> >  
> >  #ifdef CONFIG_SPICE
> >  #include 
> > @@ -167,8 +168,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
> >  }
> >  
> >  if (info->has_status) {
> > -monitor_printf(mon, "Migration status: %s\n",
> > +monitor_printf(mon, "Migration status: %s",
> > MigrationStatus_lookup[info->status]);
> > +if (info->status == MIGRATION_STATUS_FAILED &&
> > +info->has_error_desc) {
> > +monitor_printf(mon, " (%s)\n", info->error_desc);
> > +} else {
> > +monitor_printf(mon, "\n");
> > +}
> > +
> >  monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
> > info->total_time);
> >  if (info->has_expected_downtime) {
> > @@ -1532,6 +1540,9 @@ static void hmp_migrate_status_cb(void *opaque)
> >  if (status->is_block_migration) {
> >  monitor_printf(status->mon, "\n");
> >  }
> > +if (info->has_error_desc) {
> > +error_report("%s", info->error_desc);
> > +}
> >  monitor_resume(status->mon);
> >  timer_del(status->timer);
> >  g_free(status);
> > diff --git a/include/migration/migration.h b/include/migration/migration.h
> > index e335380..46c1bbe 100644
> > --- a/include/migration/migration.h
> > +++ b/include/migration/migration.h
> > @@ -171,6 +171,9 @@ struct MigrationState
> >  QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) 
> > src_page_requests;
> >  /* The RAMBlock used in the last src_page_request */
> >  RAMBlock *last_req_rb;
> > +
> > +/* The last error that occurred */
> > +Error *error;
> 
> Note that's now 'first error', but other than that comment:
> 
> Reviewed-by: Dr. David Alan Gilbert 
> 
> When you merge this, what will happen with existing libvirt; at the
> 

Re: [Qemu-devel] [PATCH v4 10/28] migration: add reporting of errors for outgoing migration

2016-03-14 Thread Dr. David Alan Gilbert
* Daniel P. Berrange (berra...@redhat.com) wrote:
> Currently if an application initiates an outgoing migration,
> it may or may not, get an error reported back on failure. If
> the error occurs synchronously to the 'migrate' command
> execution, the client app will see the error message. This
> is the case for DNS lookup failures. If the error occurs
> asynchronously to the monitor command though, the error
> will be thrown away and the client left guessing about
> what went wrong. This is the case for failure to connect
> to the TCP server (eg due to wrong port, or firewall
> rules, or other similar errors).
> 
> In the future we'll be adding more scope for errors to
> happen asynchronously with the TLS protocol handshake.
> TLS errors are hard to diagnose even when they are well
> reported, so discarding errors entirely will make it
> impossible to debug TLS connection problems.
> 
> Management apps which do migration are already using
> 'query-migrate' / 'info migrate' to check up on progress
> of background migration operations and to see their end
> status. This is a fine place to also include the error
> message when things go wrong.
> 
> This patch thus adds an 'error-desc' field to the
> MigrationInfo struct, which will be populated when
> the 'status' is set to 'failed':
> 
> (qemu) migrate -d tcp:localhost:9001
> (qemu) info migrate
> capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: 
> off compress: off events: off x-postcopy-ram: off
> Migration status: failed (Error connecting to socket: Connection refused)
> total time: 0 milliseconds
> 
> In the HMP, when doing non-detached migration, it is
> also possible to display this error message directly
> to the app.
> 
> (qemu) migrate tcp:localhost:9001
> Error connecting to socket: Connection refused
> 
> Or with QMP
> 
>   {
> "execute": "query-migrate",
> "arguments": {}
>   }
>   {
> "return": {
>   "status": "failed",
>   "error-desc": "address resolution failed for myhost:9000: No address 
> associated with hostname"
> }
>   }
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  hmp.c | 13 -
>  include/migration/migration.h |  5 -
>  include/qapi/error.h  |  2 +-
>  migration/migration.c | 15 ---
>  migration/rdma.c  | 10 +++---
>  migration/tcp.c   |  2 +-
>  migration/unix.c  |  2 +-
>  qapi-schema.json  |  7 ++-
>  trace-events  |  2 +-
>  util/error.c  |  2 +-
>  10 files changed, 42 insertions(+), 18 deletions(-)
> 
> diff --git a/hmp.c b/hmp.c
> index 5b6084a..7126f17 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -34,6 +34,7 @@
>  #include "ui/console.h"
>  #include "block/qapi.h"
>  #include "qemu-io.h"
> +#include "qemu/error-report.h"
>  
>  #ifdef CONFIG_SPICE
>  #include 
> @@ -167,8 +168,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>  }
>  
>  if (info->has_status) {
> -monitor_printf(mon, "Migration status: %s\n",
> +monitor_printf(mon, "Migration status: %s",
> MigrationStatus_lookup[info->status]);
> +if (info->status == MIGRATION_STATUS_FAILED &&
> +info->has_error_desc) {
> +monitor_printf(mon, " (%s)\n", info->error_desc);
> +} else {
> +monitor_printf(mon, "\n");
> +}
> +
>  monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
> info->total_time);
>  if (info->has_expected_downtime) {
> @@ -1532,6 +1540,9 @@ static void hmp_migrate_status_cb(void *opaque)
>  if (status->is_block_migration) {
>  monitor_printf(status->mon, "\n");
>  }
> +if (info->has_error_desc) {
> +error_report("%s", info->error_desc);
> +}
>  monitor_resume(status->mon);
>  timer_del(status->timer);
>  g_free(status);
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index e335380..46c1bbe 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -171,6 +171,9 @@ struct MigrationState
>  QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) 
> src_page_requests;
>  /* The RAMBlock used in the last src_page_request */
>  RAMBlock *last_req_rb;
> +
> +/* The last error that occurred */
> +Error *error;

Note that's now 'first error', but other than that comment:

Reviewed-by: Dr. David Alan Gilbert 

When you merge this, what will happen with existing libvirt; at the
moment I think something tries to extract the error message out
of the qemu log - but I don't think this ends up in any log
unless something does the query-migrate/info migrate;   will that
happen now, or does it mean we lose diagnostics until libvirt learns
about it?

Dave

>  };
>  
>  void migrate_set_state(int *state, int 

[Qemu-devel] [PATCH v4 10/28] migration: add reporting of errors for outgoing migration

2016-03-11 Thread Daniel P. Berrange
Currently if an application initiates an outgoing migration,
it may or may not, get an error reported back on failure. If
the error occurs synchronously to the 'migrate' command
execution, the client app will see the error message. This
is the case for DNS lookup failures. If the error occurs
asynchronously to the monitor command though, the error
will be thrown away and the client left guessing about
what went wrong. This is the case for failure to connect
to the TCP server (eg due to wrong port, or firewall
rules, or other similar errors).

In the future we'll be adding more scope for errors to
happen asynchronously with the TLS protocol handshake.
TLS errors are hard to diagnose even when they are well
reported, so discarding errors entirely will make it
impossible to debug TLS connection problems.

Management apps which do migration are already using
'query-migrate' / 'info migrate' to check up on progress
of background migration operations and to see their end
status. This is a fine place to also include the error
message when things go wrong.

This patch thus adds an 'error-desc' field to the
MigrationInfo struct, which will be populated when
the 'status' is set to 'failed':

(qemu) migrate -d tcp:localhost:9001
(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: off 
compress: off events: off x-postcopy-ram: off
Migration status: failed (Error connecting to socket: Connection refused)
total time: 0 milliseconds

In the HMP, when doing non-detached migration, it is
also possible to display this error message directly
to the app.

(qemu) migrate tcp:localhost:9001
Error connecting to socket: Connection refused

Or with QMP

  {
"execute": "query-migrate",
"arguments": {}
  }
  {
"return": {
  "status": "failed",
  "error-desc": "address resolution failed for myhost:9000: No address 
associated with hostname"
}
  }

Signed-off-by: Daniel P. Berrange 
---
 hmp.c | 13 -
 include/migration/migration.h |  5 -
 include/qapi/error.h  |  2 +-
 migration/migration.c | 15 ---
 migration/rdma.c  | 10 +++---
 migration/tcp.c   |  2 +-
 migration/unix.c  |  2 +-
 qapi-schema.json  |  7 ++-
 trace-events  |  2 +-
 util/error.c  |  2 +-
 10 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/hmp.c b/hmp.c
index 5b6084a..7126f17 100644
--- a/hmp.c
+++ b/hmp.c
@@ -34,6 +34,7 @@
 #include "ui/console.h"
 #include "block/qapi.h"
 #include "qemu-io.h"
+#include "qemu/error-report.h"
 
 #ifdef CONFIG_SPICE
 #include 
@@ -167,8 +168,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 }
 
 if (info->has_status) {
-monitor_printf(mon, "Migration status: %s\n",
+monitor_printf(mon, "Migration status: %s",
MigrationStatus_lookup[info->status]);
+if (info->status == MIGRATION_STATUS_FAILED &&
+info->has_error_desc) {
+monitor_printf(mon, " (%s)\n", info->error_desc);
+} else {
+monitor_printf(mon, "\n");
+}
+
 monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
info->total_time);
 if (info->has_expected_downtime) {
@@ -1532,6 +1540,9 @@ static void hmp_migrate_status_cb(void *opaque)
 if (status->is_block_migration) {
 monitor_printf(status->mon, "\n");
 }
+if (info->has_error_desc) {
+error_report("%s", info->error_desc);
+}
 monitor_resume(status->mon);
 timer_del(status->timer);
 g_free(status);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index e335380..46c1bbe 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -171,6 +171,9 @@ struct MigrationState
 QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) 
src_page_requests;
 /* The RAMBlock used in the last src_page_request */
 RAMBlock *last_req_rb;
+
+/* The last error that occurred */
+Error *error;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
@@ -207,7 +210,7 @@ void rdma_start_outgoing_migration(void *opaque, const char 
*host_port, Error **
 
 void rdma_start_incoming_migration(const char *host_port, Error **errp);
 
-void migrate_fd_error(MigrationState *s);
+void migrate_fd_error(MigrationState *s, const Error *error);
 
 void migrate_fd_connect(MigrationState *s);
 
diff --git a/include/qapi/error.h b/include/qapi/error.h
index 02e9dd2..c7e2869 100644
--- a/include/qapi/error.h
+++ b/include/qapi/error.h
@@ -139,7 +139,7 @@ typedef enum ErrorClass {
 /*
  * Get @err's human-readable error message.
  */
-const char *error_get_pretty(Error *err);
+const char *error_get_pretty(const Error *err);
 
 /*
  * Get @err's error class.
diff --git