Re: [Qemu-devel] [PATCH V10 05/20] COLO: Add block replication into colo process

2018-08-17 Thread Dr. David Alan Gilbert
* Zhang Chen (zhangc...@gmail.com) wrote:
> On Tue, Aug 7, 2018 at 10:30 PM Dr. David Alan Gilbert 
> wrote:
> 
> > * Zhang Chen (zhangc...@gmail.com) wrote:
> > > Make sure master start block replication after slave's block
> > > replication started.
> > >
> > > Besides, we need to activate VM's blocks before goes into
> > > COLO state.
> > >
> > > Signed-off-by: zhanghailiang 
> > > Signed-off-by: Li Zhijian 
> > > Signed-off-by: Zhang Chen 
> > > ---
> > >  migration/colo.c  | 43 +++
> > >  migration/migration.c |  9 +
> > >  2 files changed, 52 insertions(+)
> > >
> > > diff --git a/migration/colo.c b/migration/colo.c
> > > index 081df1835f..e06640c3d6 100644
> > > --- a/migration/colo.c
> > > +++ b/migration/colo.c
> > > @@ -27,6 +27,7 @@
> > >  #include "replication.h"
> > >  #include "net/colo-compare.h"
> > >  #include "net/colo.h"
> > > +#include "block/block.h"
> > >
> > >  static bool vmstate_loading;
> > >  static Notifier packets_compare_notifier;
> > > @@ -56,6 +57,7 @@ static void secondary_vm_do_failover(void)
> > >  {
> > >  int old_state;
> > >  MigrationIncomingState *mis = migration_incoming_get_current();
> > > +Error *local_err = NULL;
> > >
> > >  /* Can not do failover during the process of VM's loading VMstate,
> > Or
> > >   * it will break the secondary VM.
> > > @@ -73,6 +75,11 @@ static void secondary_vm_do_failover(void)
> > >  migrate_set_state(>state, MIGRATION_STATUS_COLO,
> > >MIGRATION_STATUS_COMPLETED);
> > >
> > > +replication_stop_all(true, _err);
> > > +if (local_err) {
> > > +error_report_err(local_err);
> > > +}
> > > +
> > >  if (!autostart) {
> > >  error_report("\"-S\" qemu option will be ignored in secondary
> > side");
> > >  /* recover runstate to normal migration finish state */
> > > @@ -110,6 +117,7 @@ static void primary_vm_do_failover(void)
> > >  {
> > >  MigrationState *s = migrate_get_current();
> > >  int old_state;
> > > +Error *local_err = NULL;
> > >
> > >  migrate_set_state(>state, MIGRATION_STATUS_COLO,
> > >MIGRATION_STATUS_COMPLETED);
> > > @@ -133,6 +141,13 @@ static void primary_vm_do_failover(void)
> > >   FailoverStatus_str(old_state));
> > >  return;
> > >  }
> > > +
> > > +replication_stop_all(true, _err);
> > > +if (local_err) {
> > > +error_report_err(local_err);
> > > +local_err = NULL;
> > > +}
> > > +
> > >  /* Notify COLO thread that failover work is finished */
> > >  qemu_sem_post(>colo_exit_sem);
> > >  }
> > > @@ -356,6 +371,11 @@ static int
> > colo_do_checkpoint_transaction(MigrationState *s,
> > >  qemu_savevm_state_header(fb);
> > >  qemu_savevm_state_setup(fb);
> > >  qemu_mutex_lock_iothread();
> > > +replication_do_checkpoint_all(_err);
> > > +if (local_err) {
> > > +qemu_mutex_unlock_iothread();
> > > +goto out;
> > > +}
> >
> > In docs/block-replication.txt it says:
> >   b. replication_do_checkpoint_all()
> >This interface is called after all VM state is transferred to
> >Secondary QEMU. The Disk buffer will be dropped in this interface.
> >The caller must hold the I/O mutex lock if it is in migration/checkpoint
> >thread.
> >
> > but we're making this call before the call below that actually transfers
> > all the state.  Which is right?
> >
> 
> Hi Dave,
> 
> The "docs/block-replication.txt" means we should call the
> replication_do_checkpoint_all() after VM state is transferred in secondary
> node,
> and in primary node this function no need to call before transfers all the
> state.

OK, it might be worth clarifying the docs.

Dave

> Thanks
> Zhang Chen
> 
> 
> 
> >
> > Other than that I think it's OK.
> >
> > Dave
> >
> > >  qemu_savevm_state_complete_precopy(fb, false, false);
> > >  qemu_mutex_unlock_iothread();
> > >
> > > @@ -446,6 +466,12 @@ static void colo_process_checkpoint(MigrationState
> > *s)
> > >  object_unref(OBJECT(bioc));
> > >
> > >  qemu_mutex_lock_iothread();
> > > +replication_start_all(REPLICATION_MODE_PRIMARY, _err);
> > > +if (local_err) {
> > > +qemu_mutex_unlock_iothread();
> > > +goto out;
> > > +}
> > > +
> > >  vm_start();
> > >  qemu_mutex_unlock_iothread();
> > >  trace_colo_vm_state_change("stop", "run");
> > > @@ -585,6 +611,11 @@ void *colo_process_incoming_thread(void *opaque)
> > >  object_unref(OBJECT(bioc));
> > >
> > >  qemu_mutex_lock_iothread();
> > > +replication_start_all(REPLICATION_MODE_SECONDARY, _err);
> > > +if (local_err) {
> > > +qemu_mutex_unlock_iothread();
> > > +goto out;
> > > +}
> > >  vm_start();
> > >  trace_colo_vm_state_change("stop", "run");
> > >  qemu_mutex_unlock_iothread();
> > > @@ -665,6 +696,18 @@ void *colo_process_incoming_thread(void *opaque)
> 

Re: [Qemu-devel] [PATCH V10 05/20] COLO: Add block replication into colo process

2018-08-11 Thread Zhang Chen
On Tue, Aug 7, 2018 at 10:30 PM Dr. David Alan Gilbert 
wrote:

> * Zhang Chen (zhangc...@gmail.com) wrote:
> > Make sure master start block replication after slave's block
> > replication started.
> >
> > Besides, we need to activate VM's blocks before goes into
> > COLO state.
> >
> > Signed-off-by: zhanghailiang 
> > Signed-off-by: Li Zhijian 
> > Signed-off-by: Zhang Chen 
> > ---
> >  migration/colo.c  | 43 +++
> >  migration/migration.c |  9 +
> >  2 files changed, 52 insertions(+)
> >
> > diff --git a/migration/colo.c b/migration/colo.c
> > index 081df1835f..e06640c3d6 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -27,6 +27,7 @@
> >  #include "replication.h"
> >  #include "net/colo-compare.h"
> >  #include "net/colo.h"
> > +#include "block/block.h"
> >
> >  static bool vmstate_loading;
> >  static Notifier packets_compare_notifier;
> > @@ -56,6 +57,7 @@ static void secondary_vm_do_failover(void)
> >  {
> >  int old_state;
> >  MigrationIncomingState *mis = migration_incoming_get_current();
> > +Error *local_err = NULL;
> >
> >  /* Can not do failover during the process of VM's loading VMstate,
> Or
> >   * it will break the secondary VM.
> > @@ -73,6 +75,11 @@ static void secondary_vm_do_failover(void)
> >  migrate_set_state(>state, MIGRATION_STATUS_COLO,
> >MIGRATION_STATUS_COMPLETED);
> >
> > +replication_stop_all(true, _err);
> > +if (local_err) {
> > +error_report_err(local_err);
> > +}
> > +
> >  if (!autostart) {
> >  error_report("\"-S\" qemu option will be ignored in secondary
> side");
> >  /* recover runstate to normal migration finish state */
> > @@ -110,6 +117,7 @@ static void primary_vm_do_failover(void)
> >  {
> >  MigrationState *s = migrate_get_current();
> >  int old_state;
> > +Error *local_err = NULL;
> >
> >  migrate_set_state(>state, MIGRATION_STATUS_COLO,
> >MIGRATION_STATUS_COMPLETED);
> > @@ -133,6 +141,13 @@ static void primary_vm_do_failover(void)
> >   FailoverStatus_str(old_state));
> >  return;
> >  }
> > +
> > +replication_stop_all(true, _err);
> > +if (local_err) {
> > +error_report_err(local_err);
> > +local_err = NULL;
> > +}
> > +
> >  /* Notify COLO thread that failover work is finished */
> >  qemu_sem_post(>colo_exit_sem);
> >  }
> > @@ -356,6 +371,11 @@ static int
> colo_do_checkpoint_transaction(MigrationState *s,
> >  qemu_savevm_state_header(fb);
> >  qemu_savevm_state_setup(fb);
> >  qemu_mutex_lock_iothread();
> > +replication_do_checkpoint_all(_err);
> > +if (local_err) {
> > +qemu_mutex_unlock_iothread();
> > +goto out;
> > +}
>
> In docs/block-replication.txt it says:
>   b. replication_do_checkpoint_all()
>This interface is called after all VM state is transferred to
>Secondary QEMU. The Disk buffer will be dropped in this interface.
>The caller must hold the I/O mutex lock if it is in migration/checkpoint
>thread.
>
> but we're making this call before the call below that actually transfers
> all the state.  Which is right?
>

Hi Dave,

The "docs/block-replication.txt" means we should call the
replication_do_checkpoint_all() after VM state is transferred in secondary
node,
and in primary node this function no need to call before transfers all the
state.

Thanks
Zhang Chen



>
> Other than that I think it's OK.
>
> Dave
>
> >  qemu_savevm_state_complete_precopy(fb, false, false);
> >  qemu_mutex_unlock_iothread();
> >
> > @@ -446,6 +466,12 @@ static void colo_process_checkpoint(MigrationState
> *s)
> >  object_unref(OBJECT(bioc));
> >
> >  qemu_mutex_lock_iothread();
> > +replication_start_all(REPLICATION_MODE_PRIMARY, _err);
> > +if (local_err) {
> > +qemu_mutex_unlock_iothread();
> > +goto out;
> > +}
> > +
> >  vm_start();
> >  qemu_mutex_unlock_iothread();
> >  trace_colo_vm_state_change("stop", "run");
> > @@ -585,6 +611,11 @@ void *colo_process_incoming_thread(void *opaque)
> >  object_unref(OBJECT(bioc));
> >
> >  qemu_mutex_lock_iothread();
> > +replication_start_all(REPLICATION_MODE_SECONDARY, _err);
> > +if (local_err) {
> > +qemu_mutex_unlock_iothread();
> > +goto out;
> > +}
> >  vm_start();
> >  trace_colo_vm_state_change("stop", "run");
> >  qemu_mutex_unlock_iothread();
> > @@ -665,6 +696,18 @@ void *colo_process_incoming_thread(void *opaque)
> >  goto out;
> >  }
> >
> > +replication_get_error_all(_err);
> > +if (local_err) {
> > +qemu_mutex_unlock_iothread();
> > +goto out;
> > +}
> > +/* discard colo disk buffer */
> > +replication_do_checkpoint_all(_err);
> > +if (local_err) {
> > +

Re: [Qemu-devel] [PATCH V10 05/20] COLO: Add block replication into colo process

2018-08-07 Thread Dr. David Alan Gilbert
* Zhang Chen (zhangc...@gmail.com) wrote:
> Make sure master start block replication after slave's block
> replication started.
> 
> Besides, we need to activate VM's blocks before goes into
> COLO state.
> 
> Signed-off-by: zhanghailiang 
> Signed-off-by: Li Zhijian 
> Signed-off-by: Zhang Chen 
> ---
>  migration/colo.c  | 43 +++
>  migration/migration.c |  9 +
>  2 files changed, 52 insertions(+)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index 081df1835f..e06640c3d6 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -27,6 +27,7 @@
>  #include "replication.h"
>  #include "net/colo-compare.h"
>  #include "net/colo.h"
> +#include "block/block.h"
>  
>  static bool vmstate_loading;
>  static Notifier packets_compare_notifier;
> @@ -56,6 +57,7 @@ static void secondary_vm_do_failover(void)
>  {
>  int old_state;
>  MigrationIncomingState *mis = migration_incoming_get_current();
> +Error *local_err = NULL;
>  
>  /* Can not do failover during the process of VM's loading VMstate, Or
>   * it will break the secondary VM.
> @@ -73,6 +75,11 @@ static void secondary_vm_do_failover(void)
>  migrate_set_state(>state, MIGRATION_STATUS_COLO,
>MIGRATION_STATUS_COMPLETED);
>  
> +replication_stop_all(true, _err);
> +if (local_err) {
> +error_report_err(local_err);
> +}
> +
>  if (!autostart) {
>  error_report("\"-S\" qemu option will be ignored in secondary side");
>  /* recover runstate to normal migration finish state */
> @@ -110,6 +117,7 @@ static void primary_vm_do_failover(void)
>  {
>  MigrationState *s = migrate_get_current();
>  int old_state;
> +Error *local_err = NULL;
>  
>  migrate_set_state(>state, MIGRATION_STATUS_COLO,
>MIGRATION_STATUS_COMPLETED);
> @@ -133,6 +141,13 @@ static void primary_vm_do_failover(void)
>   FailoverStatus_str(old_state));
>  return;
>  }
> +
> +replication_stop_all(true, _err);
> +if (local_err) {
> +error_report_err(local_err);
> +local_err = NULL;
> +}
> +
>  /* Notify COLO thread that failover work is finished */
>  qemu_sem_post(>colo_exit_sem);
>  }
> @@ -356,6 +371,11 @@ static int colo_do_checkpoint_transaction(MigrationState 
> *s,
>  qemu_savevm_state_header(fb);
>  qemu_savevm_state_setup(fb);
>  qemu_mutex_lock_iothread();
> +replication_do_checkpoint_all(_err);
> +if (local_err) {
> +qemu_mutex_unlock_iothread();
> +goto out;
> +}

In docs/block-replication.txt it says:
  b. replication_do_checkpoint_all()
   This interface is called after all VM state is transferred to
   Secondary QEMU. The Disk buffer will be dropped in this interface.
   The caller must hold the I/O mutex lock if it is in migration/checkpoint
   thread.

but we're making this call before the call below that actually transfers
all the state.  Which is right?

Other than that I think it's OK.

Dave

>  qemu_savevm_state_complete_precopy(fb, false, false);
>  qemu_mutex_unlock_iothread();
>  
> @@ -446,6 +466,12 @@ static void colo_process_checkpoint(MigrationState *s)
>  object_unref(OBJECT(bioc));
>  
>  qemu_mutex_lock_iothread();
> +replication_start_all(REPLICATION_MODE_PRIMARY, _err);
> +if (local_err) {
> +qemu_mutex_unlock_iothread();
> +goto out;
> +}
> +
>  vm_start();
>  qemu_mutex_unlock_iothread();
>  trace_colo_vm_state_change("stop", "run");
> @@ -585,6 +611,11 @@ void *colo_process_incoming_thread(void *opaque)
>  object_unref(OBJECT(bioc));
>  
>  qemu_mutex_lock_iothread();
> +replication_start_all(REPLICATION_MODE_SECONDARY, _err);
> +if (local_err) {
> +qemu_mutex_unlock_iothread();
> +goto out;
> +}
>  vm_start();
>  trace_colo_vm_state_change("stop", "run");
>  qemu_mutex_unlock_iothread();
> @@ -665,6 +696,18 @@ void *colo_process_incoming_thread(void *opaque)
>  goto out;
>  }
>  
> +replication_get_error_all(_err);
> +if (local_err) {
> +qemu_mutex_unlock_iothread();
> +goto out;
> +}
> +/* discard colo disk buffer */
> +replication_do_checkpoint_all(_err);
> +if (local_err) {
> +qemu_mutex_unlock_iothread();
> +goto out;
> +}
> +
>  vmstate_loading = false;
>  vm_start();
>  trace_colo_vm_state_change("stop", "run");
> diff --git a/migration/migration.c b/migration/migration.c
> index ce06941706..c97b7660af 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -385,6 +385,7 @@ static void process_incoming_migration_co(void *opaque)
>  MigrationIncomingState *mis = migration_incoming_get_current();
>  PostcopyState ps;
>  int ret;
> +Error *local_err = NULL;
>  
>