Re: [libvirt PATCH v2 08/10] qemu: block: add function to wait for blockcommits and collect results

2020-05-18 Thread Peter Krempa
On Wed, May 06, 2020 at 13:42:24 +0200, Pavel Mores wrote:
> This is the third phase of snapshot deletion.  Blockcommits to delete the
> snapshot have been launched and now we can wait for them to finish, check
> results and report errors if any.
> 
> Signed-off-by: Pavel Mores 
> ---
>  src/qemu/qemu_driver.c | 59 ++
>  1 file changed, 59 insertions(+)
> 
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 35b7fb69d5..a2629e9002 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -16941,6 +16941,65 @@ 
> qemuDomainSnapshotDeleteExternalLaunchJobs(virDomainObjPtr vm,
>  }
>  
>  
> +static int
> +qemuDomainSnapshotDeleteExternalWaitForJobs(virDomainObjPtr vm,
> +virQEMUDriverPtr driver,
> +const virBlockCommitDesc 
> *blockCommitDescs,
> +int numDescs)
> +{
> +size_t i;
> +
> +for (i = 0; i < numDescs; i++) {
> +virDomainDiskDefPtr disk = blockCommitDescs[i].disk;
> +bool isActive = blockCommitDescs[i].isActive;
> +
> +/* wait for the blockcommit job to finish (in particular, reach
> + * one of the finished QEMU_BLOCKJOB_STATE_* states)... */
> +g_autoptr(qemuBlockJobData) job = NULL;
> +
> +if (!(job = qemuBlockJobDiskGetJob(disk))) {
> +virReportError(VIR_ERR_OPERATION_INVALID,
> +_("disk %s does not have an active block job"), 
> disk->dst);
> +return -1;
> +}
> +
> +qemuBlockJobSyncBegin(job);
> +qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE);
> +while (job->state != QEMU_BLOCKJOB_STATE_READY &&
> +   job->state != QEMU_BLOCKJOB_STATE_FAILED &&
> +   job->state != QEMU_BLOCKJOB_STATE_CANCELLED &&
> +   job->state != QEMU_BLOCKJOB_STATE_COMPLETED) {
> +
> +if (virDomainObjWait(vm) < 0)
> +return -1;
> +qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE);
> +}
> +qemuBlockJobSyncEnd(vm, job, QEMU_ASYNC_JOB_NONE);

I'm pretty sure that this will not work for more than one disk. If the
job for the disk with i=2 finishes sooner than the one for i=1 it will
be auto-completed because you set qemuBlockJobSyncBegin(job); after you
started the commit. This creates a race condition between this handler
and the job itself.

All the jobs MUST be marked as 'sync' prior to actually starting them in
qemu if you need to wait/handle them here.

> +
> +if ((isActive && job->state != QEMU_BLOCKJOB_STATE_READY) ||
> +(!isActive && job->state != QEMU_BLOCKJOB_STATE_COMPLETED)) {
> +virReportError(VIR_ERR_OPERATION_INVALID,
> +_("blockcomit job failed for disk %s"), disk->dst);
> +/* TODO Apr 30, 2020: how to handle this?  Bailing out doesn't
> + * seem an obvious option in this case as all blockjobs are now
> + * created and running - if any of them are to fail they will,
> + * regardless of whether we break here.  It might make more
> + * sense to continue and at least report all errors. */

Aborting the jobs would be wrong IMO. I think that a better option is to
wait for all other jobs to complete and report an error. The surrounding
code needs to be able to try re-deleting such a snapshot which will then
ignore the disks which were already commited as there wouldn't be an
recovery path otherwise.

The snapshot metadata must stay intact though.


> +/*return -1;*/
> +}
> +
> +/* ... and pivot if necessary */
> +if (isActive) {
> +if (qemuDomainBlockJobAbortImpl(driver, vm, disk->dst,
> +
> VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT) < 0)
> +return -1;
> +}
> +}
> +
> +return 0;
> +}
> +
> +
>  static int
>  qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
>   unsigned int flags)
> -- 
> 2.24.1
> 



[libvirt PATCH v2 08/10] qemu: block: add function to wait for blockcommits and collect results

2020-05-06 Thread Pavel Mores
This is the third phase of snapshot deletion.  Blockcommits to delete the
snapshot have been launched and now we can wait for them to finish, check
results and report errors if any.

Signed-off-by: Pavel Mores 
---
 src/qemu/qemu_driver.c | 59 ++
 1 file changed, 59 insertions(+)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 35b7fb69d5..a2629e9002 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -16941,6 +16941,65 @@ 
qemuDomainSnapshotDeleteExternalLaunchJobs(virDomainObjPtr vm,
 }
 
 
+static int
+qemuDomainSnapshotDeleteExternalWaitForJobs(virDomainObjPtr vm,
+virQEMUDriverPtr driver,
+const virBlockCommitDesc 
*blockCommitDescs,
+int numDescs)
+{
+size_t i;
+
+for (i = 0; i < numDescs; i++) {
+virDomainDiskDefPtr disk = blockCommitDescs[i].disk;
+bool isActive = blockCommitDescs[i].isActive;
+
+/* wait for the blockcommit job to finish (in particular, reach
+ * one of the finished QEMU_BLOCKJOB_STATE_* states)... */
+g_autoptr(qemuBlockJobData) job = NULL;
+
+if (!(job = qemuBlockJobDiskGetJob(disk))) {
+virReportError(VIR_ERR_OPERATION_INVALID,
+_("disk %s does not have an active block job"), 
disk->dst);
+return -1;
+}
+
+qemuBlockJobSyncBegin(job);
+qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE);
+while (job->state != QEMU_BLOCKJOB_STATE_READY &&
+   job->state != QEMU_BLOCKJOB_STATE_FAILED &&
+   job->state != QEMU_BLOCKJOB_STATE_CANCELLED &&
+   job->state != QEMU_BLOCKJOB_STATE_COMPLETED) {
+
+if (virDomainObjWait(vm) < 0)
+return -1;
+qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE);
+}
+qemuBlockJobSyncEnd(vm, job, QEMU_ASYNC_JOB_NONE);
+
+if ((isActive && job->state != QEMU_BLOCKJOB_STATE_READY) ||
+(!isActive && job->state != QEMU_BLOCKJOB_STATE_COMPLETED)) {
+virReportError(VIR_ERR_OPERATION_INVALID,
+_("blockcomit job failed for disk %s"), disk->dst);
+/* TODO Apr 30, 2020: how to handle this?  Bailing out doesn't
+ * seem an obvious option in this case as all blockjobs are now
+ * created and running - if any of them are to fail they will,
+ * regardless of whether we break here.  It might make more
+ * sense to continue and at least report all errors. */
+/*return -1;*/
+}
+
+/* ... and pivot if necessary */
+if (isActive) {
+if (qemuDomainBlockJobAbortImpl(driver, vm, disk->dst,
+VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT) 
< 0)
+return -1;
+}
+}
+
+return 0;
+}
+
+
 static int
 qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
  unsigned int flags)
-- 
2.24.1