Re: [Qemu-block] [RFC] transactions: add transaction-wide property

2015-10-20 Thread Fam Zheng
On Thu, 09/24 17:40, John Snow wrote:
> This replaces the per-action property as in Fam's series.
> Instead, we have a transaction-wide property that is shared
> with each action.
> 
> At the action level, if a property supplied transaction-wide
> is disagreeable, we return error and the transaction is aborted.
> 
> RFC:
> 
> Where this makes sense: Any transactional actions that aren't
> prepared to accept this new paradigm of transactional behavior
> can error_setg and return.
> 
> Where this may not make sense: consider the transactions which
> do not use BlockJobs to perform their actions, i.e. they are
> synchronous during the transactional phase. Because they either
> fail or succeed so early, we might say that of course they can
> support this property ...
> 
> ...however, consider the case where we create a new bitmap and
> perform a full backup, using allow_partial=false. If the backup
> fails, we might well expect the bitmap to be deleted because a
> member of the transaction ultimately/eventually failed. However,
> the bitmap creation was not undone because it does not have a
> pending/delayed abort/commit action -- those are only for jobs
> in this implementation.
> 
> How do we fix this?
> 
> (1) We just say "No, you can't use the new block job transaction
> completion mechanic in conjunction with these commands,"
> 
> (2) We make Bitmap creation/resetting small, synchronous blockjobs
> that can join the BlockJobTxn

We could categorize commands as synchronous ones and long running ones, and
make it explicit that only long running jobs are affected by "allow_partial".

> 
> Signed-off-by: John Snow 
> ---
>  blockdev.c | 87 
> --
>  blockjob.c |  2 +-
>  qapi-schema.json   | 15 +++--
>  qapi/block-core.json   | 26 ---
>  qmp-commands.hx|  2 +-
>  tests/qemu-iotests/124 | 12 +++
>  6 files changed, 83 insertions(+), 61 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index 45a9fe7..02b1a83 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1061,7 +1061,7 @@ static void blockdev_do_action(int kind, void *data, 
> Error **errp)
>  action.data = data;
>  list.value = 
>  list.next = NULL;
> -qmp_transaction(, errp);
> +qmp_transaction(, false, NULL, errp);
>  }
>  
>  void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
> @@ -1286,6 +1286,7 @@ struct BlkActionState {
>  TransactionAction *action;
>  const BlkActionOps *ops;
>  BlockJobTxn *block_job_txn;
> +TransactionProperties *txn_props;
>  QSIMPLEQ_ENTRY(BlkActionState) entry;
>  };
>  
> @@ -1322,6 +1323,12 @@ static void internal_snapshot_prepare(BlkActionState 
> *common,
>  name = internal->name;
>  
>  /* 2. check for validation */
> +if (!common->txn_props->allow_partial) {
> +error_setg(errp,
> +   "internal_snapshot does not support allow_partial = 
> false");
> +return;
> +}
> +
>  blk = blk_by_name(device);
>  if (!blk) {
>  error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> @@ -1473,6 +1480,12 @@ static void external_snapshot_prepare(BlkActionState 
> *common,
>  }
>  
>  /* start processing */
> +if (!common->txn_props->allow_partial) {
> +error_setg(errp,
> +   "external_snapshot does not support allow_partial = 
> false");
> +return;
> +}
> +
>  state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
> has_node_name ? node_name : NULL,
> _err);
> @@ -1603,14 +1616,11 @@ static void drive_backup_prepare(BlkActionState 
> *common, Error **errp)
>  DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
>  BlockDriverState *bs;
>  BlockBackend *blk;
> -DriveBackupTxn *backup_txn;
>  DriveBackup *backup;
> -BlockJobTxn *txn = NULL;
>  Error *local_err = NULL;
>  
>  assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
> -backup_txn = common->action->drive_backup;
> -backup = backup_txn->base;
> +backup = common->action->drive_backup->base;
>  
>  blk = blk_by_name(backup->device);
>  if (!blk) {
> @@ -1624,11 +1634,6 @@ static void drive_backup_prepare(BlkActionState 
> *common, Error **errp)
>  state->aio_context = bdrv_get_aio_context(bs);
>  aio_context_acquire(state->aio_context);
>  
> -if (backup_txn->has_transactional_cancel &&
> -backup_txn->transactional_cancel) {
> -txn = common->block_job_txn;
> -}
> -
>  do_drive_backup(backup->device, backup->target,
>  backup->has_format, backup->format,
>  backup->sync,
> @@ -1637,7 +1642,7 @@ static void drive_backup_prepare(BlkActionState 
> *common, Error **errp)
>  backup->has_bitmap, backup->bitmap,
>  

Re: [Qemu-block] [Qemu-devel] [PATCH 03/17] rbd: add support for getting password from QCryptoSecret object

2015-10-20 Thread Daniel P. Berrange
On Mon, Oct 19, 2015 at 03:57:29PM -0700, Josh Durgin wrote:
> On 10/19/2015 08:09 AM, Daniel P. Berrange wrote:
> >Currently RBD passwords must be provided on the command line
> >via
> >
> >   $QEMU -drive file=rbd:pool/image:id=myname:\
> > key=QVFDVm41aE82SHpGQWhBQXEwTkN2OGp0SmNJY0UrSE9CbE1RMUE=:\
> > auth_supported=cephx
> >
> >This is insecure because the key is visible in the OS process
> >listing.
> >
> >This adds support for an 'authsecret' parameter in the RBD
> >parameters that can be used with the QCryptoSecret object to
> >provide the password via a file:
> >
> >   echo "QVFDVm41aE82SHpGQWhBQXEwTkN2OGp0SmNJY0UrSE9CbE1RMUE=" > poolkey.b64
> >   $QEMU -object secret,id=secret0,file=poolkey.b64,format=base64 \
> >   -drive file=rbd:pool/image:id=myname:\
> >   auth_supported=cephx,authsecret=secret0
> >
> >Signed-off-by: Daniel P. Berrange 
> >---
> 
> Looks good in general, thanks for fixing this! Just one thing to fix
> below.
> 
> >  block/rbd.c | 42 ++
> >  1 file changed, 42 insertions(+)
> >
> >diff --git a/block/rbd.c b/block/rbd.c
> >index a60a19d..0acf777 100644
> >--- a/block/rbd.c
> >+++ b/block/rbd.c
> >@@ -16,6 +16,7 @@
> >  #include "qemu-common.h"
> >  #include "qemu/error-report.h"
> >  #include "block/block_int.h"
> >+#include "crypto/secret.h"
> >
> >  #include 
> >
> >@@ -228,6 +229,23 @@ static char *qemu_rbd_parse_clientname(const char 
> >*conf, char *clientname)
> >  return NULL;
> >  }
> >
> >+
> >+static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
> >+ Error **errp)
> >+{
> >+gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
> >+errp);
> >+if (!secret) {
> >+return -1;
> >+}
> 
> It looks like this fails if no authsecret is provided. Ceph auth can be
> disabled, so it seems like we should skip the qemu_rbd_set_auth() calls
> in this case.

This failure scenario happens if the user provides a key ID, but the
corresponding QCryptoSecret does not exist. This is a usage error
by the user, so it is appropriate to have it be a fatal error.
In the case that they don't want any auth, they can just leave out
the keyid parameter.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-block] [PATCH v7 27/39] block: Add blk_remove_bs()

2015-10-20 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> This function removes the BlockDriverState associated with the given
> BlockBackend from that BB and sets the BDS pointer in the BB to NULL.
> 
> Signed-off-by: Max Reitz 
> ---
>  block/block-backend.c  | 12 
>  include/sysemu/block-backend.h |  1 +
>  2 files changed, 13 insertions(+)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 19fdaae..eb7409c 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -334,6 +334,18 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend 
> *blk)
>  }
>  
>  /*
> + * Disassociates the currently associated BlockDriverState from @blk.
> + */
> +void blk_remove_bs(BlockBackend *blk)
> +{
> +blk_update_root_state(blk);
> +
> +bdrv_unref(blk->bs);
> +blk->bs->blk = NULL;

Use after free?

> +blk->bs = NULL;
> +}

Kevin



Re: [Qemu-block] [PATCH v7 00/39] blockdev: BlockBackend and media

2015-10-20 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> This series reworks a lot regarding BlockBackend and media. Basically,
> it allows empty BlockBackends, that is BBs without a BDS tree.
> 
> Before this series, empty drives are represented by a BlockBackend with
> an empty BDS attached to it (a BDS with a NULL driver). However, now we
> have BlockBackends, thus an empty drive should be represented by a
> BlockBackend without any BDS tree attached to it. This is what this
> series does.

Thanks, applied patches 1-26 to the block branch.

Kevin



Re: [Qemu-block] [Qemu-devel] Question about xen disk unplug support for ahci missed in qemu

2015-10-20 Thread Laszlo Ersek
On 10/20/15 13:59, Stefano Stabellini wrote:
> On Mon, 19 Oct 2015, Laszlo Ersek wrote:
>> On 10/16/15 21:09, Laszlo Ersek wrote:
>>> On 10/16/15 13:34, Fabio Fantoni wrote:
 Il 16/10/2015 12:47, Stefano Stabellini ha scritto:
> On Fri, 16 Oct 2015, Fabio Fantoni wrote:
>> Il 16/10/2015 12:13, Anthony PERARD ha scritto:
>>> On Fri, Oct 16, 2015 at 10:32:44AM +0200, Fabio Fantoni wrote:
 Il 15/10/2015 20:02, Anthony PERARD ha scritto:
> On Thu, Oct 15, 2015 at 06:27:17PM +0200, Fabio Fantoni wrote:
>> Il 14/10/2015 13:06, Stefano Stabellini ha scritto:
>>> I would suggest Fabio to avoid AHCI disks altogether and just use
>>> OVMF
>>> with PV disks only and Anthony's patch to libxl to avoid creating
>>> any
>>> IDE disks: http://marc.info/?l=xen-devel=144482080812353.
>>>
>>> Would that work for you?
>> Thanks for the advice, I tried it:
>> https://github.com/Fantu/Xen/commits/rebase/m2r-testing-4.6
>>
>> I installed W10 pro 64 bit with ide disk, installed the win pv
>> drivers
>> and
>> after changed to xvdX instead hdX, is the only change needed, right?
>> Initial boot is ok (ovmf part about pv disks seems ok) but windows
>> boot
>> fails with problem with pv drivers.
>> In attachment full qemu log with xen_platform trace and domU's xl
>> cfg.
>>
>> Someone have windows domUs with ovmf and pv disks only working?
>> If yes
>> can
>> tell me the difference to understand what can be the problem please?
> When I worked on the PV disk implementation in OVMF, I was able to
> boot
> a Windows 8 with pv disk only.
>
> I don't have access to the guest configuration I was using, but I
> think
> one
> difference would be the viridian setting, I'm pretty sure I did
> not set
> it.
>
 I tried with viridian disabled but did the same thing, looking
 cdrom as
 latest thing before xenbug trace in qemu log I tried also to remove
 it but
 also in this case don't boot correctly, full qemu log in attachment.
 I don't know if is a ovmf thing to improve (like what seems in
 Laszlo and
 Kevin mails) or xen winpv drivers unexpected case, have you tried also
 with
 latest winpv builds? (for exclude regression)
>>> No, I did not tried the latest winpv drivers.
>>>
>>> Sorry I can help much more that that. When I install this win8 guest
>>> tried
>>> to boot it with pv drivers only, that was more than a year ago. I
>>> have not
>>> check if it's still working. (Also I can not try anything more recent,
>>> right now.)
>>>
>> I did many other tests, retrying with ide first boot working but show pv
>> devices not working, I did another reboot (with ide) and pv devices was
>> working, after I retried with pv (xvdX) and boot correctly.
>> After other tests I found that with empty cdrom device (required for xl
>> cd-insert/cd-eject) boot stop at start (tianocore image), same result
>> with ide
>> instead.
>>  From xl cfg:
>> disk=['/mnt/vm/disks/W10UEFI.disk1.cow-sn1,qcow2,xvda,rw',',raw,xvdb,ro,cdrom']
>>
>> With seabios domU boot also with empty cdrom.
>> In qemu log I found only these that can be related:
>>> xen be: qdisk-51728: error: Could not open image: No such file or
>>> directory
>>> xen be: qdisk-51728: initialise() failed
>> And latest xl dmesg line is:
>>> (d1) Invoking OVMF ...
>> If you need more informations/test tell me and I'll post them.
> Are you saying that without any cdrom drives, it works correctly?
 Yes, I did also another test to be sure, starting with ide, installing
 windows, the pv drivers, rebooting 2 times (with one at boot of time
 boot with ide only and without net and disks pv drivers working) and
 after rebooting with pv disks (xvdX) works.
 With cdrom not empty (with iso) works, with empty not, tried with both
 ide (hdX) and pv (xvdX).
 Empty cdrom not working with ovmf I suppose is ovmf bug or inexpected case.
 About major of winpv drivers problem at boot I suppose can be solved
 improving ovmf and winpv driver removing bad hybrid thing actually, but
 I have too low knowledge to be sure.
 About the problem of pv start after install that requiring at least 2
 reboot can be also a windows 10 problem (only a suppose).

 About empty cdrom with ovmf can be solved please?

>>>
>>> Sorry, I find your problem report impenetrable. :( Please slow down and
>>> try to spend time on punctuation at least.
>>>
>>> For me to make heads or tails of this, I'll need the following:
>>>
>>> - The debug output of an OVMF binary built with the DEBUG_VERBOSE bit

Re: [Qemu-block] [Qemu-devel] Question about xen disk unplug support for ahci missed in qemu

2015-10-20 Thread Stefano Stabellini
On Mon, 19 Oct 2015, Laszlo Ersek wrote:
> On 10/16/15 21:09, Laszlo Ersek wrote:
> > On 10/16/15 13:34, Fabio Fantoni wrote:
> >> Il 16/10/2015 12:47, Stefano Stabellini ha scritto:
> >>> On Fri, 16 Oct 2015, Fabio Fantoni wrote:
>  Il 16/10/2015 12:13, Anthony PERARD ha scritto:
> > On Fri, Oct 16, 2015 at 10:32:44AM +0200, Fabio Fantoni wrote:
> >> Il 15/10/2015 20:02, Anthony PERARD ha scritto:
> >>> On Thu, Oct 15, 2015 at 06:27:17PM +0200, Fabio Fantoni wrote:
>  Il 14/10/2015 13:06, Stefano Stabellini ha scritto:
> > I would suggest Fabio to avoid AHCI disks altogether and just use
> > OVMF
> > with PV disks only and Anthony's patch to libxl to avoid creating
> > any
> > IDE disks: http://marc.info/?l=xen-devel=144482080812353.
> >
> > Would that work for you?
>  Thanks for the advice, I tried it:
>  https://github.com/Fantu/Xen/commits/rebase/m2r-testing-4.6
> 
>  I installed W10 pro 64 bit with ide disk, installed the win pv
>  drivers
>  and
>  after changed to xvdX instead hdX, is the only change needed, right?
>  Initial boot is ok (ovmf part about pv disks seems ok) but windows
>  boot
>  fails with problem with pv drivers.
>  In attachment full qemu log with xen_platform trace and domU's xl
>  cfg.
> 
>  Someone have windows domUs with ovmf and pv disks only working?
>  If yes
>  can
>  tell me the difference to understand what can be the problem please?
> >>> When I worked on the PV disk implementation in OVMF, I was able to
> >>> boot
> >>> a Windows 8 with pv disk only.
> >>>
> >>> I don't have access to the guest configuration I was using, but I
> >>> think
> >>> one
> >>> difference would be the viridian setting, I'm pretty sure I did
> >>> not set
> >>> it.
> >>>
> >> I tried with viridian disabled but did the same thing, looking
> >> cdrom as
> >> latest thing before xenbug trace in qemu log I tried also to remove
> >> it but
> >> also in this case don't boot correctly, full qemu log in attachment.
> >> I don't know if is a ovmf thing to improve (like what seems in
> >> Laszlo and
> >> Kevin mails) or xen winpv drivers unexpected case, have you tried also
> >> with
> >> latest winpv builds? (for exclude regression)
> > No, I did not tried the latest winpv drivers.
> >
> > Sorry I can help much more that that. When I install this win8 guest
> > tried
> > to boot it with pv drivers only, that was more than a year ago. I
> > have not
> > check if it's still working. (Also I can not try anything more recent,
> > right now.)
> >
>  I did many other tests, retrying with ide first boot working but show pv
>  devices not working, I did another reboot (with ide) and pv devices was
>  working, after I retried with pv (xvdX) and boot correctly.
>  After other tests I found that with empty cdrom device (required for xl
>  cd-insert/cd-eject) boot stop at start (tianocore image), same result
>  with ide
>  instead.
>   From xl cfg:
>  disk=['/mnt/vm/disks/W10UEFI.disk1.cow-sn1,qcow2,xvda,rw',',raw,xvdb,ro,cdrom']
> 
>  With seabios domU boot also with empty cdrom.
>  In qemu log I found only these that can be related:
> > xen be: qdisk-51728: error: Could not open image: No such file or
> > directory
> > xen be: qdisk-51728: initialise() failed
>  And latest xl dmesg line is:
> > (d1) Invoking OVMF ...
>  If you need more informations/test tell me and I'll post them.
> >>> Are you saying that without any cdrom drives, it works correctly?
> >> Yes, I did also another test to be sure, starting with ide, installing
> >> windows, the pv drivers, rebooting 2 times (with one at boot of time
> >> boot with ide only and without net and disks pv drivers working) and
> >> after rebooting with pv disks (xvdX) works.
> >> With cdrom not empty (with iso) works, with empty not, tried with both
> >> ide (hdX) and pv (xvdX).
> >> Empty cdrom not working with ovmf I suppose is ovmf bug or inexpected case.
> >> About major of winpv drivers problem at boot I suppose can be solved
> >> improving ovmf and winpv driver removing bad hybrid thing actually, but
> >> I have too low knowledge to be sure.
> >> About the problem of pv start after install that requiring at least 2
> >> reboot can be also a windows 10 problem (only a suppose).
> >>
> >> About empty cdrom with ovmf can be solved please?
> >>
> > 
> > Sorry, I find your problem report impenetrable. :( Please slow down and
> > try to spend time on punctuation at least.
> > 
> > For me to make heads or tails of this, I'll need the following:
> > 
> > - The debug output of an OVMF binary built with the DEBUG_VERBOSE bit
> > (0x0040) enabled in 

Re: [Qemu-block] [PATCH v4 00/12] block: Protect nested event loop with bdrv_drained_begin and bdrv_drained_end

2015-10-20 Thread Kevin Wolf
Am 20.10.2015 um 07:16 hat Fam Zheng geschrieben:
> v4: Rebase on to master so fix the "bdrv_move_feature_fields" issue.
> 
> v3: Call bdrv_drain unconditionally in bdrv_drained_begin.
> Document the internal I/O implications between bdrv_drain_begin and end.
> 
> The nested aio_poll()'s in block layer has a bug that new r/w requests from
> ioeventfds and nbd exports are processed, which might break the caller's
> semantics (qmp_transaction) or even pointers (bdrv_reopen).

This series conflicts with the first part of Max' "BlockBackend and
media" series which I already merged to my tree. I tried to resolve the
conflict while applying, but while it doesn't seem really hard to do,
it's a bit more involved than what I'm willing to do while applying.

So I'm afraid I have to request another rebase.

Kevin



Re: [Qemu-block] [PATCH 0/3] Add 'blockdev-del' command

2015-10-20 Thread Alberto Garcia
On Mon 19 Oct 2015 05:04:50 PM CEST, Kevin Wolf  wrote:
>> > So we seem to have two criteria to distinguish BDSes:
>> >
>> > 1. Does/Doesn't have a BlockBackend on top.
>> >In the future, multiple BlockBackends are possible, too.
>> 
>> One single BDS attached to multiple BlockBackends at the same time?
>> What's the use case?
>
> For having multiple users of the BDS, e.g. a guest device and an NBD
> server.
>
> Or sometimes maybe just because it's the logical thing to happen:
> Imagine an image B with a backing file A. Both have a BlockBackend on
> them. Do a live commit of B into A. On completion, the BDS B is
> deleted and both BBs point to A.

Can you have a BlockBackend on a BDS that is being used as a backing
file? What is that for? And can you even write to it?

>> > I haven't thought about it enough yet, but it seems to me that we
>> > can't do the BDS/BB aliasing with blockdev-del, but need to interpret
>> > node-name as BDS and id as BB. Perhaps we also shouldn't use a single
>> > 'device' option then, but a union of 'node-name' and 'id' (just like
>> > the same devices were created in blockdev-add).
>> 
>> Having two separate options could make things more clear, indeed.
>
> Note that it doesn't improve things with your generalised rule (if I got
> it right anyway).
>
> So I think these are the options we have:
>
> a) blockdev-del only works on a BDS or empty BB without any references.
>Before deleting a BDS, it must be ejected from the BB; before
>deleting a BB, its BDS must be ejected.
>
> b) Your rule: Same as a), but treating BB and BDS as a unit if the BDS
>is only referenced by the BB.
>
> c) No aliasing. You explicitly refer to a BB or to a BDS. The difference
>from A is that it works without an explicit eject: When a BDS is
>deleted, it is automatically ejected from the BB; and when the BB is
>deleted, the BDS stays around if it still has other references.

I'm currently thinking about d), which tries to maintain the symmetry
with blockdev-add:

- blockdev-del would have two parameters, 'id' and 'node-name', and only
  one of them can be set, so you must choose whether you want to delete
  a backend or a BDS.

- blockdev-add can either create a backend with a BDS, or a BDS alone,
  so:

  - If you created a backend and you try to delete it you can do it
(along with its BDS) as long as neither the backend nor the BDS are
being used (no extra references, no parents). This means that the
operation will fail if there's a BDS that has been created
separately and manually attached to the the backend.

  - If you created a BDS alone and you try to delete it you can do it as
long as no one else is using it. This would delete the BDS and only
the BDS (because that's what you create with blockdev-add). If it's
currently attached to a backend then the operation fails.

I think this is what best mirrors blockdev-add. It however has the
following consequence:

blockdev-add id=drive0 node-name=node0 ...

blockdev-del node-name=node0 <-- This fails, node0 is used by drive0

blockdev-del id=drive0   <-- This works and removes drive0 and node0

Berto



Re: [Qemu-block] [Qemu-devel] [RFC] transactions: add transaction-wide property

2015-10-20 Thread John Snow


On 10/19/2015 03:27 AM, Markus Armbruster wrote:
> John Snow  writes:
> 
>> On 10/16/2015 08:23 AM, Stefan Hajnoczi wrote:
>>> On Mon, Oct 12, 2015 at 12:50:20PM -0400, John Snow wrote:
 Ping -- any consensus on how we should implement the "do-or-die"
 argument for transactions that start block jobs? :)

 This patch may look a little hokey in how it boxes arguments, but I can
 re-do it on top of Eric Blake's very official way of boxing arguments,
 when the QAPI dust settles.
>>>
>>> I don't understand what you are trying to do after staring at the email
>>> for 5 minutes.  Maybe the other reviewers hit the same problem and
>>> haven't responded.
>>>
>>> What is the problem you're trying to solve?
>>>
>>> Stefan
>>>
>>
>> Sorry...
>>
>> What I am trying to do is to add the transactional blocker property to
>> the *transaction* command and not as an argument to each individual action.
>>
>> There was some discussion on this so I wanted to just send an RFC to
>> show what I had in mind.
> 
> Was it the discussion on @transactional-cancel?  I'm on record
> supporting it per transaction rather than per action:
> Message-ID: <87mvwd8k9q@blackfin.pond.sub.org>
> http://lists.gnu.org/archive/html/qemu-devel/2015-09/msg05948.html
> 

Yes, this is the patch trying to illustrate that. I wrote it as an RFC
that sits on top of Fam's v7, to highlight the changes between his and
my approaches.

>> This series applies on top of Fam's latest series and moves the
>> arguments from each action to a transaction-wide property.




Re: [Qemu-block] [Qemu-devel] Question about xen disk unplug support for ahci missed in qemu

2015-10-20 Thread Stefano Stabellini
On Tue, 20 Oct 2015, Laszlo Ersek wrote:
> On 10/20/15 13:59, Stefano Stabellini wrote:
> > On Mon, 19 Oct 2015, Laszlo Ersek wrote:
> >> Could that be related to the issue you are experiencing with OVMF?
> >> Because, OVMF implies HVM (or PV-on-HVM), and your report ("empty
> >> paravirt CD-ROM" or some such -- sorry, the report remains unclear)
> >> appears to match the above message.
> >>
> >> Given that this is guest code, shouldn't the same logic be mirrored in
> >> the OVMF guest driver?
> >>
> >> /* do not create a PV cdrom device if we are an HVM guest */
> >>
> >> In other words, given that OVMF implies HVM, shouldn't OVMF too forego
> >> driving a paravirt CD-ROM entirely?
> > 
> > In the case of OVMF I think we can use the PV block interface to access
> > the cdrom, the problem is just that it cannot handle empty cdrom drives
> > at the moment and XenPvBlockFrontInitialization simply returns an error.
> 
> (*)
> 
> > A simple patch like this one should prevent OVMF from getting stuck with
> > an error when an empty cdrom is found:
> > 
> > diff --git a/OvmfPkg/XenPvBlkDxe/BlockFront.c 
> > b/OvmfPkg/XenPvBlkDxe/BlockFront.c
> > index 256ac55..ae7cab9 100644
> > --- a/OvmfPkg/XenPvBlkDxe/BlockFront.c
> > +++ b/OvmfPkg/XenPvBlkDxe/BlockFront.c
> > @@ -186,6 +186,10 @@ XenPvBlockFrontInitialization (
> >}
> >FreePool (DeviceType);
> >  
> > +  Status = XenBusReadUint64 (XenBusIo, "sectors", TRUE, );
> > +  if (Dev->MediaInfo.CdRom && Status != XENSTORE_STATUS_SUCCESS)
> > + return EFI_NO_MEDIA;
> > +
> >Status = XenBusReadUint64 (XenBusIo, "backend-id", FALSE, );
> >if (Status != XENSTORE_STATUS_SUCCESS || Value > MAX_UINT16) {
> >  DEBUG ((EFI_D_ERROR, "XenPvBlk: Failed to get backend-id (%d)\n",
> 
> (1) Directly returning at that point will leak "Dev". I think you should
> set Status, and then goto Error. XenPvBlockFree() under that label will
> free Dev.

Good idea


> (2) I agree that returning an error code here will propagate through
> XenPvBlkDxeDriverBindingStart() to the caller, and ultimately prevent
> the binding. That's probably the right thing to do.
> 
> But, how does it differ from what you wrote above (*):
>
> > [...] the problem is just that it cannot handle empty cdrom drives
> > at the moment and XenPvBlockFrontInitialization simply returns an
> > error.
> 
> If XenPvBlockFrontInitialization() simply returned an error right now,
> then that would achieve the exact same thing as your proposal -- the
> driver wouldn't bind the device. So either your idea wouldn't make a
> difference, or your analysis that XenPvBlockFrontInitialization()
> currently fails is incorrect.
> 
> I think it's the latter though, and that this patch should be tested.

The patch works, but you are right, the analysis of the problem was
wrong: it gets stuck in XenPvBlkWaitForBackendState, rather than
returning an error.


> If it works in your testing, please submit it to
> . (You have to be subscribed to post, sorry
> about that.) Please fix the leak (1), and add the following line to the
> commit message just before your signoff:
> 
> Contributed-under: TianoCore Contribution Agreement 1.0
> 
> What it means is explained in "OvmfPkg/Contributions.txt".

I'll rework the patch and send it to the list.
Thanks,

Stefano



Re: [Qemu-block] [RFC] transactions: add transaction-wide property

2015-10-20 Thread Eric Blake
On 09/24/2015 03:40 PM, John Snow wrote:
> This replaces the per-action property as in Fam's series.
> Instead, we have a transaction-wide property that is shared
> with each action.
> 
> At the action level, if a property supplied transaction-wide
> is disagreeable, we return error and the transaction is aborted.
> 
> RFC:
> 
> Where this makes sense: Any transactional actions that aren't
> prepared to accept this new paradigm of transactional behavior
> can error_setg and return.
> 
> Where this may not make sense: consider the transactions which
> do not use BlockJobs to perform their actions, i.e. they are
> synchronous during the transactional phase. Because they either
> fail or succeed so early, we might say that of course they can
> support this property ...
> 
> ...however, consider the case where we create a new bitmap and
> perform a full backup, using allow_partial=false. If the backup
> fails, we might well expect the bitmap to be deleted because a
> member of the transaction ultimately/eventually failed. However,
> the bitmap creation was not undone because it does not have a
> pending/delayed abort/commit action -- those are only for jobs
> in this implementation.

The classic example is the 'Abort' transaction, which always fails.  Or
put another way, if you run a transaction that includes both the
creation of a new bitmap, and the abort action, what does the abort do
to the bitmap.

> 
> How do we fix this?
> 
> (1) We just say "No, you can't use the new block job transaction
> completion mechanic in conjunction with these commands,"
> 
> (2) We make Bitmap creation/resetting small, synchronous blockjobs
> that can join the BlockJobTxn

I'm not sure I have an opinion on this one yet.

> 
> Signed-off-by: John Snow 
> ---
>  blockdev.c | 87 
> --
>  blockjob.c |  2 +-
>  qapi-schema.json   | 15 +++--
>  qapi/block-core.json   | 26 ---
>  qmp-commands.hx|  2 +-
>  tests/qemu-iotests/124 | 12 +++
>  6 files changed, 83 insertions(+), 61 deletions(-)
> 
> @@ -1322,6 +1323,12 @@ static void internal_snapshot_prepare(BlkActionState 
> *common,
>  name = internal->name;
>  
>  /* 2. check for validation */
> +if (!common->txn_props->allow_partial) {
> +error_setg(errp,
> +   "internal_snapshot does not support allow_partial = 
> false");

Should the error message say 'allow-partial' to match the QMP?

>  
> +/**
> + * Allocate a TransactionProperties structure if necessary, and fill
> + * that structure with desired defaults if they are unset.
> + */
> +static TransactionProperties 
> *get_transaction_properties(TransactionProperties *props)
> +{
> +if (!props) {
> +props = g_new0(TransactionProperties, 1);
> +}
> +
> +if (!props->has_allow_partial) {
> +props->allow_partial = true;
> +}
> +
> +return props;
> +}

If *props is NULL on entry, then allow_partial is true on exit but
has_allow_partial is still false. I guess that means we're relying on
the rest of the code ignoring has_allow_partial because they used this
function to set defaults.

Yeah, having default support built into qapi would make this nicer. I
don't know if we'll have enough time for qapi defaults to make it in 2.5
(the queue is already quite big for merely getting boxed qmp callback
functions in place).  But that's all internal, and won't matter if it
doesn't get added until 2.6, without affecting what behavior the
external user sees.

> +
>  /*
>   * 'Atomic' group operations.  The operations are performed as a set, and if
>   * any fail then we roll back all operations in the group.
>   */
> -void qmp_transaction(TransactionActionList *dev_list, Error **errp)
> +void qmp_transaction(TransactionActionList *dev_list,
> + bool hasTransactionProperties,

Name this has_props, to be more reminiscent of other qapi naming.

> + struct TransactionProperties *props,
> + Error **errp)
>  {

> -block_job_txn = block_job_txn_new();
> +/* Does this transaction get *canceled* as a group on failure? */
> +props = get_transaction_properties(props);
> +if (props->allow_partial == false) {
> +block_job_txn = block_job_txn_new();
> +}
>  

>  }
> +if (!hasTransactionProperties) {
> +g_free(props);

qapi_free_TransactionProperties(props) is probably better.


> +++ b/qapi-schema.json
> @@ -1505,14 +1505,20 @@
>  { 'union': 'TransactionAction',
>'data': {
> 'blockdev-snapshot-sync': 'BlockdevSnapshot',
> -   'drive-backup': 'DriveBackupTxn',
> -   'blockdev-backup': 'BlockdevBackupTxn',
> +   'drive-backup': 'DriveBackup',
> +   'blockdev-backup': 'BlockdevBackup',

I like that we no longer need the special subclasses just for transactions.

> 'abort': 'Abort',
> 'blockdev-snapshot-internal-sync': 

Re: [Qemu-block] [Qemu-devel] [RFC] transactions: add transaction-wide property

2015-10-20 Thread John Snow
So here's the current status of this blob:

- Markus supports the idea of a transaction-wide property, but hasn't
reviewed this particular RFC.
- Eric seemed supportive of a transaction-wide property, but hasn't
chimed in to this thread yet.
- Stefan was not sure what this patch was trying to accomplish.
- Fam appears to think that the concept of transactional properties is
worth pursuing and offered some light review on this RFC.

So let's recap a little bit, in a Markus-style breakdown to help bring
everyone back up to speed, Because I would desperately like some input
from the Qapibarons.

== Recap ==

We're trying to add transactional commands to make the incremental
backup QMP primitives (namely dirty bitmap clear and drive-backup with
sync=incremental) useful in an atomic fashion. Currently, you can't
clear a bitmap and start a backup simultaneously, so it's hard to
guarantee the safety of these things.

The primary problem with mixing jobs and transactions is that if a job
later fails, what do we do with the other actions that succeeded?

We need to be able to cancel jobs all together for failures, but we
don't want to break backwards compatibility with workflows that didn't
expect one failed backup to cancel a different backup.

So we need to add a parameter _somewhere_. First attempts put it as a
new parameter into the QMP command, second attempts put it in an
Action-only extension to the QMP command structure.

I propose we stick it on as an argument to the Transaction command
directly. No worrying about if one action has the transactional-cancel
argument and another doesn't, no managing the difference between QMP and
Action versions of command argument structures.

== What this patch is trying to do ==

This RFC patch applies to Fam's V7, and removes his per-action
transactional-cancel arguments and installs instead a transaction-wide
property, named "allow-partial," which I intended to be short for "Allow
partial completion."

The way it does this is meant to emulate a forthcoming feature by Eric
Blake. Instead of adding an "allow-partial" argument directly to
Transactions, it does this:

- Add an optional TransactionProperties structure to Transaction
- Add an optional allow-partial bool to this structure

This way, the transactional arguments are permanently "boxed" and we can
add/remove them at our leisure without disturbing the C prototypes any
further. This may not be necessary after Eric and Markus' qapi/qmp
refactoring decathlon.

The "default" creates allow-partial as "true," the current behavior. Any
Actions that do not support the allow-partial boolean set to false will
report an error immediately during the prepare phase of the transaction.

This way, if this property is set at the transactional level but an
action doesn't "support" or understand that property, it can fail the
transaction to prevent undefined behavior. This restriction can be
potentially lifted in the future on a per-action basis without harm to
backwards compatibility.

This approach is simple and intuitive: you are instructing all actions
as part of a transaction to live or die together. If they are incapable
of complying, they fail up-front and outright. It reduces the C
management burden and it is taxonomically obvious.

>From this approach, we leave ourselves expansion options, as well:
We can always add per-action overrides to transaction-wide properties in
the future if we so desire, but don't have to do that now.

== What happens next? ==

I believe Fam has completed work on his contribution to this set and is
handing the reins back over to me. I would like to push for the
transactional property.

I will prepare a new iteration that assumes transaction-wide properties
are the "way to go" with the understanding that if there is significant
ruckus and unhappiness we can always roll back to Fam's last version if
we all get tired of reading these long emails that I type.

== Potential Problems ==

(Because you know I can't help myself.)

As discussed on-list with Fam as a response to his review of this RFC,
there might be room to change this property from a simple boolean to a
tri-state:

sync-cancel: {none, jobs, all}

where none/false implies no change from the current behavior.
all/true implies that all actions are to live and die together.
"jobs" might imply that every action that creates a job is to
participate in the group cancellation, but any action that does not can
ignore it.

This would require some introspection to the client to be meaningful
("Which actions are jobs?") but it is probably the most powerful.

Thinking about it, I think what I'll do is this:

sync-cancel: {none, all}

And if we want "jobs" at a later date, it's just an enum addition away.

== Any questions? ==

:D

--js

On 09/24/2015 05:40 PM, John Snow wrote:
> This replaces the per-action property as in Fam's series.
> Instead, we have a transaction-wide property that is shared
> with each action.
> 
> At the action level, if a 

Re: [Qemu-block] [Qemu-devel] [RFC] transactions: add transaction-wide property

2015-10-20 Thread John Snow
A little bit of cross-talk with my "state of the union" reply and this
review from Eric.

Sorry, everyone!

On 10/20/2015 04:12 PM, Eric Blake wrote:
> On 09/24/2015 03:40 PM, John Snow wrote:
>> This replaces the per-action property as in Fam's series.
>> Instead, we have a transaction-wide property that is shared
>> with each action.
>>
>> At the action level, if a property supplied transaction-wide
>> is disagreeable, we return error and the transaction is aborted.
>>
>> RFC:
>>
>> Where this makes sense: Any transactional actions that aren't
>> prepared to accept this new paradigm of transactional behavior
>> can error_setg and return.
>>
>> Where this may not make sense: consider the transactions which
>> do not use BlockJobs to perform their actions, i.e. they are
>> synchronous during the transactional phase. Because they either
>> fail or succeed so early, we might say that of course they can
>> support this property ...
>>
>> ...however, consider the case where we create a new bitmap and
>> perform a full backup, using allow_partial=false. If the backup
>> fails, we might well expect the bitmap to be deleted because a
>> member of the transaction ultimately/eventually failed. However,
>> the bitmap creation was not undone because it does not have a
>> pending/delayed abort/commit action -- those are only for jobs
>> in this implementation.
> 
> The classic example is the 'Abort' transaction, which always fails.  Or
> put another way, if you run a transaction that includes both the
> creation of a new bitmap, and the abort action, what does the abort do
> to the bitmap.
> 
>>
>> How do we fix this?
>>
>> (1) We just say "No, you can't use the new block job transaction
>> completion mechanic in conjunction with these commands,"
>>
>> (2) We make Bitmap creation/resetting small, synchronous blockjobs
>> that can join the BlockJobTxn
> 
> I'm not sure I have an opinion on this one yet.
> 

As stated in my other mail, I'm leaning towards (1) with a design that
allows us to switch to (2) per-action as we feel like it.

>>
>> Signed-off-by: John Snow 
>> ---
>>  blockdev.c | 87 
>> --
>>  blockjob.c |  2 +-
>>  qapi-schema.json   | 15 +++--
>>  qapi/block-core.json   | 26 ---
>>  qmp-commands.hx|  2 +-
>>  tests/qemu-iotests/124 | 12 +++
>>  6 files changed, 83 insertions(+), 61 deletions(-)
>>
>> @@ -1322,6 +1323,12 @@ static void internal_snapshot_prepare(BlkActionState 
>> *common,
>>  name = internal->name;
>>  
>>  /* 2. check for validation */
>> +if (!common->txn_props->allow_partial) {
>> +error_setg(errp,
>> +   "internal_snapshot does not support allow_partial = 
>> false");
> 
> Should the error message say 'allow-partial' to match the QMP?
> 

It sure should.

>>  
>> +/**
>> + * Allocate a TransactionProperties structure if necessary, and fill
>> + * that structure with desired defaults if they are unset.
>> + */
>> +static TransactionProperties 
>> *get_transaction_properties(TransactionProperties *props)
>> +{
>> +if (!props) {
>> +props = g_new0(TransactionProperties, 1);
>> +}
>> +
>> +if (!props->has_allow_partial) {
>> +props->allow_partial = true;
>> +}
>> +
>> +return props;
>> +}
> 
> If *props is NULL on entry, then allow_partial is true on exit but
> has_allow_partial is still false. I guess that means we're relying on
> the rest of the code ignoring has_allow_partial because they used this
> function to set defaults.
> 
> Yeah, having default support built into qapi would make this nicer. I
> don't know if we'll have enough time for qapi defaults to make it in 2.5
> (the queue is already quite big for merely getting boxed qmp callback
> functions in place).  But that's all internal, and won't matter if it
> doesn't get added until 2.6, without affecting what behavior the
> external user sees.
> 

Yes, this is goofy. I will change it to a "false by default" phrasing
for the property to avoid this.

>> +
>>  /*
>>   * 'Atomic' group operations.  The operations are performed as a set, and if
>>   * any fail then we roll back all operations in the group.
>>   */
>> -void qmp_transaction(TransactionActionList *dev_list, Error **errp)
>> +void qmp_transaction(TransactionActionList *dev_list,
>> + bool hasTransactionProperties,
> 
> Name this has_props, to be more reminiscent of other qapi naming.
> 

Yes.

>> + struct TransactionProperties *props,
>> + Error **errp)
>>  {
> 
>> -block_job_txn = block_job_txn_new();
>> +/* Does this transaction get *canceled* as a group on failure? */
>> +props = get_transaction_properties(props);
>> +if (props->allow_partial == false) {
>> +block_job_txn = block_job_txn_new();
>> +}
>>  
> 
>>  }
>> +if (!hasTransactionProperties) {
>> +g_free(props);

Re: [Qemu-block] [PATCH v4 00/12] block: Protect nested event loop with bdrv_drained_begin and bdrv_drained_end

2015-10-20 Thread Fam Zheng
On Tue, 10/20 16:17, Kevin Wolf wrote:
> Am 20.10.2015 um 07:16 hat Fam Zheng geschrieben:
> > v4: Rebase on to master so fix the "bdrv_move_feature_fields" issue.
> > 
> > v3: Call bdrv_drain unconditionally in bdrv_drained_begin.
> > Document the internal I/O implications between bdrv_drain_begin and end.
> > 
> > The nested aio_poll()'s in block layer has a bug that new r/w requests from
> > ioeventfds and nbd exports are processed, which might break the caller's
> > semantics (qmp_transaction) or even pointers (bdrv_reopen).
> 
> This series conflicts with the first part of Max' "BlockBackend and
> media" series which I already merged to my tree. I tried to resolve the
> conflict while applying, but while it doesn't seem really hard to do,
> it's a bit more involved than what I'm willing to do while applying.
> 
> So I'm afraid I have to request another rebase.

Sure, thanks for taking a look!

Fam



[Qemu-block] [PATCH v5 09/12] block: Add "drained begin/end" for internal snapshot

2015-10-20 Thread Fam Zheng
This ensures the atomicity of the transaction by avoiding processing of
external requests such as those from ioeventfd.

state->bs is assigned right after bdrv_drained_begin. Because it was
used as the flag for deletion or not in abort, now we need a separate
flag - InternalSnapshotState.created.

Signed-off-by: Fam Zheng 
---
 blockdev.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 52f44b2..92c2d0d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1370,6 +1370,7 @@ typedef struct InternalSnapshotState {
 BlockDriverState *bs;
 AioContext *aio_context;
 QEMUSnapshotInfo sn;
+bool created;
 } InternalSnapshotState;
 
 static void internal_snapshot_prepare(BlkTransactionState *common,
@@ -1407,6 +1408,8 @@ static void internal_snapshot_prepare(BlkTransactionState 
*common,
 /* AioContext is released in .clean() */
 state->aio_context = blk_get_aio_context(blk);
 aio_context_acquire(state->aio_context);
+state->bs = blk_bs(blk);
+bdrv_drained_begin(state->bs);
 
 if (!blk_is_available(blk)) {
 error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
@@ -1465,7 +1468,7 @@ static void internal_snapshot_prepare(BlkTransactionState 
*common,
 }
 
 /* 4. succeed, mark a snapshot is created */
-state->bs = bs;
+state->created = true;
 }
 
 static void internal_snapshot_abort(BlkTransactionState *common)
@@ -1476,7 +1479,7 @@ static void internal_snapshot_abort(BlkTransactionState 
*common)
 QEMUSnapshotInfo *sn = >sn;
 Error *local_error = NULL;
 
-if (!bs) {
+if (!state->created) {
 return;
 }
 
@@ -1497,6 +1500,7 @@ static void internal_snapshot_clean(BlkTransactionState 
*common)
  common, common);
 
 if (state->aio_context) {
+bdrv_drained_end(state->bs);
 aio_context_release(state->aio_context);
 }
 }
-- 
2.4.3




[Qemu-block] [PATCH v5 05/12] block: Introduce "drained begin/end" API

2015-10-20 Thread Fam Zheng
The semantics is that after bdrv_drained_begin(bs), bs will not get new external
requests until the matching bdrv_drained_end(bs).

Signed-off-by: Fam Zheng 
---
 block/io.c| 17 +
 include/block/block.h | 21 +
 include/block/block_int.h |  2 ++
 3 files changed, 40 insertions(+)

diff --git a/block/io.c b/block/io.c
index 2fd7a1d..5ac6256 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2624,3 +2624,20 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
 }
 bdrv_start_throttled_reqs(bs);
 }
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+if (!bs->quiesce_counter++) {
+aio_disable_external(bdrv_get_aio_context(bs));
+}
+bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+assert(bs->quiesce_counter > 0);
+if (--bs->quiesce_counter > 0) {
+return;
+}
+aio_enable_external(bdrv_get_aio_context(bs));
+}
diff --git a/include/block/block.h b/include/block/block.h
index 28d903c..6d38b62 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -610,4 +610,25 @@ void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
+BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server and device model. Note that
+ * this doesn't block timers or coroutines from submitting more requests, which
+ * means block_job_pause is still necessary.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ */
+void bdrv_drained_end(BlockDriverState *bs);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e472a03..e317b14 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -448,6 +448,8 @@ struct BlockDriverState {
 /* threshold limit for writes, in bytes. "High water mark". */
 uint64_t write_threshold_offset;
 NotifierWithReturn write_threshold_notifier;
+
+int quiesce_counter;
 };
 
 struct BlockBackendRootState {
-- 
2.4.3




[Qemu-block] [PATCH v5 08/12] block: Add "drained begin/end" for transactional blockdev-backup

2015-10-20 Thread Fam Zheng
Similar to the previous patch, make sure that external events are not
dispatched during transaction operations.

Signed-off-by: Fam Zheng 
---
 blockdev.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 0a7848b..52f44b2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1756,6 +1756,11 @@ static void blockdev_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
+if (!blk_is_available(blk)) {
+error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+return;
+}
+
 target = blk_by_name(backup->target);
 if (!target) {
 error_setg(errp, "Device '%s' not found", backup->target);
@@ -1770,6 +1775,8 @@ static void blockdev_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 aio_context_acquire(state->aio_context);
+state->bs = blk_bs(blk);
+bdrv_drained_begin(state->bs);
 
 qmp_blockdev_backup(backup->device, backup->target,
 backup->sync,
@@ -1782,7 +1789,6 @@ static void blockdev_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
-state->bs = blk_bs(blk);
 state->job = state->bs->job;
 }
 
@@ -1802,6 +1808,7 @@ static void blockdev_backup_clean(BlkTransactionState 
*common)
 BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, 
common);
 
 if (state->aio_context) {
+bdrv_drained_end(state->bs);
 aio_context_release(state->aio_context);
 }
 }
-- 
2.4.3




[Qemu-block] [PATCH v5 07/12] block: Add "drained begin/end" for transactional backup

2015-10-20 Thread Fam Zheng
This ensures the atomicity of the transaction by avoiding processing of
external requests such as those from ioeventfd.

Move the assignment to state->bs up right after bdrv_drained_begin, so
that we can use it in the clean callback. The abort callback will still
check bs->job and state->job, so it's OK.

Signed-off-by: Fam Zheng 
---
 blockdev.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index e4a5eb4..0a7848b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1684,9 +1684,16 @@ static void drive_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
+if (!blk_is_available(blk)) {
+error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+return;
+}
+
 /* AioContext is released in .clean() */
 state->aio_context = blk_get_aio_context(blk);
 aio_context_acquire(state->aio_context);
+bdrv_drained_begin(blk_bs(blk));
+state->bs = blk_bs(blk);
 
 qmp_drive_backup(backup->device, backup->target,
  backup->has_format, backup->format,
@@ -1702,7 +1709,6 @@ static void drive_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
-state->bs = blk_bs(blk);
 state->job = state->bs->job;
 }
 
@@ -1722,6 +1728,7 @@ static void drive_backup_clean(BlkTransactionState 
*common)
 DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 
 if (state->aio_context) {
+bdrv_drained_end(state->bs);
 aio_context_release(state->aio_context);
 }
 }
-- 
2.4.3




[Qemu-block] [PATCH v5 12/12] tests: Add test case for aio_disable_external

2015-10-20 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 tests/test-aio.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/tests/test-aio.c b/tests/test-aio.c
index 03cd45d..1623803 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -374,6 +374,29 @@ static void test_flush_event_notifier(void)
 event_notifier_cleanup();
 }
 
+static void test_aio_external_client(void)
+{
+int i, j;
+
+for (i = 1; i < 3; i++) {
+EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true 
};
+event_notifier_init(, false);
+aio_set_event_notifier(ctx, , true, event_ready_cb);
+event_notifier_set();
+for (j = 0; j < i; j++) {
+aio_disable_external(ctx);
+}
+for (j = 0; j < i; j++) {
+assert(!aio_poll(ctx, false));
+assert(event_notifier_test_and_clear());
+event_notifier_set();
+aio_enable_external(ctx);
+}
+assert(aio_poll(ctx, false));
+event_notifier_cleanup();
+}
+}
+
 static void test_wait_event_notifier_noflush(void)
 {
 EventNotifierTestData data = { .n = 0 };
@@ -832,6 +855,7 @@ int main(int argc, char **argv)
 g_test_add_func("/aio/event/wait",  test_wait_event_notifier);
 g_test_add_func("/aio/event/wait/no-flush-cb",  
test_wait_event_notifier_noflush);
 g_test_add_func("/aio/event/flush", test_flush_event_notifier);
+g_test_add_func("/aio/external-client", test_aio_external_client);
 g_test_add_func("/aio/timer/schedule",  test_timer_schedule);
 
 g_test_add_func("/aio-gsource/flush",   test_source_flush);
-- 
2.4.3




[Qemu-block] [PATCH v5 03/12] dataplane: Mark host notifiers' client type as "external"

2015-10-20 Thread Fam Zheng
They will be excluded by type in the nested event loops in block layer,
so that unwanted events won't be processed there.

Signed-off-by: Fam Zheng 
---
 hw/block/dataplane/virtio-blk.c |  5 ++---
 hw/scsi/virtio-scsi-dataplane.c | 18 --
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index f8716bc..c42ddeb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -283,7 +283,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
 
 /* Get this show started by hooking up our callbacks */
 aio_context_acquire(s->ctx);
-aio_set_event_notifier(s->ctx, >host_notifier, false,
+aio_set_event_notifier(s->ctx, >host_notifier, true,
handle_notify);
 aio_context_release(s->ctx);
 return;
@@ -320,8 +320,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
 aio_context_acquire(s->ctx);
 
 /* Stop notifications for new requests from guest */
-aio_set_event_notifier(s->ctx, >host_notifier, false,
-   NULL);
+aio_set_event_notifier(s->ctx, >host_notifier, true, NULL);
 
 /* Drain and switch bs back to the QEMU main loop */
 blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 21f51df..0d8d71e 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -60,8 +60,7 @@ static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s,
 r = g_new(VirtIOSCSIVring, 1);
 r->host_notifier = *virtio_queue_get_host_notifier(vq);
 r->guest_notifier = *virtio_queue_get_guest_notifier(vq);
-aio_set_event_notifier(s->ctx, >host_notifier, false,
-   handler);
+aio_set_event_notifier(s->ctx, >host_notifier, true, handler);
 
 r->parent = s;
 
@@ -72,8 +71,7 @@ static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s,
 return r;
 
 fail_vring:
-aio_set_event_notifier(s->ctx, >host_notifier, false,
-   NULL);
+aio_set_event_notifier(s->ctx, >host_notifier, true, NULL);
 k->set_host_notifier(qbus->parent, n, false);
 g_free(r);
 return NULL;
@@ -165,16 +163,16 @@ static void virtio_scsi_clear_aio(VirtIOSCSI *s)
 
 if (s->ctrl_vring) {
 aio_set_event_notifier(s->ctx, >ctrl_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 if (s->event_vring) {
 aio_set_event_notifier(s->ctx, >event_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 if (s->cmd_vrings) {
 for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) {
 aio_set_event_notifier(s->ctx, >cmd_vrings[i]->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 }
 }
@@ -296,12 +294,12 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
 aio_context_acquire(s->ctx);
 
 aio_set_event_notifier(s->ctx, >ctrl_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 aio_set_event_notifier(s->ctx, >event_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 for (i = 0; i < vs->conf.num_queues; i++) {
 aio_set_event_notifier(s->ctx, >cmd_vrings[i]->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 
 blk_drain_all(); /* ensure there are no in-flight requests */
-- 
2.4.3




[Qemu-block] [PATCH v5 01/12] aio: Add "is_external" flag for event handlers

2015-10-20 Thread Fam Zheng
All callers pass in false, and the real external ones will switch to
true in coming patches.

Signed-off-by: Fam Zheng 
---
 aio-posix.c |  6 -
 aio-win32.c |  5 
 async.c |  3 ++-
 block/curl.c| 14 +-
 block/iscsi.c   |  9 +++
 block/linux-aio.c   |  5 ++--
 block/nbd-client.c  | 10 ---
 block/nfs.c | 17 +---
 block/sheepdog.c| 38 ++-
 block/ssh.c |  5 ++--
 block/win32-aio.c   |  5 ++--
 hw/block/dataplane/virtio-blk.c |  6 +++--
 hw/scsi/virtio-scsi-dataplane.c | 24 +++--
 include/block/aio.h |  2 ++
 iohandler.c |  3 ++-
 nbd.c   |  4 ++-
 tests/test-aio.c| 58 +++--
 17 files changed, 130 insertions(+), 84 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index d477033..f0f9122 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -25,6 +25,7 @@ struct AioHandler
 IOHandler *io_write;
 int deleted;
 void *opaque;
+bool is_external;
 QLIST_ENTRY(AioHandler) node;
 };
 
@@ -43,6 +44,7 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 
 void aio_set_fd_handler(AioContext *ctx,
 int fd,
+bool is_external,
 IOHandler *io_read,
 IOHandler *io_write,
 void *opaque)
@@ -82,6 +84,7 @@ void aio_set_fd_handler(AioContext *ctx,
 node->io_read = io_read;
 node->io_write = io_write;
 node->opaque = opaque;
+node->is_external = is_external;
 
 node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
 node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -92,10 +95,11 @@ void aio_set_fd_handler(AioContext *ctx,
 
 void aio_set_event_notifier(AioContext *ctx,
 EventNotifier *notifier,
+bool is_external,
 EventNotifierHandler *io_read)
 {
 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-   (IOHandler *)io_read, NULL, notifier);
+   is_external, (IOHandler *)io_read, NULL, notifier);
 }
 
 bool aio_prepare(AioContext *ctx)
diff --git a/aio-win32.c b/aio-win32.c
index 50a6867..3110d85 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -28,11 +28,13 @@ struct AioHandler {
 GPollFD pfd;
 int deleted;
 void *opaque;
+bool is_external;
 QLIST_ENTRY(AioHandler) node;
 };
 
 void aio_set_fd_handler(AioContext *ctx,
 int fd,
+bool is_external,
 IOHandler *io_read,
 IOHandler *io_write,
 void *opaque)
@@ -86,6 +88,7 @@ void aio_set_fd_handler(AioContext *ctx,
 node->opaque = opaque;
 node->io_read = io_read;
 node->io_write = io_write;
+node->is_external = is_external;
 
 event = event_notifier_get_handle(>notifier);
 WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +101,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
 void aio_set_event_notifier(AioContext *ctx,
 EventNotifier *e,
+bool is_external,
 EventNotifierHandler *io_notify)
 {
 AioHandler *node;
@@ -133,6 +137,7 @@ void aio_set_event_notifier(AioContext *ctx,
 node->e = e;
 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
 node->pfd.events = G_IO_IN;
+node->is_external = is_external;
 QLIST_INSERT_HEAD(>aio_handlers, node, node);
 
 g_source_add_poll(>source, >pfd);
diff --git a/async.c b/async.c
index efce14b..bdc64a3 100644
--- a/async.c
+++ b/async.c
@@ -247,7 +247,7 @@ aio_ctx_finalize(GSource *source)
 }
 qemu_mutex_unlock(>bh_lock);
 
-aio_set_event_notifier(ctx, >notifier, NULL);
+aio_set_event_notifier(ctx, >notifier, false, NULL);
 event_notifier_cleanup(>notifier);
 rfifolock_destroy(>lock);
 qemu_mutex_destroy(>bh_lock);
@@ -329,6 +329,7 @@ AioContext *aio_context_new(Error **errp)
 }
 g_source_set_can_recurse(>source, true);
 aio_set_event_notifier(ctx, >notifier,
+   false,
(EventNotifierHandler *)
event_notifier_dummy_cb);
 ctx->thread_pool = NULL;
diff --git a/block/curl.c b/block/curl.c
index 032cc8a..8994182 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -154,18 +154,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int 
action,
 DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
 switch (action) {
 case CURL_POLL_IN:
- 

[Qemu-block] [PATCH v5 02/12] nbd: Mark fd handlers client type as "external"

2015-10-20 Thread Fam Zheng
So we could distinguish it from internal used fds, thus avoid handling
unwanted events in nested aio polls.

Signed-off-by: Fam Zheng 
---
 nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nbd.c b/nbd.c
index fbc66be..dab1ebb 100644
--- a/nbd.c
+++ b/nbd.c
@@ -1446,7 +1446,7 @@ static void nbd_set_handlers(NBDClient *client)
 {
 if (client->exp && client->exp->ctx) {
 aio_set_fd_handler(client->exp->ctx, client->sock,
-   false,
+   true,
client->can_read ? nbd_read : NULL,
client->send_coroutine ? nbd_restart_write : NULL,
client);
@@ -1457,7 +1457,7 @@ static void nbd_unset_handlers(NBDClient *client)
 {
 if (client->exp && client->exp->ctx) {
 aio_set_fd_handler(client->exp->ctx, client->sock,
-   false, NULL, NULL, NULL);
+   true, NULL, NULL, NULL);
 }
 }
 
-- 
2.4.3




[Qemu-block] [PATCH v5 00/12] block: Protect nested event loop with bdrv_drained_begin and bdrv_drained_end

2015-10-20 Thread Fam Zheng
v5: Rebase onto Kevin's block tree.

v4: Rebase on to master so fix the "bdrv_move_feature_fields" issue.

v3: Call bdrv_drain unconditionally in bdrv_drained_begin.
Document the internal I/O implications between bdrv_drain_begin and end.

The nested aio_poll()'s in block layer has a bug that new r/w requests from
ioeventfds and nbd exports are processed, which might break the caller's
semantics (qmp_transaction) or even pointers (bdrv_reopen).

Fam Zheng (12):
  aio: Add "is_external" flag for event handlers
  nbd: Mark fd handlers client type as "external"
  dataplane: Mark host notifiers' client type as "external"
  aio: introduce aio_{disable,enable}_external
  block: Introduce "drained begin/end" API
  block: Add "drained begin/end" for transactional external snapshot
  block: Add "drained begin/end" for transactional backup
  block: Add "drained begin/end" for transactional blockdev-backup
  block: Add "drained begin/end" for internal snapshot
  block: Introduce BlockDriver.bdrv_drain callback
  qed: Implement .bdrv_drain
  tests: Add test case for aio_disable_external

 aio-posix.c |  9 -
 aio-win32.c |  8 +++-
 async.c |  3 +-
 block/curl.c| 14 ---
 block/io.c  | 23 +++-
 block/iscsi.c   |  9 ++---
 block/linux-aio.c   |  5 ++-
 block/nbd-client.c  | 10 +++--
 block/nfs.c | 17 -
 block/qed.c |  7 
 block/sheepdog.c| 38 ---
 block/ssh.c |  5 ++-
 block/win32-aio.c   |  5 ++-
 blockdev.c  | 37 ---
 hw/block/dataplane/virtio-blk.c |  5 ++-
 hw/scsi/virtio-scsi-dataplane.c | 22 +++
 include/block/aio.h | 39 
 include/block/block.h   | 26 +
 include/block/block_int.h   |  8 
 iohandler.c |  3 +-
 nbd.c   |  4 +-
 tests/test-aio.c| 82 -
 22 files changed, 286 insertions(+), 93 deletions(-)

-- 
2.4.3




[Qemu-block] [PATCH v5 10/12] block: Introduce BlockDriver.bdrv_drain callback

2015-10-20 Thread Fam Zheng
Drivers can have internal request sources that generate IO, like the
need_check_timer in QED. Since we want quiesced periods that contain
nested event loops in block layer, we need to have a way to disable such
event sources.

Block drivers must implement the "bdrv_drain" callback if it has any
internal sources that can generate I/O activity, like a timer or a
worker thread (even in a library) that can schedule QEMUBH in an
asynchronous callback.

Update the comments of bdrv_drain and bdrv_drained_begin accordingly.

Signed-off-by: Fam Zheng 
---
 block/io.c| 6 +-
 include/block/block.h | 9 +++--
 include/block/block_int.h | 6 ++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/block/io.c b/block/io.c
index 5ac6256..999df63 100644
--- a/block/io.c
+++ b/block/io.c
@@ -235,7 +235,8 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 }
 
 /*
- * Wait for pending requests to complete on a single BlockDriverState subtree
+ * Wait for pending requests to complete on a single BlockDriverState subtree,
+ * and suspend block driver's internal I/O until next request arrives.
  *
  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
  * AioContext.
@@ -248,6 +249,9 @@ void bdrv_drain(BlockDriverState *bs)
 {
 bool busy = true;
 
+if (bs->drv && bs->drv->bdrv_drain) {
+bs->drv->bdrv_drain(bs);
+}
 while (busy) {
 /* Keep iterating */
  bdrv_flush_io_queue(bs);
diff --git a/include/block/block.h b/include/block/block.h
index 6d38b62..cd58a32 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -617,8 +617,13 @@ BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
  *
  * Begin a quiesced section for exclusive access to the BDS, by disabling
  * external request sources including NBD server and device model. Note that
- * this doesn't block timers or coroutines from submitting more requests, which
- * means block_job_pause is still necessary.
+ * this doesn't prevent timers or coroutines from submitting more requests,
+ * which means block_job_pause is still necessary.
+ *
+ * If new I/O requests are submitted after bdrv_drained_begin is called before
+ * bdrv_drained_end, more internal I/O might be going on after the request has
+ * been completed. If you don't want this, you have to issue another bdrv_drain
+ * or use a nested bdrv_drained_begin/end section.
  *
  * This function can be recursive.
  */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e317b14..73eba05 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -288,6 +288,12 @@ struct BlockDriver {
  */
 int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
 
+/**
+ * Drain and stop any internal sources of requests in the driver, and
+ * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
+ */
+void (*bdrv_drain)(BlockDriverState *bs);
+
 QLIST_ENTRY(BlockDriver) list;
 };
 
-- 
2.4.3




[Qemu-block] [PATCH v5 11/12] qed: Implement .bdrv_drain

2015-10-20 Thread Fam Zheng
The "need_check_timer" is used to clear the "NEED_CHECK" flag in the
image header after a grace period once metadata update has finished. In
compliance to the bdrv_drain semantics we should make sure it remains
deleted once .bdrv_drain is called.

Call the qed_need_check_timer_cb manually to update the header
immediately.

Signed-off-by: Fam Zheng 
---
 block/qed.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/block/qed.c b/block/qed.c
index 5ea05d4..e9dcb4d 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -375,6 +375,12 @@ static void bdrv_qed_attach_aio_context(BlockDriverState 
*bs,
 }
 }
 
+static void bdrv_qed_drain(BlockDriverState *bs)
+{
+qed_cancel_need_check_timer(bs->opaque);
+qed_need_check_timer_cb(bs->opaque);
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
 {
@@ -1676,6 +1682,7 @@ static BlockDriver bdrv_qed = {
 .bdrv_check   = bdrv_qed_check,
 .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
 .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
+.bdrv_drain   = bdrv_qed_drain,
 };
 
 static void bdrv_qed_init(void)
-- 
2.4.3




[Qemu-block] [PATCH v5 04/12] aio: introduce aio_{disable, enable}_external

2015-10-20 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 aio-posix.c |  3 ++-
 aio-win32.c |  3 ++-
 include/block/aio.h | 37 +
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index f0f9122..0467f23 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -261,7 +261,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
 /* fill pollfds */
 QLIST_FOREACH(node, >aio_handlers, node) {
-if (!node->deleted && node->pfd.events) {
+if (!node->deleted && node->pfd.events
+&& aio_node_check(ctx, node->is_external)) {
 add_pollfd(node);
 }
 }
diff --git a/aio-win32.c b/aio-win32.c
index 3110d85..43c4c79 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -309,7 +309,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 /* fill fd sets */
 count = 0;
 QLIST_FOREACH(node, >aio_handlers, node) {
-if (!node->deleted && node->io_notify) {
+if (!node->deleted && node->io_notify
+&& aio_node_check(ctx, node->is_external)) {
 events[count++] = event_notifier_get_handle(node->e);
 }
 }
diff --git a/include/block/aio.h b/include/block/aio.h
index 12f1141..80151d1 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -122,6 +122,8 @@ struct AioContext {
 
 /* TimerLists for calling timers - one per clock type */
 QEMUTimerListGroup tlg;
+
+int external_disable_cnt;
 };
 
 /**
@@ -375,4 +377,39 @@ static inline void aio_timer_init(AioContext *ctx,
  */
 int64_t aio_compute_timeout(AioContext *ctx);
 
+/**
+ * aio_disable_external:
+ * @ctx: the aio context
+ *
+ * Disable the furthur processing of clients.
+ */
+static inline void aio_disable_external(AioContext *ctx)
+{
+atomic_inc(>external_disable_cnt);
+}
+
+/**
+ * aio_enable_external:
+ * @ctx: the aio context
+ *
+ * Disable the processing of external clients.
+ */
+static inline void aio_enable_external(AioContext *ctx)
+{
+atomic_dec(>external_disable_cnt);
+}
+
+/**
+ * aio_node_check:
+ * @ctx: the aio context
+ * @is_external: Whether or not the checked node is an external event source.
+ *
+ * Check if the node's is_external flag is okey to be polled by the ctx at this
+ * moment. True means green light.
+ */
+static inline bool aio_node_check(AioContext *ctx, bool is_external)
+{
+return !is_external || !atomic_read(>external_disable_cnt);
+}
+
 #endif
-- 
2.4.3




[Qemu-block] [PATCH v5 06/12] block: Add "drained begin/end" for transactional external snapshot

2015-10-20 Thread Fam Zheng
This ensures the atomicity of the transaction by avoiding processing of
external requests such as those from ioeventfd.

Signed-off-by: Fam Zheng 
---
 blockdev.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 35e5719..e4a5eb4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1569,6 +1569,7 @@ static void external_snapshot_prepare(BlkTransactionState 
*common,
 /* Acquire AioContext now so any threads operating on old_bs stop */
 state->aio_context = bdrv_get_aio_context(state->old_bs);
 aio_context_acquire(state->aio_context);
+bdrv_drained_begin(state->old_bs);
 
 if (!bdrv_is_inserted(state->old_bs)) {
 error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
@@ -1638,8 +1639,6 @@ static void external_snapshot_commit(BlkTransactionState 
*common)
  * don't want to abort all of them if one of them fails the reopen */
 bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
 NULL);
-
-aio_context_release(state->aio_context);
 }
 
 static void external_snapshot_abort(BlkTransactionState *common)
@@ -1649,7 +1648,14 @@ static void external_snapshot_abort(BlkTransactionState 
*common)
 if (state->new_bs) {
 bdrv_unref(state->new_bs);
 }
+}
+
+static void external_snapshot_clean(BlkTransactionState *common)
+{
+ExternalSnapshotState *state =
+ DO_UPCAST(ExternalSnapshotState, common, common);
 if (state->aio_context) {
+bdrv_drained_end(state->old_bs);
 aio_context_release(state->aio_context);
 }
 }
@@ -1809,6 +1815,7 @@ static const BdrvActionOps actions[] = {
 .prepare  = external_snapshot_prepare,
 .commit   = external_snapshot_commit,
 .abort = external_snapshot_abort,
+.clean = external_snapshot_clean,
 },
 [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
 .instance_size = sizeof(DriveBackupState),
-- 
2.4.3