Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-30 Thread Kevin Wolf
Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
 The pointer from BlockBackend to BlockDriverState is a strong
 reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
 a weak one.
 
 Convenience function blk_new_with_bs() creates a BlockBackend with its
 BlockDriverState.  Callers have to unref both.  The commit after next
 will relieve them of the need to unref the BlockDriverState.
 
 Complication: due to the silly way drive_del works, we need a way to
 hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
 special status, give the function a suitably off-putting name:
 blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
 BlockBackend's name into the empty string.  Can't avoid that without
 breaking the blk-bs-device_name equals blk-name invariant.
 
 The patch adds a memory leak: drive_del while a device model is
 connected leaks the BlockBackend.  Avoiding the leak here is rather
 hairy, but it'll become straightforward in a few commits, so I mark it
 FIXME in the code now, and plug it when it's easy.
 
 Signed-off-by: Markus Armbruster arm...@redhat.com
 ---
  block.c|  10 ++--
  block/block-backend.c  |  71 ++-
  blockdev.c |  21 ---
  hw/block/xen_disk.c|   8 +--
  include/block/block_int.h  |   2 +
  include/sysemu/block-backend.h |   5 ++
  qemu-img.c | 125 
 +++--
  qemu-io.c  |   4 +-
  qemu-nbd.c |   4 +-
  9 files changed, 156 insertions(+), 94 deletions(-)
 
 diff --git a/block.c b/block.c
 index 934881f..7ccf443 100644
 --- a/block.c
 +++ b/block.c
 @@ -2032,7 +2032,7 @@ static void bdrv_move_feature_fields(BlockDriverState 
 *bs_dest,
   * This will modify the BlockDriverState fields, and swap contents
   * between bs_new and bs_old. Both bs_new and bs_old are modified.
   *
 - * bs_new is required to be anonymous.
 + * bs_new must be nameless and not attached to a BlockBackend.
   *
   * This function does not create any image files.
   */
 @@ -2051,8 +2051,9 @@ void bdrv_swap(BlockDriverState *bs_new, 
 BlockDriverState *bs_old)
  QTAILQ_REMOVE(graph_bdrv_states, bs_old, node_list);
  }
  
 -/* bs_new must be anonymous and shouldn't have anything fancy enabled */
 +/* bs_new must be nameless and shouldn't have anything fancy enabled */
  assert(bs_new-device_name[0] == '\0');
 +assert(!bs_new-blk);
  assert(QLIST_EMPTY(bs_new-dirty_bitmaps));
  assert(bs_new-job == NULL);
  assert(bs_new-dev == NULL);
 @@ -2068,8 +2069,9 @@ void bdrv_swap(BlockDriverState *bs_new, 
 BlockDriverState *bs_old)
  bdrv_move_feature_fields(bs_old, bs_new);
  bdrv_move_feature_fields(bs_new, tmp);
  
 -/* bs_new shouldn't be in bdrv_states even after the swap!  */
 +/* bs_new must remain nameless and unattached */
  assert(bs_new-device_name[0] == '\0');
 +assert(!bs_new-blk);

Taking back my R-b: You tricked us, this assertion doesn't hold true.
Easy to reproduce by taking a live snapshot. qemu-iotests case 052
catches it. Didn't you run it?

You probably need to swap bs-blk in bdrv_move_feature_fields().

Kevin



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-30 Thread Markus Armbruster
Kevin Wolf kw...@redhat.com writes:

 Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
 The pointer from BlockBackend to BlockDriverState is a strong
 reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
 a weak one.
 
 Convenience function blk_new_with_bs() creates a BlockBackend with its
 BlockDriverState.  Callers have to unref both.  The commit after next
 will relieve them of the need to unref the BlockDriverState.
 
 Complication: due to the silly way drive_del works, we need a way to
 hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
 special status, give the function a suitably off-putting name:
 blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
 BlockBackend's name into the empty string.  Can't avoid that without
 breaking the blk-bs-device_name equals blk-name invariant.
 
 The patch adds a memory leak: drive_del while a device model is
 connected leaks the BlockBackend.  Avoiding the leak here is rather
 hairy, but it'll become straightforward in a few commits, so I mark it
 FIXME in the code now, and plug it when it's easy.
 
 Signed-off-by: Markus Armbruster arm...@redhat.com
 ---
  block.c|  10 ++--
  block/block-backend.c  |  71 ++-
  blockdev.c |  21 ---
  hw/block/xen_disk.c|   8 +--
  include/block/block_int.h  |   2 +
  include/sysemu/block-backend.h |   5 ++
  qemu-img.c | 125 
 +++--
  qemu-io.c  |   4 +-
  qemu-nbd.c |   4 +-
  9 files changed, 156 insertions(+), 94 deletions(-)
 
 diff --git a/block.c b/block.c
 index 934881f..7ccf443 100644
 --- a/block.c
 +++ b/block.c
 @@ -2032,7 +2032,7 @@ static void bdrv_move_feature_fields(BlockDriverState 
 *bs_dest,
   * This will modify the BlockDriverState fields, and swap contents
   * between bs_new and bs_old. Both bs_new and bs_old are modified.
   *
 - * bs_new is required to be anonymous.
 + * bs_new must be nameless and not attached to a BlockBackend.
   *
   * This function does not create any image files.
   */
 @@ -2051,8 +2051,9 @@ void bdrv_swap(BlockDriverState *bs_new, 
 BlockDriverState *bs_old)
  QTAILQ_REMOVE(graph_bdrv_states, bs_old, node_list);
  }
  
 -/* bs_new must be anonymous and shouldn't have anything fancy enabled */
 +/* bs_new must be nameless and shouldn't have anything fancy enabled */
  assert(bs_new-device_name[0] == '\0');
 +assert(!bs_new-blk);
  assert(QLIST_EMPTY(bs_new-dirty_bitmaps));
  assert(bs_new-job == NULL);
  assert(bs_new-dev == NULL);
 @@ -2068,8 +2069,9 @@ void bdrv_swap(BlockDriverState *bs_new, 
 BlockDriverState *bs_old)
  bdrv_move_feature_fields(bs_old, bs_new);
  bdrv_move_feature_fields(bs_new, tmp);
  
 -/* bs_new shouldn't be in bdrv_states even after the swap!  */
 +/* bs_new must remain nameless and unattached */
  assert(bs_new-device_name[0] == '\0');
 +assert(!bs_new-blk);

 Taking back my R-b: You tricked us, this assertion doesn't hold true.
 Easy to reproduce by taking a live snapshot. qemu-iotests case 052
 catches it. Didn't you run it?

I run make check-qtest check-block on every commit before I submit.
No idea what went wrong with this one.

 You probably need to swap bs-blk in bdrv_move_feature_fields().

I'll look into it, thanks!



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-30 Thread Kevin Wolf
Am 30.09.2014 um 12:56 hat Markus Armbruster geschrieben:
 Kevin Wolf kw...@redhat.com writes:
 
  Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
  The pointer from BlockBackend to BlockDriverState is a strong
  reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
  a weak one.
  
  Convenience function blk_new_with_bs() creates a BlockBackend with its
  BlockDriverState.  Callers have to unref both.  The commit after next
  will relieve them of the need to unref the BlockDriverState.
  
  Complication: due to the silly way drive_del works, we need a way to
  hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
  special status, give the function a suitably off-putting name:
  blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
  BlockBackend's name into the empty string.  Can't avoid that without
  breaking the blk-bs-device_name equals blk-name invariant.
  
  The patch adds a memory leak: drive_del while a device model is
  connected leaks the BlockBackend.  Avoiding the leak here is rather
  hairy, but it'll become straightforward in a few commits, so I mark it
  FIXME in the code now, and plug it when it's easy.
  
  Signed-off-by: Markus Armbruster arm...@redhat.com
  ---
   block.c|  10 ++--
   block/block-backend.c  |  71 ++-
   blockdev.c |  21 ---
   hw/block/xen_disk.c|   8 +--
   include/block/block_int.h  |   2 +
   include/sysemu/block-backend.h |   5 ++
   qemu-img.c | 125 
  +++--
   qemu-io.c  |   4 +-
   qemu-nbd.c |   4 +-
   9 files changed, 156 insertions(+), 94 deletions(-)
  
  diff --git a/block.c b/block.c
  index 934881f..7ccf443 100644
  --- a/block.c
  +++ b/block.c
  @@ -2032,7 +2032,7 @@ static void 
  bdrv_move_feature_fields(BlockDriverState *bs_dest,
* This will modify the BlockDriverState fields, and swap contents
* between bs_new and bs_old. Both bs_new and bs_old are modified.
*
  - * bs_new is required to be anonymous.
  + * bs_new must be nameless and not attached to a BlockBackend.
*
* This function does not create any image files.
*/
  @@ -2051,8 +2051,9 @@ void bdrv_swap(BlockDriverState *bs_new, 
  BlockDriverState *bs_old)
   QTAILQ_REMOVE(graph_bdrv_states, bs_old, node_list);
   }
   
  -/* bs_new must be anonymous and shouldn't have anything fancy enabled 
  */
  +/* bs_new must be nameless and shouldn't have anything fancy enabled 
  */
   assert(bs_new-device_name[0] == '\0');
  +assert(!bs_new-blk);
   assert(QLIST_EMPTY(bs_new-dirty_bitmaps));
   assert(bs_new-job == NULL);
   assert(bs_new-dev == NULL);
  @@ -2068,8 +2069,9 @@ void bdrv_swap(BlockDriverState *bs_new, 
  BlockDriverState *bs_old)
   bdrv_move_feature_fields(bs_old, bs_new);
   bdrv_move_feature_fields(bs_new, tmp);
   
  -/* bs_new shouldn't be in bdrv_states even after the swap!  */
  +/* bs_new must remain nameless and unattached */
   assert(bs_new-device_name[0] == '\0');
  +assert(!bs_new-blk);
 
  Taking back my R-b: You tricked us, this assertion doesn't hold true.
  Easy to reproduce by taking a live snapshot. qemu-iotests case 052
  catches it. Didn't you run it?
 
 I run make check-qtest check-block on every commit before I submit.
 No idea what went wrong with this one.

When run for raw, it's only 052 that catches it. For qcow2, I got some
more failures: 039 040 041 051 052 085

I see the problem: Only 039 and 052 are marked as 'quick', i.e. the rest
is already excluded from 'make check-block'. 039 and 052 don't work with
cache=none and 'make check-block' uses -nocache, so those are skipped as
well. I'll send a patch to remove the -nocache option and let it run
with the default options.

Kevin



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-30 Thread Markus Armbruster
Kevin Wolf kw...@redhat.com writes:

 Am 30.09.2014 um 12:56 hat Markus Armbruster geschrieben:
 Kevin Wolf kw...@redhat.com writes:
[...]
  Taking back my R-b: You tricked us, this assertion doesn't hold true.
  Easy to reproduce by taking a live snapshot. qemu-iotests case 052
  catches it. Didn't you run it?
 
 I run make check-qtest check-block on every commit before I submit.
 No idea what went wrong with this one.

 When run for raw, it's only 052 that catches it. For qcow2, I got some
 more failures: 039 040 041 051 052 085

 I see the problem: Only 039 and 052 are marked as 'quick', i.e. the rest
 is already excluded from 'make check-block'. 039 and 052 don't work with
 cache=none and 'make check-block' uses -nocache, so those are skipped as
 well.

Yes, that's why I missed it.

   I'll send a patch to remove the -nocache option and let it run
 with the default options.

Appreciated!



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-25 Thread Benoît Canet
On Tue, Sep 23, 2014 at 03:36:03PM +0200, Kevin Wolf wrote:
 Do we have a KVM Forum block layer agenda yet? I think this thread could
 already contain a few topics to discuss there.

Being the guy who constantly bring back painfull issues
(Block filters, Block Backend) on the table I think we should also do a BOFH
(Stefan's idea on a private discussion) about how we can further tweak and
improve the review process.

I experienced on some other open sources projects as complex as the QEMU block
layer a feeling of reactivity while contributing patches and while the new
QEMU block layer review process is starting we are not here yet.

One idea I have is that we could benefit from this event to establish semi
informal peering review contracts between contributors like the ISP does for
bandwith.

Of course we should be carefull to avoid to go the academic review circle route
and left some for unknown people.

Best regards

Benoît

 
 Kevin



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-23 Thread Kevin Wolf
Am 22.09.2014 um 18:34 hat Markus Armbruster geschrieben:
 Kevin Wolf kw...@redhat.com writes:
 
  Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
  diff --git a/include/block/block_int.h b/include/block/block_int.h
  index 8d86a6c..14e0b7c 100644
  --- a/include/block/block_int.h
  +++ b/include/block/block_int.h
  @@ -324,6 +324,8 @@ struct BlockDriverState {
   BlockDriver *drv; /* NULL means no media */
   void *opaque;
   
  +BlockBackend *blk;  /* owning backend, if any */
  +
   void *dev;  /* attached device model, if any */
   /* TODO change to DeviceState when all users are qdevified */
   const BlockDevOps *dev_ops;
 
  Just to make sure that we agree on where we're going: This makes the
  assumption that a BDS has at most one BB that owns it.
 
 Yes.
 
 Which is not the
  final state that we want to have, so this will have to go away later.
 
 I don't know.  Can you explain why you think we're going to want
 multiple BBs?

We already agreed that we'll have multiple parents for a BDS, for
scenarios like having an NBD server on a snapshot or sharing backing
files, potentially also some block jobs.

The question is whether among these multiple parents we want to have a
limitation to one BlockBackend, forbidding e.g. an NBD server on the
active layer. This would be a problem for live storage migration if we
don't want the NBD server to reuse the same BB as the guest device.

More generally, if we can indirectly have multiple BBs on a single
BDS by putting a filter in between, do we have good reasons to forbid
having them attached directly?

  (Where later isn't necessarily part of this series.)
 
  For now, the use of the field is limited to callbacks and
  bdrv_get_device_name(). Callbacks could always only serve a single
  device, so nothing became worse here.
 
 In *this* patch, member blk is only read in bdrv_swap(), which asserts
 it's null.  Later on in the series, it gets indeed used as you describe.

Yes, my now depends on context and either refers to the patch I'm
commenting on or the end of the series. In most cases when I see
something that I feel is worth having a closer look, the first thing I
do is looking at the fully applied series.

 PATCH 22 puts it to use for BlockDevOps callbacks.  The patch moves the
 callbacks from BDS to BB.  I hope you'll agree that's where they belong.
 
 Naturally, the *calls* of the callbacks remain where they are, in
 block.c.  They get updated like this:
 
 -   bdrv_dev_FOO(bs, ARGS)
 +   if (bs-blk) {
 +   blk_dev_FOO(bs-blk ARGS)
 +   }

Yes, as I said, this is fine for now. When we allow multiple BBs, we'll
have to turn it into something like notifier lists, but that can wait.

 PATCH 08 uses it to eliminate BDS member device_name[].
 
  I'm not entirely sure about bdrv_get_device_name(), whether it needs to
  go or to be rewritten to get the name of any BB pointing to it (I
  suspect for most callers we want to replace it by something that uses
  node-name by default if there is one and only fall back to BB names if
  there isn't), but that's not an issue to block this patch.
 
 I agree users of bdrv_get_device_name() need to be examined, and the
 ones that really want a BDS name should probably be changed to use the
 BDS name (a.k.a. node-name) and fall back to the BB name.
 
 This series makes this need more visible, by emphasizing the
 distinctness of the two names.
 
 Aside: which one to fall back to if we have multiple BBs?

My first attempt would be any, and in cases where this isn't good
enough, you can't use a fallback at all.

  What I would consider, however, is adding a TODO comment that tells
  people that this field needs to go and if you need to use it, something
  is wrong with your design (which happens to be true for the existing
  design of some code).
 
 For the device callbacks, we need a way to find the BB.  If multiple BBs
 can sit on top of the same BDS, we need to find the one with a device
 models attached.  Ot even the ones, if we permit that.
 
 Let's discuss this a bit, and depending on what we learn, add a suitable
 comment.  Possibly on top.

Are you sure that nothing else than device models can be interested in
callbacks? I expect that whatever block layer user we have, they will
always be interested in resizes, for example. Media change might also
not be entirely uninteresting, though in most cases what other users
want is probably a blocker.

Kevin



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-23 Thread Markus Armbruster
Kevin Wolf kw...@redhat.com writes:

 Am 22.09.2014 um 18:34 hat Markus Armbruster geschrieben:
 Kevin Wolf kw...@redhat.com writes:
 
  Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
  diff --git a/include/block/block_int.h b/include/block/block_int.h
  index 8d86a6c..14e0b7c 100644
  --- a/include/block/block_int.h
  +++ b/include/block/block_int.h
  @@ -324,6 +324,8 @@ struct BlockDriverState {
   BlockDriver *drv; /* NULL means no media */
   void *opaque;
   
  +BlockBackend *blk;  /* owning backend, if any */
  +
   void *dev;  /* attached device model, if any */
   /* TODO change to DeviceState when all users are qdevified */
   const BlockDevOps *dev_ops;
 
  Just to make sure that we agree on where we're going: This makes the
  assumption that a BDS has at most one BB that owns it.
 
 Yes.
 
 Which is not the
  final state that we want to have, so this will have to go away later.
 
 I don't know.  Can you explain why you think we're going to want
 multiple BBs?

 We already agreed that we'll have multiple parents for a BDS, for
 scenarios like having an NBD server on a snapshot or sharing backing
 files, potentially also some block jobs.

We certainly want to provide for multiple users (intentionally vague
language here), such NBD server, block jobs, device models.  Should they
share a BB, or does each one need its own BB?

 The question is whether among these multiple parents we want to have a
 limitation to one BlockBackend, forbidding e.g. an NBD server on the
 active layer. This would be a problem for live storage migration if we
 don't want the NBD server to reuse the same BB as the guest device.

 More generally, if we can indirectly have multiple BBs on a single
 BDS by putting a filter in between, do we have good reasons to forbid
 having them attached directly?

Keeping code simple?

Not a valid argument when we *need* multiple BBs, i.e. when the answer
to my prior question is each one needs its own BB.

  (Where later isn't necessarily part of this series.)
 
  For now, the use of the field is limited to callbacks and
  bdrv_get_device_name(). Callbacks could always only serve a single
  device, so nothing became worse here.
 
 In *this* patch, member blk is only read in bdrv_swap(), which asserts
 it's null.  Later on in the series, it gets indeed used as you describe.

 Yes, my now depends on context and either refers to the patch I'm
 commenting on or the end of the series. In most cases when I see
 something that I feel is worth having a closer look, the first thing I
 do is looking at the fully applied series.

 PATCH 22 puts it to use for BlockDevOps callbacks.  The patch moves the
 callbacks from BDS to BB.  I hope you'll agree that's where they belong.
 
 Naturally, the *calls* of the callbacks remain where they are, in
 block.c.  They get updated like this:
 
 -   bdrv_dev_FOO(bs, ARGS)
 +   if (bs-blk) {
 +   blk_dev_FOO(bs-blk ARGS)
 +   }

 Yes, as I said, this is fine for now. When we allow multiple BBs, we'll
 have to turn it into something like notifier lists, but that can wait.

Okay.

 PATCH 08 uses it to eliminate BDS member device_name[].
 
  I'm not entirely sure about bdrv_get_device_name(), whether it needs to
  go or to be rewritten to get the name of any BB pointing to it (I
  suspect for most callers we want to replace it by something that uses
  node-name by default if there is one and only fall back to BB names if
  there isn't), but that's not an issue to block this patch.
 
 I agree users of bdrv_get_device_name() need to be examined, and the
 ones that really want a BDS name should probably be changed to use the
 BDS name (a.k.a. node-name) and fall back to the BB name.
 
 This series makes this need more visible, by emphasizing the
 distinctness of the two names.
 
 Aside: which one to fall back to if we have multiple BBs?

 My first attempt would be any, and in cases where this isn't good
 enough, you can't use a fallback at all.

This is going to be fun :)

  What I would consider, however, is adding a TODO comment that tells
  people that this field needs to go and if you need to use it, something
  is wrong with your design (which happens to be true for the existing
  design of some code).
 
 For the device callbacks, we need a way to find the BB.  If multiple BBs
 can sit on top of the same BDS, we need to find the one with a device
 models attached.  Ot even the ones, if we permit that.
 
 Let's discuss this a bit, and depending on what we learn, add a suitable
 comment.  Possibly on top.

 Are you sure that nothing else than device models can be interested in
 callbacks? I expect that whatever block layer user we have, they will
 always be interested in resizes, for example. Media change might also
 not be entirely uninteresting, though in most cases what other users
 want is probably a blocker.

I designed 

Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-23 Thread Kevin Wolf
Am 23.09.2014 um 14:52 hat Markus Armbruster geschrieben:
 Kevin Wolf kw...@redhat.com writes:
 
  Am 22.09.2014 um 18:34 hat Markus Armbruster geschrieben:
  Kevin Wolf kw...@redhat.com writes:
  
   Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
   diff --git a/include/block/block_int.h b/include/block/block_int.h
   index 8d86a6c..14e0b7c 100644
   --- a/include/block/block_int.h
   +++ b/include/block/block_int.h
   @@ -324,6 +324,8 @@ struct BlockDriverState {
BlockDriver *drv; /* NULL means no media */
void *opaque;

   +BlockBackend *blk;  /* owning backend, if any */
   +
void *dev;  /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
const BlockDevOps *dev_ops;
  
   Just to make sure that we agree on where we're going: This makes the
   assumption that a BDS has at most one BB that owns it.
  
  Yes.
  
  Which is not the
   final state that we want to have, so this will have to go away later.
  
  I don't know.  Can you explain why you think we're going to want
  multiple BBs?
 
  We already agreed that we'll have multiple parents for a BDS, for
  scenarios like having an NBD server on a snapshot or sharing backing
  files, potentially also some block jobs.
 
 We certainly want to provide for multiple users (intentionally vague
 language here), such NBD server, block jobs, device models.  Should they
 share a BB, or does each one need its own BB?

I think they should have their own BB, but I can still be convinced
otherwise.

The first reason is that frontends (= BB users, more or less) and
backends are easiest to understand when they come in pairs. Having
multiple frontends for a single backend might be confusion.

Second, if the NBD server doesn't sit at the root but accesses a
backing file, it already has to get its own BB with its own name and
with no device model attached.  Doing the same at the root helps with
consistency.

Third, we'll probably want to have some things like werror/rerror or I/O
accounting handled separately for device models and NBD servers.

If we look at another type of users, we might easily find more reasons,
but for me this is already a pretty strong indicator that shared BBs are
probably not a good idea.

  The question is whether among these multiple parents we want to have a
  limitation to one BlockBackend, forbidding e.g. an NBD server on the
  active layer. This would be a problem for live storage migration if we
  don't want the NBD server to reuse the same BB as the guest device.
 
  More generally, if we can indirectly have multiple BBs on a single
  BDS by putting a filter in between, do we have good reasons to forbid
  having them attached directly?
 
 Keeping code simple?
 
 Not a valid argument when we *need* multiple BBs, i.e. when the answer
 to my prior question is each one needs its own BB.

Are there more places than just the callbacks that would be complicated
by multiple BBs per BDS?

   What I would consider, however, is adding a TODO comment that tells
   people that this field needs to go and if you need to use it, something
   is wrong with your design (which happens to be true for the existing
   design of some code).
  
  For the device callbacks, we need a way to find the BB.  If multiple BBs
  can sit on top of the same BDS, we need to find the one with a device
  models attached.  Ot even the ones, if we permit that.
  
  Let's discuss this a bit, and depending on what we learn, add a suitable
  comment.  Possibly on top.
 
  Are you sure that nothing else than device models can be interested in
  callbacks? I expect that whatever block layer user we have, they will
  always be interested in resizes, for example. Media change might also
  not be entirely uninteresting, though in most cases what other users
  want is probably a blocker.
 
 I designed BlockDevOps for device models only.  If other users emerge,
 it needs a rename, and possibly a rethink.

Very likely to happen in the long run. Block jobs are today blocking
resizes, but that's mostly because they don't have an easy way to
respond to a resize.  Sooner or later someone will want to grow their
images while they are being mirrored (which is a completely reasonable,
even if not trivial, thing to want).

Do we have a KVM Forum block layer agenda yet? I think this thread could
already contain a few topics to discuss there.

Kevin



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-23 Thread Markus Armbruster
Kevin Wolf kw...@redhat.com writes:

 Am 23.09.2014 um 14:52 hat Markus Armbruster geschrieben:
 Kevin Wolf kw...@redhat.com writes:
 
  Am 22.09.2014 um 18:34 hat Markus Armbruster geschrieben:
  Kevin Wolf kw...@redhat.com writes:
  
   Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
   diff --git a/include/block/block_int.h b/include/block/block_int.h
   index 8d86a6c..14e0b7c 100644
   --- a/include/block/block_int.h
   +++ b/include/block/block_int.h
   @@ -324,6 +324,8 @@ struct BlockDriverState {
BlockDriver *drv; /* NULL means no media */
void *opaque;

   +BlockBackend *blk;  /* owning backend, if any */
   +
void *dev;  /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
const BlockDevOps *dev_ops;
  
   Just to make sure that we agree on where we're going: This makes the
   assumption that a BDS has at most one BB that owns it.
  
  Yes.
  
  Which is not the
   final state that we want to have, so this will have to go away later.
  
  I don't know.  Can you explain why you think we're going to want
  multiple BBs?
 
  We already agreed that we'll have multiple parents for a BDS, for
  scenarios like having an NBD server on a snapshot or sharing backing
  files, potentially also some block jobs.
 
 We certainly want to provide for multiple users (intentionally vague
 language here), such NBD server, block jobs, device models.  Should they
 share a BB, or does each one need its own BB?

 I think they should have their own BB, but I can still be convinced
 otherwise.

 The first reason is that frontends (= BB users, more or less) and
 backends are easiest to understand when they come in pairs. Having
 multiple frontends for a single backend might be confusion.

 Second, if the NBD server doesn't sit at the root but accesses a
 backing file, it already has to get its own BB with its own name and
 with no device model attached.  Doing the same at the root helps with
 consistency.

These are valid, but fairly weak.

 Third, we'll probably want to have some things like werror/rerror or I/O
 accounting handled separately for device models and NBD servers.

This one's pretty convincing.

 If we look at another type of users, we might easily find more reasons,
 but for me this is already a pretty strong indicator that shared BBs are
 probably not a good idea.

  The question is whether among these multiple parents we want to have a
  limitation to one BlockBackend, forbidding e.g. an NBD server on the
  active layer. This would be a problem for live storage migration if we
  don't want the NBD server to reuse the same BB as the guest device.
 
  More generally, if we can indirectly have multiple BBs on a single
  BDS by putting a filter in between, do we have good reasons to forbid
  having them attached directly?
 
 Keeping code simple?
 
 Not a valid argument when we *need* multiple BBs, i.e. when the answer
 to my prior question is each one needs its own BB.

 Are there more places than just the callbacks that would be complicated
 by multiple BBs per BDS?

We'll know when we're done lifting stuff from BDS into BB.

   What I would consider, however, is adding a TODO comment that tells
   people that this field needs to go and if you need to use it, something
   is wrong with your design (which happens to be true for the existing
   design of some code).
  
  For the device callbacks, we need a way to find the BB.  If multiple BBs
  can sit on top of the same BDS, we need to find the one with a device
  models attached.  Ot even the ones, if we permit that.
  
  Let's discuss this a bit, and depending on what we learn, add a suitable
  comment.  Possibly on top.
 
  Are you sure that nothing else than device models can be interested in
  callbacks? I expect that whatever block layer user we have, they will
  always be interested in resizes, for example. Media change might also
  not be entirely uninteresting, though in most cases what other users
  want is probably a blocker.
 
 I designed BlockDevOps for device models only.  If other users emerge,
 it needs a rename, and possibly a rethink.

 Very likely to happen in the long run. Block jobs are today blocking
 resizes, but that's mostly because they don't have an easy way to
 respond to a resize.  Sooner or later someone will want to grow their
 images while they are being mirrored (which is a completely reasonable,
 even if not trivial, thing to want).

 Do we have a KVM Forum block layer agenda yet? I think this thread could
 already contain a few topics to discuss there.

No agenda yet, as far as I know.



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-22 Thread Kevin Wolf
Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
 The pointer from BlockBackend to BlockDriverState is a strong
 reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
 a weak one.
 
 Convenience function blk_new_with_bs() creates a BlockBackend with its
 BlockDriverState.  Callers have to unref both.  The commit after next
 will relieve them of the need to unref the BlockDriverState.
 
 Complication: due to the silly way drive_del works, we need a way to
 hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
 special status, give the function a suitably off-putting name:
 blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
 BlockBackend's name into the empty string.  Can't avoid that without
 breaking the blk-bs-device_name equals blk-name invariant.
 
 The patch adds a memory leak: drive_del while a device model is
 connected leaks the BlockBackend.  Avoiding the leak here is rather
 hairy, but it'll become straightforward in a few commits, so I mark it
 FIXME in the code now, and plug it when it's easy.
 
 Signed-off-by: Markus Armbruster arm...@redhat.com

 +/*
 + * Hide @blk.
 + * @blk must not have been hidden already.
 + * Make attached BlockDriverState, if any, anonymous.
 + * Once hidden, @blk is invisible to all functions that don't receive
 + * it as argument.  For example, blk_by_name() won't return it.
 + * Strictly for use by do_drive_del().
 + * TODO get rid of it!
 + */
 +void blk_hide_on_behalf_of_do_drive_del(BlockBackend *blk)
 +{
 +QTAILQ_REMOVE(blk_backends, blk, link);
 +blk-name[0] = 0;

Style nit: I prefer '\0' when dealing with strings.

 +if (blk-bs) {
 +bdrv_make_anon(blk-bs);
 +}
 +}
 diff --git a/blockdev.c b/blockdev.c
 index 583235a..5da6028 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -228,6 +228,7 @@ void drive_info_del(DriveInfo *dinfo)
  if (dinfo-opts) {
  qemu_opts_del(dinfo-opts);
  }
 +
  g_free(dinfo-id);
  QTAILQ_REMOVE(drives, dinfo, next);
  g_free(dinfo-serial);

This hunk is a rebasing artifact, I guess?

 diff --git a/include/block/block_int.h b/include/block/block_int.h
 index 8d86a6c..14e0b7c 100644
 --- a/include/block/block_int.h
 +++ b/include/block/block_int.h
 @@ -324,6 +324,8 @@ struct BlockDriverState {
  BlockDriver *drv; /* NULL means no media */
  void *opaque;
  
 +BlockBackend *blk;  /* owning backend, if any */
 +
  void *dev;  /* attached device model, if any */
  /* TODO change to DeviceState when all users are qdevified */
  const BlockDevOps *dev_ops;

Just to make sure that we agree on where we're going: This makes the
assumption that a BDS has at most one BB that owns it. Which is not the
final state that we want to have, so this will have to go away later.
(Where later isn't necessarily part of this series.)

For now, the use of the field is limited to callbacks and
bdrv_get_device_name(). Callbacks could always only serve a single
device, so nothing became worse here.

I'm not entirely sure about bdrv_get_device_name(), whether it needs to
go or to be rewritten to get the name of any BB pointing to it (I
suspect for most callers we want to replace it by something that uses
node-name by default if there is one and only fall back to BB names if
there isn't), but that's not an issue to block this patch.

What I would consider, however, is adding a TODO comment that tells
people that this field needs to go and if you need to use it, something
is wrong with your design (which happens to be true for the existing
design of some code).


Nothing critical in this patch, so with or without addressing the
comments:

Reviewed-by: Kevin Wolf kw...@redhat.com



Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-22 Thread Markus Armbruster
Kevin Wolf kw...@redhat.com writes:

 Am 16.09.2014 um 20:12 hat Markus Armbruster geschrieben:
 The pointer from BlockBackend to BlockDriverState is a strong
 reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
 a weak one.
 
 Convenience function blk_new_with_bs() creates a BlockBackend with its
 BlockDriverState.  Callers have to unref both.  The commit after next
 will relieve them of the need to unref the BlockDriverState.
 
 Complication: due to the silly way drive_del works, we need a way to
 hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
 special status, give the function a suitably off-putting name:
 blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
 BlockBackend's name into the empty string.  Can't avoid that without
 breaking the blk-bs-device_name equals blk-name invariant.
 
 The patch adds a memory leak: drive_del while a device model is
 connected leaks the BlockBackend.  Avoiding the leak here is rather
 hairy, but it'll become straightforward in a few commits, so I mark it
 FIXME in the code now, and plug it when it's easy.
 
 Signed-off-by: Markus Armbruster arm...@redhat.com

 +/*
 + * Hide @blk.
 + * @blk must not have been hidden already.
 + * Make attached BlockDriverState, if any, anonymous.
 + * Once hidden, @blk is invisible to all functions that don't receive
 + * it as argument.  For example, blk_by_name() won't return it.
 + * Strictly for use by do_drive_del().
 + * TODO get rid of it!
 + */
 +void blk_hide_on_behalf_of_do_drive_del(BlockBackend *blk)
 +{
 +QTAILQ_REMOVE(blk_backends, blk, link);
 +blk-name[0] = 0;

 Style nit: I prefer '\0' when dealing with strings.

I don't, but if you feel strongly about it, I'll do it your way.

 +if (blk-bs) {
 +bdrv_make_anon(blk-bs);
 +}
 +}
 diff --git a/blockdev.c b/blockdev.c
 index 583235a..5da6028 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -228,6 +228,7 @@ void drive_info_del(DriveInfo *dinfo)
  if (dinfo-opts) {
  qemu_opts_del(dinfo-opts);
  }
 +
  g_free(dinfo-id);
  QTAILQ_REMOVE(drives, dinfo, next);
  g_free(dinfo-serial);

 This hunk is a rebasing artifact, I guess?

Consider it gone.

 diff --git a/include/block/block_int.h b/include/block/block_int.h
 index 8d86a6c..14e0b7c 100644
 --- a/include/block/block_int.h
 +++ b/include/block/block_int.h
 @@ -324,6 +324,8 @@ struct BlockDriverState {
  BlockDriver *drv; /* NULL means no media */
  void *opaque;
  
 +BlockBackend *blk;  /* owning backend, if any */
 +
  void *dev;  /* attached device model, if any */
  /* TODO change to DeviceState when all users are qdevified */
  const BlockDevOps *dev_ops;

 Just to make sure that we agree on where we're going: This makes the
 assumption that a BDS has at most one BB that owns it.

Yes.

Which is not the
 final state that we want to have, so this will have to go away later.

I don't know.  Can you explain why you think we're going to want
multiple BBs?

 (Where later isn't necessarily part of this series.)

 For now, the use of the field is limited to callbacks and
 bdrv_get_device_name(). Callbacks could always only serve a single
 device, so nothing became worse here.

In *this* patch, member blk is only read in bdrv_swap(), which asserts
it's null.  Later on in the series, it gets indeed used as you describe.

PATCH 22 puts it to use for BlockDevOps callbacks.  The patch moves the
callbacks from BDS to BB.  I hope you'll agree that's where they belong.

Naturally, the *calls* of the callbacks remain where they are, in
block.c.  They get updated like this:

-   bdrv_dev_FOO(bs, ARGS)
+   if (bs-blk) {
+   blk_dev_FOO(bs-blk ARGS)
+   }

PATCH 08 uses it to eliminate BDS member device_name[].

 I'm not entirely sure about bdrv_get_device_name(), whether it needs to
 go or to be rewritten to get the name of any BB pointing to it (I
 suspect for most callers we want to replace it by something that uses
 node-name by default if there is one and only fall back to BB names if
 there isn't), but that's not an issue to block this patch.

I agree users of bdrv_get_device_name() need to be examined, and the
ones that really want a BDS name should probably be changed to use the
BDS name (a.k.a. node-name) and fall back to the BB name.

This series makes this need more visible, by emphasizing the
distinctness of the two names.

Aside: which one to fall back to if we have multiple BBs?

 What I would consider, however, is adding a TODO comment that tells
 people that this field needs to go and if you need to use it, something
 is wrong with your design (which happens to be true for the existing
 design of some code).

For the device callbacks, we need a way to find the BB.  If multiple BBs
can sit on top of the same BDS, we need to find the one with a device
models attached.  Ot even the ones, if we 

Re: [Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-20 Thread Max Reitz

On 16.09.2014 20:12, Markus Armbruster wrote:

The pointer from BlockBackend to BlockDriverState is a strong
reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
a weak one.

Convenience function blk_new_with_bs() creates a BlockBackend with its
BlockDriverState.  Callers have to unref both.  The commit after next
will relieve them of the need to unref the BlockDriverState.

Complication: due to the silly way drive_del works, we need a way to
hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
special status, give the function a suitably off-putting name:
blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
BlockBackend's name into the empty string.  Can't avoid that without
breaking the blk-bs-device_name equals blk-name invariant.

The patch adds a memory leak: drive_del while a device model is
connected leaks the BlockBackend.  Avoiding the leak here is rather
hairy, but it'll become straightforward in a few commits, so I mark it
FIXME in the code now, and plug it when it's easy.


Good.


Signed-off-by: Markus Armbruster arm...@redhat.com
---
  block.c|  10 ++--
  block/block-backend.c  |  71 ++-
  blockdev.c |  21 ---
  hw/block/xen_disk.c|   8 +--
  include/block/block_int.h  |   2 +
  include/sysemu/block-backend.h |   5 ++
  qemu-img.c | 125 +++--
  qemu-io.c  |   4 +-
  qemu-nbd.c |   4 +-
  9 files changed, 156 insertions(+), 94 deletions(-)


Reviewed-by: Max Reitz mre...@redhat.com



[Qemu-devel] [PATCH v3 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-16 Thread Markus Armbruster
The pointer from BlockBackend to BlockDriverState is a strong
reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
a weak one.

Convenience function blk_new_with_bs() creates a BlockBackend with its
BlockDriverState.  Callers have to unref both.  The commit after next
will relieve them of the need to unref the BlockDriverState.

Complication: due to the silly way drive_del works, we need a way to
hide a BlockBackend, just like bdrv_make_anon().  To emphasize its
special status, give the function a suitably off-putting name:
blk_hide_on_behalf_of_do_drive_del().  Unfortunately, hiding turns the
BlockBackend's name into the empty string.  Can't avoid that without
breaking the blk-bs-device_name equals blk-name invariant.

The patch adds a memory leak: drive_del while a device model is
connected leaks the BlockBackend.  Avoiding the leak here is rather
hairy, but it'll become straightforward in a few commits, so I mark it
FIXME in the code now, and plug it when it's easy.

Signed-off-by: Markus Armbruster arm...@redhat.com
---
 block.c|  10 ++--
 block/block-backend.c  |  71 ++-
 blockdev.c |  21 ---
 hw/block/xen_disk.c|   8 +--
 include/block/block_int.h  |   2 +
 include/sysemu/block-backend.h |   5 ++
 qemu-img.c | 125 +++--
 qemu-io.c  |   4 +-
 qemu-nbd.c |   4 +-
 9 files changed, 156 insertions(+), 94 deletions(-)

diff --git a/block.c b/block.c
index 934881f..7ccf443 100644
--- a/block.c
+++ b/block.c
@@ -2032,7 +2032,7 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_old. Both bs_new and bs_old are modified.
  *
- * bs_new is required to be anonymous.
+ * bs_new must be nameless and not attached to a BlockBackend.
  *
  * This function does not create any image files.
  */
@@ -2051,8 +2051,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 QTAILQ_REMOVE(graph_bdrv_states, bs_old, node_list);
 }
 
-/* bs_new must be anonymous and shouldn't have anything fancy enabled */
+/* bs_new must be nameless and shouldn't have anything fancy enabled */
 assert(bs_new-device_name[0] == '\0');
+assert(!bs_new-blk);
 assert(QLIST_EMPTY(bs_new-dirty_bitmaps));
 assert(bs_new-job == NULL);
 assert(bs_new-dev == NULL);
@@ -2068,8 +2069,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 bdrv_move_feature_fields(bs_old, bs_new);
 bdrv_move_feature_fields(bs_new, tmp);
 
-/* bs_new shouldn't be in bdrv_states even after the swap!  */
+/* bs_new must remain nameless and unattached */
 assert(bs_new-device_name[0] == '\0');
+assert(!bs_new-blk);
 
 /* Check a few fields that should remain attached to the device */
 assert(bs_new-dev == NULL);
@@ -2096,7 +2098,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_top. Both bs_new and bs_top are modified.
  *
- * bs_new is required to be anonymous.
+ * bs_new must be nameless and not attached to a BlockBackend.
  *
  * This function does not create any image files.
  */
diff --git a/block/block-backend.c b/block/block-backend.c
index e89caa9..a12215a 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -16,10 +16,11 @@
 struct BlockBackend {
 char *name;
 int refcnt;
+BlockDriverState *bs;
 QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
 };
 
-/* All the BlockBackends */
+/* All the BlockBackends (except for hidden ones) */
 static QTAILQ_HEAD(, BlockBackend) blk_backends =
 QTAILQ_HEAD_INITIALIZER(blk_backends);
 
@@ -47,10 +48,44 @@ BlockBackend *blk_new(const char *name, Error **errp)
 return blk;
 }
 
+/*
+ * Create a new BlockBackend with a new BlockDriverState attached.
+ * Both have a reference count of one.  Caller owns *both* references.
+ * TODO Let caller own only the BlockBackend reference
+ * Otherwise just like blk_new(), which see.
+ */
+BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+{
+BlockBackend *blk;
+BlockDriverState *bs;
+
+blk = blk_new(name, errp);
+if (!blk) {
+return NULL;
+}
+
+bs = bdrv_new_root(name, errp);
+if (!bs) {
+blk_unref(blk);
+return NULL;
+}
+
+blk-bs = bs;
+bs-blk = blk;
+return blk;
+}
+
 static void blk_delete(BlockBackend *blk)
 {
 assert(!blk-refcnt);
-QTAILQ_REMOVE(blk_backends, blk, link);
+if (blk-bs) {
+blk-bs-blk = NULL;
+blk-bs = NULL;
+}
+/* Avoid double-remove after blk_hide_on_behalf_of_do_drive_del() */
+if (blk-name[0]) {
+QTAILQ_REMOVE(blk_backends, blk, link);
+}
 g_free(blk-name);