date:20161014

Re: [Qemu-block] [PATCH] rbd: make the code better readable

2016-10-14 Thread 李秀波



JohnSnow 写到：
>
>
>On 10/14/2016 05:51 AM, Xiubo Li wrote:
>> Make it a bit clear and better readable.
>>
>
>Suggestion: "Make it clearer and more readable."
>
Yes, see the next version.

>>
>>  if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
>>  rados_shutdown(cluster);
>
>Did you mean to remove rados_shutdown() here, too?
>
I will fix this.

>>  ret = rbd_create(io_ctx, name, bytes, _order);
>> -rados_ioctx_destroy(io_ctx);
>> -rados_shutdown(cluster);
>>  if (ret < 0) {
>>  error_setg_errno(errp, -ret, "error rbd create");
>> -return ret;
>>  }
>>
>> +rados_ioctx_destroy(io_ctx);
>> +
>> +failed_shutdown:
>
>Since this executes on the non-error pathway too, I might just call
>this
>'shutdown'.
>
Agree.

Thanks very much.

BRS
Xiubo


>> +rados_shutdown(cluster);
>>  return ret;
>>  }
>>
>>
>
>--
>梛s

Re: [Qemu-block] [Qemu-devel] [PATCH v2 3/4] fdc: Move qdev properties to FloppyDrive

2016-10-14 Thread John Snow




On 09/30/2016 03:39 PM, Kevin Wolf wrote:

This makes the FloppyDrive qdev object actually useful: Now that it has
all properties that don't belong to the controller, you can actually
use '-device floppy' and get a working result.

Command line semantics is consistent with CD-ROM drives: By default you
get a single empty floppy drive. You can override it with -drive and
using the same index, but if you use -drive to add a floppy to a
different index, you get both of them. However, as soon as you use any
'-device floppy', even to a different slot, the default drive is
disabled.

Using '-device floppy' without specifying the unit will choose the first
free slot on the controller.

Signed-off-by: Kevin Wolf 
---
 hw/block/fdc.c | 112 ++---
 vl.c   |   1 +
 2 files changed, 85 insertions(+), 28 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 5aa8e52..00c0ec6 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -35,6 +35,7 @@
 #include "qemu/timer.h"
 #include "hw/isa/isa.h"
 #include "hw/sysbus.h"
+#include "hw/block/block.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/sysemu.h"
@@ -487,12 +488,18 @@ static const BlockDevOps fd_block_ops = {
  OBJECT_CHECK(FloppyDrive, (obj), TYPE_FLOPPY_DRIVE)

 typedef struct FloppyDrive {
-DeviceState qdev;
-uint32_tunit;
+DeviceState qdev;
+uint32_tunit;
+BlockConf   conf;
+FloppyDriveType type;
 } FloppyDrive;

 static Property floppy_drive_properties[] = {
 DEFINE_PROP_UINT32("unit", FloppyDrive, unit, -1),
+DEFINE_BLOCK_PROPERTIES(FloppyDrive, conf),
+DEFINE_PROP_DEFAULT("drive-type", FloppyDrive, type,
+FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+FloppyDriveType),
 DEFINE_PROP_END_OF_LIST(),
 };

@@ -501,6 +508,7 @@ static int floppy_drive_init(DeviceState *qdev)
 FloppyDrive *dev = FLOPPY_DRIVE(qdev);
 FloppyBus *bus = DO_UPCAST(FloppyBus, bus, dev->qdev.parent_bus);
 FDrive *drive;
+int ret;

 if (dev->unit == -1) {
 for (dev->unit = 0; dev->unit < MAX_FD; dev->unit++) {
@@ -517,29 +525,57 @@ static int floppy_drive_init(DeviceState *qdev)
 return -1;
 }

-/* TODO Check whether unit is in use */
-
 drive = get_drv(bus->fdc, dev->unit);
-
 if (drive->blk) {
-if (blk_get_on_error(drive->blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
-error_report("fdc doesn't support drive option werror");
-return -1;
-}
-if (blk_get_on_error(drive->blk, 1) != BLOCKDEV_ON_ERROR_REPORT) {
-error_report("fdc doesn't support drive option rerror");
-return -1;
-}
-} else {
+error_report("Floppy unit %d is in use", dev->unit);
+return -1;
+}
+
+if (!dev->conf.blk) {
 /* Anonymous BlockBackend for an empty drive */
-drive->blk = blk_new();
+dev->conf.blk = blk_new();
+ret = blk_attach_dev(dev->conf.blk, dev);


Missing a 'q' here:   ^


+assert(ret == 0);
 }

-fd_init(drive);
-if (drive->blk) {
-blk_set_dev_ops(drive->blk, _block_ops, drive);
-pick_drive_type(drive);
+blkconf_blocksizes(>conf);
+if (dev->conf.logical_block_size != 512 ||
+dev->conf.physical_block_size != 512)
+{
+error_report("Physical and logical block size must be 512 for floppy");
+return -1;
+}
+
+/* rerror/werror aren't supported by fdc and therefore not even registered
+ * with qdev. So set the defaults manually before they are used in
+ * blkconf_apply_backend_options(). */
+dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
+dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
+blkconf_apply_backend_options(>conf);
+
+/* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
+ * for empty drives. */
+if (blk_get_on_error(dev->conf.blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC &&
+blk_get_on_error(dev->conf.blk, 0) != BLOCKDEV_ON_ERROR_REPORT) {
+error_report("fdc doesn't support drive option werror");
+return -1;
 }
+if (blk_get_on_error(dev->conf.blk, 1) != BLOCKDEV_ON_ERROR_REPORT) {
+error_report("fdc doesn't support drive option rerror");
+return -1;
+}
+
+drive->blk = dev->conf.blk;
+drive->fdctrl = bus->fdc;
+
+fd_init(drive);
+blk_set_dev_ops(drive->blk, _block_ops, drive);
+
+/* Keep 'type' qdev property and FDrive->drive in sync */
+drive->drive = dev->type;
+pick_drive_type(drive);
+dev->type = drive->drive;
+
 fd_revalidate(drive);

 return 0;
@@ -808,6 +844,10 @@ struct FDCtrl {
 FloppyBus bus;
 uint8_t num_floppies;
 FDrive drives[MAX_FD];
+struct {
+BlockBackend *blk;
+FloppyDriveType type;
+} qdev_for_drives[MAX_FD];

Re: [Qemu-block] [Qemu-devel] [PATCH v2 2/4] fdc: Add a floppy drive qdev

2016-10-14 Thread John Snow




On 09/30/2016 03:39 PM, Kevin Wolf wrote:

Floppy controllers automatically create two floppy drive devices in qdev
now. (They always created two drives, but managed them only internally.)



Is this commit message out-of-phase now?


Signed-off-by: Kevin Wolf 
---
 hw/block/fdc.c | 151 +
 1 file changed, 120 insertions(+), 31 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index a3afb62..5aa8e52 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -60,6 +60,8 @@
 #define FLOPPY_BUS(obj) OBJECT_CHECK(FloppyBus, (obj), TYPE_FLOPPY_BUS)

 typedef struct FDCtrl FDCtrl;
+typedef struct FDrive FDrive;
+static FDrive *get_drv(FDCtrl *fdctrl, int unit);

 typedef struct FloppyBus {
 BusState bus;
@@ -180,7 +182,7 @@ typedef enum FDiskFlags {
 FDISK_DBL_SIDES  = 0x01,
 } FDiskFlags;

-typedef struct FDrive {
+struct FDrive {
 FDCtrl *fdctrl;
 BlockBackend *blk;
 /* Drive status */
@@ -201,7 +203,7 @@ typedef struct FDrive {
 uint8_t media_rate;   /* Data rate of medium*/

 bool media_validated; /* Have we validated the media? */
-} FDrive;
+};


 static FloppyDriveType get_fallback_drive_type(FDrive *drv);
@@ -466,6 +468,100 @@ static void fd_revalidate(FDrive *drv)
 }
 }

+static void fd_change_cb(void *opaque, bool load)
+{
+FDrive *drive = opaque;
+
+drive->media_changed = 1;
+drive->media_validated = false;
+fd_revalidate(drive);
+}
+
+static const BlockDevOps fd_block_ops = {
+.change_media_cb = fd_change_cb,
+};
+
+
+#define TYPE_FLOPPY_DRIVE "floppy"
+#define FLOPPY_DRIVE(obj) \
+ OBJECT_CHECK(FloppyDrive, (obj), TYPE_FLOPPY_DRIVE)
+
+typedef struct FloppyDrive {
+DeviceState qdev;
+uint32_tunit;
+} FloppyDrive;
+
+static Property floppy_drive_properties[] = {
+DEFINE_PROP_UINT32("unit", FloppyDrive, unit, -1),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static int floppy_drive_init(DeviceState *qdev)
+{
+FloppyDrive *dev = FLOPPY_DRIVE(qdev);
+FloppyBus *bus = DO_UPCAST(FloppyBus, bus, dev->qdev.parent_bus);
+FDrive *drive;
+
+if (dev->unit == -1) {
+for (dev->unit = 0; dev->unit < MAX_FD; dev->unit++) {
+drive = get_drv(bus->fdc, dev->unit);
+if (!drive->blk) {
+break;
+}
+}
+}
+
+if (dev->unit >= MAX_FD) {
+error_report("Can't create floppy unit %d, bus supports only %d units",
+ dev->unit, MAX_FD);
+return -1;
+}
+
+/* TODO Check whether unit is in use */
+


Dear whoever cares about FDC: Save me from the merciless void ...!

(I see you remove this in the next patch, but don't make my heart jump 
like that!)



+drive = get_drv(bus->fdc, dev->unit);
+
+if (drive->blk) {
+if (blk_get_on_error(drive->blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
+error_report("fdc doesn't support drive option werror");
+return -1;
+}
+if (blk_get_on_error(drive->blk, 1) != BLOCKDEV_ON_ERROR_REPORT) {
+error_report("fdc doesn't support drive option rerror");
+return -1;
+}
+} else {
+/* Anonymous BlockBackend for an empty drive */
+drive->blk = blk_new();
+}
+
+fd_init(drive);
+if (drive->blk) {
+blk_set_dev_ops(drive->blk, _block_ops, drive);
+pick_drive_type(drive);
+}
+fd_revalidate(drive);
+
+return 0;
+}
+
+static void floppy_drive_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *k = DEVICE_CLASS(klass);
+k->init = floppy_drive_init;
+set_bit(DEVICE_CATEGORY_STORAGE, k->categories);
+k->bus_type = TYPE_FLOPPY_BUS;
+k->props = floppy_drive_properties;
+k->desc = "virtual floppy drive";
+}
+
+static const TypeInfo floppy_drive_info = {
+.name = TYPE_FLOPPY_DRIVE,
+.parent = TYPE_DEVICE,
+.instance_size = sizeof(FloppyDrive),
+.class_init = floppy_drive_class_init,
+};
+
 //
 /* Intel 82078 floppy disk controller emulation  */

@@ -1185,9 +1281,9 @@ static inline FDrive *drv3(FDCtrl *fdctrl)
 }
 #endif

-static FDrive *get_cur_drv(FDCtrl *fdctrl)
+static FDrive *get_drv(FDCtrl *fdctrl, int unit)
 {
-switch (fdctrl->cur_drv) {
+switch (unit) {
 case 0: return drv0(fdctrl);
 case 1: return drv1(fdctrl);
 #if MAX_FD == 4
@@ -1198,6 +1294,11 @@ static FDrive *get_cur_drv(FDCtrl *fdctrl)
 }
 }

+static FDrive *get_cur_drv(FDCtrl *fdctrl)
+{
+return get_drv(fdctrl, fdctrl->cur_drv);
+}
+
 /* Status A register : 0x00 (read-only) */
 static uint32_t fdctrl_read_statusA(FDCtrl *fdctrl)
 {
@@ -2357,46 +2458,33 @@ static void fdctrl_result_timer(void *opaque)
 }
 }

-static void fdctrl_change_cb(void *opaque, bool load)
-{
-FDrive *drive = opaque;
-
-drive->media_changed = 1;
-drive->media_validated = false;
-fd_revalidate(drive);

Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/4] fdc: Add a floppy qbus

2016-10-14 Thread John Snow




On 09/30/2016 03:39 PM, Kevin Wolf wrote:

This adds a qbus to the floppy controller that should contain the floppy
drives eventually. At the moment it just exists and is empty.



Not unlike myself.


Signed-off-by: Kevin Wolf 
---
 hw/block/fdc.c | 40 +++-
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index b79873a..a3afb62 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -52,6 +52,33 @@
 }   \
 } while (0)

+
+//
+/* qdev floppy bus  */
+
+#define TYPE_FLOPPY_BUS "Floppy"
+#define FLOPPY_BUS(obj) OBJECT_CHECK(FloppyBus, (obj), TYPE_FLOPPY_BUS)
+
+typedef struct FDCtrl FDCtrl;
+
+typedef struct FloppyBus {
+BusState bus;
+FDCtrl *fdc;
+} FloppyBus;
+
+static const TypeInfo floppy_bus_info = {
+.name = TYPE_FLOPPY_BUS,
+.parent = TYPE_BUS,
+.instance_size = sizeof(FloppyBus),
+};
+
+static void floppy_bus_create(FDCtrl *fdc, FloppyBus *bus, DeviceState *dev)
+{
+qbus_create_inplace(bus, sizeof(FloppyBus), TYPE_FLOPPY_BUS, dev, NULL);
+bus->fdc = fdc;
+}
+
+
 //
 /* Floppy drive emulation   */

@@ -148,8 +175,6 @@ static FDriveSize drive_size(FloppyDriveType drive)
 #define FD_SECTOR_SC   2   /* Sector size code */
 #define FD_RESET_SENSEI_COUNT  4   /* Number of sense interrupts on RESET */

-typedef struct FDCtrl FDCtrl;
-
 /* Floppy disk drive emulation */
 typedef enum FDiskFlags {
 FDISK_DBL_SIDES  = 0x01,
@@ -684,6 +709,7 @@ struct FDCtrl {
 /* Power down config (also with status regB access mode */
 uint8_t pwrd;
 /* Floppy drives */
+FloppyBus bus;
 uint8_t num_floppies;
 FDrive drives[MAX_FD];
 int reset_sensei;
@@ -2442,7 +2468,8 @@ void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
 *fdc_tc = qdev_get_gpio_in(dev, 0);
 }

-static void fdctrl_realize_common(FDCtrl *fdctrl, Error **errp)
+static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
+  Error **errp)
 {
 int i, j;
 static int command_tables_inited = 0;
@@ -2480,6 +2507,8 @@ static void fdctrl_realize_common(FDCtrl *fdctrl, Error 
**errp)
 k->register_channel(fdctrl->dma, fdctrl->dma_chann,
 _transfer_handler, fdctrl);
 }
+
+floppy_bus_create(fdctrl, >bus, dev);
 fdctrl_connect_drives(fdctrl, errp);
 }

@@ -2508,7 +2537,7 @@ static void isabus_fdc_realize(DeviceState *dev, Error 
**errp)
 }

 qdev_set_legacy_instance_id(dev, isa->iobase, 2);
-fdctrl_realize_common(fdctrl, );
+fdctrl_realize_common(dev, fdctrl, );
 if (err != NULL) {
 error_propagate(errp, err);
 return;
@@ -2559,7 +2588,7 @@ static void sysbus_fdc_common_realize(DeviceState *dev, 
Error **errp)
 FDCtrlSysBus *sys = SYSBUS_FDC(dev);
 FDCtrl *fdctrl = >state;

-fdctrl_realize_common(fdctrl, errp);
+fdctrl_realize_common(dev, fdctrl, errp);
 }

 FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i)
@@ -2744,6 +2773,7 @@ static void fdc_register_types(void)
 type_register_static(_fdc_type_info);
 type_register_static(_fdc_info);
 type_register_static(_fdc_info);
+type_register_static(_bus_info);
 }

 type_init(fdc_register_types)



Reviewed-by: John Snow

Re: [Qemu-block] [Qemu-devel] [PATCHv2 0/2] dma-helpers: explicitly pass alignment into DMA helpers

2016-10-14 Thread John Snow




On 10/14/2016 07:41 AM, Mark Cave-Ayland wrote:

This is a follow-up to the thread at
https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg01396.html which
introduces an explicit alignment to the DMA helpers to facilitate conversion
of the macio controller over to use the now byte-aligned DMA helpers.

Patch 1 introduces an alignment parameter as suggested by Paolo above, whilst
patch 2 performs the conversion for the macio controller.

Signed-off-by: Mark Cave-Ayland 

v2:
- Use QEMU_IS_ALIGNED and QEMU_ALIGN_DOWN macros suggested by Eric
- Add Reviewed-by/Acked-by tags from Eric and John
- Rebase onto master

Mark Cave-Ayland (2):
  dma-helpers: explicitly pass alignment into DMA helpers
  macio: switch over to new byte-aligned DMA helpers

 dma-helpers.c|   21 ++---
 hw/block/nvme.c  |6 +-
 hw/ide/ahci.c|2 +
 hw/ide/core.c|6 +-
 hw/ide/macio.c   |  213 +++---
 hw/scsi/scsi-disk.c  |2 +
 include/sysemu/dma.h |6 +-
 7 files changed, 54 insertions(+), 202 deletions(-)



Did you know: "PATCHv2" apparently confuses our patches scraper?

Anyway:

Thanks, applied to my IDE tree:

https://github.com/jnsnow/qemu/commits/ide
https://github.com/jnsnow/qemu.git

--js

Re: [Qemu-block] bug introduced by "block: Move throttling fields from BDS to BB"

2016-10-14 Thread Paolo Bonzini



On 14/10/2016 16:11, Paolo Bonzini wrote:
> -ThrottleGroup *tg = container_of(blk_bs(blk)->throttle_state,
> - ThrottleGroup, ts);
> +BlockBackendPublic *blkp = blk_get_public(blk);
> +ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, 
> ts);
>  BlockBackend *token, *start;
>  
>  start = token = tg->tokens[is_write];
>  
>  /* get next bs round in round robin style */
>  token = throttle_group_next_blk(token);
> -while (token != start && !blk_bs(token)->pending_reqs[is_write]) {
> +while (token != start && !blkp->pending_reqs[is_write]) {
>  token = throttle_group_next_blk(token);
>  }
> 
> 
> blkp isn't updated every time token is updated.

BTW, the simplest fix is probably to introduce a function

static inline bool blk_has_pending_reqs(BlockBackend *blk,
bool is_write)

Paolo

Re: [Qemu-block] [PATCH] rbd: make the code better readable

2016-10-14 Thread John Snow




On 10/14/2016 05:51 AM, Xiubo Li wrote:

Make it a bit clear and better readable.



Suggestion: "Make it clearer and more readable."


Signed-off-by: Xiubo Li 
---
 block/rbd.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 0a5840d..de8ca1b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -366,45 +366,45 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 rados_conf_read_file(cluster, NULL);
 } else if (conf[0] != '\0' &&
qemu_rbd_set_conf(cluster, conf, true, _err) < 0) {
-rados_shutdown(cluster);
 error_propagate(errp, local_err);
-return -EIO;
+ret = -EIO;
+goto failed_shutdown;
 }

 if (conf[0] != '\0' &&
 qemu_rbd_set_conf(cluster, conf, false, _err) < 0) {
-rados_shutdown(cluster);
 error_propagate(errp, local_err);
-return -EIO;
+ret = -EIO;
+goto failed_shutdown;
 }

 if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
 rados_shutdown(cluster);


Did you mean to remove rados_shutdown() here, too?


-return -EIO;
+ret = -EIO;
+goto failed_shutdown;
 }

 ret = rados_connect(cluster);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error connecting");
-rados_shutdown(cluster);
-return ret;
+goto failed_shutdown;
 }

 ret = rados_ioctx_create(cluster, pool, _ctx);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error opening pool %s", pool);
-rados_shutdown(cluster);
-return ret;
+goto failed_shutdown;
 }

 ret = rbd_create(io_ctx, name, bytes, _order);
-rados_ioctx_destroy(io_ctx);
-rados_shutdown(cluster);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error rbd create");
-return ret;
 }

+rados_ioctx_destroy(io_ctx);
+
+failed_shutdown:


Since this executes on the non-error pathway too, I might just call this 
'shutdown'.



+rados_shutdown(cluster);
 return ret;
 }




--
—js

Re: [Qemu-block] [PATCH 07/22] qcow2-bitmap: introduce auto-loading bitmaps

2016-10-14 Thread Vladimir Sementsov-Ogievskiy


On 01.10.2016 19:26, Max Reitz wrote:

On 30.09.2016 12:53, Vladimir Sementsov-Ogievskiy wrote:

Auto loading bitmaps are bitmaps in Qcow2, with AUTO flag set. They are


...


+goto fail;
+}
+
+/* loop is safe because next entry offset is calculated after conversion to
Should it be s/safe/unsafe/?


loop is safe. _unsafe is related to absence of assert in a loop, as it 
loops through BE data. Bad wording, I'll change it somehow..





+ * cpu format */
+for_each_bitmap_dir_entry_unsafe(e, dir, size) {
+if ((uint8_t *)(e + 1) > dir_end) {
+goto broken_dir;
+}
+
+bitmap_dir_entry_to_cpu(e);
+
+if ((uint8_t *)next_dir_entry(e) > dir_end) {
+goto broken_dir;
+}
+
+if (e->extra_data_size != 0) {
+error_setg(errp, "Can't load bitmap '%.*s' from '%s':"
+   "extra data not supported.", e->name_size,

Full stop again.

Also, I'm not quite sure why you give the device/node name here. You
don't do that anywhere else and I think if we want to emit the
information where something failed, it should be added at some previous
point in the call chain.


For example, how? As I understand, I can emit it only by error_setg, so 
actually it would be better to add node and bitmap name to all 
error_setg in the code.. or create helper function for it.





+   dir_entry_name_notcstr(e),
+   bdrv_get_device_or_node_name(bs));
+goto fail;
+}
+}


...


+if (ret < 0) {
+goto fail;
+}
+}
+s->bitmap_directory_offset = new_offset;
+s->bitmap_directory_size = new_size;
+s->nb_bitmaps = new_nb_bitmaps;
+
+ret = update_header_sync(bs);
+if (ret < 0) {
+goto fail;
+}
+
+if (old_size) {
+qcow2_free_clusters(bs, old_offset, old_size, QCOW2_DISCARD_ALWAYS);
+}
+
+return 0;
+
+fail:
+if (new_offset > 0) {
+qcow2_free_clusters(bs, new_offset, new_size, QCOW2_DISCARD_ALWAYS);
+s->bitmap_directory_offset = old_offset;
+s->bitmap_directory_size = old_size;
+s->nb_bitmaps = old_nb_bitmaps;
+s->autoclear_features = old_autocl;

Why are you restoring the autoclear features? From a quick glance I
can't see any code path that changes this field here, and if there is
one, it probably has a good reason to do so.


hmm.. it's an artefact from future). should be moved to later patch.



--
Best regards,
Vladimir

[Qemu-block] [PATCH v7 15/16] nbd: Implement NBD_CMD_WRITE_ZEROES on server

2016-10-14 Thread Eric Blake

Upstream NBD protocol recently added the ability to efficiently
write zeroes without having to send the zeroes over the wire,
along with a flag to control whether the client wants to allow
a hole.

Note that when it comes to requiring full allocation, vs.
permitting optimizations, the NBD spec intentionally picked a
different sense for the flag; the rules in qemu are:
MAY_UNMAP == 0: must write zeroes
MAY_UNMAP == 1: may use holes if reads will see zeroes

while in NBD, the rules are:
FLAG_NO_HOLE == 1: must write zeroes
FLAG_NO_HOLE == 0: may use holes if reads will see zeroes

In all cases, the 'may use holes' scenario is optional (the
server need not use a hole, and must not use a hole if
subsequent reads would not see zeroes).

Signed-off-by: Eric Blake 

---
v6: rebase, improve commit message
v5: no change
v4: rebase, fix value for constant
v3: abandon NBD_CMD_CLOSE extension, rebase to use blk_pwrite_zeroes
---
 include/block/nbd.h |  8 ++--
 nbd/server.c| 42 --
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index eea7ef0..3e373f0 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -71,6 +71,7 @@ typedef struct NBDReply NBDReply;
 #define NBD_FLAG_SEND_FUA   (1 << 3)/* Send FUA (Force Unit 
Access) */
 #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
rotational media */
 #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */

 /* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
@@ -96,7 +97,8 @@ typedef struct NBDReply NBDReply;
 #define NBD_REP_ERR_SHUTDOWNNBD_REP_ERR(7)  /* Server shutting down */

 /* Request flags, sent from client to server during transmission phase */
-#define NBD_CMD_FLAG_FUA(1 << 0)
+#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write */
+#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */

 /* Supported request types */
 enum {
@@ -104,7 +106,9 @@ enum {
 NBD_CMD_WRITE = 1,
 NBD_CMD_DISC = 2,
 NBD_CMD_FLUSH = 3,
-NBD_CMD_TRIM = 4
+NBD_CMD_TRIM = 4,
+/* 5 reserved for failed experiment NBD_CMD_CACHE */
+NBD_CMD_WRITE_ZEROES = 6,
 };

 #define NBD_DEFAULT_PORT   10809
diff --git a/nbd/server.c b/nbd/server.c
index 66dc484..ea234ae 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -617,7 +617,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 char buf[8 + 8 + 8 + 128];
 int rc;
 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
-  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
+  NBD_FLAG_SEND_WRITE_ZEROES);
 bool oldStyle;
 size_t len;

@@ -1147,11 +1148,17 @@ static ssize_t nbd_co_receive_request(NBDRequestData 
*req,
 rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
 goto out;
 }
-if (request->flags & ~NBD_CMD_FLAG_FUA) {
+if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
 LOG("unsupported flags (got 0x%x)", request->flags);
 rc = -EINVAL;
 goto out;
 }
+if (request->type != NBD_CMD_WRITE_ZEROES &&
+(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
+LOG("unexpected flags (got 0x%x)", request->flags);
+rc = -EINVAL;
+goto out;
+}

 rc = 0;

@@ -1256,6 +1263,37 @@ static void nbd_trip(void *opaque)
 }
 break;

+case NBD_CMD_WRITE_ZEROES:
+TRACE("Request type is WRITE_ZEROES");
+
+if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+TRACE("Server is read-only, return error");
+reply.error = EROFS;
+goto error_reply;
+}
+
+TRACE("Writing to device");
+
+flags = 0;
+if (request.flags & NBD_CMD_FLAG_FUA) {
+flags |= BDRV_REQ_FUA;
+}
+if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+flags |= BDRV_REQ_MAY_UNMAP;
+}
+ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
+request.len, flags);
+if (ret < 0) {
+LOG("writing to file failed");
+reply.error = -ret;
+goto error_reply;
+}
+
+if (nbd_co_send_reply(req, , 0) < 0) {
+goto out;
+}
+break;
+
 case NBD_CMD_DISC:
 /* unreachable, thanks to special case in nbd_co_receive_request() */
 abort();
-- 
2.7.4

[Qemu-block] [PATCH v7 11/16] nbd: Less allocation during NBD_OPT_LIST

2016-10-14 Thread Eric Blake

Since we know that the maximum name we are willing to accept
is small enough to stack-allocate, rework the iteration over
NBD_OPT_LIST responses to reuse a stack buffer rather than
allocating every time.  Furthermore, we don't even have to
allocate if we know the server's length doesn't match what
we are searching for.

Signed-off-by: Eric Blake 

---
v6: rebase
v5: alter signature of nbd_receive_list for simpler logic
v4: rebase
v3: tweak commit message
---
 nbd/client.c | 145 +--
 1 file changed, 70 insertions(+), 75 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index df7eb9c..f6468db 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -254,19 +254,28 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
 return result;
 }

-static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
+/* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
+ * the current reply matches @want or if the server does not support
+ * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
+ * is complete, positive if more replies are expected, or negative
+ * with @errp set if an unrecoverable error occurred. */
+static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
+Error **errp)
 {
 nbd_opt_reply reply;
 uint32_t len;
 uint32_t namelen;
+char name[NBD_MAX_NAME_SIZE + 1];
 int error;

-*name = NULL;
 if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, , errp) < 0) {
 return -1;
 }
 error = nbd_handle_reply_err(ioc, , errp);
 if (error <= 0) {
+/* The server did not support NBD_OPT_LIST, so set *match on
+ * the assumption that any name will be accepted.  */
+*match = true;
 return error;
 }
 len = reply.length;
@@ -277,105 +286,91 @@ static int nbd_receive_list(QIOChannel *ioc, char 
**name, Error **errp)
 nbd_send_opt_abort(ioc);
 return -1;
 }
-} else if (reply.type == NBD_REP_SERVER) {
-if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
-error_setg(errp, "incorrect option length %" PRIu32, len);
-nbd_send_opt_abort(ioc);
-return -1;
-}
-if (read_sync(ioc, , sizeof(namelen)) != sizeof(namelen)) {
-error_setg(errp, "failed to read option name length");
-nbd_send_opt_abort(ioc);
-return -1;
-}
-namelen = be32_to_cpu(namelen);
-len -= sizeof(namelen);
-if (len < namelen) {
-error_setg(errp, "incorrect option name length");
-nbd_send_opt_abort(ioc);
-return -1;
-}
-if (namelen > NBD_MAX_NAME_SIZE) {
-error_setg(errp, "export name length too long %" PRIu32, namelen);
-nbd_send_opt_abort(ioc);
-return -1;
-}
-
-*name = g_new0(char, namelen + 1);
-if (read_sync(ioc, *name, namelen) != namelen) {
-error_setg(errp, "failed to read export name");
-g_free(*name);
-*name = NULL;
-nbd_send_opt_abort(ioc);
-return -1;
-}
-(*name)[namelen] = '\0';
-len -= namelen;
-if (drop_sync(ioc, len) != len) {
-error_setg(errp, "failed to read export description");
-g_free(*name);
-*name = NULL;
-nbd_send_opt_abort(ioc);
-return -1;
-}
-} else {
+return 0;
+} else if (reply.type != NBD_REP_SERVER) {
 error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
reply.type, NBD_REP_SERVER);
 nbd_send_opt_abort(ioc);
 return -1;
 }
+
+if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
+error_setg(errp, "incorrect option length %" PRIu32, len);
+nbd_send_opt_abort(ioc);
+return -1;
+}
+if (read_sync(ioc, , sizeof(namelen)) != sizeof(namelen)) {
+error_setg(errp, "failed to read option name length");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+namelen = be32_to_cpu(namelen);
+len -= sizeof(namelen);
+if (len < namelen) {
+error_setg(errp, "incorrect option name length");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+if (namelen != strlen(want)) {
+if (drop_sync(ioc, len) != len) {
+error_setg(errp, "failed to skip export name with wrong length");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+return 1;
+}
+
+assert(namelen < sizeof(name));
+if (read_sync(ioc, name, namelen) != namelen) {
+error_setg(errp, "failed to read export name");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+name[namelen] = '\0';
+len -= namelen;
+if (drop_sync(ioc, len) != len) {
+

[Qemu-block] [PATCH v7 12/16] nbd: Support shorter handshake

2016-10-14 Thread Eric Blake

The NBD Protocol allows the server and client to mutually agree
on a shorter handshake (omit the 124 bytes of reserved 0), via
the server advertising NBD_FLAG_NO_ZEROES and the client
acknowledging with NBD_FLAG_C_NO_ZEROES (only possible in
newstyle, whether or not it is fixed newstyle).  It doesn't
shave much off the wire, but we might as well implement it.

Signed-off-by: Eric Blake 
Reviewed-by: Alex Bligh 

---
v6: rebase
v5: no change
v4: rebase
v3: rebase
---
 include/block/nbd.h |  6 --
 nbd/client.c|  8 +++-
 nbd/server.c| 15 +++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index b69bf1d..d326308 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -74,11 +74,13 @@ typedef struct NBDReply NBDReply;

 /* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
-#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */
+#define NBD_FLAG_FIXED_NEWSTYLE   (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_NO_ZEROES(1 << 1) /* End handshake without zeroes. */

 /* New-style client flags, sent from client to server to control what happens
during handshake phase. */
-#define NBD_FLAG_C_FIXED_NEWSTYLE   (1 << 0)/* Fixed newstyle protocol. */
+#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_C_NO_ZEROES  (1 << 1) /* End handshake without zeroes. */

 /* Reply types. */
 #define NBD_REP_ACK (1) /* Data sending finished. */
diff --git a/nbd/client.c b/nbd/client.c
index f6468db..f5e4c74 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -439,6 +439,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 char buf[256];
 uint64_t magic, s;
 int rc;
+bool zeroes = true;

 TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
   tlscreds, hostname ? hostname : "");
@@ -503,6 +504,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 TRACE("Server supports fixed new style");
 clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
 }
+if (globalflags & NBD_FLAG_NO_ZEROES) {
+zeroes = false;
+TRACE("Server supports no zeroes");
+clientflags |= NBD_FLAG_C_NO_ZEROES;
+}
 /* client requested flags */
 clientflags = cpu_to_be32(clientflags);
 if (write_sync(ioc, , sizeof(clientflags)) !=
@@ -590,7 +596,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 }

 TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
-if (drop_sync(ioc, 124) != 124) {
+if (zeroes && drop_sync(ioc, 124) != 124) {
 error_setg(errp, "Failed to read reserved block");
 goto fail;
 }
diff --git a/nbd/server.c b/nbd/server.c
index 3d39292..20f1086 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -81,6 +81,7 @@ struct NBDClient {
 int refcount;
 void (*close)(NBDClient *client);

+bool no_zeroes;
 NBDExport *exp;
 QCryptoTLSCreds *tlscreds;
 char *tlsaclname;
@@ -449,6 +450,11 @@ static int nbd_negotiate_options(NBDClient *client)
 fixedNewstyle = true;
 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
 }
+if (flags & NBD_FLAG_C_NO_ZEROES) {
+TRACE("Client supports no zeroes at handshake end");
+client->no_zeroes = true;
+flags &= ~NBD_FLAG_C_NO_ZEROES;
+}
 if (flags != 0) {
 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
 return -EIO;
@@ -601,6 +607,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
   NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
 bool oldStyle;
+size_t len;

 /* Old style negotiation header without options
 [ 0 ..   7]   passwd   ("NBDMAGIC")
@@ -617,7 +624,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 options sent
 [18 ..  25]   size
 [26 ..  27]   export flags
-[28 .. 151]   reserved (0)
+[28 .. 151]   reserved (0, omit if no_zeroes)
  */

 qio_channel_set_blocking(client->ioc, false, NULL);
@@ -636,7 +643,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
 } else {
 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
-stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
+stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
 }

 if (oldStyle) {
@@ -663,8 +670,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
   client->exp->size, client->exp->nbdflags | myflags);
 stq_be_p(buf + 18,

[Qemu-block] [PATCH v7 14/16] nbd: Improve server handling of shutdown requests

2016-10-14 Thread Eric Blake

NBD commit 6d34500b clarified how clients and servers are supposed
to behave before closing a connection. It added NBD_REP_ERR_SHUTDOWN
(for the server to announce it is about to go away during option
haggling, so the client should quit sending NBD_OPT_* other than
NBD_OPT_ABORT) and ESHUTDOWN (for the server to announce it is about
to go away during transmission, so the client should quit sending
NBD_CMD_* other than NBD_CMD_DISC).  It also clarified that
NBD_OPT_ABORT gets a reply, while NBD_CMD_DISC does not.

This patch merely adds the missing reply to NBD_OPT_ABORT and teaches
the client to recognize server errors.  Actually teaching the server
to send NBD_REP_ERR_SHUTDOWN or ESHUTDOWN would require knowing that
the server has been requested to shut down soon (maybe we could do
that by installing a SIGINT handler in qemu-nbd, which transitions
from RUNNING to a new state that waits for the client to react,
rather than just out-right quitting - but that's a bigger task for
another day).

Signed-off-by: Eric Blake 

---
v7: conditionalize ESHUTDOWN for mingw
v6: rebase
v5: no change
v4: new patch
---
 include/block/nbd.h | 13 +
 nbd/nbd-internal.h  |  1 +
 nbd/client.c| 24 
 nbd/server.c| 12 
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index d326308..eea7ef0 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -83,12 +83,17 @@ typedef struct NBDReply NBDReply;
 #define NBD_FLAG_C_NO_ZEROES  (1 << 1) /* End handshake without zeroes. */

 /* Reply types. */
+#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
+
 #define NBD_REP_ACK (1) /* Data sending finished. */
 #define NBD_REP_SERVER  (2) /* Export description. */
-#define NBD_REP_ERR_UNSUP   ((UINT32_C(1) << 31) | 1) /* Unknown option. */
-#define NBD_REP_ERR_POLICY  ((UINT32_C(1) << 31) | 2) /* Server denied */
-#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
-#define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */
+
+#define NBD_REP_ERR_UNSUP   NBD_REP_ERR(1)  /* Unknown option */
+#define NBD_REP_ERR_POLICY  NBD_REP_ERR(2)  /* Server denied */
+#define NBD_REP_ERR_INVALID NBD_REP_ERR(3)  /* Invalid length */
+#define NBD_REP_ERR_PLATFORMNBD_REP_ERR(4)  /* Not compiled in */
+#define NBD_REP_ERR_TLS_REQDNBD_REP_ERR(5)  /* TLS required */
+#define NBD_REP_ERR_SHUTDOWNNBD_REP_ERR(7)  /* Server shutting down */

 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA(1 << 0)
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index dd57e18..eee20ab 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -92,6 +92,7 @@
 #define NBD_ENOMEM 12
 #define NBD_EINVAL 22
 #define NBD_ENOSPC 28
+#define NBD_ESHUTDOWN  108

 static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
 {
diff --git a/nbd/client.c b/nbd/client.c
index c6da4c4..d109e26 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -21,6 +21,12 @@
 #include "qapi/error.h"
 #include "nbd-internal.h"

+/* mingw lacks ESHUTDOWN. For this file, we can just fake it to any
+ * value unlikely to collide with any real errno */
+#ifndef ESHUTDOWN
+#define ESHUTDOWN 123456
+#endif
+
 static int nbd_errno_to_system_errno(int err)
 {
 int ret;
@@ -40,6 +46,9 @@ static int nbd_errno_to_system_errno(int err)
 case NBD_ENOSPC:
 ret = ENOSPC;
 break;
+case NBD_ESHUTDOWN:
+ret = ESHUTDOWN;
+break;
 default:
 TRACE("Squashing unexpected error %d to EINVAL", err);
 /* fallthrough */
@@ -239,11 +248,21 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
reply->option);
 break;

+case NBD_REP_ERR_PLATFORM:
+error_setg(errp, "Server lacks support for option %" PRIx32,
+   reply->option);
+break;
+
 case NBD_REP_ERR_TLS_REQD:
 error_setg(errp, "TLS negotiation required before option %" PRIx32,
reply->option);
 break;

+case NBD_REP_ERR_SHUTDOWN:
+error_setg(errp, "Server shutting down before option %" PRIx32,
+   reply->option);
+break;
+
 default:
 error_setg(errp, "Unknown error code when asking for option %" PRIx32,
reply->option);
@@ -784,6 +803,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)

 reply->error = nbd_errno_to_system_errno(reply->error);

+if (reply->error == ESHUTDOWN) {
+/* This works even on mingw which lacks a native ESHUTDOWN */
+LOG("server shutting down");
+return -EINVAL;
+}
 TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32
   ", handle = %" PRIu64" }",
   magic, reply->error,

[Qemu-block] [PATCH v7 16/16] nbd: Implement NBD_CMD_WRITE_ZEROES on client

2016-10-14 Thread Eric Blake

Upstream NBD protocol recently added the ability to efficiently
write zeroes without having to send the zeroes over the wire,
along with a flag to control whether the client wants a hole.

The generic block code takes care of falling back to the obvious
write of lots of zeroes if we return -ENOTSUP because the server
does not have WRITE_ZEROES.

Ideally, since NBD_CMD_WRITE_ZEROES does not involve any data
over the wire, we want to support transactions that are much
larger than the normal 32M limit imposed on NBD_CMD_WRITE.  But
the server may still have a limit smaller than UINT_MAX, so
until experimental NBD protocol additions for advertising various
command sizes is finalized (see [1], [2]), for now we just stick to
the same limits as normal writes.

[1] https://github.com/yoe/nbd/blob/extension-info/doc/proto.md
[2] https://sourceforge.net/p/nbd/mailman/message/35081223/

Signed-off-by: Eric Blake 

---
v6: rebase
v5: enhance commit message
v4: rebase to byte-based limits
v3: rebase, tell block layer about our support
---
 block/nbd-client.h |  2 ++
 block/nbd-client.c | 35 +++
 block/nbd.c|  4 
 3 files changed, 41 insertions(+)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 78e8e57..e51df22 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t 
offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
   uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags);

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 8e89add..31db557 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -275,6 +275,41 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 return -reply.error;
 }

+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+int count, BdrvRequestFlags flags)
+{
+ssize_t ret;
+NBDClientSession *client = nbd_get_client_session(bs);
+NBDRequest request = {
+.type = NBD_CMD_WRITE_ZEROES,
+.from = offset,
+.len = count,
+};
+NBDReply reply;
+
+if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+return -ENOTSUP;
+}
+
+if (flags & BDRV_REQ_FUA) {
+assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+request.flags |= NBD_CMD_FLAG_FUA;
+}
+if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+request.flags |= NBD_CMD_FLAG_NO_HOLE;
+}
+
+nbd_coroutine_start(client, );
+ret = nbd_co_send_request(bs, , NULL);
+if (ret < 0) {
+reply.error = -ret;
+} else {
+nbd_co_receive_reply(client, , , NULL);
+}
+nbd_coroutine_end(client, );
+return -reply.error;
+}
+
 int nbd_client_co_flush(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
diff --git a/block/nbd.c b/block/nbd.c
index e227490..6c7bbc8 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -403,6 +403,7 @@ static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
 bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }

@@ -491,6 +492,7 @@ static BlockDriver bdrv_nbd = {
 .bdrv_file_open = nbd_open,
 .bdrv_co_preadv = nbd_client_co_preadv,
 .bdrv_co_pwritev= nbd_client_co_pwritev,
+.bdrv_co_pwrite_zeroes  = nbd_client_co_pwrite_zeroes,
 .bdrv_close = nbd_close,
 .bdrv_co_flush_to_os= nbd_co_flush,
 .bdrv_co_pdiscard   = nbd_client_co_pdiscard,
@@ -509,6 +511,7 @@ static BlockDriver bdrv_nbd_tcp = {
 .bdrv_file_open = nbd_open,
 .bdrv_co_preadv = nbd_client_co_preadv,
 .bdrv_co_pwritev= nbd_client_co_pwritev,
+.bdrv_co_pwrite_zeroes  = nbd_client_co_pwrite_zeroes,
 .bdrv_close = nbd_close,
 .bdrv_co_flush_to_os= nbd_co_flush,
 .bdrv_co_pdiscard   = nbd_client_co_pdiscard,
@@ -527,6 +530,7 @@ static BlockDriver bdrv_nbd_unix = {
 .bdrv_file_open = nbd_open,
 .bdrv_co_preadv = nbd_client_co_preadv,
 .bdrv_co_pwritev= nbd_client_co_pwritev,
+.bdrv_co_pwrite_zeroes  = nbd_client_co_pwrite_zeroes,
 .bdrv_close = nbd_close,
 .bdrv_co_flush_to_os= nbd_co_flush,
 .bdrv_co_pdiscard   = nbd_client_co_pdiscard,
-- 
2.7.4

[Qemu-block] [PATCH v7 07/16] nbd: Send message along with server NBD_REP_ERR errors

2016-10-14 Thread Eric Blake

The NBD Protocol allows us to send human-readable messages
along with any NBD_REP_ERR error during option negotiation;
make use of this fact for clients that know what to do with
our message.

Signed-off-by: Eric Blake 

---
v6: tweak comments, fix indentation
v5: don't leak 'msg'
v4: new patch
---
 nbd/server.c | 78 +---
 1 file changed, 59 insertions(+), 19 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 14a5bd5..3d39292 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -236,6 +236,38 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, 
uint32_t type, uint32_t opt)
 return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
 }

+/* Send an error reply.
+ * Return -errno on error, 0 on success. */
+static int GCC_FMT_ATTR(4, 5)
+nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
+   uint32_t opt, const char *fmt, ...)
+{
+va_list va;
+char *msg;
+int ret;
+size_t len;
+
+va_start(va, fmt);
+msg = g_strdup_vprintf(fmt, va);
+va_end(va);
+len = strlen(msg);
+assert(len < 4096);
+TRACE("sending error message \"%s\"", msg);
+ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
+if (ret < 0) {
+goto out;
+}
+if (nbd_negotiate_write(ioc, msg, len) != len) {
+LOG("write failed (error message)");
+ret = -EIO;
+} else {
+ret = 0;
+}
+out:
+g_free(msg);
+return ret;
+}
+
 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
  * Return -errno on error, 0 on success. */
 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
@@ -281,8 +313,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, 
uint32_t length)
 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
 return -EIO;
 }
-return nbd_negotiate_send_rep(client->ioc,
-  NBD_REP_ERR_INVALID, NBD_OPT_LIST);
+return nbd_negotiate_send_rep_err(client->ioc,
+  NBD_REP_ERR_INVALID, NBD_OPT_LIST,
+  "OPT_LIST should not have length");
 }

 /* For each export, send a NBD_REP_SERVER reply. */
@@ -329,7 +362,8 @@ fail:
 return rc;
 }

-
+/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
+ * new channel for all further (now-encrypted) communication. */
 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
  uint32_t length)
 {
@@ -343,7 +377,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
 if (nbd_negotiate_drop_sync(ioc, length) != length) {
 return NULL;
 }
-nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
+nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
+   "OPT_STARTTLS should not have length");
 return NULL;
 }

@@ -473,13 +508,15 @@ static int nbd_negotiate_options(NBDClient *client)
 return -EINVAL;

 default:
-TRACE("Option 0x%" PRIx32 " not permitted before TLS",
-  clientflags);
 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
 return -EIO;
 }
-ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
- clientflags);
+ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_TLS_REQD,
+ clientflags,
+ "Option 0x%" PRIx32
+ "not permitted before TLS",
+ clientflags);
 if (ret < 0) {
 return ret;
 }
@@ -505,27 +542,30 @@ static int nbd_negotiate_options(NBDClient *client)
 return -EIO;
 }
 if (client->tlscreds) {
-TRACE("TLS already enabled");
-ret = nbd_negotiate_send_rep(client->ioc,
- NBD_REP_ERR_INVALID,
- clientflags);
+ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_INVALID,
+ clientflags,
+ "TLS already enabled");
 } else {
-TRACE("TLS not configured");
-ret = nbd_negotiate_send_rep(client->ioc,
-

[Qemu-block] [PATCH v7 10/16] nbd: Let client skip portions of server reply

2016-10-14 Thread Eric Blake

The server has a nice helper function nbd_negotiate_drop_sync()
which lets it easily ignore fluff from the client (such as the
payload to an unknown option request).  We can't quite make it
common, since it depends on nbd_negotiate_read() which handles
coroutine magic, but we can copy the idea into the client where
we have places where we want to ignore data (such as the
description tacked on the end of NBD_REP_SERVER).

Signed-off-by: Eric Blake 

---
v6: rebase
v5: no change
v4: rebase
v3: rebase
---
 nbd/client.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index a3e1e7a..df7eb9c 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -75,6 +75,32 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);

 */

+/* Discard length bytes from channel.  Return -errno on failure, or
+ * the amount of bytes consumed. */
+static ssize_t drop_sync(QIOChannel *ioc, size_t size)
+{
+ssize_t ret, dropped = size;
+char small[1024];
+char *buffer;
+
+buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size));
+while (size > 0) {
+ret = read_sync(ioc, buffer, MIN(65536, size));
+if (ret < 0) {
+goto cleanup;
+}
+assert(ret <= size);
+size -= ret;
+}
+ret = dropped;
+
+ cleanup:
+if (buffer != small) {
+g_free(buffer);
+}
+return ret;
+}
+
 /* Send an option request.
  *
  * The request is for option @opt, with @data containing @len bytes of
@@ -285,19 +311,12 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 }
 (*name)[namelen] = '\0';
 len -= namelen;
-if (len) {
-char *buf = g_malloc(len + 1);
-if (read_sync(ioc, buf, len) != len) {
-error_setg(errp, "failed to read export description");
-g_free(*name);
-g_free(buf);
-*name = NULL;
-nbd_send_opt_abort(ioc);
-return -1;
-}
-buf[len] = '\0';
-TRACE("Ignoring export description: %s", buf);
-g_free(buf);
+if (drop_sync(ioc, len) != len) {
+error_setg(errp, "failed to read export description");
+g_free(*name);
+*name = NULL;
+nbd_send_opt_abort(ioc);
+return -1;
 }
 } else {
 error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
@@ -576,7 +595,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 }

 TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
-if (read_sync(ioc, , 124) != 124) {
+if (drop_sync(ioc, 124) != 124) {
 error_setg(errp, "Failed to read reserved block");
 goto fail;
 }
-- 
2.7.4

[Qemu-block] [PATCH v7 06/16] nbd: Share common reply-sending code in server

2016-10-14 Thread Eric Blake

Rather than open-coding NBD_REP_SERVER, reuse the code we
already have by adding a length parameter.  Additionally,
the refactoring will make adding NBD_OPT_GO in a later patch
easier.

Signed-off-by: Eric Blake 

---
v6: improve (and add) function comments
v5: no change
v4: no change
v3: rebase to changes earlier in series
---
 nbd/server.c | 52 +++-
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 8e0ad78..14a5bd5 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -196,12 +196,15 @@ static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, 
size_t size)

 */

-static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
+/* Send a reply header, including length, but no payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
+  uint32_t opt, uint32_t len)
 {
 uint64_t magic;
-uint32_t len;

-TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
+TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
+  type, opt, len);

 magic = cpu_to_be64(NBD_REP_MAGIC);
 if (nbd_negotiate_write(ioc, , sizeof(magic)) != sizeof(magic)) {
@@ -218,7 +221,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t 
type, uint32_t opt)
 LOG("write failed (rep type)");
 return -EINVAL;
 }
-len = cpu_to_be32(0);
+len = cpu_to_be32(len);
 if (nbd_negotiate_write(ioc, , sizeof(len)) != sizeof(len)) {
 LOG("write failed (rep data length)");
 return -EINVAL;
@@ -226,37 +229,32 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, 
uint32_t type, uint32_t opt)
 return 0;
 }

+/* Send a reply header with default 0 length.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
+{
+return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
+}
+
+/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
+ * Return -errno on error, 0 on success. */
 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
 {
-uint64_t magic;
 size_t name_len, desc_len;
-uint32_t opt, type, len;
+uint32_t len;
 const char *name = exp->name ? exp->name : "";
 const char *desc = exp->description ? exp->description : "";
+int rc;

 TRACE("Advertising export name '%s' description '%s'", name, desc);
 name_len = strlen(name);
 desc_len = strlen(desc);
-magic = cpu_to_be64(NBD_REP_MAGIC);
-if (nbd_negotiate_write(ioc, , sizeof(magic)) != sizeof(magic)) {
-LOG("write failed (magic)");
-return -EINVAL;
- }
-opt = cpu_to_be32(NBD_OPT_LIST);
-if (nbd_negotiate_write(ioc, , sizeof(opt)) != sizeof(opt)) {
-LOG("write failed (opt)");
-return -EINVAL;
-}
-type = cpu_to_be32(NBD_REP_SERVER);
-if (nbd_negotiate_write(ioc, , sizeof(type)) != sizeof(type)) {
-LOG("write failed (reply type)");
-return -EINVAL;
-}
-len = cpu_to_be32(name_len + desc_len + sizeof(len));
-if (nbd_negotiate_write(ioc, , sizeof(len)) != sizeof(len)) {
-LOG("write failed (length)");
-return -EINVAL;
+len = name_len + desc_len + sizeof(len);
+rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
+if (rc < 0) {
+return rc;
 }
+
 len = cpu_to_be32(name_len);
 if (nbd_negotiate_write(ioc, , sizeof(len)) != sizeof(len)) {
 LOG("write failed (name length)");
@@ -273,6 +271,8 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, 
NBDExport *exp)
 return 0;
 }

+/* Process the NBD_OPT_LIST command, with a potential series of replies.
+ * Return -errno on error, 0 on success. */
 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
 {
 NBDExport *exp;
@@ -381,6 +381,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
 }


+/* Process all NBD_OPT_* client option commands.
+ * Return -errno on error, 0 on success. */
 static int nbd_negotiate_options(NBDClient *client)
 {
 uint32_t flags;
-- 
2.7.4

[Qemu-block] [PATCH v7 05/16] nbd: Rename struct nbd_request and nbd_reply

2016-10-14 Thread Eric Blake

Our coding convention prefers CamelCase names, and we already
have other existing structs with NBDFoo naming.  Let's be
consistent, before later patches add even more structs.

Signed-off-by: Eric Blake 
---
v6: new patch
---
 block/nbd-client.h  |  2 +-
 include/block/nbd.h | 10 ++
 block/nbd-client.c  | 28 ++--
 nbd/client.c|  4 ++--
 nbd/server.c| 12 ++--
 5 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index a84a478..78e8e57 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,7 +29,7 @@ typedef struct NBDClientSession {
 int in_flight;

 Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-struct nbd_reply reply;
+NBDReply reply;

 bool is_unix;
 } NBDClientSession;
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 5fe2670..a33581b 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -29,18 +29,20 @@
 /* Note: these are _NOT_ the same as the network representation of an NBD
  * request and reply!
  */
-struct nbd_request {
+struct NBDRequest {
 uint64_t handle;
 uint64_t from;
 uint32_t len;
 uint16_t flags;
 uint16_t type;
 };
+typedef struct NBDRequest NBDRequest;

-struct nbd_reply {
+struct NBDReply {
 uint64_t handle;
 uint32_t error;
 };
+typedef struct NBDReply NBDReply;

 /* Transmission (export) flags: sent from server to client during handshake,
but describe what will happen during transmission */
@@ -101,8 +103,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
   QIOChannel **outioc,
   off_t *size, Error **errp);
 int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size);
-ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
-ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply);
+ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);

diff --git a/block/nbd-client.c b/block/nbd-client.c
index c94608a..8e89add 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -116,7 +116,7 @@ static void nbd_restart_write(void *opaque)
 }

 static int nbd_co_send_request(BlockDriverState *bs,
-   struct nbd_request *request,
+   NBDRequest *request,
QEMUIOVector *qiov)
 {
 NBDClientSession *s = nbd_get_client_session(bs);
@@ -168,8 +168,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
 }

 static void nbd_co_receive_reply(NBDClientSession *s,
- struct nbd_request *request,
- struct nbd_reply *reply,
+ NBDRequest *request,
+ NBDReply *reply,
  QEMUIOVector *qiov)
 {
 int ret;
@@ -196,7 +196,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 }

 static void nbd_coroutine_start(NBDClientSession *s,
-   struct nbd_request *request)
+NBDRequest *request)
 {
 /* Poor man semaphore.  The free_sema is locked when no other request
  * can be accepted, and unlocked after receiving one reply.  */
@@ -210,7 +210,7 @@ static void nbd_coroutine_start(NBDClientSession *s,
 }

 static void nbd_coroutine_end(NBDClientSession *s,
-struct nbd_request *request)
+  NBDRequest *request)
 {
 int i = HANDLE_TO_INDEX(s, request->handle);
 s->recv_coroutine[i] = NULL;
@@ -223,12 +223,12 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
-struct nbd_request request = {
+NBDRequest request = {
 .type = NBD_CMD_READ,
 .from = offset,
 .len = bytes,
 };
-struct nbd_reply reply;
+NBDReply reply;
 ssize_t ret;

 assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -249,12 +249,12 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
   uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
-struct nbd_request request = {
+NBDRequest request = {
 .type = NBD_CMD_WRITE,
 .from = offset,
 .len = bytes,
 };
-struct nbd_reply reply;
+NBDReply reply;
 ssize_t ret;

 if (flags & BDRV_REQ_FUA) {
@@ -278,8 +278,8 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 int nbd_client_co_flush(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
-struct nbd_request request = { .type = NBD_CMD_FLUSH };
-struct nbd_reply reply;
+NBDRequest request =

[Qemu-block] [PATCH v7 09/16] nbd: Let server know when client gives up negotiation

2016-10-14 Thread Eric Blake

The NBD spec says that a client should send NBD_OPT_ABORT
rather than just dropping the connection, if the client doesn't
like something the server sent during option negotiation.  This
is a best-effort attempt only, and can only be done in places
where we know the server is still in sync with what we've sent,
whether or not we've read everything the server has sent.
Technically, the server then has to reply with NBD_REP_ACK, but
it's not worth complicating the client to wait around for that
reply.

Signed-off-by: Eric Blake 

---
v6: rebase
v5: no change
v4: new patch
---
 nbd/client.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/nbd/client.c b/nbd/client.c
index f7a2d6e..a3e1e7a 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -111,6 +111,19 @@ static int nbd_send_option_request(QIOChannel *ioc, 
uint32_t opt,
 return 0;
 }

+/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
+ * not going to attempt further negotiation. */
+static void nbd_send_opt_abort(QIOChannel *ioc)
+{
+/* Technically, a compliant server is supposed to reply to us; but
+ * older servers disconnected instead. At any rate, we're allowed
+ * to disconnect without waiting for the server reply, so we don't
+ * even care if the request makes it to the server, let alone
+ * waiting around for whether the server replies. */
+nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
+}
+
+
 /* Receive the header of an option reply, which should match the given
  * opt.  Read through the length field, but NOT the length bytes of
  * payload. Return 0 if successful, -1 with errp set if it is
@@ -121,6 +134,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, 
uint32_t opt,
 QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
 if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
 error_setg(errp, "failed to read option reply");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 be64_to_cpus(>magic);
@@ -133,11 +147,13 @@ static int nbd_receive_option_reply(QIOChannel *ioc, 
uint32_t opt,

 if (reply->magic != NBD_REP_MAGIC) {
 error_setg(errp, "Unexpected option reply magic");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 if (reply->option != opt) {
 error_setg(errp, "Unexpected option type %x expected %x",
reply->option, opt);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 return 0;
@@ -206,6 +222,9 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,

  cleanup:
 g_free(msg);
+if (result < 0) {
+nbd_send_opt_abort(ioc);
+}
 return result;
 }

@@ -229,25 +248,30 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 if (reply.type == NBD_REP_ACK) {
 if (len != 0) {
 error_setg(errp, "length too long for option end");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 } else if (reply.type == NBD_REP_SERVER) {
 if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
 error_setg(errp, "incorrect option length %" PRIu32, len);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 if (read_sync(ioc, , sizeof(namelen)) != sizeof(namelen)) {
 error_setg(errp, "failed to read option name length");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 namelen = be32_to_cpu(namelen);
 len -= sizeof(namelen);
 if (len < namelen) {
 error_setg(errp, "incorrect option name length");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 if (namelen > NBD_MAX_NAME_SIZE) {
 error_setg(errp, "export name length too long %" PRIu32, namelen);
+nbd_send_opt_abort(ioc);
 return -1;
 }

@@ -256,6 +280,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 error_setg(errp, "failed to read export name");
 g_free(*name);
 *name = NULL;
+nbd_send_opt_abort(ioc);
 return -1;
 }
 (*name)[namelen] = '\0';
@@ -267,6 +292,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 g_free(*name);
 g_free(buf);
 *name = NULL;
+nbd_send_opt_abort(ioc);
 return -1;
 }
 buf[len] = '\0';
@@ -276,6 +302,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 } else {
 error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
reply.type, NBD_REP_SERVER);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 return 1;
@@ -325,6 +352,7 @@ static int nbd_receive_query_exports(QIOChannel *ioc,

 if (!foundExport) {
 error_setg(errp, "No export with name '%s'

[Qemu-block] [PATCH v7 01/16] nbd: Add qemu-nbd -D for human-readable description

2016-10-14 Thread Eric Blake

The NBD protocol allows servers to advertise a human-readable
description alongside an export name during NBD_OPT_LIST.  Add
an option to pass through the user's string to the NBD client.

Doing this also makes it easier to test commit 200650d4, which
is the client counterpart of receiving the description.

Signed-off-by: Eric Blake 

---
v6: rebase to latest
v5: rebase to latest
v4: rebase to latest
---
 include/block/nbd.h |  1 +
 nbd/nbd-internal.h  |  5 +++--
 nbd/server.c| 34 ++
 qemu-nbd.c  | 12 +++-
 qemu-nbd.texi   |  5 -
 5 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 80610ff..fd58390 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -115,6 +115,7 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp);

 NBDExport *nbd_export_find(const char *name);
 void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_set_description(NBDExport *exp, const char *description);
 void nbd_export_close_all(void);

 void nbd_client_new(NBDExport *exp,
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 93a6ca8..7e78064 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -104,9 +104,10 @@ static inline ssize_t read_sync(QIOChannel *ioc, void 
*buffer, size_t size)
 return nbd_wr_syncv(ioc, , 1, size, true);
 }

-static inline ssize_t write_sync(QIOChannel *ioc, void *buffer, size_t size)
+static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer,
+ size_t size)
 {
-struct iovec iov = { .iov_base = buffer, .iov_len = size };
+struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };

 return nbd_wr_syncv(ioc, , 1, size, false);
 }
diff --git a/nbd/server.c b/nbd/server.c
index 472f584..319827b 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -61,6 +61,7 @@ struct NBDExport {

 BlockBackend *blk;
 char *name;
+char *description;
 off_t dev_offset;
 off_t size;
 uint16_t nbdflags;
@@ -129,7 +130,8 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void 
*buffer, size_t size)

 }

-static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size)
+static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer,
+   size_t size)
 {
 ssize_t ret;
 guint watch;
@@ -225,11 +227,15 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, 
uint32_t type, uint32_t opt)

 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
 {
-uint64_t magic, name_len;
+uint64_t magic;
+size_t name_len, desc_len;
 uint32_t opt, type, len;
+const char *name = exp->name ? exp->name : "";
+const char *desc = exp->description ? exp->description : "";

-TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
-name_len = strlen(exp->name);
+TRACE("Advertising export name '%s' description '%s'", name, desc);
+name_len = strlen(name);
+desc_len = strlen(desc);
 magic = cpu_to_be64(NBD_REP_MAGIC);
 if (nbd_negotiate_write(ioc, , sizeof(magic)) != sizeof(magic)) {
 LOG("write failed (magic)");
@@ -245,18 +251,22 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, 
NBDExport *exp)
 LOG("write failed (reply type)");
 return -EINVAL;
 }
-len = cpu_to_be32(name_len + sizeof(len));
+len = cpu_to_be32(name_len + desc_len + sizeof(len));
 if (nbd_negotiate_write(ioc, , sizeof(len)) != sizeof(len)) {
 LOG("write failed (length)");
 return -EINVAL;
 }
 len = cpu_to_be32(name_len);
 if (nbd_negotiate_write(ioc, , sizeof(len)) != sizeof(len)) {
-LOG("write failed (length)");
+LOG("write failed (name length)");
 return -EINVAL;
 }
-if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) {
-LOG("write failed (buffer)");
+if (nbd_negotiate_write(ioc, name, name_len) != name_len) {
+LOG("write failed (name buffer)");
+return -EINVAL;
+}
+if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) {
+LOG("write failed (description buffer)");
 return -EINVAL;
 }
 return 0;
@@ -893,6 +903,12 @@ void nbd_export_set_name(NBDExport *exp, const char *name)
 nbd_export_put(exp);
 }

+void nbd_export_set_description(NBDExport *exp, const char *description)
+{
+g_free(exp->description);
+exp->description = g_strdup(description);
+}
+
 void nbd_export_close(NBDExport *exp)
 {
 NBDClient *client, *next;
@@ -902,6 +918,7 @@ void nbd_export_close(NBDExport *exp)
 client_close(client);
 }
 nbd_export_set_name(exp, NULL);
+nbd_export_set_description(exp, NULL);
 nbd_export_put(exp);
 }

@@ -920,6 +937,7 @@ void nbd_export_put(NBDExport *exp)

 if (--exp->refcount == 0) {
 assert(exp->name == NULL);
+assert(exp->description ==

[Qemu-block] [PATCH v7 08/16] nbd: Share common option-sending code in client

2016-10-14 Thread Eric Blake

Rather than open-coding each option request, it's easier to
have common helper functions do the work.  That in turn requires
having convenient packed types for handling option requests
and replies.

Signed-off-by: Eric Blake 

---
v6: comment and formatting tweaks
v5: no change
v4: rebase
v3: rebase, tweak a debug message
---
 include/block/nbd.h |  25 +-
 nbd/nbd-internal.h  |   2 +-
 nbd/client.c| 255 ++--
 3 files changed, 131 insertions(+), 151 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index a33581b..b69bf1d 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -26,15 +26,34 @@
 #include "io/channel-socket.h"
 #include "crypto/tlscreds.h"

-/* Note: these are _NOT_ the same as the network representation of an NBD
+/* Handshake phase structs - this struct is passed on the wire */
+
+struct nbd_option {
+uint64_t magic; /* NBD_OPTS_MAGIC */
+uint32_t option; /* NBD_OPT_* */
+uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_option nbd_option;
+
+struct nbd_opt_reply {
+uint64_t magic; /* NBD_REP_MAGIC */
+uint32_t option; /* NBD_OPT_* */
+uint32_t type; /* NBD_REP_* */
+uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_opt_reply nbd_opt_reply;
+
+/* Transmission phase structs
+ *
+ * Note: these are _NOT_ the same as the network representation of an NBD
  * request and reply!
  */
 struct NBDRequest {
 uint64_t handle;
 uint64_t from;
 uint32_t len;
-uint16_t flags;
-uint16_t type;
+uint16_t flags; /* NBD_CMD_FLAG_* */
+uint16_t type; /* NBD_CMD_* */
 };
 typedef struct NBDRequest NBDRequest;

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 99e5157..dd57e18 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -62,7 +62,7 @@
 #define NBD_REPLY_MAGIC 0x67446698
 #define NBD_OPTS_MAGIC  0x49484156454F5054LL
 #define NBD_CLIENT_MAGIC0x420281861253LL
-#define NBD_REP_MAGIC   0x3e889045565a9LL
+#define NBD_REP_MAGIC   0x0003e889045565a9LL

 #define NBD_SET_SOCK_IO(0xab, 0)
 #define NBD_SET_BLKSIZE _IO(0xab, 1)
diff --git a/nbd/client.c b/nbd/client.c
index 86e29dc..f7a2d6e 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -75,64 +75,128 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);

 */

+/* Send an option request.
+ *
+ * The request is for option @opt, with @data containing @len bytes of
+ * additional payload for the request (@len may be -1 to treat @data as
+ * a C string; and @data may be NULL if @len is 0).
+ * Return 0 if successful, -1 with errp set if it is impossible to
+ * continue. */
+static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
+   uint32_t len, const char *data,
+   Error **errp)
+{
+nbd_option req;
+QEMU_BUILD_BUG_ON(sizeof(req) != 16);

-/* If type represents success, return 1 without further action.
- * If type represents an error reply, consume the rest of the packet on ioc.
- * Then return 0 for unsupported (so the client can fall back to
- * other approaches), or -1 with errp set for other errors.
+if (len == -1) {
+req.length = len = strlen(data);
+}
+TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len);
+
+stq_be_p(, NBD_OPTS_MAGIC);
+stl_be_p(, opt);
+stl_be_p(, len);
+
+if (write_sync(ioc, , sizeof(req)) != sizeof(req)) {
+error_setg(errp, "Failed to send option request header");
+return -1;
+}
+
+if (len && write_sync(ioc, (char *) data, len) != len) {
+error_setg(errp, "Failed to send option request data");
+return -1;
+}
+
+return 0;
+}
+
+/* Receive the header of an option reply, which should match the given
+ * opt.  Read through the length field, but NOT the length bytes of
+ * payload. Return 0 if successful, -1 with errp set if it is
+ * impossible to continue. */
+static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
+nbd_opt_reply *reply, Error **errp)
+{
+QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
+if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
+error_setg(errp, "failed to read option reply");
+return -1;
+}
+be64_to_cpus(>magic);
+be32_to_cpus(>option);
+be32_to_cpus(>type);
+be32_to_cpus(>length);
+
+TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32,
+  reply->option, reply->type, reply->length);
+
+if (reply->magic != NBD_REP_MAGIC) {
+error_setg(errp, "Unexpected option reply magic");
+return -1;
+}
+if (reply->option != opt) {
+error_setg(errp, "Unexpected option type %x expected %x",
+   reply->option, opt);
+return -1;
+}
+return 0;
+}
+
+/* If reply represents success, return 1 without

[Qemu-block] [PATCH v7 02/16] nbd: Treat flags vs. command type as separate fields

2016-10-14 Thread Eric Blake

Current upstream NBD documents that requests have a 16-bit flags,
followed by a 16-bit type integer; although older versions mentioned
only a 32-bit field with masking to find flags.  Since the protocol
is in network order (big-endian over the wire), the ABI is unchanged;
but dealing with the flags as a separate field rather than masking
will make it easier to add support for upcoming NBD extensions that
increase the number of both flags and commands.

Improve some comments in nbd.h based on the current upstream
NBD protocol (https://github.com/yoe/nbd/blob/master/doc/proto.md),
and touch some nearby code to keep checkpatch.pl happy.

Signed-off-by: Eric Blake 

---
v6: no change
v5: no change
v4: rebase to earlier changes
v3: rebase to other changes earlier in series
---
 include/block/nbd.h | 18 --
 nbd/nbd-internal.h  |  4 ++--
 block/nbd-client.c  |  9 +++--
 nbd/client.c|  9 ++---
 nbd/server.c| 39 +++
 5 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index fd58390..5fe2670 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori 
  *
  *  Network Block Device
@@ -32,7 +33,8 @@ struct nbd_request {
 uint64_t handle;
 uint64_t from;
 uint32_t len;
-uint32_t type;
+uint16_t flags;
+uint16_t type;
 };

 struct nbd_reply {
@@ -40,6 +42,8 @@ struct nbd_reply {
 uint32_t error;
 };

+/* Transmission (export) flags: sent from server to client during handshake,
+   but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS  (1 << 0)/* Flags are there */
 #define NBD_FLAG_READ_ONLY  (1 << 1)/* Device is read-only */
 #define NBD_FLAG_SEND_FLUSH (1 << 2)/* Send FLUSH */
@@ -47,10 +51,12 @@ struct nbd_reply {
 #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
rotational media */
 #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */

-/* New-style global flags. */
+/* New-style handshake (global) flags, sent from server to client, and
+   control what will happen during handshake phase. */
 #define NBD_FLAG_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */

-/* New-style client flags. */
+/* New-style client flags, sent from client to server to control what happens
+   during handshake phase. */
 #define NBD_FLAG_C_FIXED_NEWSTYLE   (1 << 0)/* Fixed newstyle protocol. */

 /* Reply types. */
@@ -61,10 +67,10 @@ struct nbd_reply {
 #define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
 #define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */

+/* Request flags, sent from client to server during transmission phase */
+#define NBD_CMD_FLAG_FUA(1 << 0)

-#define NBD_CMD_MASK_COMMAND   0x
-#define NBD_CMD_FLAG_FUA   (1 << 16)
-
+/* Supported request types */
 enum {
 NBD_CMD_READ = 0,
 NBD_CMD_WRITE = 1,
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 7e78064..99e5157 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -53,10 +53,10 @@
 /* This is all part of the "official" NBD API.
  *
  * The most up-to-date documentation is available at:
- * https://github.com/yoe/nbd/blob/master/doc/proto.txt
+ * https://github.com/yoe/nbd/blob/master/doc/proto.md
  */

-#define NBD_REQUEST_SIZE(4 + 4 + 8 + 8 + 4)
+#define NBD_REQUEST_SIZE(4 + 2 + 2 + 8 + 8 + 4)
 #define NBD_REPLY_SIZE  (4 + 4 + 8)
 #define NBD_REQUEST_MAGIC   0x25609513
 #define NBD_REPLY_MAGIC 0x67446698
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 2cf3237..7e9c3ec 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,6 +1,7 @@
 /*
  * QEMU Block driver for  NBD
  *
+ * Copyright (C) 2016 Red Hat, Inc.
  * Copyright (C) 2008 Bull S.A.S.
  * Author: Laurent Vivier 
  *
@@ -258,7 +259,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,

 if (flags & BDRV_REQ_FUA) {
 assert(client->nbdflags & NBD_FLAG_SEND_FUA);
-request.type |= NBD_CMD_FLAG_FUA;
+request.flags |= NBD_CMD_FLAG_FUA;
 }

 assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -343,11 +344,7 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
 void nbd_client_close(BlockDriverState *bs)
 {
 NbdClientSession *client = nbd_get_client_session(bs);
-struct nbd_request request = {
-.type = NBD_CMD_DISC,
-.from = 0,
-.len = 0
-};
+struct nbd_request request = { .type = NBD_CMD_DISC };

 if (client->ioc == NULL) {
 return;
diff --git a/nbd/client.c b/nbd/client.c
index a92f1e2..7c172ed 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005

[Qemu-block] [PATCH v7 04/16] nbd: Rename NbdClientSession to NBDClientSession

2016-10-14 Thread Eric Blake

It's better to use consistent capitalization of the namespace
used for NBD functions; we have more instances of NBD* than
Nbd*.

Signed-off-by: Eric Blake 

---
v6: new patch
---
 block/nbd-client.h |  6 +++---
 block/nbd-client.c | 26 +-
 block/nbd.c|  4 ++--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 044aca4..a84a478 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -17,7 +17,7 @@

 #define MAX_NBD_REQUESTS16

-typedef struct NbdClientSession {
+typedef struct NBDClientSession {
 QIOChannelSocket *sioc; /* The master data channel */
 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
 uint16_t nbdflags;
@@ -32,9 +32,9 @@ typedef struct NbdClientSession {
 struct nbd_reply reply;

 bool is_unix;
-} NbdClientSession;
+} NBDClientSession;

-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

 int nbd_client_init(BlockDriverState *bs,
 QIOChannelSocket *sock,
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7e9c3ec..c94608a 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -33,7 +33,7 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
 int i;

@@ -46,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)

 static void nbd_teardown_connection(BlockDriverState *bs)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);

 if (!client->ioc) { /* Already closed */
 return;
@@ -68,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 static void nbd_reply_ready(void *opaque)
 {
 BlockDriverState *bs = opaque;
-NbdClientSession *s = nbd_get_client_session(bs);
+NBDClientSession *s = nbd_get_client_session(bs);
 uint64_t i;
 int ret;

@@ -119,7 +119,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
struct nbd_request *request,
QEMUIOVector *qiov)
 {
-NbdClientSession *s = nbd_get_client_session(bs);
+NBDClientSession *s = nbd_get_client_session(bs);
 AioContext *aio_context;
 int rc, ret, i;

@@ -167,7 +167,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
 return rc;
 }

-static void nbd_co_receive_reply(NbdClientSession *s,
+static void nbd_co_receive_reply(NBDClientSession *s,
  struct nbd_request *request,
  struct nbd_reply *reply,
  QEMUIOVector *qiov)
@@ -195,7 +195,7 @@ static void nbd_co_receive_reply(NbdClientSession *s,
 }
 }

-static void nbd_coroutine_start(NbdClientSession *s,
+static void nbd_coroutine_start(NBDClientSession *s,
struct nbd_request *request)
 {
 /* Poor man semaphore.  The free_sema is locked when no other request
@@ -209,7 +209,7 @@ static void nbd_coroutine_start(NbdClientSession *s,
 /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }

-static void nbd_coroutine_end(NbdClientSession *s,
+static void nbd_coroutine_end(NBDClientSession *s,
 struct nbd_request *request)
 {
 int i = HANDLE_TO_INDEX(s, request->handle);
@@ -222,7 +222,7 @@ static void nbd_coroutine_end(NbdClientSession *s,
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = {
 .type = NBD_CMD_READ,
 .from = offset,
@@ -248,7 +248,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
   uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = {
 .type = NBD_CMD_WRITE,
 .from = offset,
@@ -277,7 +277,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,

 int nbd_client_co_flush(BlockDriverState *bs)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = { .type = NBD_CMD_FLUSH };
 struct nbd_reply reply;
 ssize_t ret;
@@ -302,7 +302,7 @@ int nbd_client_co_flush(BlockDriverState *bs)

 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
-NbdClientSession

[Qemu-block] [PATCH v7 00/16] nbd: efficient write zeroes

2016-10-14 Thread Eric Blake

Also available as a tag at:
git fetch git://repo.or.cz/qemu/ericb.git nbd-zero-v7

v5 was here, but missed 2.7 freeze:
https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04053.html

Since then, I've rebased the series, and the bulk of the changes
were to use consistent NBDFoo CamelCase naming, as well as to
improve the commit messages for questions raised on v5.

v6 was here, with no human review yet:
https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg03048.html

Since then, I addressed the buildbot complaints.

001/16:[] [--] 'nbd: Add qemu-nbd -D for human-readable description'
002/16:[] [--] 'nbd: Treat flags vs. command type as separate fields'
003/16:[] [--] 'nbd: Rename NBDRequest to NBDRequestData'
004/16:[] [--] 'nbd: Rename NbdClientSession to NBDClientSession'
005/16:[] [--] 'nbd: Rename struct nbd_request and nbd_reply'
006/16:[] [--] 'nbd: Share common reply-sending code in server'
007/16:[] [--] 'nbd: Send message along with server NBD_REP_ERR errors'
008/16:[] [--] 'nbd: Share common option-sending code in client'
009/16:[] [--] 'nbd: Let server know when client gives up negotiation'
010/16:[] [--] 'nbd: Let client skip portions of server reply'
011/16:[] [--] 'nbd: Less allocation during NBD_OPT_LIST'
012/16:[] [--] 'nbd: Support shorter handshake'
013/16:[down] 'nbd: Refactor conversion to errno to silence checkpatch'
014/16:[0012] [FC] 'nbd: Improve server handling of shutdown requests'
015/16:[] [--] 'nbd: Implement NBD_CMD_WRITE_ZEROES on server'
016/16:[] [--] 'nbd: Implement NBD_CMD_WRITE_ZEROES on client'

Eric Blake (16):
  nbd: Add qemu-nbd -D for human-readable description
  nbd: Treat flags vs. command type as separate fields
  nbd: Rename NBDRequest to NBDRequestData
  nbd: Rename NbdClientSession to NBDClientSession
  nbd: Rename struct nbd_request and nbd_reply
  nbd: Share common reply-sending code in server
  nbd: Send message along with server NBD_REP_ERR errors
  nbd: Share common option-sending code in client
  nbd: Let server know when client gives up negotiation
  nbd: Let client skip portions of server reply
  nbd: Less allocation during NBD_OPT_LIST
  nbd: Support shorter handshake
  nbd: Refactor conversion to errno to silence checkpatch
  nbd: Improve server handling of shutdown requests
  nbd: Implement NBD_CMD_WRITE_ZEROES on server
  nbd: Implement NBD_CMD_WRITE_ZEROES on client

 block/nbd-client.h  |  10 +-
 include/block/nbd.h |  73 ++--
 nbd/nbd-internal.h  |  12 +-
 block/nbd-client.c  |  96 ++
 block/nbd.c |   8 +-
 nbd/client.c| 510 
 nbd/server.c| 296 --
 qemu-nbd.c  |  12 +-
 qemu-nbd.texi   |   5 +-
 9 files changed, 638 insertions(+), 384 deletions(-)

-- 
2.7.4

[Qemu-block] [PATCH v7 03/16] nbd: Rename NBDRequest to NBDRequestData

2016-10-14 Thread Eric Blake

We have both 'struct NBDRequest' and 'struct nbd_request'; making
it confusing to see which does what.  Furthermore, we want to
rename nbd_request to align with our normal CamelCase naming
conventions.  So, rename the struct which is used to associate
the data received during request callbacks, while leaving the
shorter name for the description of the request sent over the
wire in the NBD protocol.

Signed-off-by: Eric Blake 

---
v6: new patch
---
 nbd/server.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 2e84d51..78c0419 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -47,10 +47,10 @@ static int system_errno_to_nbd_errno(int err)

 /* Definitions for opaque data types */

-typedef struct NBDRequest NBDRequest;
+typedef struct NBDRequestData NBDRequestData;

-struct NBDRequest {
-QSIMPLEQ_ENTRY(NBDRequest) entry;
+struct NBDRequestData {
+QSIMPLEQ_ENTRY(NBDRequestData) entry;
 NBDClient *client;
 uint8_t *data;
 bool complete;
@@ -759,21 +759,21 @@ static void client_close(NBDClient *client)
 }
 }

-static NBDRequest *nbd_request_get(NBDClient *client)
+static NBDRequestData *nbd_request_get(NBDClient *client)
 {
-NBDRequest *req;
+NBDRequestData *req;

 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
 client->nb_requests++;
 nbd_update_can_read(client);

-req = g_new0(NBDRequest, 1);
+req = g_new0(NBDRequestData, 1);
 nbd_client_get(client);
 req->client = client;
 return req;
 }

-static void nbd_request_put(NBDRequest *req)
+static void nbd_request_put(NBDRequestData *req)
 {
 NBDClient *client = req->client;

@@ -975,7 +975,7 @@ void nbd_export_close_all(void)
 }
 }

-static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
+static ssize_t nbd_co_send_reply(NBDRequestData *req, struct nbd_reply *reply,
  int len)
 {
 NBDClient *client = req->client;
@@ -1011,7 +1011,7 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct 
nbd_reply *reply,
  * and any other negative value to report an error to the client
  * (although the caller may still need to disconnect after reporting
  * the error).  */
-static ssize_t nbd_co_receive_request(NBDRequest *req,
+static ssize_t nbd_co_receive_request(NBDRequestData *req,
   struct nbd_request *request)
 {
 NBDClient *client = req->client;
@@ -1105,7 +1105,7 @@ static void nbd_trip(void *opaque)
 {
 NBDClient *client = opaque;
 NBDExport *exp = client->exp;
-NBDRequest *req;
+NBDRequestData *req;
 struct nbd_request request;
 struct nbd_reply reply;
 ssize_t ret;
-- 
2.7.4

Re: [Qemu-block] [PATCH] hw/block/nvme: Simplify if-statements a little bit

2016-10-14 Thread Stefan Hajnoczi

On Wed, Oct 12, 2016 at 05:18:40PM +0200, Thomas Huth wrote:
> The condition  '!A || (A && B)' is equivalent to '!A || B'.
> 
> Buglink: https://bugs.launchpad.net/qemu/+bug/1464611
> Signed-off-by: Thomas Huth 
> ---
>  hw/block/nvme.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [Qemu-block] [PATCH 7/7] blockjobs: fix documentation

2016-10-14 Thread Kevin Wolf

Am 14.10.2016 um 00:57 hat John Snow geschrieben:
> (Trivial)
> 
> Fix wrong function names in documentation.
> 
> Signed-off-by: John Snow 

Reviewed-by: Kevin Wolf

Re: [Qemu-block] [PATCH 5/7] Blockjobs: Internalize user_pause logic

2016-10-14 Thread Kevin Wolf

Am 14.10.2016 um 00:57 hat John Snow geschrieben:
> BlockJobs will begin hiding their state in preparation for some
> refactorings anyway, so let's internalize the user_pause mechanism
> instead of leaving it to callers to correctly manage.
> 
> Signed-off-by: John Snow 

Reviewed-by: Kevin Wolf

Re: [Qemu-block] [Qemu-devel][PATCH] qemu-img: fix failed qemu-img command return zero exit code defeat

2016-10-14 Thread Stefan Hajnoczi

On Mon, Oct 10, 2016 at 11:07:09AM +0800, Xu Tian wrote:
> If backing file can not open when do image rebase, flag 'ret' not
> assign a non-zero value, then qemu-img process exit with code zero.
> Assign value '-1' to flag 'ret' after report error message to fix
> this defeat.

I suggest rewording the commit description:

img_rebase() returns 0 (success) when opening the backing file fails
because 'ret' isn't set.

> BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1383012
> 
> Signed-off-by: Xu Tian 
> ---
>  qemu-img.c | 2 ++
>  1 file changed, 2 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [Qemu-block] [PATCH v5 1/1] block: improve error handling in raw_open

2016-10-14 Thread Stefan Hajnoczi

On Tue, Oct 11, 2016 at 04:12:35PM +0200, Halil Pasic wrote:
> Make raw_open for POSIX more consistent in handling errors by setting
> the error object also when qemu_open fails. The error object was set
> generally set in case of errors, but I guess this case was overlooked.
> Do the same for win32.
> 
> Signed-off-by: Halil Pasic 
> Reviewed-by: Sascha Silbe 
> Tested-by: Marc Hartmayer  (POSIX only)
> 
> ---
> 
> Stumbled upon this (POSIX) while testing VMs with too many SCSI disks in
> respect to my nofile limit. When open hits the nofile limit while trying
> to hotplug yet another SCSI disk via libvirt we end up with no adequate
> error message (one stating too many files). Sadly this patch in not
> sufficient to fix this problem because drive_new (/qemu/blockdev.c)
> handles errors using error_report_err which is documented as not to be
> used in QMP context.
> 
> The win32 part was not tested, and the sole reason I touched it is
> to not introduce unnecessary divergence.
> 
> v4 -> v5:
> * fix qemu-iotests by adding the filename to the message

This patch doesn't modify any iotests golden master files.  Does this
mean the iotests output is unchanged?

> v3 -> v4:
> * rebased on current master
> v2 -> v3:
> * first save errno then error_setg_errno
> v1 -> v2:
> * fixed win32 by the correct error_setg_*
> * use the original errno consequently
> ---
>  block/raw-posix.c | 1 +
>  block/raw-win32.c | 1 +
>  2 files changed, 2 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [Qemu-block] block/nfs: Fine grained runtime options in nfs

2016-10-14 Thread Stefan Hajnoczi

On Mon, Oct 10, 2016 at 10:39:30AM +0530, Ashijeet Acharya wrote:
> Hi all,
> 
> I was working on trying to add blockdev-add compatibility for the nfs
> block driver but before that runtime options need to be separated into
> various options rather than just a simple "filename" option.
> 
> I have added the following until now:
> a) host
> b) port (not sure about this one, do we just use a default port number?)
> c) export
> d) path (path to the file)
> 
> I have matched these with the URI but still let me know if i have
> missed anyone :)
> 
> Now, in order to parse the uri for different runtime options, I have
> made two new functions nfs_parse_filename() and nfs_parse_uri() which
> is pretty similar to the way how other network block drivers do it.
> 
> Currently we parse the uri in a nfs_client_open() function which takes
> 'const char *filename' as one of its parameters but I dont think
> that's the right way anymore because we pass 'qemu_opt_get(opts,
> "filename")' which is invalid due to no runtime option named
> "filename" available anymore. Right?
> 
> While parsing uri we check for the query parameters inside a 'for
> loop', so I have moved that too inside
> 
> nfs_parse_uri(const char *filename, QDict *options, Error **errp)
> 
> but the problem is there is no struct NFSClient parameter here, so I
> cannot fill up its important fields while parsing the query
> parameters. I cannot do the same inside nfs_client_open() because I no
> longer parse the uri over there.OR CAN I? A completely different
> approach will work too :)
> 
> I can attach a pastebin link containing a raw patch if you want to get
> a clear view but I am afraid it doesn't compile at the moment due to
> the problems mentioned above.

Please post the code and annotate the relevant places where you are
stuck.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-block] [PATCH 6/7] blockjobs: split interface into public/private, Part 1

2016-10-14 Thread Kevin Wolf

Am 14.10.2016 um 00:57 hat John Snow geschrieben:
> To make it a little more obvious which functions are intended to be
> public interface and which are intended to be for use only by jobs
> themselves, split the interface into "public" and "private" files.
> 
> Convert blockjobs (e.g. block/backup) to using the private interface.
> Leave blockdev and others on the public interface.
> 
> There are remaining uses of private state by qemu-img, and several
> cases in blockdev.c and block/io.c where we grab job->blk for the
> purposes of acquiring an AIOContext.
> 
> These will be corrected in future patches.
> 
> Signed-off-by: John Snow 

Reviewed-by: Kevin Wolf

Re: [Qemu-block] [PATCH 4/7] blockjob: centralize QMP event emissions

2016-10-14 Thread Kevin Wolf

Am 14.10.2016 um 00:56 hat John Snow geschrieben:
> There's no reason to leave this to blockdev; we can do it in blockjobs
> directly and get rid of an extra callback for most users.
> 
> All non-internal events, even those created outside of QMP, will
> consistently emit events.
> 
> Signed-off-by: John Snow 

Reviewed-by: Kevin Wolf

Re: [Qemu-block] [PATCH 12/18] iothread: detach all block devices before stopping them

2016-10-14 Thread Paolo Bonzini



On 14/10/2016 16:50, Fam Zheng wrote:
>> > +BdrvNextIterator it;
>> > +
>> > +for (bs = bdrv_first(); bs; bs = bdrv_next()) {
>> > +AioContext *ctx = bdrv_get_aio_context(bs);
> I have a strong feeling that we should 'continue' if ctx ==
> qemu_get_aio_context() - otherwise a lot of unnecessary (and somehow
> complicated) code will always run, even if user has no iothread.
> 
> Fam
> 
>> > +aio_context_acquire(ctx);
>> > +bdrv_set_aio_context(bs, qemu_get_aio_context());
>> > +aio_context_release(ctx);
>> > +}

Sounds good.

Paolo

Re: [Qemu-block] [PATCH 15/18] block: only call aio_poll on the current thread's AioContext

2016-10-14 Thread Fam Zheng

On Thu, 10/13 19:34, Paolo Bonzini wrote:
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 11f877b..0516f62 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -470,6 +470,7 @@ struct BlockDriverState {
>  NotifierWithReturnList before_write_notifiers;
>  
>  /* number of in-flight requests; overall and serialising */
> +bool wakeup;

This should probably be move above the in-flight comment.

Fam

>  unsigned int in_flight;
>  unsigned int serialising_in_flight;
>  
> -- 
> 2.7.4
> 
>

[Qemu-block] [PATCH] rbd: make the code better readable

2016-10-14 Thread Xiubo Li

Make it a bit clear and better readable.

Signed-off-by: Xiubo Li 
---
 block/rbd.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 0a5840d..de8ca1b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -366,45 +366,45 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 rados_conf_read_file(cluster, NULL);
 } else if (conf[0] != '\0' &&
qemu_rbd_set_conf(cluster, conf, true, _err) < 0) {
-rados_shutdown(cluster);
 error_propagate(errp, local_err);
-return -EIO;
+ret = -EIO;
+goto failed_shutdown;
 }
 
 if (conf[0] != '\0' &&
 qemu_rbd_set_conf(cluster, conf, false, _err) < 0) {
-rados_shutdown(cluster);
 error_propagate(errp, local_err);
-return -EIO;
+ret = -EIO;
+goto failed_shutdown;
 }
 
 if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
 rados_shutdown(cluster);
-return -EIO;
+ret = -EIO;
+goto failed_shutdown;
 }
 
 ret = rados_connect(cluster);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error connecting");
-rados_shutdown(cluster);
-return ret;
+goto failed_shutdown;
 }
 
 ret = rados_ioctx_create(cluster, pool, _ctx);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error opening pool %s", pool);
-rados_shutdown(cluster);
-return ret;
+goto failed_shutdown;
 }
 
 ret = rbd_create(io_ctx, name, bytes, _order);
-rados_ioctx_destroy(io_ctx);
-rados_shutdown(cluster);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error rbd create");
-return ret;
 }
 
+rados_ioctx_destroy(io_ctx);
+
+failed_shutdown:
+rados_shutdown(cluster);
 return ret;
 }
 
-- 
1.8.3.1

Re: [Qemu-block] [PATCH 12/18] iothread: detach all block devices before stopping them

2016-10-14 Thread Fam Zheng

On Thu, 10/13 19:34, Paolo Bonzini wrote:
> Soon bdrv_drain will not call aio_poll itself on iothreads.  If block
> devices are left hanging off the iothread's AioContext, there will be no
> one to do I/O for those poor devices.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  iothread.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/iothread.c b/iothread.c
> index 62c8796..8153e21 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -16,6 +16,7 @@
>  #include "qom/object_interfaces.h"
>  #include "qemu/module.h"
>  #include "block/aio.h"
> +#include "block/block.h"
>  #include "sysemu/iothread.h"
>  #include "qmp-commands.h"
>  #include "qemu/error-report.h"
> @@ -199,6 +200,15 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
>  void iothread_stop_all(void)
>  {
>  Object *container = object_get_objects_root();
> +BlockDriverState *bs;
> +BdrvNextIterator it;
> +
> +for (bs = bdrv_first(); bs; bs = bdrv_next()) {
> +AioContext *ctx = bdrv_get_aio_context(bs);

I have a strong feeling that we should 'continue' if ctx ==
qemu_get_aio_context() - otherwise a lot of unnecessary (and somehow
complicated) code will always run, even if user has no iothread.

Fam

> +aio_context_acquire(ctx);
> +bdrv_set_aio_context(bs, qemu_get_aio_context());
> +aio_context_release(ctx);
> +}
>  
>  object_child_foreach(container, iothread_stop, NULL);
>  }
> -- 
> 2.7.4
> 
>

Re: [Qemu-block] [PATCH 2/7] blockjobs: Allow creating internal jobs

2016-10-14 Thread Kevin Wolf

Am 14.10.2016 um 00:56 hat John Snow geschrieben:
> Add the ability to create jobs without an ID.
> 
> Signed-off-by: John Snow 

Reviewed-by: Kevin Wolf

[Qemu-block] bug introduced by "block: Move throttling fields from BDS to BB"

2016-10-14 Thread Paolo Bonzini

Here is next_throttle_token:

-ThrottleGroup *tg = container_of(blk_bs(blk)->throttle_state,
- ThrottleGroup, ts);
+BlockBackendPublic *blkp = blk_get_public(blk);
+ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
 BlockBackend *token, *start;
 
 start = token = tg->tokens[is_write];
 
 /* get next bs round in round robin style */
 token = throttle_group_next_blk(token);
-while (token != start && !blk_bs(token)->pending_reqs[is_write]) {
+while (token != start && !blkp->pending_reqs[is_write]) {
 token = throttle_group_next_blk(token);
 }


blkp isn't updated every time token is updated.

The same bug occurs in schedule_next_request.  This patch was only
in 2.7.x.

Thanks,

Paolo

[Qemu-block] [PATCH v11 03/19] block: Add block_job_add_bdrv()

2016-10-14 Thread Alberto Garcia

When a block job is created on a certain BlockDriverState, operations
are blocked there while the job exists. However, some block jobs may
involve additional BDSs, which must be blocked separately when the job
is created and unblocked manually afterwards.

This patch adds block_job_add_bdrv(), that simplifies this process by
keeping a list of BDSs that are involved in the specified block job.

Signed-off-by: Alberto Garcia 
---
 blockjob.c   | 17 +++--
 include/block/blockjob.h | 14 ++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 43fecbe..c67c46d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -117,6 +117,13 @@ static void block_job_detach_aio_context(void *opaque)
 block_job_unref(job);
 }
 
+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+{
+job->nodes = g_slist_prepend(job->nodes, bs);
+bdrv_ref(bs);
+bdrv_op_block_all(bs, job->blocker);
+}
+
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, int64_t speed,
BlockCompletionFunc *cb, void *opaque, Error **errp)
@@ -154,7 +161,7 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 job = g_malloc0(driver->instance_size);
 error_setg(>blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
-bdrv_op_block_all(bs, job->blocker);
+block_job_add_bdrv(job, bs);
 bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
 job->driver= driver;
@@ -193,9 +200,15 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
 if (--job->refcnt == 0) {
+GSList *l;
 BlockDriverState *bs = blk_bs(job->blk);
 bs->job = NULL;
-bdrv_op_unblock_all(bs, job->blocker);
+for (l = job->nodes; l; l = l->next) {
+bs = l->data;
+bdrv_op_unblock_all(bs, job->blocker);
+bdrv_unref(bs);
+}
+g_slist_free(job->nodes);
 blk_remove_aio_context_notifier(job->blk,
 block_job_attached_aio_context,
 block_job_detach_aio_context, job);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 4ddb4ae..bab91b1 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -181,6 +181,9 @@ struct BlockJob {
 /** Block other operations when block job is running */
 Error *blocker;
 
+/** BlockDriverStates that are involved in this block job */
+GSList *nodes;
+
 /** The opaque value that is passed to the completion function.  */
 void *opaque;
 
@@ -246,6 +249,17 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
BlockCompletionFunc *cb, void *opaque, Error **errp);
 
 /**
+ * block_job_add_bdrv:
+ * @job: A block job
+ * @bs: A BlockDriverState that is involved in @job
+ *
+ * Add @bs to the list of BlockDriverState that are involved in
+ * @job. This means that all operations will be blocked on @bs while
+ * @job exists.
+ */
+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
+
+/**
  * block_job_sleep_ns:
  * @job: The job that calls the function.
  * @clock: The clock to sleep on.
-- 
2.9.3

[Qemu-block] [PATCH v11 10/19] block: Add QMP support for streaming to an intermediate layer

2016-10-14 Thread Alberto Garcia

This patch makes the 'device' parameter of the 'block-stream' command
accept a node name that is not a root node.

In addition to that, operation blockers will be checked in all
intermediate nodes between the top and the base node.

Signed-off-by: Alberto Garcia 
---
 blockdev.c   | 15 +--
 qapi/block-core.json | 10 +++---
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index f13fc69..c01776a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2937,7 +2937,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_on_error, BlockdevOnError on_error,
   Error **errp)
 {
-BlockDriverState *bs;
+BlockDriverState *bs, *iter;
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
@@ -2947,7 +2947,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 on_error = BLOCKDEV_ON_ERROR_REPORT;
 }
 
-bs = qmp_get_root_bs(device, errp);
+bs = bdrv_lookup_bs(device, device, errp);
 if (!bs) {
 return;
 }
@@ -2955,10 +2955,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 aio_context = bdrv_get_aio_context(bs);
 aio_context_acquire(aio_context);
 
-if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
-goto out;
-}
-
 if (has_base) {
 base_bs = bdrv_find_backing_image(bs, base);
 if (base_bs == NULL) {
@@ -2969,6 +2965,13 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 base_name = base;
 }
 
+/* Check for op blockers in the whole chain between bs and base */
+for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
+if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
+goto out;
+}
+}
+
 /* if we are streaming the entire chain, the result will have no backing
  * file, and specifying one is therefore an error */
 if (base_bs == NULL && has_backing_file) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1b7aa1b..cb9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1464,6 +1464,10 @@
 # with query-block-jobs.  The operation can be stopped before it has completed
 # using the block-job-cancel command.
 #
+# The node that receives the data is called the top image, can be located in
+# any part of the chain (but always above the base image; see below) and can be
+# specified using its device or node name.
+#
 # If a base file is specified then sectors are not copied from that base file 
and
 # its backing chain.  When streaming completes the image file will have the 
base
 # file as its backing file.  This can be used to stream a subset of the backing
@@ -1475,12 +1479,12 @@
 # @job-id: #optional identifier for the newly-created block job. If
 #  omitted, the device name will be used. (Since 2.7)
 #
-# @device: the device name or node-name of a root node
+# @device: the device or node name of the top image
 #
 # @base:   #optional the common backing file name
 #
-# @backing-file: #optional The backing file string to write into the active
-#  layer. This filename is not validated.
+# @backing-file: #optional The backing file string to write into the top
+#  image. This filename is not validated.
 #
 #  If a pathname string is such that it cannot be
 #  resolved by QEMU, that means that subsequent QMP or
-- 
2.9.3

[Qemu-block] [PATCH v11 06/19] block: Check blockers in all nodes involved in a block-commit job

2016-10-14 Thread Alberto Garcia

qmp_block_commit() checks for op blockers in the active and
destination (base) images. However all nodes between top_bs and base
are also involved, and they are removed from the chain afterwards.

In addition to that, if top_bs is not the active layer then top_bs's
overlay also needs to be checked because it's involved in the job (its
backing image string needs to be updated to point to 'base').

This patch checks that none of those nodes are blocked.

Signed-off-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
---
 blockdev.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 07ec733..f13fc69 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3001,6 +3001,7 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
   Error **errp)
 {
 BlockDriverState *bs;
+BlockDriverState *iter;
 BlockDriverState *base_bs, *top_bs;
 AioContext *aio_context;
 Error *local_err = NULL;
@@ -3067,8 +3068,10 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
 
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 
-if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
-goto out;
+for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) {
+if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+goto out;
+}
 }
 
 /* Do not allow attempts to commit an image into itself */
@@ -3086,6 +3089,10 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
 commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
 on_error, block_job_cb, bs, _err, false);
 } else {
+BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
+if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) 
{
+goto out;
+}
 commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
  on_error, block_job_cb, bs,
  has_backing_file ? backing_file : NULL, _err);
-- 
2.9.3

[Qemu-block] [PATCH v11 14/19] qemu-iotests: Test overlapping stream and commit operations

2016-10-14 Thread Alberto Garcia

These test cases check that it's not possible to perform two
block-stream or block-commit operations if there are nodes involved in
both.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/030 | 89 ++
 tests/qemu-iotests/030.out |  4 +--
 2 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index e487473..e3bded8 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -228,6 +228,95 @@ class TestParallelOps(iotests.QMPTestCase):
  qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[i-1]),
  'image file map does not match backing file after 
streaming')
 
+# Test that it's not possible to perform two block-stream
+# operations if there are nodes involved in both.
+def test_overlapping_1(self):
+self.assert_no_active_block_jobs()
+
+# Set a speed limit to make sure that this job blocks the rest
+result = self.vm.qmp('block-stream', device='node4', 
job_id='stream-node4', base=self.imgs[1], speed=1024*1024)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('block-stream', device='node5', 
job_id='stream-node5', base=self.imgs[2])
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block-stream', device='node3', 
job_id='stream-node3', base=self.imgs[2])
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block-stream', device='node4', 
job_id='stream-node4-v2')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# block-commit should also fail if it touches nodes used by the stream 
job
+result = self.vm.qmp('block-commit', device='drive0', 
base=self.imgs[4], job_id='commit-node4')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block-commit', device='drive0', 
base=self.imgs[1], top=self.imgs[3], job_id='commit-node1')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# This fails because it needs to modify the backing string in node2, 
which is blocked
+result = self.vm.qmp('block-commit', device='drive0', 
base=self.imgs[0], top=self.imgs[1], job_id='commit-node0')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+self.wait_until_completed(drive='stream-node4')
+self.assert_no_active_block_jobs()
+
+# Similar to test_overlapping_1, but with block-commit
+# blocking the other jobs
+def test_overlapping_2(self):
+self.assertLessEqual(9, self.num_imgs)
+self.assert_no_active_block_jobs()
+
+# Set a speed limit to make sure that this job blocks the rest
+result = self.vm.qmp('block-commit', device='drive0', 
top=self.imgs[5], base=self.imgs[3], job_id='commit-node3', speed=1024*1024)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('block-stream', device='node3', 
job_id='stream-node3')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block-stream', device='node6', 
base=self.imgs[2], job_id='stream-node6')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block-stream', device='node4', 
base=self.imgs[2], job_id='stream-node4')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block-stream', device='node6', 
base=self.imgs[4], job_id='stream-node6-v2')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# This fails because block-commit needs to block node6, the overlay of 
the 'top' image
+result = self.vm.qmp('block-stream', device='node7', 
base=self.imgs[5], job_id='stream-node6-v3')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# This fails because block-commit currently blocks the active layer 
even if it's not used
+result = self.vm.qmp('block-stream', device='drive0', 
base=self.imgs[5], job_id='stream-drive0')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+self.wait_until_completed(drive='commit-node3')
+
+# Similar to test_overlapping_2, but here block-commit doesn't use the 
'top' parameter.
+# Internally this uses a mirror block job, hence the separate test case.
+def test_overlapping_3(self):
+self.assertLessEqual(8, self.num_imgs)
+self.assert_no_active_block_jobs()
+
+# Set a speed limit to make sure that this job blocks the rest
+result = self.vm.qmp('block-commit', device='drive0', 
base=self.imgs[3], job_id='commit-drive0', speed=1024*1024)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('block-stream', device='node5', 
base=self.imgs[3], job_id='stream-node6')
+self.assert_qmp(result, 'error/class',

[Qemu-block] [PATCH v11 16/19] qemu-iotests: Add iotests.supports_quorum()

2016-10-14 Thread Alberto Garcia

There's many tests that need Quorum support in order to run. At the
moment each test implements its own check to see if Quorum is
enabled. This patch centralizes all those checks in a new function
called iotests.supports_quorum().

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/041| 27 ---
 tests/qemu-iotests/139|  3 ++-
 tests/qemu-iotests/iotests.py |  5 -
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index d1e1ad8..b4f3748 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -761,9 +761,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 image_len = 1 * 1024 * 1024 # MB
 IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
 
-def has_quorum(self):
-return 'quorum' in iotests.qemu_img_pipe('--help')
-
 def setUp(self):
 self.vm = iotests.VM()
 
@@ -784,7 +781,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 #assemble the quorum block device from the individual files
 args = { "options" : { "driver": "quorum", "node-name": "quorum0",
  "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } 
}
-if self.has_quorum():
+if iotests.supports_quorum():
 result = self.vm.qmp("blockdev-add", **args)
 self.assert_qmp(result, 'return', {})
 
@@ -799,7 +796,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 pass
 
 def test_complete(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 self.assert_no_active_block_jobs()
@@ -818,7 +815,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 'target image does not match source after mirroring')
 
 def test_cancel(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 self.assert_no_active_block_jobs()
@@ -835,7 +832,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.vm.shutdown()
 
 def test_cancel_after_ready(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 self.assert_no_active_block_jobs()
@@ -854,7 +851,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 'target image does not match source after mirroring')
 
 def test_pause(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 self.assert_no_active_block_jobs()
@@ -884,7 +881,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 'target image does not match source after mirroring')
 
 def test_medium_not_found(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 if iotests.qemu_default_machine != 'pc':
@@ -898,7 +895,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_image_not_found(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -908,7 +905,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_device_not_found(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 result = self.vm.qmp('drive-mirror', job_id='job0',
@@ -919,7 +916,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_wrong_sync_mode(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0',
@@ -929,7 +926,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_no_node_name(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -938,7 +935,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_nonexistent_replaces(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -947,7 +944,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_after_a_quorum_snapshot(self):
-if not self.has_quorum():
+if not iotests.supports_quorum():
 return
 
 result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
diff

[Qemu-block] [PATCH v11 09/19] block: Support streaming to an intermediate layer

2016-10-14 Thread Alberto Garcia

This makes sure that the image we are streaming into is open in
read-write mode during the operation.

Operation blockers are also set in all intermediate nodes, since they
will be removed from the chain afterwards.

Finally, this also unblocks the stream operation in backing files.

Signed-off-by: Alberto Garcia 
---
 block.c|  4 +++-
 block/stream.c | 24 
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index adbecd0..1ef36c9 100644
--- a/block.c
+++ b/block.c
@@ -1428,9 +1428,11 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 backing_hd->drv ? backing_hd->drv->format_name : "");
 
 bdrv_op_block_all(backing_hd, bs->backing_blocker);
-/* Otherwise we won't be able to commit due to check in bdrv_commit */
+/* Otherwise we won't be able to commit or stream */
 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
 bs->backing_blocker);
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+bs->backing_blocker);
 /*
  * We do backup in 3 ways:
  * 1. drive backup
diff --git a/block/stream.c b/block/stream.c
index 3187481..b8ab89a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -37,6 +37,7 @@ typedef struct StreamBlockJob {
 BlockDriverState *base;
 BlockdevOnError on_error;
 char *backing_file_str;
+int bs_flags;
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -81,6 +82,11 @@ static void stream_complete(BlockJob *job, void *opaque)
 bdrv_set_backing_hd(bs, base);
 }
 
+/* Reopen the image back in read-only mode if necessary */
+if (s->bs_flags != bdrv_get_flags(bs)) {
+bdrv_reopen(bs, s->bs_flags, NULL);
+}
+
 g_free(s->backing_file_str);
 block_job_completed(>common, data->ret);
 g_free(data);
@@ -220,6 +226,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
   BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
 StreamBlockJob *s;
+BlockDriverState *iter;
+int orig_bs_flags;
 
 s = block_job_create(job_id, _job_driver, bs, speed,
  cb, opaque, errp);
@@ -227,8 +235,24 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 return;
 }
 
+/* Make sure that the image is opened in read-write mode */
+orig_bs_flags = bdrv_get_flags(bs);
+if (!(orig_bs_flags & BDRV_O_RDWR)) {
+if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
+block_job_unref(>common);
+return;
+}
+}
+
+/* Block all intermediate nodes between bs and base, because they
+ * will disappear from the chain after this operation */
+for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) 
{
+block_job_add_bdrv(>common, iter);
+}
+
 s->base = base;
 s->backing_file_str = g_strdup(backing_file_str);
+s->bs_flags = orig_bs_flags;
 
 s->on_error = on_error;
 s->common.co = qemu_coroutine_create(stream_run, s);
-- 
2.9.3

[Qemu-block] [PATCH v11 11/19] docs: Document how to stream to an intermediate layer

2016-10-14 Thread Alberto Garcia

Signed-off-by: Alberto Garcia 
---
 docs/live-block-ops.txt | 36 +---
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt
index a257087..2211d14 100644
--- a/docs/live-block-ops.txt
+++ b/docs/live-block-ops.txt
@@ -4,15 +4,20 @@ LIVE BLOCK OPERATIONS
 High level description of live block operations. Note these are not
 supported for use with the raw format at the moment.
 
+Note also that this document is incomplete and it currently only
+covers the 'stream' operation. Other operations supported by QEMU such
+as 'commit', 'mirror' and 'backup' are not described here yet. Please
+refer to the qapi/block-core.json file for an overview of those.
+
 Snapshot live merge
 ===
 
 Given a snapshot chain, described in this document in the following
 format:
 
-[A] -> [B] -> [C] -> [D]
+[A] <- [B] <- [C] <- [D] <- [E]
 
-Where the rightmost object ([D] in the example) described is the current
+Where the rightmost object ([E] in the example) described is the current
 image which the guest OS has write access to. To the left of it is its base
 image, and so on accordingly until the leftmost image, which has no
 base.
@@ -21,11 +26,14 @@ The snapshot live merge operation transforms such a chain 
into a
 smaller one with fewer elements, such as this transformation relative
 to the first example:
 
-[A] -> [D]
+[A] <- [E]
 
-Currently only forward merge with target being the active image is
-supported, that is, data copy is performed in the right direction with
-destination being the rightmost image.
+Data is copied in the right direction with destination being the
+rightmost image, but any other intermediate image can be specified
+instead. In this example data is copied from [C] into [D], so [D] can
+be backed by [B]:
+
+[A] <- [B] <- [D] <- [E]
 
 The operation is implemented in QEMU through image streaming facilities.
 
@@ -35,14 +43,20 @@ streaming operation completes it raises a QMP event. 
'block_stream'
 copies data from the backing file(s) into the active image. When finished,
 it adjusts the backing file pointer.
 
-The 'base' parameter specifies an image which data need not be streamed from.
-This image will be used as the backing file for the active image when the
-operation is finished.
+The 'base' parameter specifies an image which data need not be
+streamed from. This image will be used as the backing file for the
+destination image when the operation is finished.
 
-In the example above, the command would be:
+In the first example above, the command would be:
 
-(qemu) block_stream virtio0 A
+(qemu) block_stream virtio0 file-A.img
 
+In order to specify a destination image different from the active
+(rightmost) one we can use its node name instead.
+
+In the second example above, the command would be:
+
+(qemu) block_stream node-D file-B.img
 
 Live block copy
 ===
-- 
2.9.3

[Qemu-block] [PATCH v11 17/19] qemu-iotests: Test streaming to a Quorum child

2016-10-14 Thread Alberto Garcia

Quorum children are special in the sense that they're not directly
attached to a block backend but they're not used as backing images
either. However the intermediate block streaming code supports
streaming to them. This is a test case for that scenario.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/030 | 56 ++
 tests/qemu-iotests/030.out |  4 ++--
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index d88823a..6736004 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -347,6 +347,62 @@ class TestParallelOps(iotests.QMPTestCase):
 
 self.assert_no_active_block_jobs()
 
+class TestQuorum(iotests.QMPTestCase):
+num_children = 3
+children = []
+backing = []
+
+def setUp(self):
+opts = ['driver=quorum', 'vote-threshold=2']
+
+# Initialize file names and command-line options
+for i in range(self.num_children):
+child_img = os.path.join(iotests.test_dir, 'img-%d.img' % i)
+backing_img = os.path.join(iotests.test_dir, 'backing-%d.img' % i)
+self.children.append(child_img)
+self.backing.append(backing_img)
+qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
+qemu_io('-f', iotests.imgfmt,
+'-c', 'write -P 0x55 0 1024', backing_img)
+qemu_img('create', '-f', iotests.imgfmt,
+ '-o', 'backing_file=%s' % backing_img, child_img)
+opts.append("children.%d.file.filename=%s" % (i, child_img))
+opts.append("children.%d.node-name=node%d" % (i, i))
+
+# Attach the drive to the VM
+self.vm = iotests.VM()
+self.vm.add_drive(path = None, opts = ','.join(opts))
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+for img in self.children:
+os.remove(img)
+for img in self.backing:
+os.remove(img)
+
+def test_stream_quorum(self):
+if not iotests.supports_quorum():
+return
+
+self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.children[0]),
+qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.backing[0]),
+'image file map matches backing file before 
streaming')
+
+self.assert_no_active_block_jobs()
+
+result = self.vm.qmp('block-stream', device='node0', 
job_id='stream-node0')
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed(drive='stream-node0')
+
+self.assert_no_active_block_jobs()
+self.vm.shutdown()
+
+self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.children[0]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.backing[0]),
+ 'image file map does not match backing file after 
streaming')
+
 class TestSmallerBackingFile(iotests.QMPTestCase):
 backing_len = 1 * 1024 * 1024 # MB
 image_len = 2 * backing_len
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 3a89159..c6a10f8 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-
+.
 --
-Ran 20 tests
+Ran 21 tests
 
 OK
-- 
2.9.3

[Qemu-block] [PATCH v11 07/19] block: Block all nodes involved in the block-commit operation

2016-10-14 Thread Alberto Garcia

After a successful block-commit operation all nodes between top and
base are removed from the backing chain, and top's overlay needs to
be updated to point to base. Because of that we should prevent other
block jobs from messing with them.

This patch blocks all operations in these nodes in commit_start().

Signed-off-by: Alberto Garcia 
---
 block/commit.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/commit.c b/block/commit.c
index 9f67a8b..1500d7c 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -216,6 +216,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 BlockReopenQueue *reopen_queue = NULL;
 int orig_overlay_flags;
 int orig_base_flags;
+BlockDriverState *iter;
 BlockDriverState *overlay_bs;
 Error *local_err = NULL;
 
@@ -260,6 +261,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 }
 
 
+/* Block all nodes between top and base, because they will
+ * disappear from the chain after this operation. */
+assert(bdrv_chain_contains(top, base));
+for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
+block_job_add_bdrv(>common, iter);
+}
+/* overlay_bs must be blocked because it needs to be modified to
+ * update the backing image string, but if it's the root node then
+ * don't block it again */
+if (bs != overlay_bs) {
+block_job_add_bdrv(>common, overlay_bs);
+}
+
 s->base = blk_new();
 blk_insert_bs(s->base, base);
 
-- 
2.9.3

[Qemu-block] [PATCH v11 19/19] qemu-iotests: Test the 'base-node' parameter of 'block-stream'

2016-10-14 Thread Alberto Garcia

The block-stream command has traditionally used the 'base' parameter
to indicate the image to copy the data from. This test checks that the
'base-node' parameter can also be used for the same purpose.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/030 | 33 +
 tests/qemu-iotests/030.out |  4 ++--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 6736004..44729f5 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -347,6 +347,39 @@ class TestParallelOps(iotests.QMPTestCase):
 
 self.assert_no_active_block_jobs()
 
+# Test the base_node parameter
+def test_stream_base_node_name(self):
+self.assert_no_active_block_jobs()
+
+self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[4]),
+qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[3]),
+'image file map matches backing file before 
streaming')
+
+# Error: the base node does not exist
+result = self.vm.qmp('block-stream', device='node4', base_node='none', 
job_id='stream')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# Error: the base node is not a backing file of the top node
+result = self.vm.qmp('block-stream', device='node4', 
base_node='node6', job_id='stream')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# Error: the base node is the same as the top node
+result = self.vm.qmp('block-stream', device='node4', 
base_node='node4', job_id='stream')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+# Success: the base node is a backing file of the top node
+result = self.vm.qmp('block-stream', device='node4', 
base_node='node2', job_id='stream')
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed(drive='stream')
+
+self.assert_no_active_block_jobs()
+self.vm.shutdown()
+
+self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[4]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[3]),
+ 'image file map matches backing file after streaming')
+
 class TestQuorum(iotests.QMPTestCase):
 num_children = 3
 children = []
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index c6a10f8..84bfd63 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-.
+..
 --
-Ran 21 tests
+Ran 22 tests
 
 OK
-- 
2.9.3

[Qemu-block] [PATCH v11 12/19] qemu-iotests: Test streaming to an intermediate layer

2016-10-14 Thread Alberto Garcia

This adds test_stream_intermediate(), similar to test_stream() but
streams to the intermediate image instead.

It also removes the usage of blkdebug, which is unnecessary for this
test.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/030 | 21 -
 tests/qemu-iotests/030.out |  4 ++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 107049b..367ecf8 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -36,7 +36,7 @@ class TestSingleDrive(iotests.QMPTestCase):
 qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % 
mid_img, test_img)
 qemu_io('-f', 'raw', '-c', 'write -P 0x1 0 512', backing_img)
 qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x1 524288 512', mid_img)
-self.vm = iotests.VM().add_drive("blkdebug::" + test_img)
+self.vm = iotests.VM().add_drive(test_img, "backing.node-name=mid")
 self.vm.launch()
 
 def tearDown(self):
@@ -60,6 +60,25 @@ class TestSingleDrive(iotests.QMPTestCase):
  qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img),
  'image file map does not match backing file after 
streaming')
 
+def test_stream_intermediate(self):
+self.assert_no_active_block_jobs()
+
+self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
+qemu_io('-f', iotests.imgfmt, '-c', 'map', 
mid_img),
+'image file map matches backing file before 
streaming')
+
+result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid')
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed(drive='stream-mid')
+
+self.assert_no_active_block_jobs()
+self.vm.shutdown()
+
+self.assertEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img),
+ 'image file map does not match backing file after 
streaming')
+
 def test_stream_pause(self):
 self.assert_no_active_block_jobs()
 
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 6323079..96961ed 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-..
+...
 --
-Ran 14 tests
+Ran 15 tests
 
 OK
-- 
2.9.3

[Qemu-block] [PATCH v11 00/19] Support streaming to an intermediate layer

2016-10-14 Thread Alberto Garcia

Hi all,

here's version 11 of the series.

See below for the list of changes. The most important one is, I think,
the addition of the 'base-node' parameter to 'block-stream'. I decided
to go for this one rather than writing a new 'blockdev-stream' command
because (a) it's not obvious to me that introducing a new command for
such a small API change is justified in this case, and (b) it didn't
seem to me that there was a very clear agreement on the mailing list,
so I decided to go for the easier alternative.

Anyway I don't have a problem with adding a new command if that's what
most people prefer. If everything else in the series is fine I can
change that in v12.

Berto

Changes:

v11:
- Patches 1-2: Add bdrv_drain_all_{begin,end}() and use these
  functions in bdrv_reopen_multiple() instead of pausing all block
  jobs.
- Patch 3: Don't unblock BLOCK_OP_TYPE_DATAPLANE in
  block_job_add_bdrv()
- Patch 7: In commit_start(), don't call block_job_add_bdrv() twice on
  the active node.
- Patch 10: In qmp_block_stream(), don't check twice for op blockers
  in 'bs'.
- Patch 11: Clarify that live-block-ops.txt is incomplete and it only
  covers the block-stream operation.
- Patches 18-19: Add a 'base-node' parameter to 'block-stream', and
  iotests for it.

v10: https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg01047.html
- Rebase the code.
- Pause block jobs during bdrv_reopen().
- Allow jobs in parallel again, and modify jobs to block all involved
  nodes, not just the root one.
- Add more tests.

v9: https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg00411.html
- Rebase the code
- Block the active layer in order to forbid other block jobs in the
  same chain.
- Split the patch that adds block_job_next() into 4 (new patches 1-4).
- Replace the test that performs several block-stream operations in
  parallel with one that check that they're forbidden.
- Remove patches that have already been merged.

v8: https://lists.gnu.org/archive/html/qemu-devel/2015-06/msg05754.html
- Rebased on top of Stefan's block branch (0a35bce416)
- The loop that pauses the block jobs in bdrv_drain_all() is now split
  in two: one that iterates the list of block jobs to stop them, and
  one that iterates the root bds in order to get the aio contexts.

v7: https://lists.gnu.org/archive/html/qemu-devel/2015-05/msg02580.html
- Rebased against the current master
- Updated bdrv_drain_all() to use the new block_job_next() API.

v6: https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg03046.html
- fix the no-op test following Max's suggestions

v5: https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg03006.html
- Fix a few typos
- Minor documentation updates
- Update test_stream_partial() to test no-ops
- New test case: test_stream_parallel()
- New test case: test_stream_overlapping()

v4: https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg01878.html
- Refactor find_block_job to use the error from bdrv_lookup_bs()
- Don't use QERR_DEVICE_IN_USE in block_job_create() since we can be
  dealing with nodes now.
- Fix @device comment in the BlockJobInfo documentation
- stream_start(): simplify the bdrv_reopen() call and use
  bdrv_get_device_or_node_name() for error messages.
- Use a different variable name for BlockDriverState *i
- Documentation fixes in docs/live-block-ops.txt
- Update iotest 30 since now test_device_not_found() returns
  GenericError
- Fix test case test_stream_partial()
- Add new test case test_stream_intermediate()
- Fix typos

v3: https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg00806.html
- Keep a list of block jobs and make qmp_query_block_jobs() iterate
  over it.

v2: https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg04798.html
- The 'block-stream' command does not have a 'node-name' parameter
  anymore and reuses 'device' for that purpose.
- Block jobs can now be owned by any intermediate node, and not just
  by the ones at the root. query-block-jobs is updated to reflect that
  change.
- The 'device' parameter of all 'block-job-*' commands can now take a
  node name.
- The BlockJobInfo type and all BLOCK_JOB_* events report the node
  name in the 'device' field if the node does not have a device name.
- All intermediate nodes are blocked (and checked for blockers) during
  the streaming operation.

v1: https://lists.gnu.org/archive/html/qemu-devel/2015-02/msg04116.html

backport-diff against v10:
Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/19:[down] 'block: Add bdrv_drain_all_{begin,end}()'
002/19:[0022] [FC] 'block: Pause all jobs during bdrv_reopen_multiple()'
003/19:[0006] [FC] 'block: Add block_job_add_bdrv()'
004/19:[] [--] 'block: Use block_job_add_bdrv() in mirror_start_job()'
005/19:[] [--] 'block: Use block_job_add_bdrv() in backup_start()'
006/19:[] [--] 'block: Check

[Qemu-block] [PATCH v11 08/19] block: Block all intermediate nodes in commit_active_start()

2016-10-14 Thread Alberto Garcia

When block-commit is launched without the top parameter, it uses
internally a mirror block job. In that case all intermediate nodes
between the active and base nodes must be blocked as well.

Signed-off-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
---
 block/mirror.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 9b5159f..a5b71b7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -965,6 +965,14 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 }
 
 block_job_add_bdrv(>common, target);
+/* In commit_active_start() all intermediate nodes disappear, so
+ * any jobs in them must be blocked */
+if (bdrv_chain_contains(bs, target)) {
+BlockDriverState *iter;
+for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
+block_job_add_bdrv(>common, iter);
+}
+}
 
 s->common.co = qemu_coroutine_create(mirror_run, s);
 trace_mirror_start(bs, s, s->common.co, opaque);
-- 
2.9.3

[Qemu-block] [PATCH v11 15/19] qemu-iotests: Test block-stream and block-commit in parallel

2016-10-14 Thread Alberto Garcia

As with test_stream_parallel(), we allow mixing block-stream and
block-commit operations in the same backing chain as long as there's
no overlap among the involved nodes.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/030 | 30 ++
 tests/qemu-iotests/030.out |  4 ++--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index e3bded8..d88823a 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -315,6 +315,36 @@ class TestParallelOps(iotests.QMPTestCase):
 self.assert_qmp(result, 'return', {})
 
 self.wait_until_completed(drive='commit-drive0')
+
+# Test a block-stream and a block-commit job in parallel
+def test_stream_commit(self):
+self.assertLessEqual(8, self.num_imgs)
+self.assert_no_active_block_jobs()
+
+# Stream from node0 into node2
+result = self.vm.qmp('block-stream', device='node2', job_id='node2')
+self.assert_qmp(result, 'return', {})
+
+# Commit from the active layer into node3
+result = self.vm.qmp('block-commit', device='drive0', 
top=self.imgs[5], base=self.imgs[3])
+self.assert_qmp(result, 'return', {})
+
+# Wait for all jobs to be finished.
+pending_jobs = ['node2', 'drive0']
+while len(pending_jobs) > 0:
+for event in self.vm.get_qmp_events(wait=True):
+if event['event'] == 'BLOCK_JOB_COMPLETED':
+node_name = self.dictpath(event, 'data/device')
+self.assertTrue(node_name in pending_jobs)
+self.assert_qmp_absent(event, 'data/error')
+pending_jobs.remove(node_name)
+if event['event'] == 'BLOCK_JOB_READY':
+self.assert_qmp(event, 'data/device', 'drive0')
+self.assert_qmp(event, 'data/type', 'commit')
+self.assert_qmp_absent(event, 'data/error')
+self.assertTrue('drive0' in pending_jobs)
+self.vm.qmp('block-job-complete', device='drive0')
+
 self.assert_no_active_block_jobs()
 
 class TestSmallerBackingFile(iotests.QMPTestCase):
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 4176bb9..3a89159 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-...
+
 --
-Ran 19 tests
+Ran 20 tests
 
 OK
-- 
2.9.3

[Qemu-block] [PATCH v11 04/19] block: Use block_job_add_bdrv() in mirror_start_job()

2016-10-14 Thread Alberto Garcia

Use block_job_add_bdrv() instead of blocking all operations in
mirror_start_job() and unblocking them in mirror_exit().

Signed-off-by: Alberto Garcia 
---
 block/mirror.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index f9d1fec..9b5159f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -526,7 +526,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
 aio_context_release(replace_aio_context);
 }
 g_free(s->replaces);
-bdrv_op_unblock_all(target_bs, s->common.blocker);
 blk_unref(s->target);
 block_job_completed(>common, data->ret);
 g_free(data);
@@ -965,7 +964,7 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 return;
 }
 
-bdrv_op_block_all(target, s->common.blocker);
+block_job_add_bdrv(>common, target);
 
 s->common.co = qemu_coroutine_create(mirror_run, s);
 trace_mirror_start(bs, s, s->common.co, opaque);
-- 
2.9.3

[Qemu-block] [PATCH v11 18/19] block: Add 'base-node' parameter to the 'block-stream' command

2016-10-14 Thread Alberto Garcia

The way to specify the node from which to copy data in the
block-stream operation is by using the 'base' parameter. This
parameter however takes a file name, not a node name.

Since we want to be able to perform this operation using only node
names, this patch adds a new 'base-node' parameter.

Signed-off-by: Alberto Garcia 
---
 blockdev.c| 21 +
 docs/qmp-commands.txt |  7 +--
 hmp.c |  2 +-
 qapi/block-core.json  |  8 ++--
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index c01776a..4ed718d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2932,6 +2932,7 @@ static void block_job_cb(void *opaque, int ret)
 
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
   bool has_base, const char *base,
+  bool has_base_node, const char *base_node,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
@@ -2955,6 +2956,12 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 aio_context = bdrv_get_aio_context(bs);
 aio_context_acquire(aio_context);
 
+if (has_base && has_base_node) {
+error_setg(errp, "'base' and 'base-node' cannot be specified "
+   "at the same time");
+goto out;
+}
+
 if (has_base) {
 base_bs = bdrv_find_backing_image(bs, base);
 if (base_bs == NULL) {
@@ -2965,6 +2972,20 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 base_name = base;
 }
 
+if (has_base_node) {
+base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+if (!base_bs) {
+goto out;
+}
+if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
+error_setg(errp, "Node '%s' is not a backing image of '%s'",
+   base_node, device);
+goto out;
+}
+assert(bdrv_get_aio_context(base_bs) == aio_context);
+base_name = base_bs->filename;
+}
+
 /* Check for op blockers in the whole chain between bs and base */
 for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
 if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
diff --git a/docs/qmp-commands.txt b/docs/qmp-commands.txt
index 7f652e0..7409865 100644
--- a/docs/qmp-commands.txt
+++ b/docs/qmp-commands.txt
@@ -740,8 +740,11 @@ Arguments:
 - "job-id": Identifier for the newly-created block job. If omitted,
 the device name will be used. (json-string, optional)
 - "device": The device name or node-name of a root node (json-string)
-- "base": The file name of the backing image above which copying starts
-  (json-string, optional)
+- "base": The file name of the backing image above which copying starts.
+  It cannot be set if 'base-node' is also set (json-string, optional)
+- "base-node": the node name of the backing image above which copying starts.
+   It cannot be set if 'base' is also set.
+   (json-string, optional) (Since 2.8)
 - "backing-file": The backing file string to write into the active layer. This
   filename is not validated.
 
diff --git a/hmp.c b/hmp.c
index 42bef84..0731266 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1526,7 +1526,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
 qmp_block_stream(false, NULL, device, base != NULL, base, false, NULL,
- qdict_haskey(qdict, "speed"), speed,
+ false, NULL, qdict_haskey(qdict, "speed"), speed,
  true, BLOCKDEV_ON_ERROR_REPORT, );
 
 hmp_handle_error(mon, );
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cb9..fa1e0ec 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1481,7 +1481,11 @@
 #
 # @device: the device or node name of the top image
 #
-# @base:   #optional the common backing file name
+# @base:   #optional the common backing file name.
+#It cannot be set if @base-node is also set.
+#
+# @base-node: #optional the node name of the backing file.
+#   It cannot be set if @base is also set. (Since 2.8)
 #
 # @backing-file: #optional The backing file string to write into the top
 #  image. This filename is not validated.
@@ -1508,7 +1512,7 @@
 ##
 { 'command': 'block-stream',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
-'*backing-file': 'str', '*speed': 'int',
+'*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError' } }
 
 ##
-- 
2.9.3

[Qemu-block] [PATCH v11 01/19] block: Add bdrv_drain_all_{begin, end}()

2016-10-14 Thread Alberto Garcia

bdrv_drain_all() doesn't allow the caller to do anything after all
pending requests have been completed but before block jobs are
resumed.

This patch splits bdrv_drain_all() into _begin() and _end() for that
purpose. It also adds aio_{disable,enable}_external() calls to disable
external clients in the meantime.

Signed-off-by: Alberto Garcia 
---
 block/io.c| 24 +---
 include/block/block.h |  2 ++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/block/io.c b/block/io.c
index b136c89..9418f8b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -275,8 +275,11 @@ void bdrv_drain(BlockDriverState *bs)
  *
  * This function does not flush data to disk, use bdrv_flush_all() for that
  * after calling this function.
+ *
+ * This pauses all block jobs and disables external clients. It must
+ * be paired with bdrv_drain_all_end().
  */
-void bdrv_drain_all(void)
+void bdrv_drain_all_begin(void)
 {
 /* Always run first iteration so any pending completion BHs run */
 bool busy = true;
@@ -300,6 +303,7 @@ void bdrv_drain_all(void)
 bdrv_parent_drained_begin(bs);
 bdrv_io_unplugged_begin(bs);
 bdrv_drain_recurse(bs);
+aio_disable_external(aio_context);
 aio_context_release(aio_context);
 
 if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -333,17 +337,25 @@ void bdrv_drain_all(void)
 }
 }
 
+g_slist_free(aio_ctxs);
+}
+
+void bdrv_drain_all_end(void)
+{
+BlockDriverState *bs;
+BdrvNextIterator it;
+BlockJob *job = NULL;
+
 for (bs = bdrv_first(); bs; bs = bdrv_next()) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
 aio_context_acquire(aio_context);
+aio_enable_external(aio_context);
 bdrv_io_unplugged_end(bs);
 bdrv_parent_drained_end(bs);
 aio_context_release(aio_context);
 }
-g_slist_free(aio_ctxs);
 
-job = NULL;
 while ((job = block_job_next(job))) {
 AioContext *aio_context = blk_get_aio_context(job->blk);
 
@@ -353,6 +365,12 @@ void bdrv_drain_all(void)
 }
 }
 
+void bdrv_drain_all(void)
+{
+bdrv_drain_all_begin();
+bdrv_drain_all_end();
+}
+
 /**
  * Remove an active request from the tracked requests list
  *
diff --git a/include/block/block.h b/include/block/block.h
index 107c603..301d713 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -338,6 +338,8 @@ int bdrv_flush_all(void);
 void bdrv_close_all(void);
 void bdrv_drain(BlockDriverState *bs);
 void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_end(void);
 void bdrv_drain_all(void);
 
 int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count);
-- 
2.9.3

[Qemu-block] [PATCH v11 13/19] qemu-iotests: Test block-stream operations in parallel

2016-10-14 Thread Alberto Garcia

This test case checks that it's possible to launch several stream
operations in parallel in the same snapshot chain, each one involving
a different set of nodes.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/030 | 80 ++
 tests/qemu-iotests/030.out |  4 +--
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 367ecf8..e487473 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -148,6 +148,86 @@ class TestSingleDrive(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 
+class TestParallelOps(iotests.QMPTestCase):
+num_ops = 4 # Number of parallel block-stream operations
+num_imgs = num_ops * 2 + 1
+image_len = num_ops * 1024 * 1024
+imgs = []
+
+def setUp(self):
+opts = []
+self.imgs = []
+
+# Initialize file names and command-line options
+for i in range(self.num_imgs):
+img_depth = self.num_imgs - i - 1
+opts.append("backing." * img_depth + "node-name=node%d" % i)
+self.imgs.append(os.path.join(iotests.test_dir, 'img-%d.img' % i))
+
+# Create all images
+iotests.create_image(self.imgs[0], self.image_len)
+for i in range(1, self.num_imgs):
+qemu_img('create', '-f', iotests.imgfmt,
+ '-o', 'backing_file=%s' % self.imgs[i-1], self.imgs[i])
+
+# Put data into the images we are copying data from
+for i in range(self.num_imgs / 2):
+img_index = i * 2 + 1
+# Alternate between 512k and 1M.
+# This way jobs will not finish in the same order they were created
+num_kb = 512 + 512 * (i % 2)
+qemu_io('-f', iotests.imgfmt,
+'-c', 'write -P %d %d %d' % (i, i*1024*1024, num_kb * 
1024),
+self.imgs[img_index])
+
+# Attach the drive to the VM
+self.vm = iotests.VM()
+self.vm.add_drive(self.imgs[-1], ','.join(opts))
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+for img in self.imgs:
+os.remove(img)
+
+# Test that it's possible to run several block-stream operations
+# in parallel in the same snapshot chain
+def test_stream_parallel(self):
+self.assert_no_active_block_jobs()
+
+# Check that the maps don't match before the streaming operations
+for i in range(2, self.num_imgs, 2):
+self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[i]),
+qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[i-1]),
+'image file map matches backing file before 
streaming')
+
+# Create all streaming jobs
+pending_jobs = []
+for i in range(2, self.num_imgs, 2):
+node_name = 'node%d' % i
+job_id = 'stream-%s' % node_name
+pending_jobs.append(job_id)
+result = self.vm.qmp('block-stream', device=node_name, 
job_id=job_id, base=self.imgs[i-2], speed=512*1024)
+self.assert_qmp(result, 'return', {})
+
+# Wait for all jobs to be finished.
+while len(pending_jobs) > 0:
+for event in self.vm.get_qmp_events(wait=True):
+if event['event'] == 'BLOCK_JOB_COMPLETED':
+job_id = self.dictpath(event, 'data/device')
+self.assertTrue(job_id in pending_jobs)
+self.assert_qmp_absent(event, 'data/error')
+pending_jobs.remove(job_id)
+
+self.assert_no_active_block_jobs()
+self.vm.shutdown()
+
+# Check that all maps match now
+for i in range(2, self.num_imgs, 2):
+self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[i]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', 
self.imgs[i-1]),
+ 'image file map does not match backing file after 
streaming')
+
 class TestSmallerBackingFile(iotests.QMPTestCase):
 backing_len = 1 * 1024 * 1024 # MB
 image_len = 2 * backing_len
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 96961ed..b6f2576 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-...
+
 --
-Ran 15 tests
+Ran 16 tests
 
 OK
-- 
2.9.3

[Qemu-block] [PATCH v11 02/19] block: Pause all jobs during bdrv_reopen_multiple()

2016-10-14 Thread Alberto Garcia

When a BlockDriverState is about to be reopened it can trigger certain
operations that need to write to disk. During this process a different
block job can be woken up. If that block job completes and also needs
to call bdrv_reopen() it can happen that it needs to do it on the same
BlockDriverState that is still in the process of being reopened.

This can have fatal consequences, like in this example:

  1) Block job A starts and sleeps after a while.
  2) Block job B starts and tries to reopen node1 (a qcow2 file).
  3) Reopening node1 means flushing and replacing its qcow2 cache.
  4) While the qcow2 cache is being flushed, job A wakes up.
  5) Job A completes and reopens node1, replacing its cache.
  6) Job B resumes, but the cache that was being flushed no longer
 exists.

This patch splits the bdrv_drain_all() call to keep all block jobs
paused during bdrv_reopen_multiple(), so that step 4 can never happen
and the operation is safe.

Note that this scenario can only happen if both bdrv_reopen() calls
are made by block jobs on the same backing chain. Otherwise there's no
chance that the same BlockDriverState appears in both reopen queues.

Signed-off-by: Alberto Garcia 
---
 block.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 7f3e7bc..adbecd0 100644
--- a/block.c
+++ b/block.c
@@ -2090,7 +2090,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
 
 assert(bs_queue != NULL);
 
-bdrv_drain_all();
+bdrv_drain_all_begin();
 
 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
 if (bdrv_reopen_prepare(_entry->state, bs_queue, _err)) {
@@ -2120,6 +2120,9 @@ cleanup:
 g_free(bs_entry);
 }
 g_free(bs_queue);
+
+bdrv_drain_all_end();
+
 return ret;
 }
 
-- 
2.9.3

[Qemu-block] [PATCHv2 1/2] dma-helpers: explicitly pass alignment into DMA helpers

2016-10-14 Thread Mark Cave-Ayland

The hard-coded default alignment is BDRV_SECTOR_SIZE, however this is not
necessarily the case for all platforms. Use this as the default alignment for
all current callers.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Eric Blake 
Acked-by: John Snow 
---
 dma-helpers.c|   21 -
 hw/block/nvme.c  |6 --
 hw/ide/ahci.c|2 ++
 hw/ide/core.c|6 +++---
 hw/scsi/scsi-disk.c  |2 ++
 include/sysemu/dma.h |6 +++---
 6 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 9defc10..6f9d47c 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -73,6 +73,7 @@ typedef struct {
 AioContext *ctx;
 BlockAIOCB *acb;
 QEMUSGList *sg;
+uint32_t align;
 uint64_t offset;
 DMADirection dir;
 int sg_cur_index;
@@ -160,8 +161,9 @@ static void dma_blk_cb(void *opaque, int ret)
 return;
 }
 
-if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
-qemu_iovec_discard_back(>iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
+if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
+qemu_iovec_discard_back(>iov,
+QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
 }
 
 dbs->acb = dbs->io_func(dbs->offset, >iov,
@@ -199,7 +201,7 @@ static const AIOCBInfo dma_aiocb_info = {
 };
 
 BlockAIOCB *dma_blk_io(AioContext *ctx,
-QEMUSGList *sg, uint64_t offset,
+QEMUSGList *sg, uint64_t offset, uint32_t align,
 DMAIOFunc *io_func, void *io_func_opaque,
 BlockCompletionFunc *cb,
 void *opaque, DMADirection dir)
@@ -212,6 +214,7 @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
 dbs->sg = sg;
 dbs->ctx = ctx;
 dbs->offset = offset;
+dbs->align = align;
 dbs->sg_cur_index = 0;
 dbs->sg_cur_byte = 0;
 dbs->dir = dir;
@@ -234,11 +237,11 @@ BlockAIOCB *dma_blk_read_io_func(int64_t offset, 
QEMUIOVector *iov,
 }
 
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
- QEMUSGList *sg, uint64_t offset,
+ QEMUSGList *sg, uint64_t offset, uint32_t align,
  void (*cb)(void *opaque, int ret), void *opaque)
 {
-return dma_blk_io(blk_get_aio_context(blk),
-  sg, offset, dma_blk_read_io_func, blk, cb, opaque,
+return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+  dma_blk_read_io_func, blk, cb, opaque,
   DMA_DIRECTION_FROM_DEVICE);
 }
 
@@ -252,11 +255,11 @@ BlockAIOCB *dma_blk_write_io_func(int64_t offset, 
QEMUIOVector *iov,
 }
 
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-  QEMUSGList *sg, uint64_t offset,
+  QEMUSGList *sg, uint64_t offset, uint32_t align,
   void (*cb)(void *opaque, int ret), void *opaque)
 {
-return dma_blk_io(blk_get_aio_context(blk),
-  sg, offset, dma_blk_write_io_func, blk, cb, opaque,
+return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+  dma_blk_write_io_func, blk, cb, opaque,
   DMA_DIRECTION_TO_DEVICE);
 }
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index cef3bb4..b380142 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -258,8 +258,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 req->has_sg = true;
 dma_acct_start(n->conf.blk, >acct, >qsg, acct);
 req->aiocb = is_write ?
-dma_blk_write(n->conf.blk, >qsg, data_offset, nvme_rw_cb, req) :
-dma_blk_read(n->conf.blk, >qsg, data_offset, nvme_rw_cb, req);
+dma_blk_write(n->conf.blk, >qsg, data_offset, BDRV_SECTOR_SIZE,
+  nvme_rw_cb, req) :
+dma_blk_read(n->conf.blk, >qsg, data_offset, BDRV_SECTOR_SIZE,
+ nvme_rw_cb, req);
 
 return NVME_NO_COMPLETE;
 }
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 63ead21..3c19bda 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1009,6 +1009,7 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
_tfs->sglist, BLOCK_ACCT_READ);
 ncq_tfs->aiocb = dma_blk_read(ide_state->blk, _tfs->sglist,
   ncq_tfs->lba << BDRV_SECTOR_BITS,
+  BDRV_SECTOR_SIZE,
   ncq_cb, ncq_tfs);
 break;
 case WRITE_FPDMA_QUEUED:
@@ -1022,6 +1023,7 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
_tfs->sglist, BLOCK_ACCT_WRITE);
 ncq_tfs->aiocb = dma_blk_write(ide_state->blk, _tfs->sglist,
ncq_tfs->lba << BDRV_SECTOR_BITS,
+   BDRV_SECTOR_SIZE,
ncq_cb, ncq_tfs);
 break;
 default:
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 7291677..43709e5 100644

[Qemu-block] [PATCHv2 2/2] macio: switch over to new byte-aligned DMA helpers

2016-10-14 Thread Mark Cave-Ayland

Now that the DMA helpers are byte-aligned they can be called directly from
the macio routines rather than emulating byte-aligned accesses via multiple
block-level accesses.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Eric Blake 
Reviewed-by: John Snow 
---
 hw/ide/macio.c |  213 
 1 file changed, 28 insertions(+), 185 deletions(-)

diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 76f97c2..9742c00 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -52,187 +52,6 @@ static const int debug_macio = 0;
 
 #define MACIO_PAGE_SIZE 4096
 
-/*
- * Unaligned DMA read/write access functions required for OS X/Darwin which
- * don't perform DMA transactions on sector boundaries. These functions are
- * modelled on bdrv_co_preadv()/bdrv_co_pwritev() and so should be easy to
- * remove if the unaligned block APIs are ever exposed.
- */
-
-static void pmac_dma_read(BlockBackend *blk,
-  int64_t offset, unsigned int bytes,
-  void (*cb)(void *opaque, int ret), void *opaque)
-{
-DBDMA_io *io = opaque;
-MACIOIDEState *m = io->opaque;
-IDEState *s = idebus_active_if(>bus);
-dma_addr_t dma_addr;
-int64_t sector_num;
-int nsector;
-uint64_t align = BDRV_SECTOR_SIZE;
-size_t head_bytes, tail_bytes;
-
-qemu_iovec_destroy(>iov);
-qemu_iovec_init(>iov, io->len / MACIO_PAGE_SIZE + 1);
-
-sector_num = (offset >> 9);
-nsector = (io->len >> 9);
-
-MACIO_DPRINTF("--- DMA read transfer (0x%" HWADDR_PRIx ",0x%x): "
-  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
-  sector_num, nsector);
-
-dma_addr = io->addr;
-io->dir = DMA_DIRECTION_FROM_DEVICE;
-io->dma_len = io->len;
-io->dma_mem = dma_memory_map(_space_memory, dma_addr, >dma_len,
- io->dir);
-
-if (offset & (align - 1)) {
-head_bytes = offset & (align - 1);
-
-MACIO_DPRINTF("--- DMA unaligned head: sector %" PRId64 ", "
-  "discarding %zu bytes\n", sector_num, head_bytes);
-
-qemu_iovec_add(>iov, >head_remainder, head_bytes);
-
-bytes += offset & (align - 1);
-offset = offset & ~(align - 1);
-}
-
-qemu_iovec_add(>iov, io->dma_mem, io->len);
-
-if ((offset + bytes) & (align - 1)) {
-tail_bytes = (offset + bytes) & (align - 1);
-
-MACIO_DPRINTF("--- DMA unaligned tail: sector %" PRId64 ", "
-  "discarding bytes %zu\n", sector_num, tail_bytes);
-
-qemu_iovec_add(>iov, >tail_remainder, align - tail_bytes);
-bytes = ROUND_UP(bytes, align);
-}
-
-s->io_buffer_size -= io->len;
-s->io_buffer_index += io->len;
-
-io->len = 0;
-
-MACIO_DPRINTF("--- Block read transfer - sector_num: %" PRIx64 "  "
-  "nsector: %x\n", (offset >> 9), (bytes >> 9));
-
-s->bus->dma->aiocb = blk_aio_preadv(blk, offset, >iov, 0, cb, io);
-}
-
-static void pmac_dma_write(BlockBackend *blk,
- int64_t offset, int bytes,
- void (*cb)(void *opaque, int ret), void *opaque)
-{
-DBDMA_io *io = opaque;
-MACIOIDEState *m = io->opaque;
-IDEState *s = idebus_active_if(>bus);
-dma_addr_t dma_addr;
-int64_t sector_num;
-int nsector;
-uint64_t align = BDRV_SECTOR_SIZE;
-size_t head_bytes, tail_bytes;
-bool unaligned_head = false, unaligned_tail = false;
-
-qemu_iovec_destroy(>iov);
-qemu_iovec_init(>iov, io->len / MACIO_PAGE_SIZE + 1);
-
-sector_num = (offset >> 9);
-nsector = (io->len >> 9);
-
-MACIO_DPRINTF("--- DMA write transfer (0x%" HWADDR_PRIx ",0x%x): "
-  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
-  sector_num, nsector);
-
-dma_addr = io->addr;
-io->dir = DMA_DIRECTION_TO_DEVICE;
-io->dma_len = io->len;
-io->dma_mem = dma_memory_map(_space_memory, dma_addr, >dma_len,
- io->dir);
-
-if (offset & (align - 1)) {
-head_bytes = offset & (align - 1);
-sector_num = ((offset & ~(align - 1)) >> 9);
-
-MACIO_DPRINTF("--- DMA unaligned head: pre-reading head sector %"
-  PRId64 "\n", sector_num);
-
-blk_pread(s->blk, (sector_num << 9), >head_remainder, align);
-
-qemu_iovec_add(>iov, >head_remainder, head_bytes);
-qemu_iovec_add(>iov, io->dma_mem, io->len);
-
-bytes += offset & (align - 1);
-offset = offset & ~(align - 1);
-
-unaligned_head = true;
-}
-
-if ((offset + bytes) & (align - 1)) {
-tail_bytes = (offset + bytes) & (align - 1);
-sector_num = (((offset + bytes) & ~(align - 1)) >> 9);
-
-MACIO_DPRINTF("--- DMA unaligned tail: pre-reading tail sector %"
-  PRId64 "\n", sector_num);
-
-

[Qemu-block] [PATCHv2 0/2] dma-helpers: explicitly pass alignment into DMA helpers

2016-10-14 Thread Mark Cave-Ayland

This is a follow-up to the thread at
https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg01396.html which
introduces an explicit alignment to the DMA helpers to facilitate conversion
of the macio controller over to use the now byte-aligned DMA helpers.

Patch 1 introduces an alignment parameter as suggested by Paolo above, whilst
patch 2 performs the conversion for the macio controller.

Signed-off-by: Mark Cave-Ayland 

v2:
- Use QEMU_IS_ALIGNED and QEMU_ALIGN_DOWN macros suggested by Eric
- Add Reviewed-by/Acked-by tags from Eric and John
- Rebase onto master

Mark Cave-Ayland (2):
  dma-helpers: explicitly pass alignment into DMA helpers
  macio: switch over to new byte-aligned DMA helpers

 dma-helpers.c|   21 ++---
 hw/block/nvme.c  |6 +-
 hw/ide/ahci.c|2 +
 hw/ide/core.c|6 +-
 hw/ide/macio.c   |  213 +++---
 hw/scsi/scsi-disk.c  |2 +
 include/sysemu/dma.h |6 +-
 7 files changed, 54 insertions(+), 202 deletions(-)

-- 
1.7.10.4

Re: [Qemu-block] [PATCH v2 0/4] fdc: Use separate qdev device for drives

2016-10-14 Thread Kevin Wolf

Am 30.09.2016 um 21:39 hat Kevin Wolf geschrieben:
> We have been complaining for a long time about how the floppy controller and
> floppy drives are combined in a single qdev device and how this makes the
> device awkward to work with because it behaves different from all other block
> devices.
> 
> The latest reason to complain was when I noticed that using qdev device names
> in QMP commands (e.g. for media change) doesn't really work when only the
> controller is a qdev device, but the drives aren't.
> 
> So I decided to have a go at it, and this is the result.
> 
> It doesn't actually change any of the inner workings of the floppy controller,
> but it wires things up differently on the qdev layer so that a floppy
> controller now exposes a bus on which the floppy drives sit. This results in a
> structure that is similar to IDE where the actual drive state is still in the
> controller and the qdev device basically just contains the qdev properties -
> not pretty, but quite workable.
> 
> The commit message of patch 3 explains how to use it. In short, there is a
> '-device floppy' now and it does what you would expect if you ever used 
> ide-cd.
> 
> The other problem is old command lines, especially those using things like
> '-global isa-fdc,driveA=...'. In order to keep them working, we need to 
> forward
> the property to an internally created floppy drive device. This is a bit like
> usb-storage, which we know is ugly, but works well enough in practice. The 
> good
> news here is that in contrast to usb-storage, the floppy controller only does
> the forwarding for legacy configurations; as soon as you start using '-device
> floppy', it doesn't happen any more.
> 
> So as you may have expected, this conversion doesn't result in a perfect
> device, but I think it's definitely an improvement over the old state. I hope
> you like it despite the warts. :-)
> 
> v2:
> - Added patch 4 (qemu-iotests case for floppy config on the command line)
> - Patch 2: Create a floppy device only if a BlockBackend exists instead of
>   always creating two of them
> - Patch 2: Initialise drive->fdctrl even if no drive is attached, it is
>   accessed anyway during migration
> - Patch 3: Keep 'type' qdev property and FDrive->drive in sync
> - Patch 3: Removed if with condition that is always true

ping (freeze coming closer...)

Re: [Qemu-block] [PATCH 07/18] block: introduce bdrv_poll_while and bdrv_wakeup

2016-10-14 Thread Paolo Bonzini



On 14/10/2016 12:42, Fam Zheng wrote:
>> > diff --git a/block/qed.c b/block/qed.c
>> > index 1a7ef0a..dcb5fb9 100644
>> > --- a/block/qed.c
>> > +++ b/block/qed.c
>> > @@ -354,7 +354,9 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
>> >  static void qed_cancel_need_check_timer(BDRVQEDState *s)
>> >  {
>> >  trace_qed_cancel_need_check_timer(s);
>> > -timer_del(s->need_check_timer);
>> > +if (s->need_check_timer) {
>> > +timer_del(s->need_check_timer);
>> > +}
>> >  }
> 
> This belongs to a separate patch, or deserves an explanation in the commit
> message.

It probably belongs in no patch, actually.

Paolo

Re: [Qemu-block] [PATCH 07/18] block: introduce bdrv_poll_while and bdrv_wakeup

2016-10-14 Thread Fam Zheng

On Thu, 10/13 19:34, Paolo Bonzini wrote:
> We want the BDS event loop to run exclusively in the iothread that
> owns the BDS's AioContext.  This function and macro provides the
> synchronization between the two event loops.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  block/block-backend.c |  7 +--
>  block/io.c| 47 
> +++
>  block/qed-table.c | 16 
>  block/qed.c   |  4 +++-
>  include/block/block.h |  9 +
>  include/block/block_int.h |  1 +
>  6 files changed, 29 insertions(+), 55 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 234df1e..c5c2597 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -878,7 +878,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, 
> uint8_t *buf,
> int64_t bytes, CoroutineEntry co_entry,
> BdrvRequestFlags flags)
>  {
> -AioContext *aio_context;
>  QEMUIOVector qiov;
>  struct iovec iov;
>  Coroutine *co;
> @@ -900,11 +899,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, 
> uint8_t *buf,
>  
>  co = qemu_coroutine_create(co_entry, );
>  qemu_coroutine_enter(co);
> -
> -aio_context = blk_get_aio_context(blk);
> -while (rwco.ret == NOT_DONE) {
> -aio_poll(aio_context, true);
> -}
> +bdrv_poll_while(blk_bs(blk), rwco.ret == NOT_DONE);

Can we make it "BDRV_POLL_WHILE"? With lower case the mental model of a reader
would be "evaluate rwco.ret == NOT_DONE" first before doing the poll.

>  
>  return rwco.ret;
>  }
> diff --git a/block/io.c b/block/io.c
> index afec968..7d3dcfc 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -156,23 +156,12 @@ bool bdrv_requests_pending(BlockDriverState *bs)
>  return false;
>  }
>  
> -static bool bdrv_drain_poll(BlockDriverState *bs)
> -{
> -bool waited = false;
> -
> -while (atomic_read(>in_flight) > 0) {
> -aio_poll(bdrv_get_aio_context(bs), true);
> -waited = true;
> -}
> -return waited;
> -}
> -
>  static bool bdrv_drain_recurse(BlockDriverState *bs)
>  {
>  BdrvChild *child;
>  bool waited;
>  
> -waited = bdrv_drain_poll(bs);
> +waited = bdrv_poll_while(bs, atomic_read(>in_flight) > 0);
>  
>  if (bs->drv && bs->drv->bdrv_drain) {
>  bs->drv->bdrv_drain(bs);
> @@ -485,9 +474,14 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
>  atomic_inc(>in_flight);
>  }
>  
> +void bdrv_wakeup(BlockDriverState *bs)
> +{
> +}
> +
>  void bdrv_dec_in_flight(BlockDriverState *bs)
>  {
>  atomic_dec(>in_flight);
> +bdrv_wakeup(bs);
>  }
>  
>  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
> @@ -597,13 +591,9 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
>  /* Fast-path if already in coroutine context */
>  bdrv_rw_co_entry();
>  } else {
> -AioContext *aio_context = bdrv_get_aio_context(child->bs);
> -
>  co = qemu_coroutine_create(bdrv_rw_co_entry, );
>  qemu_coroutine_enter(co);
> -while (rwco.ret == NOT_DONE) {
> -aio_poll(aio_context, true);
> -}
> +bdrv_poll_while(child->bs, rwco.ret == NOT_DONE);
>  }
>  return rwco.ret;
>  }
> @@ -1845,14 +1835,10 @@ int64_t bdrv_get_block_status_above(BlockDriverState 
> *bs,
>  /* Fast-path if already in coroutine context */
>  bdrv_get_block_status_above_co_entry();
>  } else {
> -AioContext *aio_context = bdrv_get_aio_context(bs);
> -
>  co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
> );
>  qemu_coroutine_enter(co);
> -while (!data.done) {
> -aio_poll(aio_context, true);
> -}
> +bdrv_poll_while(bs, !data.done);
>  }
>  return data.ret;
>  }
> @@ -2411,13 +2397,9 @@ int bdrv_flush(BlockDriverState *bs)
>  /* Fast-path if already in coroutine context */
>  bdrv_flush_co_entry(_co);
>  } else {
> -AioContext *aio_context = bdrv_get_aio_context(bs);
> -
>  co = qemu_coroutine_create(bdrv_flush_co_entry, _co);
>  qemu_coroutine_enter(co);
> -while (flush_co.ret == NOT_DONE) {
> -aio_poll(aio_context, true);
> -}
> +bdrv_poll_while(bs, flush_co.ret == NOT_DONE);
>  }
>  
>  return flush_co.ret;
> @@ -2543,13 +2525,9 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t 
> offset, int count)
>  /* Fast-path if already in coroutine context */
>  bdrv_pdiscard_co_entry();
>  } else {
> -AioContext *aio_context = bdrv_get_aio_context(bs);
> -
>  co = qemu_coroutine_create(bdrv_pdiscard_co_entry, );
>  qemu_coroutine_enter(co);
> -while (rwco.ret == NOT_DONE) {
> -aio_poll(aio_context, true);
> -}
> +

Re: [Qemu-block] [PATCH 06/18] qed: Implement .bdrv_drain

2016-10-14 Thread Paolo Bonzini



On 14/10/2016 12:33, Fam Zheng wrote:
>> > +bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
> If this one has to change, what about the other bdrv_aio_flush(s->bs, ...) 
> down
> in this call path:
> 
> qed_need_check_timer_cb
> qed_clear_need_check
> qed_write_header
> qed_flush_after_clear_need_check

It was really just for clarity, so I guess I can change all four of them
in a separate patch.

Paolo

Re: [Qemu-block] [PATCH 03/18] mirror: use bdrv_drained_begin/bdrv_drained_end

2016-10-14 Thread Paolo Bonzini



On 14/10/2016 11:43, Fam Zheng wrote:
> On Thu, 10/13 19:34, Paolo Bonzini wrote:
>> Ensure that there are no changes between the last check to
>> bdrv_get_dirty_count and the switch to the target.
>>
>> There is already a bdrv_drained_end call, we only need to ensure
>> that bdrv_drained_begin is not called twice.
>>
>> Cc: qemu-sta...@nongnu.org
> 
> Cc stable? I don't see an existing bug here, can you explain?

Hmm, I was not sure that mirror was safe for dataplane devices without
the drained section, but it looks like there is no "hole".  So no need
to Cc stable.

>> @@ -802,9 +812,10 @@ immediate_exit:
>>  
>>  data = g_malloc(sizeof(*data));
>>  data->ret = ret;
>> -/* Before we switch to target in mirror_exit, make sure data doesn't
>> - * change. */
>> -bdrv_drained_begin(bs);
>> +
>> +if (need_drain) {
> 
> Not sure whether this if block is necessary (i.e. when !(cnt == 0 &&
> should_complete)), but it certainly doesn't hurt.

Yes, the alternative is to have something similar, to skip the
bdrv_drained_end, in mirror_exit.

Paolo

>> +bdrv_drained_begin(bs);
>> +}
>>  block_job_defer_to_main_loop(>common, mirror_exit, data);
>>  }
>>  
>> -- 
>> 2.7.4
>>
>>
> 
> Fam
>

Re: [Qemu-block] [PATCH 03/18] mirror: use bdrv_drained_begin/bdrv_drained_end

2016-10-14 Thread Fam Zheng

On Thu, 10/13 19:34, Paolo Bonzini wrote:
> Ensure that there are no changes between the last check to
> bdrv_get_dirty_count and the switch to the target.
> 
> There is already a bdrv_drained_end call, we only need to ensure
> that bdrv_drained_begin is not called twice.
> 
> Cc: qemu-sta...@nongnu.org

Cc stable? I don't see an existing bug here, can you explain?

> Signed-off-by: Paolo Bonzini 
> ---
>  block/mirror.c | 35 +++
>  1 file changed, 23 insertions(+), 12 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index bd1963d..8cd69aa 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -622,6 +622,7 @@ static void coroutine_fn mirror_run(void *opaque)
>  MirrorExitData *data;
>  BlockDriverState *bs = blk_bs(s->common.blk);
>  BlockDriverState *target_bs = blk_bs(s->target);
> +bool need_drain = true;
>  int64_t length;
>  BlockDriverInfo bdi;
>  char backing_filename[2]; /* we only need 2 characters because we are 
> only
> @@ -756,11 +757,26 @@ static void coroutine_fn mirror_run(void *opaque)
>   * source has dirty data to copy!
>   *
>   * Note that I/O can be submitted by the guest while
> - * mirror_populate runs.
> + * mirror_populate runs, so pause it now.  Before deciding
> + * whether to switch to target check one last time if I/O has
> + * come in the meanwhile, and if not flush the data to disk.
>   */
>  trace_mirror_before_drain(s, cnt);
> -bdrv_co_drain(bs);
> +
> +bdrv_drained_begin(bs);
>  cnt = bdrv_get_dirty_count(s->dirty_bitmap);
> +if (cnt > 0) {
> +bdrv_drained_end(bs);
> +continue;
> +}
> +
> +/* The two disks are in sync.  Exit and report successful
> + * completion.
> + */
> +assert(QLIST_EMPTY(>tracked_requests));
> +s->common.cancelled = false;
> +need_drain = false;
> +break;
>  }
>  
>  ret = 0;
> @@ -773,13 +789,6 @@ static void coroutine_fn mirror_run(void *opaque)
>  } else if (!should_complete) {
>  delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
>  block_job_sleep_ns(>common, QEMU_CLOCK_REALTIME, delay_ns);
> -} else if (cnt == 0) {
> -/* The two disks are in sync.  Exit and report successful
> - * completion.
> - */
> -assert(QLIST_EMPTY(>tracked_requests));
> -s->common.cancelled = false;
> -break;
>  }
>  s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>  }
> @@ -791,6 +800,7 @@ immediate_exit:
>   * the target is a copy of the source.
>   */
>  assert(ret < 0 || (!s->synced && 
> block_job_is_cancelled(>common)));
> +assert(need_drain);
>  mirror_wait_for_all_io(s);
>  }
>  
> @@ -802,9 +812,10 @@ immediate_exit:
>  
>  data = g_malloc(sizeof(*data));
>  data->ret = ret;
> -/* Before we switch to target in mirror_exit, make sure data doesn't
> - * change. */
> -bdrv_drained_begin(bs);
> +
> +if (need_drain) {

Not sure whether this if block is necessary (i.e. when !(cnt == 0 &&
should_complete)), but it certainly doesn't hurt.

> +bdrv_drained_begin(bs);
> +}
>  block_job_defer_to_main_loop(>common, mirror_exit, data);
>  }
>  
> -- 
> 2.7.4
> 
> 

Fam

Re: [Qemu-block] [PATCH v4 06/12] block/nbd: Accept SocketAddress

2016-10-14 Thread Ashijeet Acharya

On Thu, Oct 13, 2016 at 5:12 PM, Kevin Wolf  wrote:
> Am 28.09.2016 um 22:55 hat Max Reitz geschrieben:
>> Add a new option "address" to the NBD block driver which accepts a
>> SocketAddress.
>>
>> "path", "host" and "port" are still supported as legacy options and are
>> mapped to their corresponding SocketAddress representation.
>>
>> Signed-off-by: Max Reitz 
>
> Not opposed in principle to your change, but we should try to keep the
> naming consistent between NBD and the other block drivers, notably the
> SSH work that is currently going on.
>
> This patch uses 'address' for the SockAddress, the proposed SSH patch
> uses 'server'. I don't mind too much which one we choose, though I like
> 'server' a bit better. Anyway, we should choose one and stick to it in
> all drivers.
>
>>  block/nbd.c   | 166 
>> ++
>>  tests/qemu-iotests/051.out|   4 +-
>>  tests/qemu-iotests/051.pc.out |   4 +-
>>  3 files changed, 106 insertions(+), 68 deletions(-)
>>
>> diff --git a/block/nbd.c b/block/nbd.c
>> index cdab20f..449f94e 100644
>> --- a/block/nbd.c
>> +++ b/block/nbd.c
>> @@ -32,6 +32,9 @@
>>  #include "qemu/uri.h"
>>  #include "block/block_int.h"
>>  #include "qemu/module.h"
>> +#include "qapi-visit.h"
>> +#include "qapi/qmp-input-visitor.h"
>> +#include "qapi/qmp-output-visitor.h"
>>  #include "qapi/qmp/qdict.h"
>>  #include "qapi/qmp/qjson.h"
>>  #include "qapi/qmp/qint.h"
>> @@ -44,7 +47,8 @@ typedef struct BDRVNBDState {
>>  NbdClientSession client;
>>
>>  /* For nbd_refresh_filename() */
>> -char *path, *host, *port, *export, *tlscredsid;
>> +SocketAddress *saddr;
>> +char *export, *tlscredsid;
>>  } BDRVNBDState;
>>
>>  static int nbd_parse_uri(const char *filename, QDict *options)
>> @@ -131,7 +135,9 @@ static bool nbd_has_filename_options_conflict(QDict 
>> *options, Error **errp)
>>  if (!strcmp(e->key, "host") ||
>>  !strcmp(e->key, "port") ||
>>  !strcmp(e->key, "path") ||
>> -!strcmp(e->key, "export"))
>> +!strcmp(e->key, "export") ||
>> +!strcmp(e->key, "address") ||
>> +!strncmp(e->key, "address.", 8))
>
> strstart()?
>
>>  {
>>  error_setg(errp, "Option '%s' cannot be used with a file name",
>> e->key);
>> @@ -205,50 +211,67 @@ out:
>>  g_free(file);
>>  }
>>
>> -static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error 
>> **errp)
>> +static bool nbd_process_legacy_socket_options(QDict *output_options,
>> +  QemuOpts *legacy_opts,
>> +  Error **errp)
>>  {
>> -SocketAddress *saddr;
>> -
>> -s->path = g_strdup(qemu_opt_get(opts, "path"));
>> -s->host = g_strdup(qemu_opt_get(opts, "host"));
>> -s->port = g_strdup(qemu_opt_get(opts, "port"));
>> -
>> -if (!s->path == !s->host) {
>> -if (s->path) {
>> -error_setg(errp, "path and host may not be used at the same 
>> time");
>> -} else {
>> -error_setg(errp, "one of path and host must be specified");
>> +const char *path = qemu_opt_get(legacy_opts, "path");
>> +const char *host = qemu_opt_get(legacy_opts, "host");
>> +const char *port = qemu_opt_get(legacy_opts, "port");

I am working on a similar task for the SSH block driver, and in my
'ssh_process_legacy_socket_options' I get the @host and @port fields
still pointing to NULL even after qemu_opt_get(). To clarify things a
bit more I tried to debug the same on your patch using gdb and faced
the same issue. So in your patch, ultimately the control flows
directly to the last statement i.e. 'return true;' and none of the
'if' conditions get satisfied. Reverting the patches and using the
same command-line seems to overcome the issue. Command line I used:

$ ./bin/qemu-system-x86_64 -cdrom nbd://localhost/precise-5.7.1.iso

I can be wrong so I advise debugging yourself once and correcting me
if I am wrong.

Link to my patch:

https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg02139.html

Thanks
Ashijeet

> Kevin

67 matches

Mail list logo