[Qemu-devel] [PATCH v9 01/12] VMDK: introduce VmdkExtent

2011-07-12 Thread Fam Zheng
Introduced VmdkExtent array into BDRVVmdkState, enable holding multiple
image extents for multiple file image support.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |  348 +-
 1 files changed, 246 insertions(+), 102 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 922b23d..3b78583 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -60,7 +60,11 @@ typedef struct {
 
 #define L2_CACHE_SIZE 16
 
-typedef struct BDRVVmdkState {
+typedef struct VmdkExtent {
+BlockDriverState *file;
+bool flat;
+int64_t sectors;
+int64_t end_sector;
 int64_t l1_table_offset;
 int64_t l1_backup_table_offset;
 uint32_t *l1_table;
@@ -74,7 +78,13 @@ typedef struct BDRVVmdkState {
 uint32_t l2_cache_counts[L2_CACHE_SIZE];
 
 unsigned int cluster_sectors;
+} VmdkExtent;
+
+typedef struct BDRVVmdkState {
 uint32_t parent_cid;
+int num_extents;
+/* Extent array with num_extents entries, ascend ordered by address */
+VmdkExtent *extents;
 } BDRVVmdkState;
 
 typedef struct VmdkMetaData {
@@ -105,6 +115,19 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 #define DESC_SIZE 20*SECTOR_SIZE   // 20 sectors of 512 bytes each
 #define HEADER_SIZE 512// first sector of 512 bytes
 
+static void vmdk_free_extents(BlockDriverState *bs)
+{
+int i;
+BDRVVmdkState *s = bs->opaque;
+
+for (i = 0; i < s->num_extents; i++) {
+qemu_free(s->extents[i].l1_table);
+qemu_free(s->extents[i].l2_cache);
+qemu_free(s->extents[i].l1_backup_table);
+}
+qemu_free(s->extents);
+}
+
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
 char desc[DESC_SIZE];
@@ -358,11 +381,50 @@ static int vmdk_parent_open(BlockDriverState *bs)
 return 0;
 }
 
+/* Create and append extent to the extent array. Return the added VmdkExtent
+ * address. return NULL if allocation failed. */
+static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
+   BlockDriverState *file, bool flat, int64_t sectors,
+   int64_t l1_offset, int64_t l1_backup_offset,
+   uint32_t l1_size,
+   int l2_size, unsigned int cluster_sectors)
+{
+VmdkExtent *extent;
+BDRVVmdkState *s = bs->opaque;
+
+s->extents = qemu_realloc(s->extents,
+  (s->num_extents + 1) * sizeof(VmdkExtent));
+extent = &s->extents[s->num_extents];
+s->num_extents++;
+
+memset(extent, 0, sizeof(VmdkExtent));
+extent->file = file;
+extent->flat = flat;
+extent->sectors = sectors;
+extent->l1_table_offset = l1_offset;
+extent->l1_backup_table_offset = l1_backup_offset;
+extent->l1_size = l1_size;
+extent->l1_entry_sectors = l2_size * cluster_sectors;
+extent->l2_size = l2_size;
+extent->cluster_sectors = cluster_sectors;
+
+if (s->num_extents > 1) {
+extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
+} else {
+extent->end_sector = extent->sectors;
+}
+bs->total_sectors = extent->end_sector;
+return extent;
+}
+
+
 static int vmdk_open(BlockDriverState *bs, int flags)
 {
 BDRVVmdkState *s = bs->opaque;
 uint32_t magic;
-int l1_size, i;
+int i;
+uint32_t l1_size, l1_entry_sectors;
+VmdkExtent *extent = NULL;
 
 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
 goto fail;
@@ -370,32 +432,34 @@ static int vmdk_open(BlockDriverState *bs, int flags)
 magic = be32_to_cpu(magic);
 if (magic == VMDK3_MAGIC) {
 VMDK3Header header;
-
-if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != 
sizeof(header))
+if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+!= sizeof(header)) {
 goto fail;
-s->cluster_sectors = le32_to_cpu(header.granularity);
-s->l2_size = 1 << 9;
-s->l1_size = 1 << 6;
-bs->total_sectors = le32_to_cpu(header.disk_sectors);
-s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
-s->l1_backup_table_offset = 0;
-s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+}
+extent = vmdk_add_extent(bs, bs->file, false,
+  le32_to_cpu(header.disk_sectors),
+  le32_to_cpu(header.l1dir_offset) << 9, 0,
+  1 << 6, 1 << 9, le32_to_cpu(header.granularity));
 } else if (magic == VMDK4_MAGIC) {
 VMDK4Header header;
-
-if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != 
sizeof(header))
+if (bdrv_pread(bs->file, sizeof(magic), &head

[Qemu-devel] [PATCH v9 12/12] block: add bdrv_get_allocated_file_size() operation

2011-07-12 Thread Fam Zheng
qemu-img.c wants to count allocated file size of image. Previously it
counts a single bs->file by 'stat' or Window API. As VMDK introduces
multiple file support, the operation becomes format specific with
platform specific meanwhile.

The functions are moved to block/raw-{posix,win32}.c and qemu-img.c calls
bdrv_get_allocated_file_size to count the bs. And also added VMDK code
to count his own extents.

Signed-off-by: Fam Zheng 
---
 block.c   |   19 +++
 block.h   |1 +
 block/raw-posix.c |   21 +
 block/raw-win32.c |   29 +
 block/vmdk.c  |   24 
 block_int.h   |1 +
 qemu-img.c|   31 +--
 7 files changed, 96 insertions(+), 30 deletions(-)

diff --git a/block.c b/block.c
index 24a25d5..9549b9e 100644
--- a/block.c
+++ b/block.c
@@ -1147,6 +1147,25 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
 }
 
 /**
+ * Length of a allocated file in bytes. Sparse files are counted by actual
+ * allocated space. Return < 0 if error or unknown.
+ */
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
+{
+BlockDriver *drv = bs->drv;
+if (!drv) {
+return -ENOMEDIUM;
+}
+if (drv->bdrv_get_allocated_file_size) {
+return drv->bdrv_get_allocated_file_size(bs);
+}
+if (bs->file) {
+return bdrv_get_allocated_file_size(bs->file);
+}
+return -ENOTSUP;
+}
+
+/**
  * Length of a file in bytes. Return < 0 if error or unknown.
  */
 int64_t bdrv_getlength(BlockDriverState *bs)
diff --git a/block.h b/block.h
index 859d1d9..59cc410 100644
--- a/block.h
+++ b/block.h
@@ -89,6 +89,7 @@ int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
 const uint8_t *buf, int nb_sectors);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int 
*psecs);
 int bdrv_commit(BlockDriverState *bs);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 4cd7d7a..911cc0d 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -791,6 +791,17 @@ static int64_t raw_getlength(BlockDriverState *bs)
 }
 #endif
 
+static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
+{
+struct stat st;
+BDRVRawState *s = bs->opaque;
+
+if (fstat(s->fd, &st) < 0) {
+return -errno;
+}
+return (int64_t)st.st_blocks * 512;
+}
+
 static int raw_create(const char *filename, QEMUOptionParameter *options)
 {
 int fd;
@@ -886,6 +897,8 @@ static BlockDriver bdrv_file = {
 
 .bdrv_truncate = raw_truncate,
 .bdrv_getlength = raw_getlength,
+.bdrv_get_allocated_file_size
+= raw_get_allocated_file_size,
 
 .create_options = raw_create_options,
 };
@@ -1154,6 +1167,8 @@ static BlockDriver bdrv_host_device = {
 .bdrv_read  = raw_read,
 .bdrv_write = raw_write,
 .bdrv_getlength= raw_getlength,
+.bdrv_get_allocated_file_size
+= raw_get_allocated_file_size,
 
 /* generic scsi device */
 #ifdef __linux__
@@ -1269,6 +1284,8 @@ static BlockDriver bdrv_host_floppy = {
 .bdrv_read  = raw_read,
 .bdrv_write = raw_write,
 .bdrv_getlength= raw_getlength,
+.bdrv_get_allocated_file_size
+= raw_get_allocated_file_size,
 
 /* removable device support */
 .bdrv_is_inserted   = floppy_is_inserted,
@@ -1366,6 +1383,8 @@ static BlockDriver bdrv_host_cdrom = {
 .bdrv_read  = raw_read,
 .bdrv_write = raw_write,
 .bdrv_getlength = raw_getlength,
+.bdrv_get_allocated_file_size
+= raw_get_allocated_file_size,
 
 /* removable device support */
 .bdrv_is_inserted   = cdrom_is_inserted,
@@ -1489,6 +1508,8 @@ static BlockDriver bdrv_host_cdrom = {
 .bdrv_read  = raw_read,
 .bdrv_write = raw_write,
 .bdrv_getlength = raw_getlength,
+.bdrv_get_allocated_file_size
+= raw_get_allocated_file_size,
 
 /* removable device support */
 .bdrv_is_inserted   = cdrom_is_inserted,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 56bd719..91067e7 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -213,6 +213,31 @@ static int64_t raw_getlength(BlockDriverState *bs)
 return l.QuadPart;
 }
 
+static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
+{
+typedef DWORD (WINAPI * get_compressed_t)(const char *filename,
+  DWORD * high);
+get_compressed_t get_compressed;
+struct _stati64 st;
+const char *filename = bs->filename;
+/* WinNT suppor

[Qemu-devel] [PATCH v9 02/12] VMDK: bugfix, align offset to cluster in get_whole_cluster

2011-07-12 Thread Fam Zheng
In get_whole_cluster, the offset is not aligned to cluster when reading
from backing_hd. When the first write to child is not at the cluster
boundary, wrong address data from parent is copied to child.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 3b78583..03a4619 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -514,21 +514,23 @@ static int get_whole_cluster(BlockDriverState *bs,
 /* 128 sectors * 512 bytes each = grain size 64KB */
 uint8_t  whole_grain[extent->cluster_sectors * 512];
 
-// we will be here if it's first write on non-exist grain(cluster).
-// try to read from parent image, if exist
+/* we will be here if it's first write on non-exist grain(cluster).
+ * try to read from parent image, if exist */
 if (bs->backing_hd) {
 int ret;
 
 if (!vmdk_is_cid_valid(bs))
 return -1;
 
+/* floor offset to cluster */
+offset -= offset % (extent->cluster_sectors * 512);
 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
 extent->cluster_sectors);
 if (ret < 0) {
 return -1;
 }
 
-//Write grain only into the active image
+/* Write grain only into the active image */
 ret = bdrv_write(extent->file, cluster_offset, whole_grain,
 extent->cluster_sectors);
 if (ret < 0) {



[Qemu-devel] [PATCH v9 09/12] VMDK: open/read/write for monolithicFlat image

2011-07-12 Thread Fam Zheng
Parse vmdk decriptor file and open mono flat image.
Read/write the flat extent.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |  171 +-
 1 files changed, 158 insertions(+), 13 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index f637d98..93ac289 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -65,6 +65,7 @@ typedef struct VmdkExtent {
 bool flat;
 int64_t sectors;
 int64_t end_sector;
+int64_t flat_start_offset;
 int64_t l1_table_offset;
 int64_t l1_backup_table_offset;
 uint32_t *l1_table;
@@ -407,9 +408,10 @@ fail:
 static int vmdk_parent_open(BlockDriverState *bs)
 {
 char *p_name;
-char desc[DESC_SIZE];
+char desc[DESC_SIZE + 1];
 BDRVVmdkState *s = bs->opaque;
 
+desc[DESC_SIZE] = '\0';
 if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
 return -1;
 }
@@ -584,6 +586,144 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, int 
flags)
 return ret;
 }
 
+/* find an option value out of descriptor file */
+static int vmdk_parse_description(const char *desc, const char *opt_name,
+char *buf, int buf_size)
+{
+char *opt_pos, *opt_end;
+const char *end = desc + strlen(desc);
+
+opt_pos = strstr(desc, opt_name);
+if (!opt_pos) {
+return -1;
+}
+/* Skip "=\"" following opt_name */
+opt_pos += strlen(opt_name) + 2;
+if (opt_pos >= end) {
+return -1;
+}
+opt_end = opt_pos;
+while (opt_end < end && *opt_end != '"') {
+opt_end++;
+}
+if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
+return -1;
+}
+pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
+return 0;
+}
+
+static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
+const char *desc_file_path)
+{
+int ret;
+char access[11];
+char type[11];
+char fname[512];
+const char *p = desc;
+int64_t sectors = 0;
+int64_t flat_offset;
+
+while (*p) {
+/* parse extent line:
+ * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
+ * or
+ * RW [size in sectors] SPARSE "file-name.vmdk"
+ */
+flat_offset = -1;
+ret = sscanf(p, "%10s %lld %10s %511s %lld",
+access, §ors, type, fname, &flat_offset);
+if (ret < 4 || strcmp(access, "RW")) {
+goto next_line;
+} else if (!strcmp(type, "FLAT")) {
+if (ret != 5 || flat_offset < 0) {
+return -EINVAL;
+}
+} else if (ret != 4) {
+return -EINVAL;
+}
+
+/* trim the quotation marks around */
+if (fname[0] == '"') {
+memmove(fname, fname + 1, strlen(fname));
+if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
+return -EINVAL;
+}
+fname[strlen(fname) - 1] = '\0';
+}
+if (sectors <= 0 ||
+(strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
+(strcmp(access, "RW"))) {
+goto next_line;
+}
+
+/* save to extents array */
+if (!strcmp(type, "FLAT")) {
+/* FLAT extent */
+char extent_path[PATH_MAX];
+BlockDriverState *extent_file;
+VmdkExtent *extent;
+
+path_combine(extent_path, sizeof(extent_path),
+desc_file_path, fname);
+ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
+if (ret) {
+return ret;
+}
+extent = vmdk_add_extent(bs, extent_file, true, sectors,
+0, 0, 0, 0, sectors);
+extent->flat_start_offset = flat_offset;
+} else {
+/* SPARSE extent, not supported for now */
+fprintf(stderr,
+"VMDK: Not supported extent type \"%s\""".\n", type);
+return -ENOTSUP;
+}
+next_line:
+/* move to next line */
+while (*p && *p != '\n') {
+p++;
+}
+p++;
+}
+return 0;
+}
+
+static int vmdk_open_desc_file(BlockDriverState *bs, int flags)
+{
+int ret;
+char buf[2048];
+char ct[128];
+BDRVVmdkState *s = bs->opaque;
+
+ret = bdrv_pread(bs->file, 0, buf, sizeof(buf));
+if (ret < 0) {
+return ret;
+}
+buf[2047] = '\0';
+if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
+return -EINVAL;
+}
+if (strcmp(ct, "monolithicFlat")) {
+fprintf(stderr,
+"VMDK: Not supported image type \"%s\""&

[Qemu-devel] [PATCH v9 04/12] VMDK: separate vmdk_open by format version

2011-07-12 Thread Fam Zheng
Separate vmdk_open by subformats to:
* vmdk_open_vmdk3
* vmdk_open_vmdk4

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |  178 -
 1 files changed, 112 insertions(+), 66 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index f8a815c..6d7b497 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -458,67 +458,20 @@ static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
 return extent;
 }
 
-
-static int vmdk_open(BlockDriverState *bs, int flags)
+static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
 {
-BDRVVmdkState *s = bs->opaque;
-uint32_t magic;
-int i;
-uint32_t l1_size, l1_entry_sectors;
-VmdkExtent *extent = NULL;
-
-if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
-goto fail;
-
-magic = be32_to_cpu(magic);
-if (magic == VMDK3_MAGIC) {
-VMDK3Header header;
-if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
-!= sizeof(header)) {
-goto fail;
-}
-extent = vmdk_add_extent(bs, bs->file, false,
-  le32_to_cpu(header.disk_sectors),
-  le32_to_cpu(header.l1dir_offset) << 9, 0,
-  1 << 6, 1 << 9, le32_to_cpu(header.granularity));
-} else if (magic == VMDK4_MAGIC) {
-VMDK4Header header;
-if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
-!= sizeof(header)) {
-goto fail;
-}
-l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
-* le64_to_cpu(header.granularity);
-l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
-/ l1_entry_sectors;
-extent = vmdk_add_extent(bs, bs->file, false,
-  le64_to_cpu(header.capacity),
-  le64_to_cpu(header.gd_offset) << 9,
-  le64_to_cpu(header.rgd_offset) << 9,
-  l1_size,
-  le32_to_cpu(header.num_gtes_per_gte),
-  le64_to_cpu(header.granularity));
-if (extent->l1_entry_sectors <= 0) {
-goto fail;
-}
-// try to open parent images, if exist
-if (vmdk_parent_open(bs) != 0)
-goto fail;
-// write the CID once after the image creation
-s->parent_cid = vmdk_read_cid(bs,1);
-} else {
-goto fail;
-}
+int ret;
+int l1_size, i;
 
 /* read the L1 table */
 l1_size = extent->l1_size * sizeof(uint32_t);
 extent->l1_table = qemu_malloc(l1_size);
-if (bdrv_pread(bs->file,
-extent->l1_table_offset,
-extent->l1_table,
-l1_size)
-!= l1_size) {
-goto fail;
+ret = bdrv_pread(extent->file,
+extent->l1_table_offset,
+extent->l1_table,
+l1_size);
+if (ret < 0) {
+goto fail_l1;
 }
 for (i = 0; i < extent->l1_size; i++) {
 le32_to_cpus(&extent->l1_table[i]);
@@ -526,12 +479,12 @@ static int vmdk_open(BlockDriverState *bs, int flags)
 
 if (extent->l1_backup_table_offset) {
 extent->l1_backup_table = qemu_malloc(l1_size);
-if (bdrv_pread(bs->file,
-extent->l1_backup_table_offset,
-extent->l1_backup_table,
-l1_size)
-!= l1_size) {
-goto fail;
+ret = bdrv_pread(extent->file,
+extent->l1_backup_table_offset,
+extent->l1_backup_table,
+l1_size);
+if (ret < 0) {
+goto fail_l1b;
 }
 for (i = 0; i < extent->l1_size; i++) {
 le32_to_cpus(&extent->l1_backup_table[i]);
@@ -541,9 +494,102 @@ static int vmdk_open(BlockDriverState *bs, int flags)
 extent->l2_cache =
 qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
 return 0;
+ fail_l1b:
+qemu_free(extent->l1_backup_table);
+ fail_l1:
+qemu_free(extent->l1_table);
+return ret;
+}
+
+static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
+{
+int ret;
+uint32_t magic;
+VMDK3Header header;
+VmdkExtent *extent;
+
+ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
+if (ret < 0) {
+goto fail;
+}
+extent = vmdk_add_extent(bs,
+ bs->file, false,
+ le32_to_cpu(header.disk_sectors),
+ le32_to_cpu(header.l1dir_offset) << 9,
+ 0, 1 << 6, 1 << 9,
+ le32_t

[Qemu-devel] [PATCH v9 07/12] VMDK: move 'static' cid_update flag to bs field

2011-07-12 Thread Fam Zheng
Cid_update is the flag for updating CID on first write after opening the
image. This should be per image open rather than per program life cycle,
so change it from static var of vmdk_write to a field in BDRVVmdkState.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index f6d2986..8dc58a8 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -82,6 +82,7 @@ typedef struct VmdkExtent {
 
 typedef struct BDRVVmdkState {
 int desc_offset;
+bool cid_updated;
 uint32_t parent_cid;
 int num_extents;
 /* Extent array with num_extents entries, ascend ordered by address */
@@ -853,7 +854,6 @@ static int vmdk_write(BlockDriverState *bs, int64_t 
sector_num,
 int n;
 int64_t index_in_cluster;
 uint64_t cluster_offset;
-static int cid_update = 0;
 VmdkMetaData m_data;
 
 if (sector_num > bs->total_sectors) {
@@ -900,9 +900,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t 
sector_num,
 buf += n * 512;
 
 // update CID on the first write every time the virtual disk is opened
-if (!cid_update) {
+if (!s->cid_updated) {
 vmdk_write_cid(bs, time(NULL));
-cid_update++;
+s->cid_updated = true;
 }
 }
 return 0;



[Qemu-devel] [PATCH v9 11/12] VMDK: fix coding style

2011-07-12 Thread Fam Zheng
Conform coding style in vmdk.c to pass scripts/checkpatch.pl checks.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |   78 +++---
 1 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index e7bea1f..aa05a3b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -102,8 +102,9 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 {
 uint32_t magic;
 
-if (buf_size < 4)
+if (buf_size < 4) {
 return 0;
+}
 magic = be32_to_cpu(*(uint32_t *)buf);
 if (magic == VMDK3_MAGIC ||
 magic == VMDK4_MAGIC) {
@@ -193,9 +194,10 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int 
parent)
 cid_str_size = sizeof("CID");
 }
 
-if ((p_name = strstr(desc,cid_str)) != NULL) {
+p_name = strstr(desc, cid_str);
+if (p_name != NULL) {
 p_name += cid_str_size;
-sscanf(p_name,"%x",&cid);
+sscanf(p_name, "%x", &cid);
 }
 
 return cid;
@@ -212,9 +214,10 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t 
cid)
 return -EIO;
 }
 
-tmp_str = strstr(desc,"parentCID");
+tmp_str = strstr(desc, "parentCID");
 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
-if ((p_name = strstr(desc,"CID")) != NULL) {
+p_name = strstr(desc, "CID");
+if (p_name != NULL) {
 p_name += sizeof("CID");
 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
 pstrcat(desc, sizeof(desc), tmp_desc);
@@ -234,13 +237,14 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
 uint32_t cur_pcid;
 
 if (p_bs) {
-cur_pcid = vmdk_read_cid(p_bs,0);
-if (s->parent_cid != cur_pcid)
-// CID not valid
+cur_pcid = vmdk_read_cid(p_bs, 0);
+if (s->parent_cid != cur_pcid) {
+/* CID not valid */
 return 0;
+}
 }
 #endif
-// CID valid
+/* CID valid */
 return 1;
 }
 
@@ -255,14 +259,18 @@ static int vmdk_parent_open(BlockDriverState *bs)
 return -1;
 }
 
-if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
+p_name = strstr(desc, "parentFileNameHint");
+if (p_name != NULL) {
 char *end_name;
 
 p_name += sizeof("parentFileNameHint") + 1;
-if ((end_name = strchr(p_name,'\"')) == NULL)
+end_name = strchr(p_name, '\"');
+if (end_name == NULL) {
 return -1;
-if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
+}
+if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
 return -1;
+}
 
 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
 }
@@ -595,8 +603,9 @@ static int get_whole_cluster(BlockDriverState *bs,
 if (bs->backing_hd) {
 int ret;
 
-if (!vmdk_is_cid_valid(bs))
+if (!vmdk_is_cid_valid(bs)) {
 return -1;
+}
 
 /* floor offset to cluster */
 offset -= offset % (extent->cluster_sectors * 512);
@@ -655,8 +664,9 @@ static int get_cluster_offset(BlockDriverState *bs,
 int min_index, i, j;
 uint32_t min_count, *l2_table, tmp = 0;
 
-if (m_data)
+if (m_data) {
 m_data->valid = 0;
+}
 if (extent->flat) {
 *cluster_offset = extent->flat_start_offset;
 return 0;
@@ -712,7 +722,7 @@ static int get_cluster_offset(BlockDriverState *bs,
 return -1;
 }
 
-// Avoid the L2 tables update for the images that have snapshots.
+/* Avoid the L2 tables update for the images that have snapshots. */
 *cluster_offset = bdrv_getlength(extent->file);
 bdrv_truncate(
 extent->file,
@@ -729,8 +739,9 @@ static int get_cluster_offset(BlockDriverState *bs,
  * or inappropriate VM shutdown.
  */
 if (get_whole_cluster(
-bs, extent, *cluster_offset, offset, allocate) == -1)
+bs, extent, *cluster_offset, offset, allocate) == -1) {
 return -1;
+}
 
 if (m_data) {
 m_data->offset = tmp;
@@ -780,8 +791,9 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t 
sector_num,
 
 index_in_cluster = sector_num % extent->cluster_sectors;
 n = extent->cluster_sectors - index_in_cluster;
-if (n > nb_sectors)
+if (n > nb_sectors) {
 n = nb_sectors;
+}
 *pnum = n;
 return ret;
 }
@@ -805,16 +817,19 @@ static int vmdk_read(BlockDriverState *bs, int64_t 
sector_num,
 sector_num << 9, 0, &cluster_offset);
 index_in_cluster = sector_num % extent->cluster_sectors;
 n = extent->cluster_sectors - index_in_

[Qemu-devel] [PATCH v9 10/12] VMDK: create different subformats

2011-07-12 Thread Fam Zheng
Add create option 'format', with enums:
monolithicSparse
monolithicFlat
twoGbMaxExtentSparse
twoGbMaxExtentFlat
Each creates a subformat image file. The default is monolithicSparse.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |  502 +++--
 block_int.h  |1 +
 2 files changed, 274 insertions(+), 229 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 93ac289..e7bea1f 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -156,8 +156,9 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 #define CHECK_CID 1
 
 #define SECTOR_SIZE 512
-#define DESC_SIZE 20*SECTOR_SIZE   // 20 sectors of 512 bytes each
-#define HEADER_SIZE 512// first sector of 512 bytes
+#define DESC_SIZE (20 * SECTOR_SIZE)/* 20 sectors of 512 bytes each */
+#define BUF_SIZE 4096
+#define HEADER_SIZE 512 /* first sector of 512 bytes */
 
 static void vmdk_free_extents(BlockDriverState *bs)
 {
@@ -243,168 +244,6 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
 return 1;
 }
 
-static int vmdk_snapshot_create(const char *filename, const char *backing_file)
-{
-int snp_fd, p_fd;
-int ret;
-uint32_t p_cid;
-char *p_name, *gd_buf, *rgd_buf;
-const char *real_filename, *temp_str;
-VMDK4Header header;
-uint32_t gde_entries, gd_size;
-int64_t gd_offset, rgd_offset, capacity, gt_size;
-char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
-static const char desc_template[] =
-"# Disk DescriptorFile\n"
-"version=1\n"
-"CID=%x\n"
-"parentCID=%x\n"
-"createType=\"monolithicSparse\"\n"
-"parentFileNameHint=\"%s\"\n"
-"\n"
-"# Extent description\n"
-"RW %u SPARSE \"%s\"\n"
-"\n"
-"# The Disk Data Base \n"
-"#DDB\n"
-"\n";
-
-snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | 
O_LARGEFILE, 0644);
-if (snp_fd < 0)
-return -errno;
-p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
-if (p_fd < 0) {
-close(snp_fd);
-return -errno;
-}
-
-/* read the header */
-if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
-ret = -errno;
-goto fail;
-}
-
-/* write the header */
-if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
-ret = -errno;
-goto fail;
-}
-
-memset(&header, 0, sizeof(header));
-memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
-
-if (ftruncate(snp_fd, header.grain_offset << 9)) {
-ret = -errno;
-goto fail;
-}
-/* the descriptor offset = 0x200 */
-if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
-ret = -errno;
-goto fail;
-}
-
-if ((p_name = strstr(p_desc,"CID")) != NULL) {
-p_name += sizeof("CID");
-sscanf(p_name,"%x",&p_cid);
-}
-
-real_filename = filename;
-if ((temp_str = strrchr(real_filename, '\\')) != NULL)
-real_filename = temp_str + 1;
-if ((temp_str = strrchr(real_filename, '/')) != NULL)
-real_filename = temp_str + 1;
-if ((temp_str = strrchr(real_filename, ':')) != NULL)
-real_filename = temp_str + 1;
-
-snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
- (uint32_t)header.capacity, real_filename);
-
-/* write the descriptor */
-if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
-ret = -errno;
-goto fail;
-}
-
-gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
-rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
-capacity = header.capacity * SECTOR_SIZE;   // Extent size
-/*
- * Each GDE span 32M disk, means:
- * 512 GTE per GT, each GTE points to grain
- */
-gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * 
SECTOR_SIZE;
-if (!gt_size) {
-ret = -EINVAL;
-goto fail;
-}
-gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
-gd_size = gde_entries * sizeof(uint32_t);
-
-/* write RGD */
-rgd_buf = qemu_malloc(gd_size);
-if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
-ret = -errno;
-goto fail_rgd;
-}
-if (read(p_fd, rgd_buf, gd_size) != gd_size) {

[Qemu-devel] [PATCH v9 09/12] VMDK: open/read/write for monolithicFlat image

2011-07-18 Thread Fam Zheng
Parse vmdk decriptor file and open mono flat image.
Read/write the flat extent.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |  171 +-
 1 files changed, 158 insertions(+), 13 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index f637d98..e1fb962 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -65,6 +65,7 @@ typedef struct VmdkExtent {
 bool flat;
 int64_t sectors;
 int64_t end_sector;
+int64_t flat_start_offset;
 int64_t l1_table_offset;
 int64_t l1_backup_table_offset;
 uint32_t *l1_table;
@@ -407,9 +408,10 @@ fail:
 static int vmdk_parent_open(BlockDriverState *bs)
 {
 char *p_name;
-char desc[DESC_SIZE];
+char desc[DESC_SIZE + 1];
 BDRVVmdkState *s = bs->opaque;
 
+desc[DESC_SIZE] = '\0';
 if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
 return -1;
 }
@@ -584,6 +586,144 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, int 
flags)
 return ret;
 }
 
+/* find an option value out of descriptor file */
+static int vmdk_parse_description(const char *desc, const char *opt_name,
+char *buf, int buf_size)
+{
+char *opt_pos, *opt_end;
+const char *end = desc + strlen(desc);
+
+opt_pos = strstr(desc, opt_name);
+if (!opt_pos) {
+return -1;
+}
+/* Skip "=\"" following opt_name */
+opt_pos += strlen(opt_name) + 2;
+if (opt_pos >= end) {
+return -1;
+}
+opt_end = opt_pos;
+while (opt_end < end && *opt_end != '"') {
+opt_end++;
+}
+if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
+return -1;
+}
+pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
+return 0;
+}
+
+static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
+const char *desc_file_path)
+{
+int ret;
+char access[11];
+char type[11];
+char fname[512];
+const char *p = desc;
+int64_t sectors = 0;
+int64_t flat_offset;
+
+while (*p) {
+/* parse extent line:
+ * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
+ * or
+ * RW [size in sectors] SPARSE "file-name.vmdk"
+ */
+flat_offset = -1;
+ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
+access, §ors, type, fname, &flat_offset);
+if (ret < 4 || strcmp(access, "RW")) {
+goto next_line;
+} else if (!strcmp(type, "FLAT")) {
+if (ret != 5 || flat_offset < 0) {
+return -EINVAL;
+}
+} else if (ret != 4) {
+return -EINVAL;
+}
+
+/* trim the quotation marks around */
+if (fname[0] == '"') {
+memmove(fname, fname + 1, strlen(fname));
+if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
+return -EINVAL;
+}
+fname[strlen(fname) - 1] = '\0';
+}
+if (sectors <= 0 ||
+(strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
+(strcmp(access, "RW"))) {
+goto next_line;
+}
+
+/* save to extents array */
+if (!strcmp(type, "FLAT")) {
+/* FLAT extent */
+char extent_path[PATH_MAX];
+BlockDriverState *extent_file;
+VmdkExtent *extent;
+
+path_combine(extent_path, sizeof(extent_path),
+desc_file_path, fname);
+ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
+if (ret) {
+return ret;
+}
+extent = vmdk_add_extent(bs, extent_file, true, sectors,
+0, 0, 0, 0, sectors);
+extent->flat_start_offset = flat_offset;
+} else {
+/* SPARSE extent, not supported for now */
+fprintf(stderr,
+"VMDK: Not supported extent type \"%s\""".\n", type);
+return -ENOTSUP;
+}
+next_line:
+/* move to next line */
+while (*p && *p != '\n') {
+p++;
+}
+p++;
+}
+return 0;
+}
+
+static int vmdk_open_desc_file(BlockDriverState *bs, int flags)
+{
+int ret;
+char buf[2048];
+char ct[128];
+BDRVVmdkState *s = bs->opaque;
+
+ret = bdrv_pread(bs->file, 0, buf, sizeof(buf));
+if (ret < 0) {
+return ret;
+}
+buf[2047] = '\0';
+if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
+return -EINVAL;
+}
+if (strcmp(ct, "monolithicFlat")) {
+fprintf(stderr,
+"VMDK: Not supported image type \

[Qemu-devel] [PATCH v9 10/12] VMDK: create different subformats

2011-07-18 Thread Fam Zheng
Add create option 'format', with enums:
monolithicSparse
monolithicFlat
twoGbMaxExtentSparse
twoGbMaxExtentFlat
Each creates a subformat image file. The default is monolithicSparse.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |  503 +++--
 block_int.h  |1 +
 2 files changed, 275 insertions(+), 229 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index e1fb962..b53c5f5 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -156,8 +156,9 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 #define CHECK_CID 1
 
 #define SECTOR_SIZE 512
-#define DESC_SIZE 20*SECTOR_SIZE   // 20 sectors of 512 bytes each
-#define HEADER_SIZE 512// first sector of 512 bytes
+#define DESC_SIZE (20 * SECTOR_SIZE)/* 20 sectors of 512 bytes each */
+#define BUF_SIZE 4096
+#define HEADER_SIZE 512 /* first sector of 512 bytes */
 
 static void vmdk_free_extents(BlockDriverState *bs)
 {
@@ -243,168 +244,6 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
 return 1;
 }
 
-static int vmdk_snapshot_create(const char *filename, const char *backing_file)
-{
-int snp_fd, p_fd;
-int ret;
-uint32_t p_cid;
-char *p_name, *gd_buf, *rgd_buf;
-const char *real_filename, *temp_str;
-VMDK4Header header;
-uint32_t gde_entries, gd_size;
-int64_t gd_offset, rgd_offset, capacity, gt_size;
-char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
-static const char desc_template[] =
-"# Disk DescriptorFile\n"
-"version=1\n"
-"CID=%x\n"
-"parentCID=%x\n"
-"createType=\"monolithicSparse\"\n"
-"parentFileNameHint=\"%s\"\n"
-"\n"
-"# Extent description\n"
-"RW %u SPARSE \"%s\"\n"
-"\n"
-"# The Disk Data Base \n"
-"#DDB\n"
-"\n";
-
-snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | 
O_LARGEFILE, 0644);
-if (snp_fd < 0)
-return -errno;
-p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
-if (p_fd < 0) {
-close(snp_fd);
-return -errno;
-}
-
-/* read the header */
-if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
-ret = -errno;
-goto fail;
-}
-
-/* write the header */
-if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
-ret = -errno;
-goto fail;
-}
-
-memset(&header, 0, sizeof(header));
-memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
-
-if (ftruncate(snp_fd, header.grain_offset << 9)) {
-ret = -errno;
-goto fail;
-}
-/* the descriptor offset = 0x200 */
-if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
-ret = -errno;
-goto fail;
-}
-
-if ((p_name = strstr(p_desc,"CID")) != NULL) {
-p_name += sizeof("CID");
-sscanf(p_name,"%x",&p_cid);
-}
-
-real_filename = filename;
-if ((temp_str = strrchr(real_filename, '\\')) != NULL)
-real_filename = temp_str + 1;
-if ((temp_str = strrchr(real_filename, '/')) != NULL)
-real_filename = temp_str + 1;
-if ((temp_str = strrchr(real_filename, ':')) != NULL)
-real_filename = temp_str + 1;
-
-snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
- (uint32_t)header.capacity, real_filename);
-
-/* write the descriptor */
-if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
-ret = -errno;
-goto fail;
-}
-if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
-ret = -errno;
-goto fail;
-}
-
-gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
-rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
-capacity = header.capacity * SECTOR_SIZE;   // Extent size
-/*
- * Each GDE span 32M disk, means:
- * 512 GTE per GT, each GTE points to grain
- */
-gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * 
SECTOR_SIZE;
-if (!gt_size) {
-ret = -EINVAL;
-goto fail;
-}
-gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
-gd_size = gde_entries * sizeof(uint32_t);
-
-/* write RGD */
-rgd_buf = qemu_malloc(gd_size);
-if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
-ret = -errno;
-goto fail_rgd;
-}
-if (read(p_fd, rgd_buf, gd_size) != gd_size) {

Re: [Qemu-devel] [PATCH v9 00/12] Adding VMDK monolithic flat support

2011-07-18 Thread Fam Zheng
Hi,

Resent [09/12] and [10/12].

On Mon, Jul 18, 2011 at 10:52 PM, Kevin Wolf  wrote:
> Am 12.07.2011 13:56, schrieb Fam Zheng:
>> Changes from v8:
>>     09/12: remove duplicated sscanf
>>     10/12: change option name to 'subformat', change commit message typo,
>>             factor common parts of creating, and other small improvements
>>
>> Fam Zheng (12):
>>   VMDK: introduce VmdkExtent
>>   VMDK: bugfix, align offset to cluster in get_whole_cluster
>>   VMDK: probe for monolithicFlat images
>>   VMDK: separate vmdk_open by format version
>>   VMDK: add field BDRVVmdkState.desc_offset
>>   VMDK: flush multiple extents
>>   VMDK: move 'static' cid_update flag to bs field
>>   VMDK: change get_cluster_offset return type
>>   VMDK: open/read/write for monolithicFlat image
>>   VMDK: create different subformats
>>   VMDK: fix coding style
>>   block: add bdrv_get_allocated_file_size() operation
>>
>>  block.c           |   19 +
>>  block.h           |    1 +
>>  block/raw-posix.c |   21 +
>>  block/raw-win32.c |   29 ++
>>  block/vmdk.c      | 1296 
>> -
>>  block_int.h       |    2 +
>>  qemu-img.c        |   31 +--
>>  7 files changed, 964 insertions(+), 435 deletions(-)
>
> This doesn't build for me:
>
> cc1: warnings being treated as errors
> block/vmdk.c: In function 'vmdk_parse_extents':
> block/vmdk.c:482: error: format '%lld' expects type 'long long int *',
> but argument 4 has type 'int64_t *'
> block/vmdk.c:482: error: format '%lld' expects type 'long long int *',
> but argument 7 has type 'int64_t *'
> block/vmdk.c: In function 'vmdk_create_extent':
> block/vmdk.c:1024: error: invalid storage class for function
> 'filename_decompose'
> [...]
>
> The following patch makes it compile. I haven't tried figuring out which
> patches need to be fixed, but should be easy enough for you. Please
> resend only those patches that need to be changed, I have already merged
> everything (plus my fixup) and will only update it in the block branch.
>
> Kevin
>
>
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 63d7605..37478d2 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -478,7 +478,7 @@ static int vmdk_parse_extents(const char *desc,
> BlockDriverState *bs,
>          * RW [size in sectors] SPARSE "file-name.vmdk"
>          */
>         flat_offset = -1;
> -        ret = sscanf(p, "%10s %lld %10s %511s %lld",
> +        ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
>                 access, §ors, type, fname, &flat_offset);
>         if (ret < 4 || strcmp(access, "RW")) {
>             goto next_line;
> @@ -927,6 +927,7 @@ static int vmdk_create_extent(const char *filename,
> int64_t filesize, bool flat)
>         0644);
>     if (fd < 0) {
>         return -errno;
> +    }
>     if (flat) {
>         ret = ftruncate(fd, filesize);
>         if (ret < 0) {
>



-- 
Best regards!
Fam Zheng



[Qemu-devel] [PATCH] VMDK: fix leak of extent_file

2011-09-18 Thread Fam Zheng
Release extent_file on error in vmdk_parse_extents. Added closing files
in freeing extents.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c |   14 ++
 1 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 6c8edfc..5d16ec4 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -179,11 +179,16 @@ static void vmdk_free_extents(BlockDriverState *bs)
 {
 int i;
 BDRVVmdkState *s = bs->opaque;
+VmdkExtent *e;
 
 for (i = 0; i < s->num_extents; i++) {
-g_free(s->extents[i].l1_table);
-g_free(s->extents[i].l2_cache);
-g_free(s->extents[i].l1_backup_table);
+e = &s->extents[i];
+g_free(e->l1_table);
+g_free(e->l2_cache);
+g_free(e->l1_backup_table);
+if (e->file != bs->file) {
+bdrv_delete(e->file);
+}
 }
 g_free(s->extents);
 }
@@ -619,12 +624,13 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int 
flags,
 s->desc_offset = 0;
 ret = vmdk_parse_extents(buf, bs, bs->file->filename);
 if (ret) {
+vmdk_free_extents(bs);
 return ret;
 }
 
 /* try to open parent images, if exist */
 if (vmdk_parent_open(bs)) {
-g_free(s->extents);
+vmdk_free_extents(bs);
 return -EINVAL;
 }
 s->parent_cid = vmdk_read_cid(bs, 1);



Re: [Qemu-devel] [PATCHv2] block: introduce BDRV_O_SEQUENTIAL

2014-03-24 Thread Fam Zheng
On Fri, 03/21 12:49, Peter Lieven wrote:
> this patch introduces a new flag to indicate that we are going to sequentially
> read from a file and do not plan to reread/reuse the data after it has been 
> read.
> 
> The current use of this flag is to open the source(s) of a qemu-img convert
> process. If a protocol from block/raw-posix.c is used posix_fadvise is 
> utilized
> to advise to the kernel that we are going to read sequentially from the
> file and a POSIX_FADV_DONTNEED advise is issued after each write to indicate
> that there is no advantage keeping the blocks in the buffers.
> 
> Consider the following test case that was created to confirm the behaviour of
> the new flag:
> 
> A 10G logical volume was created and filled with random data.
> Then the logical volume was exported via qemu-img convert to an iscsi target.
> Before the export was started all caches of the linux kernel where dropped.
> 
> Old behavior:
>  - The convert process took 3m45s and the buffer cache grew up to 9.67 GB 
> close
>to the end of the conversion. After qemu-img terminated all the buffers 
> were
>freed by the kernel.
> 
> New behavior with the -N switch:
>  - The convert process took 3m43s and the buffer cache grew up to 15.48 MB 
> close
>to the end with some small peaks up to 30 MB durine the conversion.

s/durine/during/

The patch looks OK, and I have no objection with this flag. But I'm still
curious about the use case: Host page cache growing is not the real problem,
I'm not fully persudaded by commit message because I still don't know _what_
useful cache would be dropped (if you don't empty the kernel cache before
starting). I don't think all 9.67 GB buffer will be filled by data from this
volume, so the question is how to measure the real, effective performance
impact?

> 
> Signed-off-by: Peter Lieven 
> ---
> v1->v2: - added test example to commit msg
> - added -N knob to qemu-img
> 
>  block/raw-posix.c |   14 ++
>  include/block/block.h |1 +
>  qemu-img-cmds.hx  |4 ++--
>  qemu-img.c|   16 +---
>  qemu-img.texi |9 -
>  5 files changed, 38 insertions(+), 6 deletions(-)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 1688e16..08f7209 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -444,6 +444,13 @@ static int raw_open_common(BlockDriverState *bs, QDict 
> *options,
>  }
>  #endif
>  
> +#ifdef POSIX_FADV_SEQUENTIAL
> +if (bs->open_flags & BDRV_O_SEQUENTIAL &&
> +!(bs->open_flags & BDRV_O_NOCACHE)) {
> +posix_fadvise(s->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
> +}
> +#endif
> +
>  ret = 0;
>  fail:
>  qemu_opts_del(opts);
> @@ -913,6 +920,13 @@ static int aio_worker(void *arg)
>  ret = aiocb->aio_nbytes;
>  }
>  if (ret == aiocb->aio_nbytes) {
> +#ifdef POSIX_FADV_DONTNEED
> +if (aiocb->bs->open_flags & BDRV_O_SEQUENTIAL &&
> +!(aiocb->bs->open_flags & BDRV_O_NOCACHE)) {
> +posix_fadvise(aiocb->aio_fildes, aiocb->aio_offset,
> +  aiocb->aio_nbytes, POSIX_FADV_DONTNEED);
> +}
> +#endif

I'm not familiar with posix_fadvise, can we do this on the whole file in once
in raw_open_common like POSIX_FADV_SEQUENTIAL?

Thanks,
Fam



Re: [Qemu-devel] [Qemu-stable] [PATCH 1/1] virtio-blk: Use a req pool instead of malloc/free

2014-03-27 Thread Fam Zheng
Please send this patch to qemu-devel@nongnu.org.

On Tue, 03/25 16:42, Li, Zhen-Hua wrote:
> From: "Li, ZhenHua" 
> 
> In virtio-blk module, when there is new request, new req structure
> will be created by malloc.  Use a req pool instead of this, will increase

s/Use/Using/

> performance;
> 
> Increacement: about 5% to 10%.

s/Increacement/Increment/

> 
> Signed-off-by: Li, ZhenHua 
> ---
>  hw/block/virtio-blk.c |   87 
> ++---
>  1 file changed, 75 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 8a568e5..da5b570 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -39,6 +39,70 @@ typedef struct VirtIOBlockReq
>  BlockAcctCookie acct;
>  } VirtIOBlockReq;
>  
> +#define POOL_PAGE 512
> +static VirtIOBlockReq * * req_pool;
> +static char * req_pool_used;

Why char *? It looks like a bool array.

> +static unsigned long req_pool_size = 0;

Bad coding style (and below), please use scripts/checkpatch.pl to check you
patch before posting.

> +
> +static void remalloc_reqs(void){

Bikeshedding, but remalloc? realloc?

> +unsigned long old_size = req_pool_size;
> +unsigned long int i;
> +char * old_used = req_pool_used;
> +VirtIOBlockReq * * old_pool = req_pool;
> +
> +req_pool_size += POOL_PAGE;
> +req_pool_used = (char * )malloc(req_pool_size * sizeof(char));

Please use g_malloc0 and friends. No need to do explict type cast.

Thanks,
Fam

> +req_pool =  (VirtIOBlockReq * * )malloc(req_pool_size * 
> sizeof(VirtIOBlockReq *));
> +
> +if(old_size != 0){
> +memcpy(req_pool_used, old_used, old_size*(sizeof(char)));
> +memcpy(req_pool, old_pool, old_size*(sizeof(VirtIOBlockReq *)));
> +}
> +for(i=old_size; i + req_pool[i] = (VirtIOBlockReq *)malloc(sizeof(VirtIOBlockReq));
> + req_pool_used[i] = 0;
> +}
> +
> +if(old_size != 0){
> + free(old_used);
> + free(old_pool);
> +}
> +}
> +static VirtIOBlockReq * req_pool_get_new(void){
> +unsigned long int i;
> +char * used;
> +VirtIOBlockReq * * req;
> +
> +if(req_pool_size == 0){
> +remalloc_reqs();
> +}
> +for(i=0, used=req_pool_used, req=req_pool; 
> +i + if(*used == 0){
> +*used = 1;
> +return *req;
> +}
> +}
> +remalloc_reqs();
> +req_pool_used[req_pool_size-POOL_PAGE] = 1;
> +*req = req_pool[req_pool_size-POOL_PAGE];
> +return *req;
> +}
> +
> +static void virtio_blk_free_request(VirtIOBlockReq *req0){
> +unsigned long int i;
> +char * used;
> +VirtIOBlockReq * * req;
> +
> +for(i=0, used=req_pool_used, req=req_pool; 
> +i + if(*req == req0){
> +*used = 0;
> +}
> +}
> +}
> +
> +
>  static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
>  {
>  VirtIOBlock *s = req->dev;
> @@ -63,7 +127,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, 
> int error,
>  } else if (action == BDRV_ACTION_REPORT) {
>  virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
>  bdrv_acct_done(s->bs, &req->acct);
> -g_free(req);
> +virtio_blk_free_request(req);
>  }
>  
>  bdrv_error_action(s->bs, action, is_read, error);
> @@ -84,7 +148,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>  
>  virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
>  bdrv_acct_done(req->dev->bs, &req->acct);
> -g_free(req);
> +virtio_blk_free_request(req);
>  }
>  
>  static void virtio_blk_flush_complete(void *opaque, int ret)
> @@ -99,25 +163,24 @@ static void virtio_blk_flush_complete(void *opaque, int 
> ret)
>  
>  virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
>  bdrv_acct_done(req->dev->bs, &req->acct);
> -g_free(req);
> +virtio_blk_free_request(req);
>  }
> -
>  static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>  {
> -VirtIOBlockReq *req = g_malloc(sizeof(*req));
> +VirtIOBlockReq *req ;
> +req = req_pool_get_new();
>  req->dev = s;
>  req->qiov.size = 0;
>  req->next = NULL;
>  return req;
>  }
> -
>  static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
>  {
>  VirtIOBlockReq *req = virtio_blk_alloc_request(s);
>  
>  if (req != NULL) {
>  if (!virtqueue_pop(s->vq, &req->elem)) {
> -g_free(req);
> +virtio_blk_free_request(req);
>  return NULL;
>  }
>  }
> @@ -142,7 +205,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
>   */
>  if (req->elem.out_num < 2 || req->elem.in_num < 3) {
>  virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
> -g_free(req);
> +virtio_blk_free_request(req);
>  return;
>  }
>  
> @@ -232,7 +295,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
>  stl_p(&req->scsi->data_len, hdr.dxfer_len);
>  
>  virtio_blk_req_complete(req, 

[Qemu-devel] [PATCH v4 0/9] QMP: Introduce incremental drive-backup with in-memory dirty bitmap

2014-03-27 Thread Fam Zheng
v4: [08/09] qmp: Add support of "dirty-bitmap" sync mode for drive-backup
Comment text fixes. (Eric)

v3:
Address Benoit's comments.

[01/09] qapi: Add optional field "name" to block dirty bitmap
Don't split line.

[03/09] block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap
Add reviewed-by.

[04/09] block: Introduce bdrv_dirty_bitmap_granularity()
Add reviewed-by.

[05/09] hbitmap: Add hbitmap_copy
Fix size calculation.

[08/09] qmp: Add support of "dirty-bitmap" sync mode for drive-backup
Fix typo in commit message.
Add comment for sync_bitmap_gran.
    Add (Since 2.1).

Thanks,
Fam


Fam Zheng (9):
  qapi: Add optional field "name" to block dirty bitmap
  qmp: Add dirty-bitmap-add and dirty-bitmap-remove
  block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap
  block: Introduce bdrv_dirty_bitmap_granularity()
  hbitmap: Add hbitmap_copy
  block: Add bdrv_copy_dirty_bitmap and bdrv_reset_dirty_bitmap
  qmp: Add dirty-bitmap-enable and dirty-bitmap-disable
  qmp: Add support of "dirty-bitmap" sync mode for drive-backup
  qapi: Add transaction support to dirty-bitmap-{add,disable}

 block-migration.c |   3 +-
 block.c   |  89 ++-
 block/backup.c|  53 +-
 block/mirror.c|   6 +-
 blockdev.c| 181 +-
 hmp.c |   4 +-
 include/block/block.h |  16 +++-
 include/block/block_int.h |   3 +
 include/qemu/hbitmap.h|   8 ++
 qapi-schema.json  | 117 --
 qmp-commands.hx   |  66 -
 util/hbitmap.c|  16 
 12 files changed, 544 insertions(+), 18 deletions(-)

-- 
1.9.1




[Qemu-devel] [PATCH v4 1/9] qapi: Add optional field "name" to block dirty bitmap

2014-03-27 Thread Fam Zheng
This field will be set for user created dirty bitmap. Also pass in an
error pointer to bdrv_create_dirty_bitmap, so when a name is already
taken on this BDS, it can report an error message. This is not global
check, two BDSes can have dirty bitmap with a common name.

Implemented bdrv_find_dirty_bitmap to find a dirty bitmap by name, will
be used later when other QMP commands want to reference dirty bitmap by
name.

Add bdrv_dirty_bitmap_make_anon. This unsets the name of dirty bitmap.

Signed-off-by: Fam Zheng 
---
 block-migration.c |  3 ++-
 block.c   | 33 -
 block/mirror.c|  2 +-
 include/block/block.h |  8 +++-
 qapi-schema.json  |  4 +++-
 5 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 897fdba..e6e016a 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -315,7 +315,8 @@ static void set_dirty_tracking(void)
 BlkMigDevState *bmds;
 
 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
+bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
+  NULL, NULL);
 }
 }
 
diff --git a/block.c b/block.c
index acb70fd..3f880d6 100644
--- a/block.c
+++ b/block.c
@@ -52,6 +52,7 @@
 
 struct BdrvDirtyBitmap {
 HBitmap *bitmap;
+char *name;
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -5079,18 +5080,45 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
QEMUIOVector *qiov)
 return true;
 }
 
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity)
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+BdrvDirtyBitmap *bm;
+QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+if (!strcmp(name, bm->name)) {
+return bm;
+}
+}
+return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+g_free(bitmap->name);
+bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+  int granularity,
+  const char *name,
+  Error **errp)
 {
 int64_t bitmap_size;
 BdrvDirtyBitmap *bitmap;
 
 assert((granularity & (granularity - 1)) == 0);
 
+if (name && bdrv_find_dirty_bitmap(bs, name)) {
+error_setg(errp, "Bitmap already exists: %s", name);
+return NULL;
+}
 granularity >>= BDRV_SECTOR_BITS;
 assert(granularity);
 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+if (name) {
+bitmap->name = g_strdup(name);
+}
 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
 return bitmap;
 }
@@ -5102,6 +5130,7 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 if (bm == bitmap) {
 QLIST_REMOVE(bitmap, list);
 hbitmap_free(bitmap->bitmap);
+g_free(bitmap->name);
 g_free(bitmap);
 return;
 }
@@ -5120,6 +5149,8 @@ BlockDirtyInfoList 
*bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 info->count = bdrv_get_dirty_count(bs, bm);
 info->granularity =
 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
+info->has_name = bm->name[0] != '\0';
+info->name = g_strdup(bm->name);
 entry->value = info;
 *plist = entry;
 plist = &entry->next;
diff --git a/block/mirror.c b/block/mirror.c
index 0ef41f9..9b73cd9 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -605,7 +605,7 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 s->granularity = granularity;
 s->buf_size = MAX(buf_size, granularity);
 
-s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
+s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
 bdrv_set_enable_write_cache(s->target, true);
 bdrv_set_on_error(s->target, on_target_error, on_target_error);
 bdrv_iostatus_enable(s->target);
diff --git a/include/block/block.h b/include/block/block.h
index 1ed55d8..aa30b2a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -428,7 +428,13 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
QEMUIOVector *qiov);
 
 struct HBitmapIter;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity);
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ 

[Qemu-devel] [PATCH v4 4/9] block: Introduce bdrv_dirty_bitmap_granularity()

2014-03-27 Thread Fam Zheng
This returns the granularity (in sectors) of dirty bitmap.

Signed-off-by: Fam Zheng 
Reviewed-by: Benoit Canet 
---
 block.c   | 6 ++
 include/block/block.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/block.c b/block.c
index b5265bb..6b82bf0 100644
--- a/block.c
+++ b/block.c
@@ -5173,6 +5173,12 @@ int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap 
*bitmap, int64_t sector
 }
 }
 
+int bdrv_dirty_bitmap_granularity(BlockDriverState *bs,
+  BdrvDirtyBitmap *bitmap)
+{
+return hbitmap_granularity(bitmap->bitmap);
+}
+
 void bdrv_dirty_iter_init(BlockDriverState *bs,
   BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
 {
diff --git a/include/block/block.h b/include/block/block.h
index aa30b2a..ed85f85 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -437,6 +437,8 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState 
*bs,
 void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap 
*bitmap);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
+int bdrv_dirty_bitmap_granularity(BlockDriverState *bs,
+  BdrvDirtyBitmap *bitmap);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t 
sector);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int 
nr_sectors);
-- 
1.9.1




[Qemu-devel] [PATCH v4 7/9] qmp: Add dirty-bitmap-enable and dirty-bitmap-disable

2014-03-27 Thread Fam Zheng
This allows to put the dirty bitmap into a disabled state where no more
writes will be tracked.

It will be used before backup or writing to persistent file.

Signed-off-by: Fam Zheng 
---
 block.c   | 15 +++
 blockdev.c| 44 
 include/block/block.h |  2 ++
 qapi-schema.json  | 32 
 qmp-commands.hx   | 10 ++
 5 files changed, 103 insertions(+)

diff --git a/block.c b/block.c
index 0abc593..f396f92 100644
--- a/block.c
+++ b/block.c
@@ -55,6 +55,7 @@ struct BdrvDirtyBitmap {
 int64_t size;
 int64_t granularity;
 char *name;
+bool enabled;
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -5150,6 +5151,7 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
 if (name) {
 bitmap->name = g_strdup(name);
 }
+bitmap->enabled = true;
 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
 return bitmap;
 }
@@ -5168,6 +5170,16 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 }
 }
 
+void bdrv_disable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+bitmap->enabled = false;
+}
+
+void bdrv_enable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+bitmap->enabled = true;
+}
+
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 {
 BdrvDirtyBitmap *bm;
@@ -5216,6 +5228,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t 
cur_sector,
 {
 BdrvDirtyBitmap *bitmap;
 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+if (!bitmap->enabled) {
+continue;
+}
 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }
 }
diff --git a/blockdev.c b/blockdev.c
index 662c950..aa3ee55 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1773,6 +1773,50 @@ void qmp_dirty_bitmap_remove(const char *device, const 
char *name,
 bdrv_release_dirty_bitmap(bs, bitmap);
 }
 
+void qmp_dirty_bitmap_enable(const char *device, const char *name,
+ bool has_granularity, int64_t granularity,
+ Error **errp)
+{
+BlockDriverState *bs;
+BdrvDirtyBitmap *bitmap;
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+
+bitmap = bdrv_find_dirty_bitmap(bs, name);
+if (!bitmap) {
+error_setg(errp, "Dirty bitmap not found: %s", name);
+return;
+}
+
+bdrv_enable_dirty_bitmap(bs, bitmap);
+}
+
+void qmp_dirty_bitmap_disable(const char *device, const char *name,
+  bool has_granularity, int64_t granularity,
+  Error **errp)
+{
+BlockDriverState *bs;
+BdrvDirtyBitmap *bitmap;
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+
+bitmap = bdrv_find_dirty_bitmap(bs, name);
+if (!bitmap) {
+error_setg(errp, "Dirty bitmap not found: %s", name);
+return;
+}
+
+bdrv_disable_dirty_bitmap(bs, bitmap);
+}
+
 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
 const char *id = qdict_get_str(qdict, "id");
diff --git a/include/block/block.h b/include/block/block.h
index a29169f..eef28df 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -440,6 +440,8 @@ BdrvDirtyBitmap *bdrv_copy_dirty_bitmap(BlockDriverState 
*bs,
 const BdrvDirtyBitmap *bitmap,
 const char *name);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_disable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_enable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 int bdrv_dirty_bitmap_granularity(BlockDriverState *bs,
   BdrvDirtyBitmap *bitmap);
diff --git a/qapi-schema.json b/qapi-schema.json
index 56f16a9..cf1cc8a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2254,6 +2254,38 @@
   'data': { 'device': 'str', 'name': 'str' } }
 
 ##
+# @dirty-bitmap-enable
+#
+# Enable a dirty bitmap on the device
+#
+# Setting granularity has no effect here.
+#
+# Returns: nothing on success
+#  If @device is not a valid block device, DeviceNotFound
+#  If @name is not found, GenericError with an explaining message
+#
+# Since 2.1
+##
+{'command': 'dirty-bitmap-enable',
+  'data': 'DirtyBitmap' }
+
+##
+# @dirty-bitmap-disable
+#
+# Disable a dirty bitmap on the device
+#
+# Setting granularity has no effect here.
+#
+# Returns: nothing on success
+#  If @device is not a valid block 

[Qemu-devel] [PATCH v4 8/9] qmp: Add support of "dirty-bitmap" sync mode for drive-backup

2014-03-27 Thread Fam Zheng
For "dirty-bitmap" sync mode, the block job will iterate through the
given dirty bitmap to decide if a sector needs backup (backup all the
dirty clusters and skip clean ones), just as allocation conditions of
"top" sync mode.

There are two bitmap use modes for sync=dirty-bitmap:

 - reset: backup job makes a copy of bitmap and resets the original
   one.
 - consume: backup job makes the original anonymous (invisible to user)
   and releases it after use.

Signed-off-by: Fam Zheng 
---
 block/backup.c| 53 ++-
 block/mirror.c|  4 
 blockdev.c|  9 +++-
 hmp.c |  4 +++-
 include/block/block_int.h |  3 +++
 qapi-schema.json  | 32 
 qmp-commands.hx   |  7 ---
 7 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 15a2e55..24b8d2c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,6 +37,10 @@ typedef struct CowRequest {
 typedef struct BackupBlockJob {
 BlockJob common;
 BlockDriverState *target;
+/* bitmap for sync=dirty-bitmap */
+BdrvDirtyBitmap *sync_bitmap;
+/* dirty bitmap granularity */
+int sync_bitmap_gran;
 MirrorSyncMode sync_mode;
 RateLimit limit;
 BlockdevOnError on_source_error;
@@ -263,7 +267,7 @@ static void coroutine_fn backup_run(void *opaque)
 job->common.busy = true;
 }
 } else {
-/* Both FULL and TOP SYNC_MODE's require copying.. */
+/* FULL, TOP and DIRTY_BITMAP SYNC_MODE's require copying.. */
 for (; start < end; start++) {
 bool error_is_read;
 
@@ -317,7 +321,21 @@ static void coroutine_fn backup_run(void *opaque)
 if (alloced == 0) {
 continue;
 }
+} else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+int i, dirty = 0;
+for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;
+ i += job->sync_bitmap_gran) {
+if (bdrv_get_dirty(bs, job->sync_bitmap,
+start * BACKUP_SECTORS_PER_CLUSTER + i)) {
+dirty = 1;
+break;
+}
+}
+if (!dirty) {
+continue;
+}
 }
+
 /* FULL sync mode we copy the whole drive. */
 ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
 BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
@@ -341,6 +359,9 @@ static void coroutine_fn backup_run(void *opaque)
 qemu_co_rwlock_wrlock(&job->flush_rwlock);
 qemu_co_rwlock_unlock(&job->flush_rwlock);
 
+if (job->sync_bitmap) {
+bdrv_release_dirty_bitmap(bs, job->sync_bitmap);
+}
 hbitmap_free(job->bitmap);
 
 bdrv_iostatus_disable(target);
@@ -351,12 +372,15 @@ static void coroutine_fn backup_run(void *opaque)
 
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
   int64_t speed, MirrorSyncMode sync_mode,
+  BdrvDirtyBitmap *sync_bitmap,
+  BitmapUseMode bitmap_mode,
   BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
   BlockDriverCompletionFunc *cb, void *opaque,
   Error **errp)
 {
 int64_t len;
+BdrvDirtyBitmap *original;
 
 assert(bs);
 assert(target);
@@ -369,6 +393,28 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 return;
 }
 
+if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP && !sync_bitmap) {
+error_setg(errp, "must provide a valid bitmap name for 
\"dirty-bitmap\""
+ "sync mode");
+return;
+}
+
+if (sync_bitmap) {
+switch (bitmap_mode) {
+case BITMAP_USE_MODE_RESET:
+original = sync_bitmap;
+sync_bitmap = bdrv_copy_dirty_bitmap(bs, sync_bitmap, NULL);
+bdrv_reset_dirty_bitmap(bs, original);
+break;
+case BITMAP_USE_MODE_CONSUME:
+bdrv_dirty_bitmap_make_anon(bs, sync_bitmap);
+break;
+default:
+assert(0);
+}
+bdrv_disable_dirty_bitmap(bs, sync_bitmap);
+}
+
 len = bdrv_getlength(bs);
 if (len < 0) {
 error_setg_errno(errp, -len, "unable to get length for '%s'",
@@ -386,6 +432,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 job->on_target_error = on_target_error;
 job->target = target;
 job->sync_mode = sync_mode;
+job->sync_bitmap = sync_bitmap;
+if (sync_bitmap) {
+job->sync_bitmap_gran =
+bdrv_dirty_bitmap_granularity(bs, job-&

[Qemu-devel] [PATCH v4 9/9] qapi: Add transaction support to dirty-bitmap-{add, disable}

2014-03-27 Thread Fam Zheng
This adds dirty-bitmap-add and dirty-bitmap-disable to transactions.
With this, user can stop a dirty bitmap, start backup of it, and start
another dirty bitmap atomically, so that the dirty bitmap is tracked
incrementally and we don't miss any write.

Signed-off-by: Fam Zheng 
---
 blockdev.c   | 68 
 qapi-schema.json |  4 +++-
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 4120dee..38dabe6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1411,6 +1411,64 @@ static void drive_backup_abort(BlkTransactionState 
*common)
 }
 }
 
+static void dirty_bitmap_add_prepare(BlkTransactionState *common, Error **errp)
+{
+DirtyBitmap *action;
+Error *local_err = NULL;
+
+action = common->action->dirty_bitmap_add;
+qmp_dirty_bitmap_add(action->device, action->name, false, 0, &local_err);
+if (error_is_set(&local_err)) {
+error_propagate(errp, local_err);
+}
+}
+
+static void dirty_bitmap_add_abort(BlkTransactionState *common)
+{
+DirtyBitmap *action;
+BdrvDirtyBitmap *bm;
+BlockDriverState *bs;
+
+action = common->action->dirty_bitmap_add;
+bs = bdrv_find(action->device);
+if (bs) {
+bm = bdrv_find_dirty_bitmap(bs, action->name);
+if (bm) {
+bdrv_release_dirty_bitmap(bs, bm);
+}
+}
+}
+
+static void dirty_bitmap_disable_prepare(BlkTransactionState *common,
+ Error **errp)
+{
+DirtyBitmap *action;
+Error *local_err = NULL;
+
+action = common->action->dirty_bitmap_disable;
+qmp_dirty_bitmap_disable(action->device, action->name,
+ false, 0, &local_err);
+if (error_is_set(&local_err)) {
+error_propagate(errp, local_err);
+}
+}
+
+static void dirty_bitmap_disable_abort(BlkTransactionState *common)
+{
+DirtyBitmap *action;
+BdrvDirtyBitmap *bitmap;
+BlockDriverState *bs;
+
+action = common->action->dirty_bitmap_disable;
+bs = bdrv_find(action->device);
+if (bs) {
+bitmap = bdrv_find_dirty_bitmap(bs, action->name);
+if (bitmap) {
+bdrv_enable_dirty_bitmap(bs, bitmap);
+}
+}
+}
+
 static void abort_prepare(BlkTransactionState *common, Error **errp)
 {
 error_setg(errp, "Transaction aborted using Abort action");
@@ -1443,6 +1501,16 @@ static const BdrvActionOps actions[] = {
 .prepare  = internal_snapshot_prepare,
 .abort = internal_snapshot_abort,
 },
+[TRANSACTION_ACTION_KIND_DIRTY_BITMAP_ADD] = {
+.instance_size = sizeof(BlkTransactionState),
+.prepare = dirty_bitmap_add_prepare,
+.abort = dirty_bitmap_add_abort,
+},
+[TRANSACTION_ACTION_KIND_DIRTY_BITMAP_DISABLE] = {
+.instance_size = sizeof(BlkTransactionState),
+.prepare = dirty_bitmap_disable_prepare,
+.abort = dirty_bitmap_disable_abort,
+},
 };
 
 /*
diff --git a/qapi-schema.json b/qapi-schema.json
index d99041c..48f7c15 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1994,7 +1994,9 @@
'blockdev-snapshot-sync': 'BlockdevSnapshot',
'drive-backup': 'DriveBackup',
'abort': 'Abort',
-   'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
+   'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal',
+   'dirty-bitmap-add': 'DirtyBitmap',
+   'dirty-bitmap-disable': 'DirtyBitmap'
} }
 
 ##
-- 
1.9.1




[Qemu-devel] [PATCH v4 2/9] qmp: Add dirty-bitmap-add and dirty-bitmap-remove

2014-03-27 Thread Fam Zheng
The new command pair is added to manage user created dirty bitmap. The
dirty bitmap's name is mandatory and must be unique for the same device,
but different devices can have bitmaps with the same names.

Signed-off-by: Fam Zheng 
---
 blockdev.c   | 60 
 qapi-schema.json | 45 ++
 qmp-commands.hx  | 49 +
 3 files changed, 154 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index c3422a1..662c950 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1713,6 +1713,66 @@ void qmp_block_set_io_throttle(const char *device, 
int64_t bps, int64_t bps_rd,
 }
 }
 
+void qmp_dirty_bitmap_add(const char *device, const char *name,
+  bool has_granularity, int64_t granularity,
+  Error **errp)
+{
+BlockDriverState *bs;
+BdrvDirtyBitmap *bitmap;
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+
+if (!name || name[0] == '\0') {
+error_setg(errp, "Bitmap name cannot be empty");
+return;
+}
+if (has_granularity) {
+if (granularity & (granularity - 1)) {
+error_setg(errp, "Granularity must be power of 2");
+return;
+}
+} else {
+granularity = 65536;
+}
+
+bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+if (!bitmap) {
+return;
+}
+}
+
+void qmp_dirty_bitmap_remove(const char *device, const char *name,
+ Error **errp)
+{
+BlockDriverState *bs;
+BdrvDirtyBitmap *bitmap;
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+
+if (!name || name[0] == '\0') {
+error_setg(errp, "Bitmap name cannot be empty");
+return;
+}
+bitmap = bdrv_find_dirty_bitmap(bs, name);
+if (!bitmap) {
+error_setg(errp, "Dirty bitmap not found: %s", name);
+return;
+}
+
+/* Make it invisible to user in case the following
+ * bdrv_release_dirty_bitmap doens't free it because of refcnt */
+bdrv_dirty_bitmap_make_anon(bs, bitmap);
+bdrv_release_dirty_bitmap(bs, bitmap);
+}
+
 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
 const char *id = qdict_get_str(qdict, "id");
diff --git a/qapi-schema.json b/qapi-schema.json
index 506c242..56f16a9 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2209,6 +2209,51 @@
 '*on-target-error': 'BlockdevOnError' } }
 
 ##
+# @DirtyBitmap
+#
+# @device: name of device which the bitmap is tracking
+#
+# @name: name of the dirty bitmap
+#
+# @granularity: #optional the bitmap granularity, default is 64k for
+#   dirty-bitmap-add
+#
+# Since 2.1
+##
+{ 'type': 'DirtyBitmap',
+  'data': { 'device': 'str', 'name': 'str', '*granularity': 'int' } }
+
+##
+# @dirty-bitmap-add
+#
+# Create a dirty bitmap with a name on the device
+#
+# Returns: nothing on success
+#  If @device is not a valid block device, DeviceNotFound
+#  If @name is already taken, GenericError with an explaining message
+#
+# Since 2.1
+##
+{'command': 'dirty-bitmap-add',
+  'data': 'DirtyBitmap' }
+
+##
+# @dirty-bitmap-remove
+#
+# Remove a dirty bitmap on the device
+#
+# Setting granularity has no effect here.
+#
+# Returns: nothing on success
+#  If @device is not a valid block device, DeviceNotFound
+#  If @name is not found, GenericError with an explaining message
+#
+# Since 2.1
+##
+{'command': 'dirty-bitmap-remove',
+  'data': { 'device': 'str', 'name': 'str' } }
+
+##
 # @migrate_cancel
 #
 # Cancel the current executing migration process.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ed3ab92..b74f6ed 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1185,6 +1185,55 @@ Example:
 EQMP
 
 {
+.name   = "dirty-bitmap-add",
+.args_type  = "device:B,name:s,granularity:i?",
+.mhandler.cmd_new = qmp_marshal_input_dirty_bitmap_add,
+},
+{
+.name   = "dirty-bitmap-remove",
+.args_type  = "device:B,name:s",
+.mhandler.cmd_new = qmp_marshal_input_dirty_bitmap_remove,
+},
+
+SQMP
+
+dirty-bitmap-add
+
+
+Create a dirty bitmap with a name on the device, and start tracking the writes.
+
+Arguments:
+
+- "device": device name to create dirty bitmap (json-string)
+- "name": name of the new dirty bitmap (json-string)
+- "granularity": granul

[Qemu-devel] [PATCH v4 5/9] hbitmap: Add hbitmap_copy

2014-03-27 Thread Fam Zheng
This makes a deep copy of an HBitmap.

Signed-off-by: Fam Zheng 
---
 include/qemu/hbitmap.h |  8 
 util/hbitmap.c | 16 
 2 files changed, 24 insertions(+)

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 550d7ce..b645cfc 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -65,6 +65,14 @@ struct HBitmapIter {
 HBitmap *hbitmap_alloc(uint64_t size, int granularity);
 
 /**
+ * hbitmap_copy:
+ * @bitmap: The original bitmap to copy.
+ *
+ * Copy a HBitmap.
+ */
+HBitmap *hbitmap_copy(const HBitmap *bitmap);
+
+/**
  * hbitmap_empty:
  * @hb: HBitmap to operate on.
  *
diff --git a/util/hbitmap.c b/util/hbitmap.c
index d936831..d906c06 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -400,3 +400,19 @@ HBitmap *hbitmap_alloc(uint64_t size, int granularity)
 hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
 return hb;
 }
+
+HBitmap *hbitmap_copy(const HBitmap *bitmap)
+{
+int i;
+int64_t size;
+HBitmap *hb = g_memdup(bitmap, sizeof(struct HBitmap));
+
+size = bitmap->size;
+for (i = HBITMAP_LEVELS; i-- > 0; ) {
+size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+hb->levels[i] = g_memdup(bitmap->levels[i],
+ size * sizeof(unsigned long));
+}
+
+return hb;
+}
-- 
1.9.1




[Qemu-devel] [PATCH v4 6/9] block: Add bdrv_copy_dirty_bitmap and bdrv_reset_dirty_bitmap

2014-03-27 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 block.c   | 30 --
 include/block/block.h |  4 
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 6b82bf0..0abc593 100644
--- a/block.c
+++ b/block.c
@@ -52,6 +52,8 @@
 
 struct BdrvDirtyBitmap {
 HBitmap *bitmap;
+int64_t size;
+int64_t granularity;
 char *name;
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
@@ -5097,6 +5099,29 @@ void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 bitmap->name = NULL;
 }
 
+BdrvDirtyBitmap *bdrv_copy_dirty_bitmap(BlockDriverState *bs,
+const BdrvDirtyBitmap *bitmap,
+const char *name)
+{
+BdrvDirtyBitmap *new_bitmap;
+
+new_bitmap = g_memdup(bitmap, sizeof(BdrvDirtyBitmap));
+new_bitmap->bitmap = hbitmap_copy(bitmap->bitmap);
+if (name) {
+new_bitmap->name = g_strdup(name);
+}
+QLIST_INSERT_HEAD(&bs->dirty_bitmaps, new_bitmap, list);
+return new_bitmap;
+}
+
+void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+HBitmap *original = bitmap->bitmap;
+
+bitmap->bitmap = hbitmap_alloc(bitmap->size, bitmap->granularity);
+hbitmap_free(original);
+}
+
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
   int granularity,
   const char *name,
@@ -5118,9 +5143,10 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
 error_setg(errp, "could not get length of device");
 return NULL;
 }
-bitmap_size >>= BDRV_SECTOR_BITS;
 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
-bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+bitmap->size = bitmap_size >> BDRV_SECTOR_BITS;
+bitmap->granularity = ffs(granularity) - 1;
+bitmap->bitmap = hbitmap_alloc(bitmap->size, bitmap->granularity);
 if (name) {
 bitmap->name = g_strdup(name);
 }
diff --git a/include/block/block.h b/include/block/block.h
index ed85f85..a29169f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -435,6 +435,10 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState 
*bs,
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 const char *name);
 void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap 
*bitmap);
+void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+BdrvDirtyBitmap *bdrv_copy_dirty_bitmap(BlockDriverState *bs,
+const BdrvDirtyBitmap *bitmap,
+const char *name);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 int bdrv_dirty_bitmap_granularity(BlockDriverState *bs,
-- 
1.9.1




[Qemu-devel] [PATCH v4 3/9] block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap

2014-03-27 Thread Fam Zheng
bdrv_getlength could fail, check the return value before using it.

Signed-off-by: Fam Zheng 
Reviewed-by: Benoit Canet 
---
 block.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 3f880d6..b5265bb 100644
--- a/block.c
+++ b/block.c
@@ -5113,7 +5113,12 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
 }
 granularity >>= BDRV_SECTOR_BITS;
 assert(granularity);
-bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+bitmap_size = bdrv_getlength(bs);
+if (bitmap_size < 0) {
+error_setg(errp, "could not get length of device");
+return NULL;
+}
+bitmap_size >>= BDRV_SECTOR_BITS;
 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
 if (name) {
-- 
1.9.1




Re: [Qemu-devel] [PATCH] Fix for qemu-img info to supply FORMAT values for SPARSE extents

2014-03-31 Thread Fam Zheng
On Mon, 03/31 12:05, Shwetha Mathangi Chandra Choodamani wrote:
> This patch fixes the qemu-img info bug to return the right format of an 
> extent. Changes applicable to both VMDK3 and VMDK4 type headers.
> Signed-off-by: Shwetha Mathangi Chandra Choodamani 
> 
> ---
>  block/vmdk.c |   42 ++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/block/vmdk.c b/block/vmdk.c
> index b69988d..81fcb92 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -515,6 +515,28 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
>le32_to_cpu(header.granularity),
>&extent,
>errp);
> +char access[11];
> +char type[11];
> +char fname[512];
> +int64_t sectors = 0;
> +int64_t flat_offset;
> +int64_t size;
> +size = bdrv_getlength(file);
> +char *buf;

Please put variable declarations in the beginning of code block.

> +buf = g_malloc0(size + 1);

buf is allocated and incremented, but the buffer is never released.

> +bdrv_pread(file, sizeof(magic), buf, size);
> +while (strcmp(access, "RW")) {

access is used before initialization. Does this work?

> +while (*buf) {
> +sscanf(buf, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
> + access, §ors, type, fname, &flat_offset);

Better to align the parameters with first line:

  access, §ors, ...);


However I don't think VMDK3 needs this fix, what we care here is
monolithicSparse, which is specifically VMDK4.

> +if (*buf == '\n') {
> +buf++;
> +break;
> +}
> +buf++;
> +}
> +}
> +extent->type = g_strdup(type);
>  if (ret < 0) {
>  return ret;
>  }
> @@ -566,6 +588,12 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>  VmdkExtent *extent;
>  BDRVVmdkState *s = bs->opaque;
>  int64_t l1_backup_offset = 0;
> +char access[11];
> +char type[11];
> +char fname[512];
> +int64_t sectors = 0;
> +int64_t flat_offset;
> +
>  
>  ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
>  if (ret < 0) {
> @@ -589,6 +617,19 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>  
>  if (!s->create_type) {
>  s->create_type = g_strdup("monolithicSparse");

For VMDK4, this duplicates with vmdk_parse_extents(). I think this patch can be
simplified by moving this if block after vmdk_add_extent, where the "extent"
variable is set and we can set its type field safely.

And please fix in the same way for streamOptimized.

Thanks,
Fam

> +uint64_t desc_offset = le64_to_cpu(header.desc_offset);
> +char *buf = vmdk_read_desc(file, desc_offset<<9, errp);
> +while (strcmp(access, "RW")) {
> +while (*buf) {
> +sscanf(buf, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" 
> SCNd64,
> + access, §ors, type, fname, &flat_offset);
> +if (*buf == '\n') {
> +buf++;
> +break;
> +}
> +buf++;
> +}
> +}
>  }
>  
>  if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
> @@ -697,6 +738,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>  extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
>  extent->version = le32_to_cpu(header.version);
>  extent->has_zero_grain = le32_to_cpu(header.flags) & 
> VMDK4_FLAG_ZERO_GRAIN;
> +extent->type = g_strdup(type);
>  ret = vmdk_init_tables(bs, extent, errp);
>  if (ret) {
>  /* free extent allocated by vmdk_add_extent */
> -- 
> 1.7.9.5
> 



Re: [Qemu-devel] [PATCHv2] block: optimize zero writes with bdrv_write_zeroes

2014-04-01 Thread Fam Zheng
   if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
> +flags |= BDRV_REQ_MAY_UNMAP;
> +}
> +}
> +
>  if (ret < 0) {
>  /* Do nothing, write notifier decided to fail this request */
>  } else if (flags & BDRV_REQ_ZERO_WRITE) {
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index cd5bc73..7a3013a 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -271,6 +271,17 @@ typedef struct BlockLimits {
>  } BlockLimits;
>  
>  /*
> + * Different operation modes for automatic zero detection
> + * to speed the write operation up with bdrv_write_zeroes.
> + */
> +typedef enum {
> +BDRV_DETECT_ZEROES_OFF   = 0x0,
> +BDRV_DETECT_ZEROES_ON= 0x1,
> +/* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
> +BDRV_DETECT_ZEROES_UNMAP = 0x2,
> +} BdrvDetectZeroes;
> +
> +/*
>   * Note: the function bdrv_append() copies and swaps contents of
>   * BlockDriverStates, so if you add new fields to this struct, please
>   * inspect bdrv_append() to determine if the new fields need to be
> @@ -365,6 +376,7 @@ struct BlockDriverState {
>  BlockJob *job;
>  
>  QDict *options;
> +BdrvDetectZeroes detect_zeroes;
>  };
>  
>  int get_tmp_filename(char *filename, int size);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index c8a58a8..574da73 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>  void qemu_iovec_concat_iov(QEMUIOVector *dst,
> struct iovec *src_iov, unsigned int src_cnt,
> size_t soffset, size_t sbytes);
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov);
>  void qemu_iovec_destroy(QEMUIOVector *qiov);
>  void qemu_iovec_reset(QEMUIOVector *qiov);
>  size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
> diff --git a/qemu-options.hx b/qemu-options.hx
> index ee5437b..f824d9b 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -410,6 +410,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
>  "   
> [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
>  "   [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
>  "   [,readonly=on|off][,copy-on-read=on|off]\n"
> +"   [,detect-zeroes=on|off|unmap]\n"
>  "   [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
>  "   [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
>  "   [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
> @@ -470,6 +471,11 @@ Open drive @option{file} as read-only. Guest write 
> attempts will fail.
>  @item copy-on-read=@var{copy-on-read}
>  @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
>  file sectors into the image file.
> +@item detect-zeroes=@var{detect-zeroes}
> +@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
> +conversion of plain zero writes by the OS to driver specific optimized
> +zero write commands. If "unmap" is chosen and @var{discard} is "on"
> +a zero write may even be converted to an UNMAP operation.
>  @end table
>  
>  By default, the @option{cache=writeback} mode is used. It will report data
> diff --git a/util/iov.c b/util/iov.c
> index 6569b5a..0b17392 100644
> --- a/util/iov.c
> +++ b/util/iov.c
> @@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>  qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
>  }
>  
> +/*
> + *  check if the contents of the iovecs is all zero

contents of ... are? content of ... is? Other than that,

Reviewed-by: Fam Zheng 

> + */
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov)
> +{
> +int i;
> +for (i = 0; i < qiov->niov; i++) {
> +size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
> +uint8_t *ptr = qiov->iov[i].iov_base;
> +if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
> +return false;
> +}
> +for (; offs < qiov->iov[i].iov_len; offs++) {
> +if (ptr[offs]) {
> +return false;
> +}
> +}
> +}
> +return true;
> +}
> +
>  void qemu_iovec_destroy(QEMUIOVector *qiov)
>  {
>  assert(qiov->nalloc != -1);
> -- 
> 1.7.9.5
> 



Re: [Qemu-devel] [PATCH v4 1/9] qapi: Add optional field "name" to block dirty bitmap

2014-04-01 Thread Fam Zheng
On Thu, 03/27 16:30, Stefan Hajnoczi wrote:
> On Thu, Mar 27, 2014 at 05:09:40PM +0800, Fam Zheng wrote:
> > @@ -5079,18 +5080,45 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
> > QEMUIOVector *qiov)
> >  return true;
> >  }
> >  
> > -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
> > granularity)
> > +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char 
> > *name)
> > +{
> > +BdrvDirtyBitmap *bm;
> > +QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
> > +if (!strcmp(name, bm->name)) {
> > +return bm;
> > +}
> > +}
> > +return NULL;
> > +}
> > +
> > +void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap 
> > *bitmap)
> > +{
> > +g_free(bitmap->name);
> > +bitmap->name = NULL;
> > +}
> 
> This looks dangerous since strcmp() does not check for NULL pointers.  I
> guess you need to add a check to bdrv_find_dirty_bitmap().

OK.

> 
> > @@ -5120,6 +5149,8 @@ BlockDirtyInfoList 
> > *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
> >  info->count = bdrv_get_dirty_count(bs, bm);
> >  info->granularity =
> >  ((int64_t) BDRV_SECTOR_SIZE << 
> > hbitmap_granularity(bm->bitmap));
> > +info->has_name = bm->name[0] != '\0';
> > +info->name = g_strdup(bm->name);
> 
> This looks dangerous too.  What if ->name is NULL?

Will add a check here.

Thanks,
Fam



Re: [Qemu-devel] [PATCH v4 2/9] qmp: Add dirty-bitmap-add and dirty-bitmap-remove

2014-04-01 Thread Fam Zheng
On Thu, 03/27 16:43, Stefan Hajnoczi wrote:
> On Thu, Mar 27, 2014 at 05:09:41PM +0800, Fam Zheng wrote:
> > @@ -1713,6 +1713,66 @@ void qmp_block_set_io_throttle(const char *device, 
> > int64_t bps, int64_t bps_rd,
> >  }
> >  }
> >  
> > +void qmp_dirty_bitmap_add(const char *device, const char *name,
> > +  bool has_granularity, int64_t granularity,
> > +  Error **errp)
> > +{
> > +BlockDriverState *bs;
> > +BdrvDirtyBitmap *bitmap;
> > +
> > +bs = bdrv_find(device);
> > +if (!bs) {
> > +error_set(errp, QERR_DEVICE_NOT_FOUND, device);
> > +return;
> > +}
> > +
> > +if (!name || name[0] == '\0') {
> > +error_setg(errp, "Bitmap name cannot be empty");
> > +return;
> > +}
> > +if (has_granularity) {
> > +if (granularity & (granularity - 1)) {
> > +error_setg(errp, "Granularity must be power of 2");
> > +return;
> > +}
> 
> granularity must be non-zero, otherwise bdrv_create_dirty_bitmap() hits
> an assertion failure.
> 
> It should probably also be at least 512.

Sure, adding a check.

> 
> > +} else {
> > +granularity = 65536;
> > +}
> > +
> > +bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
> > +if (!bitmap) {
> > +return;
> > +}
> 
> Useless error return.

Removing.

> 
> > +}
> > +
> > +void qmp_dirty_bitmap_remove(const char *device, const char *name,
> > + Error **errp)
> > +{
> > +BlockDriverState *bs;
> > +BdrvDirtyBitmap *bitmap;
> > +
> > +bs = bdrv_find(device);
> > +if (!bs) {
> > +error_set(errp, QERR_DEVICE_NOT_FOUND, device);
> > +return;
> > +}
> > +
> > +if (!name || name[0] == '\0') {
> > +error_setg(errp, "Bitmap name cannot be empty");
> > +return;
> > +}
> > +bitmap = bdrv_find_dirty_bitmap(bs, name);
> > +if (!bitmap) {
> > +error_setg(errp, "Dirty bitmap not found: %s", name);
> > +return;
> > +}
> > +
> > +/* Make it invisible to user in case the following
> > + * bdrv_release_dirty_bitmap doens't free it because of refcnt */
> 
> "doesn't"

Thanks,
Fam



Re: [Qemu-devel] [PATCH v4 2/9] qmp: Add dirty-bitmap-add and dirty-bitmap-remove

2014-04-01 Thread Fam Zheng
On Thu, 03/27 16:39, Dr. David Alan Gilbert wrote:
> Hi Fam,
>   Could you make this something like block-dirty-bitmap  - the RAM migration
> also has a dirty bitmap, and it would just make it clearer.

Good idea. Thanks, Dave.

Fam



Re: [Qemu-devel] [PATCH v4 2/9] qmp: Add dirty-bitmap-add and dirty-bitmap-remove

2014-04-01 Thread Fam Zheng
On Thu, 03/27 10:41, Eric Blake wrote:
> On 03/27/2014 03:09 AM, Fam Zheng wrote:
> > The new command pair is added to manage user created dirty bitmap. The
> > dirty bitmap's name is mandatory and must be unique for the same device,
> > but different devices can have bitmaps with the same names.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> 
> > +++ b/qapi-schema.json
> > @@ -2209,6 +2209,51 @@
> >  '*on-target-error': 'BlockdevOnError' } }
> >  
> >  ##
> > +# @DirtyBitmap
> > +#
> > +# @device: name of device which the bitmap is tracking
> > +#
> > +# @name: name of the dirty bitmap
> > +#
> > +# @granularity: #optional the bitmap granularity, default is 64k for
> > +#   dirty-bitmap-add
> 
> Optional, but only affects dirty-bitmap-add.  You later document...
> 
> > +# @dirty-bitmap-remove
> > +#
> > +# Remove a dirty bitmap on the device
> > +#
> > +# Setting granularity has no effect here.
> 
> ...that it is silently ignored where it can't be used here, and again in
> 7/9 for both dirty-bitmap-disable and dirty-bitmap-enable.
> 
> I think it would be smarter to do:
> 
> { 'type': 'DirtyBitmap',
>   'data': { 'device': 'str', 'name': 'str' } }
> 
> {'command': 'dirty-bitmap-add',
>   'data': { 'map': 'DirtyBitmap', '*granularity': 'int' } }
> 
> Or:
> 
> { 'type': 'DirtyBitmap',
>   'data': { 'device': 'str', 'name': 'str' } }
> { 'type': 'DirtyBitmapGranularity',
>   'base': 'DirtyBitmap',
>   'data': { '*granularity': 'int' } }
> {'command': 'dirty-bitmap-add',
>   'data': 'DirtyBitmapGranularity' }
> 
> 
> which says that the 'DirtyBitmap' struct has no optional members, and
> instead of silently ignoring an optional member in 3 commands, we
> instead write the one command that takes the optional argument when we
> actually care about it.
> 

Yes, taking the later one since a type is needed for transaction support.
Thanks,

Fam



Re: [Qemu-devel] [PATCH v4 7/9] qmp: Add dirty-bitmap-enable and dirty-bitmap-disable

2014-04-01 Thread Fam Zheng
On Thu, 03/27 10:46, Eric Blake wrote:
> On 03/27/2014 03:09 AM, Fam Zheng wrote:
>  Also, is there a counterpart query- command that I can use to
> see the current state of a named dirty bitmap and whether it is
> currently enabled, so that this isn't a write-only interface?
> 

Will add the enable status into query-block information in another patch.

Thanks,
Fam



Re: [Qemu-devel] [PATCH v2] Fix for qemu-img info to supply FORMAT values for SPARSE extents

2014-04-01 Thread Fam Zheng
On Tue, 04/01 17:49, Shwetha Mathangi Chandra Choodamani wrote:
> This patch fixes the bug in qemu-img info that wouldn't populate the extent 
> type for default formats.
> The extent type has now been set where necessary. This is the second version 
> in the series after inputs
> from Fam Zheng(f...@redhat.com).
> 
> Signed-off-by: Shwetha Mathangi Chandra Choodamani 
> 
> ---
>  block/vmdk.c |   53 +
>  1 file changed, 49 insertions(+), 4 deletions(-)
> 
> diff --git a/block/vmdk.c b/block/vmdk.c
> index b69988d..d4a37ad 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -499,6 +499,18 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
>  VMDK3Header header;
>  VmdkExtent *extent;
>  

IIRC VMFS always comes with a description file, so we won't miss the extent
type and this part is not necessary.

> +char access[11];
> +char type[11];
> +char fname[512];
> +int64_t sectors = 0;
> +int64_t flat_offset;
> +int64_t size;
> +size = bdrv_getlength(file);
> +char *buf;
> +buf = g_malloc0(size + 1);
> +bdrv_pread(file, sizeof(magic), buf, size);
> +sscanf(buf, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
> +access, §ors, type, fname, &flat_offset);
>  ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
>  if (ret < 0) {
>  error_setg_errno(errp, -ret,
> @@ -515,6 +527,18 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
>le32_to_cpu(header.granularity),
>&extent,
>errp);
> +while (strcmp(access, "RW")) {
> +while (*buf) {
> +sscanf(buf, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
> +access, §ors, type, fname, &flat_offset);
> +if (*buf == '\n') {
> +buf++;
> +break;
> +}
> +buf++;
> +}
> +}
> +extent->type = g_strdup(type);
>  if (ret < 0) {
>  return ret;
>  }
> @@ -523,6 +547,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
>  /* free extent allocated by vmdk_add_extent */
>  vmdk_free_last_extent(bs);
>  }
> +g_free(buf);
>  return ret;
>  }
>  
> @@ -566,7 +591,11 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>  VmdkExtent *extent;
>  BDRVVmdkState *s = bs->opaque;
>  int64_t l1_backup_offset = 0;
> -
> +char access[11];
> +char type[11];
> +char fname[512];
> +int64_t sectors = 0;
> +int64_t flat_offset;
>  ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
>  if (ret < 0) {
>  error_setg_errno(errp, -ret,
> @@ -586,11 +615,9 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>  return ret;
>  }
>  }
> -

Unnecessary blank line change.

>  if (!s->create_type) {
>  s->create_type = g_strdup("monolithicSparse");
>  }
> -

Same here.

>  if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
>  /*
>   * The footer takes precedence over the header, so read it in. The
> @@ -694,6 +721,25 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>  g_free(s->create_type);
>  s->create_type = g_strdup("streamOptimized");
>  }
> +
> +if (!extent->type) {
> +uint64_t desc_offset = le64_to_cpu(header.desc_offset);
> +char *buf = vmdk_read_desc(file, desc_offset<<9, errp);
> +sscanf(buf, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
> +access, §ors, type, fname, &flat_offset);
> +while (strcmp(access, "RW")) {
> +while (*buf) {
> +sscanf(buf, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" 
> SCNd64,
> +access, §ors, type, fname, &flat_offset);
> +if (*buf == '\n') {
> +buf++;
> +break;
> +}
> +buf++;
> +}
> +}
> +}

No. Why do you need to read the description?

Here we already know the extent type by looking at s->create_type, it would be
"monolithicSparse" or "streamOptimized" in the cases that we are trying to fix
in this patch, so no need to parse the description text.

Just set extent->type according to s->create_type.

> +extent->type = g_strdup(type);


>  extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
>  extent->version = le32_to_cpu(header.version);
>  extent->has_zero_grain = le32_to_cpu(header.flags) & 
> VMDK4_FLAG_ZERO_GRAIN;
> @@ -711,7 +757,6 @@ static int vmdk_parse_description(const char *desc, const 
> char *opt_name,
>  {
>  char *opt_pos, *opt_end;
>  const char *end = desc + strlen(desc);
> -

Blank line change, please drop it.

Fam

>  opt_pos = strstr(desc, opt_name);
>  if (!opt_pos) {
>  return VMDK_ERROR;
> -- 
> 1.7.9.5
> 
> 



Re: [Qemu-devel] [PATCH] qcow2: Patch for shrinking qcow2 disk image

2014-04-01 Thread Fam Zheng
On Tue, 04/01 10:06, Jun Lee wrote:
> Signed-off-by: Jun Li 
> 
> This patch can make sure the data still existing after shrinking. And only 
> discard the unused (guest) clusters. If shrinking to the size which stored 
> data, It will return an error and will not do any change.
> As this patch can support shrinking, so changed the func name of 
> qcow2_grow_l1_table to qcow2_truncate_l1_table.

Signed-off-by line is missing.

> ---
>  block/qcow2-cluster.c  | 20 ++--
>  block/qcow2-snapshot.c |  2 +-
>  block/qcow2.c  |  8 +---
>  block/qcow2.h  |  2 +-
>  4 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 9499df9..70e61ea 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -29,7 +29,7 @@
>  #include "block/qcow2.h"
>  #include "trace.h"
>  
> -int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
> +int qcow2_truncate_l1_table(BlockDriverState *bs, uint64_t min_size,
>  bool exact_size)

Parameter alignment is changed.

>  {
>  BDRVQcowState *s = bs->opaque;
> @@ -39,9 +39,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
> min_size,
>  int64_t new_l1_table_offset, new_l1_size;
>  uint8_t data[12];
>  
> -if (min_size <= s->l1_size)
> -return 0;
> -
>  if (exact_size) {
>  new_l1_size = min_size;
>  } else {
> @@ -66,7 +63,18 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
> min_size,
>  
>  new_l1_size2 = sizeof(uint64_t) * new_l1_size;
>  new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
> -memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
> +
> +/* shrinking the image */
> +if (min_size <= s->l1_size) {
> +if (s->l1_table[new_l1_size] != 0) {

Is this safe as advertised? Do we need to scan all the dropped l1 entries,
instead of only the first one?

> +error_report("Could not shrink to this size, "
> +"it will destory image data");

Better to align by quote mark.

> +return -ENOTSUP;
> +}
> +memcpy(new_l1_table, s->l1_table, new_l1_size2);
> +}
> +
> +   memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));

Redundant (and wrong size) memcpy for shrinking case?

Fam

>  
>  /* write new table (align to cluster) */
>  BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
> @@ -559,7 +567,7 @@ static int get_cluster_table(BlockDriverState *bs, 
> uint64_t offset,
>  
>  l1_index = offset >> (s->l2_bits + s->cluster_bits);
>  if (l1_index >= s->l1_size) {
> -ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
> +ret = qcow2_truncate_l1_table(bs, l1_index + 1, false);
>  if (ret < 0) {
>  return ret;
>  }
> diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
> index 2fc6320..ab16c52 100644
> --- a/block/qcow2-snapshot.c
> +++ b/block/qcow2-snapshot.c
> @@ -491,7 +491,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char 
> *snapshot_id)
>   * L1 table of the snapshot. If the snapshot L1 table is smaller, the
>   * current one must be padded with zeros.
>   */
> -ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
> +ret = qcow2_truncate_l1_table(bs, sn->l1_size, true);
>  if (ret < 0) {
>  goto fail;
>  }
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b9dc960..4797879 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -1764,14 +1764,8 @@ static int qcow2_truncate(BlockDriverState *bs, 
> int64_t offset)
>  return -ENOTSUP;
>  }
>  
> -/* shrinking is currently not supported */
> -if (offset < bs->total_sectors * 512) {
> -error_report("qcow2 doesn't support shrinking images yet");
> -return -ENOTSUP;
> -}
> -
>  new_l1_size = size_to_l1(s, offset);
> -ret = qcow2_grow_l1_table(bs, new_l1_size, true);
> +ret = qcow2_truncate_l1_table(bs, new_l1_size, true);
>  if (ret < 0) {
>  return ret;
>  }
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 0b0eac8..298d84e 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -455,7 +455,7 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, 
> int ign, int64_t offset,
>int64_t size);
>  
>  /* qcow2-cluster.c functions */
> -int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
> +int qcow2_truncate_l1_table(BlockDriverState *bs, uint64_t min_size,
>  bool exact_size);
>  int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
>  void qcow2_l2_cache_reset(BlockDriverState *bs);
> -- 
> 1.8.3.1
> 



[Qemu-devel] [PATCH] qemu-iotests: Improve and make use of QMPTestCase.wait_until_completed()

2014-04-01 Thread Fam Zheng
This eliminates code duplication.

Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/030| 50 +--
 tests/qemu-iotests/056|  9 +---
 tests/qemu-iotests/iotests.py |  5 +++--
 3 files changed, 9 insertions(+), 55 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 59a34f7..8cb61fd 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -50,15 +50,7 @@ class TestSingleDrive(iotests.QMPTestCase):
 result = self.vm.qmp('block-stream', device='drive0')
 self.assert_qmp(result, 'return', {})
 
-completed = False
-while not completed:
-for event in self.vm.get_qmp_events(wait=True):
-if event['event'] == 'BLOCK_JOB_COMPLETED':
-self.assert_qmp(event, 'data/type', 'stream')
-self.assert_qmp(event, 'data/device', 'drive0')
-self.assert_qmp(event, 'data/offset', self.image_len)
-self.assert_qmp(event, 'data/len', self.image_len)
-completed = True
+self.wait_until_completed()
 
 self.assert_no_active_block_jobs()
 self.vm.shutdown()
@@ -89,15 +81,7 @@ class TestSingleDrive(iotests.QMPTestCase):
 self.assert_qmp(result, 'return', {})
 
 self.vm.resume_drive('drive0')
-completed = False
-while not completed:
-for event in self.vm.get_qmp_events(wait=True):
-if event['event'] == 'BLOCK_JOB_COMPLETED':
-self.assert_qmp(event, 'data/type', 'stream')
-self.assert_qmp(event, 'data/device', 'drive0')
-self.assert_qmp(event, 'data/offset', self.image_len)
-self.assert_qmp(event, 'data/len', self.image_len)
-completed = True
+self.wait_until_completed()
 
 self.assert_no_active_block_jobs()
 self.vm.shutdown()
@@ -112,15 +96,7 @@ class TestSingleDrive(iotests.QMPTestCase):
 result = self.vm.qmp('block-stream', device='drive0', base=mid_img)
 self.assert_qmp(result, 'return', {})
 
-completed = False
-while not completed:
-for event in self.vm.get_qmp_events(wait=True):
-if event['event'] == 'BLOCK_JOB_COMPLETED':
-self.assert_qmp(event, 'data/type', 'stream')
-self.assert_qmp(event, 'data/device', 'drive0')
-self.assert_qmp(event, 'data/offset', self.image_len)
-self.assert_qmp(event, 'data/len', self.image_len)
-completed = True
+self.wait_until_completed()
 
 self.assert_no_active_block_jobs()
 self.vm.shutdown()
@@ -152,15 +128,7 @@ class TestSmallerBackingFile(iotests.QMPTestCase):
 result = self.vm.qmp('block-stream', device='drive0')
 self.assert_qmp(result, 'return', {})
 
-completed = False
-while not completed:
-for event in self.vm.get_qmp_events(wait=True):
-if event['event'] == 'BLOCK_JOB_COMPLETED':
-self.assert_qmp(event, 'data/type', 'stream')
-self.assert_qmp(event, 'data/device', 'drive0')
-self.assert_qmp(event, 'data/offset', self.image_len)
-self.assert_qmp(event, 'data/len', self.image_len)
-completed = True
+self.wait_until_completed()
 
 self.assert_no_active_block_jobs()
 self.vm.shutdown()
@@ -442,15 +410,7 @@ class TestSetSpeed(iotests.QMPTestCase):
 result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 
1024 * 1024)
 self.assert_qmp(result, 'return', {})
 
-completed = False
-while not completed:
-for event in self.vm.get_qmp_events(wait=True):
-if event['event'] == 'BLOCK_JOB_COMPLETED':
-self.assert_qmp(event, 'data/type', 'stream')
-self.assert_qmp(event, 'data/device', 'drive0')
-self.assert_qmp(event, 'data/offset', self.image_len)
-self.assert_qmp(event, 'data/len', self.image_len)
-completed = True
+self.wait_until_completed()
 
 self.assert_no_active_block_jobs()
 
diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056
index 6389342..54e4bd0 1007

Re: [Qemu-devel] [PATCH v17 00/14] Drop in_use from BlockDriverState and enable point-in-time snapshot exporting over NBD

2014-04-01 Thread Fam Zheng
On Mon, 03/10 15:25, Fam Zheng wrote:
> v17: Rebase to current master.

Ping?



[Qemu-devel] [PATCH] block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap

2014-04-01 Thread Fam Zheng
bdrv_getlength could fail, check the return value before using it.

Signed-off-by: Fam Zheng 
---
 block-migration.c | 28 
 block.c   | 10 --
 block/mirror.c|  5 -
 include/block/block.h |  3 ++-
 4 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 897fdba..62cd597 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -310,13 +310,26 @@ static int mig_save_device_bulk(QEMUFile *f, 
BlkMigDevState *bmds)
 
 /* Called with iothread lock taken.  */
 
-static void set_dirty_tracking(void)
+static int set_dirty_tracking(void)
 {
 BlkMigDevState *bmds;
 
 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
+bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
+  NULL);
+if (!bmds->dirty_bitmap) {
+goto fail;
+}
+}
+return 0;
+
+fail:
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+if (bmds->dirty_bitmap) {
+bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
+}
 }
+return -1;
 }
 
 static void unset_dirty_tracking(void)
@@ -611,10 +624,17 @@ static int block_save_setup(QEMUFile *f, void *opaque)
 block_mig_state.submitted, block_mig_state.transferred);
 
 qemu_mutex_lock_iothread();
-init_blk_migration(f);
 
 /* start track dirty blocks */
-set_dirty_tracking();
+ret = set_dirty_tracking();
+
+if (ret) {
+qemu_mutex_unlock_iothread();
+return ret;
+}
+
+init_blk_migration(f);
+
 qemu_mutex_unlock_iothread();
 
 ret = flush_blks(f);
diff --git a/block.c b/block.c
index acb70fd..93006de 100644
--- a/block.c
+++ b/block.c
@@ -5079,7 +5079,8 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
QEMUIOVector *qiov)
 return true;
 }
 
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity)
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity,
+  Error **errp)
 {
 int64_t bitmap_size;
 BdrvDirtyBitmap *bitmap;
@@ -5088,7 +5089,12 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
 
 granularity >>= BDRV_SECTOR_BITS;
 assert(granularity);
-bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+bitmap_size = bdrv_getlength(bs);
+if (bitmap_size < 0) {
+error_setg(errp, "could not get length of device");
+return NULL;
+}
+bitmap_size >>= BDRV_SECTOR_BITS;
 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
diff --git a/block/mirror.c b/block/mirror.c
index 0ef41f9..2618c37 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -605,7 +605,10 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 s->granularity = granularity;
 s->buf_size = MAX(buf_size, granularity);
 
-s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
+s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
+if (!s->dirty_bitmap) {
+return;
+}
 bdrv_set_enable_write_cache(s->target, true);
 bdrv_set_on_error(s->target, on_target_error, on_target_error);
 bdrv_iostatus_enable(s->target);
diff --git a/include/block/block.h b/include/block/block.h
index 1ed55d8..8e70a57 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -428,7 +428,8 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
QEMUIOVector *qiov);
 
 struct HBitmapIter;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity);
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity,
+  Error **errp);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t 
sector);
-- 
1.9.1




Re: [Qemu-devel] [PATCH] qcow2: Patch for shrinking qcow2 disk image

2014-04-03 Thread Fam Zheng
On Thu, 04/03 07:28, Jun Lee wrote:
> > @@ -66,7 +63,18 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
> > min_size,
> >  
> >  new_l1_size2 = sizeof(uint64_t) * new_l1_size;
> >  new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
> > -memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
> > +
> > +/* shrinking the image */
> > +if (min_size <= s->l1_size) {
> > +if (s->l1_table[new_l1_size] != 0) {
> 
> Is this safe as advertised? Do we need to scan all the dropped l1 entries,
> instead of only the first one?
> -As the dropped l1 entries will not contain any value data.

I don't understand, how do you make sure of that without checking it?

Fam



[Qemu-devel] [PATCH for-2.0] iscsi: Don't set error if already set in iscsi_do_inquiry

2014-04-04 Thread Fam Zheng
This eliminates the possible assertion failure in error_setg().

Signed-off-by: Fam Zheng 
---
 block/iscsi.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 21c18a3..64a509f 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1101,8 +1101,10 @@ static struct scsi_task *iscsi_do_inquiry(struct 
iscsi_context *iscsi, int lun,
 return task;
 
 fail:
-error_setg(errp, "iSCSI: Inquiry command failed : %s",
-   iscsi_get_error(iscsi));
+if (!error_is_set(errp)) {
+error_setg(errp, "iSCSI: Inquiry command failed : %s",
+   iscsi_get_error(iscsi));
+}
 if (task != NULL) {
 scsi_free_scsi_task(task);
 }
-- 
1.9.1




Re: [Qemu-devel] why we build only some optional modules as modules?

2014-04-05 Thread Fam Zheng
On Sat, 04/05 18:29, Michael Tokarev wrote:
> A quick (hopefully) question.
> 
> Why build-as-modules only enabled for a few optional modules only,
> why not build as modules everything, to reduce common executable
> size?  Or is it not a concern anymore at our disk sizes? :)
> 

The original demand was to reduce package dependencies of distribution QEMU,
hence not everything is (planned to be) modularized.

But there is no blocking reason to turn other things to modules as well,
AFAICT.

Thanks,
Fam



Re: [Qemu-devel] why we build only some optional modules as modules?

2014-04-07 Thread Fam Zheng
On Sun, 04/06 10:17, Michael Tokarev wrote:
> 06.04.2014 04:52, Fam Zheng wrote:
> > On Sat, 04/05 18:29, Michael Tokarev wrote:
> >> A quick (hopefully) question.
> >>
> >> Why build-as-modules only enabled for a few optional modules only,
> >> why not build as modules everything, to reduce common executable
> >> size?  Or is it not a concern anymore at our disk sizes? :)
> > 
> > The original demand was to reduce package dependencies of distribution QEMU,
> > hence not everything is (planned to be) modularized.
> 
> Please note that current situation/implementation has one detail which
> prevents this usage: when any module is missing, qemu complains at
> startup about this.  So at this stage, the original goal is not met. ;)

True but I don't know either, my original patch didn't complain about this. :)

> 
> > But there is no blocking reason to turn other things to modules as well,
> > AFAICT.
> 
> I think so, too, yes.
> 

And it would be good if you have a reason to do it.

Fam



Re: [Qemu-devel] [PATCH v17 02/14] block: Introduce op_blockers to BlockDriverState

2014-04-07 Thread Fam Zheng
On Sun, 04/06 19:49, Jeff Cody wrote:
> On Mon, Mar 10, 2014 at 03:25:58PM +0800, Fam Zheng wrote:
> > BlockDriverState.op_blockers is an array of lists with BLOCK_OP_TYPE_MAX
> > elements. Each list is a list of blockers of an operation type
> > (BlockOpType), that marks this BDS as currently blocked for a certain
> > type of operation with reason errors stored in the list. The rule of
> > usage is:
> > 
> >  * BDS user who wants to take an operation should check if there's any
> >blocker of the type with bdrv_op_is_blocked().
> > 
> >  * BDS user who wants to block certain types of operation, should call
> >bdrv_op_block (or bdrv_op_block_all to block all types of operations,
> >which is similar to the existing bdrv_set_in_use()).
> >
> >  * A blocker is only referenced by op_blockers, so the lifecycle is
> >managed by caller, and shouldn't be lost until unblock, so typically
> >a caller does these:
> > 
> >- Allocate a blocker with error_setg or similar, call bdrv_op_block()
> >  to block some operations.
> >- Hold the blocker, do his job.
> >- Unblock operations that it blocked, with the same reason pointer
> >  passed to bdrv_op_unblock().
> >- Release the blocker with error_free().
> 
> Is there a reason to assume there will be atypical usages that don't
> follow these steps?  If not, could the Error reason resource be
> allocated inside the block() operation if non-NULL, and freed inside
> the unblock() operations?

Could work as well. It's just that the current interface is following the style
of migration blocker.

Thanks,
Fam



Re: [Qemu-devel] [PATCH v17 10/14] qmp: Add command 'blockdev-backup'

2014-04-08 Thread Fam Zheng
On Mon, 04/07 15:07, Eric Blake wrote:
> On 03/10/2014 01:26 AM, Fam Zheng wrote:
> > Similar to drive-backup, but this command uses a device id as target
> > instead of creating/opening an image file.
> > 
> > Also add blocker on target bs, since the target is also a named device
> > now.
> > 
> > Add check and report error for bs == target which became possible but is
> > an illegal case with introduction of blockdev-backup.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> >  block/backup.c   | 26 ++
> >  blockdev.c   | 47 +++
> >  qapi-schema.json | 49 +
> >  qmp-commands.hx  | 44 
> >  4 files changed, 166 insertions(+)
> 
> Reviewing just QAPI portion:
> 
> 
> > +++ b/qapi-schema.json
> > @@ -1919,6 +1919,40 @@
> >  '*on-target-error': 'BlockdevOnError' } }
> >  
> >  ##
> > +# @BlockdevBackup
> > +#
> > +# @device: the name of the device which should be copied.
> > +#
> > +# @target: the name of the backup target device.
> > +#
> > +# @sync: what parts of the disk image should be copied to the destination
> > +#(all the disk, only the sectors allocated in the topmost image, or
> > +#only new I/O).
> > +#
> > +# @speed: #optional the maximum speed, in bytes per second.
> > +#
> > +# @on-source-error: #optional the action to take on an error on the source,
> > +#   default 'report'.  'stop' and 'enospc' can only be used
> > +#   if the block device supports io-status (see BlockInfo).
> > +#
> > +# @on-target-error: #optional the action to take on an error on the target,
> > +#   default 'report' (no limitations, since this applies to
> > +#   a different block device than @device).
> > +#
> > +# Note that @on-source-error and @on-target-error only affect background 
> > I/O.
> > +# If an error occurs during a guest write request, the device's 
> > rerror/werror
> > +# actions will be used.
> > +#
> > +# Since: 2.0
> 
> 2.1 now
> 
> > +##
> > +{ 'type': 'BlockdevBackup',
> > +  'data': { 'device': 'str', 'target': 'str',
> > +'sync': 'MirrorSyncMode',
> > +'*speed': 'int',
> > +'*on-source-error': 'BlockdevOnError',
> > +'*on-target-error': 'BlockdevOnError' } }
> > +
> 
> Looks reasonable
> 
> 
> > +#  If @device or @target is not a valid block device, 
> > DeviceNotFound.
> > +#
> > +# Since 2.0
> > +##
> > +{ 'command': 'blockdev-backup', 'data': 'BlockdevBackup' }
> 
> Another case of 2.1

Yes, thanks!

Fam



Re: [Qemu-devel] [PATCH v17 06/14] block: Add backing_blocker in BlockDriverState

2014-04-08 Thread Fam Zheng
On Sun, 04/06 20:31, Jeff Cody wrote:
> On Mon, Mar 10, 2014 at 03:26:02PM +0800, Fam Zheng wrote:
> > This makes use of op_blocker and blocks all the operations except for
> > commit target, on each BlockDriverState->backing_hd.
> > 
> > The asserts for op_blocker in bdrv_swap are removed because with this
> > change, the target of block commit has at least the backing blocker of
> > its child, so the assertion is not true. Callers should do their check.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> >  block.c   | 24 
> >  block/mirror.c|  1 +
> >  include/block/block_int.h |  3 +++
> >  3 files changed, 24 insertions(+), 4 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 64738dc..95247c8 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -1050,16 +1050,33 @@ fail:
> >  void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState 
> > *backing_hd)
> >  {
> >  
> > +if (bs->backing_hd) {
> > +assert(error_is_set(&bs->backing_blocker));
> > +bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
> > +} else if (backing_hd) {
> > +error_setg(&bs->backing_blocker,
> > +   "device is used as backing hd of '%s'",
> > +   bs->device_name);
> > +}
> > +
> >  bs->backing_hd = backing_hd;
> >  if (!backing_hd) {
> >  bs->backing_file[0] = '\0';
> >  bs->backing_format[0] = '\0';
> > +if (error_is_set(&bs->backing_blocker)) {
> > +error_free(bs->backing_blocker);
> > +}
> >  goto out;
> >  }
> >  bs->open_flags &= ~BDRV_O_NO_BACKING;
> >  pstrcpy(bs->backing_file, sizeof(bs->backing_file), 
> > backing_hd->filename);
> >  pstrcpy(bs->backing_format, sizeof(bs->backing_format),
> >  backing_hd->drv ? backing_hd->drv->format_name : "");
> > +
> > +bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
> > +/* Otherwise we won't be able to commit due to check in bdrv_commit */
> > +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
> > +bs->backing_blocker);
> >  out:
> >  bdrv_refresh_limits(bs);
> >  }
> > @@ -1699,8 +1716,9 @@ void bdrv_close(BlockDriverState *bs)
> >  
> >  if (bs->drv) {
> >  if (bs->backing_hd) {
> > -bdrv_unref(bs->backing_hd);
> > -bs->backing_hd = NULL;
> > +BlockDriverState *backing_hd = bs->backing_hd;
> > +bdrv_set_backing_hd(bs, NULL);
> > +bdrv_unref(backing_hd);
> >  }
> >  bs->drv->bdrv_close(bs);
> >  g_free(bs->opaque);
> > @@ -1908,7 +1926,6 @@ void bdrv_swap(BlockDriverState *bs_new, 
> > BlockDriverState *bs_old)
> >  assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
> >  assert(bs_new->job == NULL);
> >  assert(bs_new->dev == NULL);
> > -assert(bdrv_op_blocker_is_empty(bs_new));
> >  assert(bs_new->io_limits_enabled == false);
> >  assert(!throttle_have_timer(&bs_new->throttle_state));
> >  
> > @@ -1927,7 +1944,6 @@ void bdrv_swap(BlockDriverState *bs_new, 
> > BlockDriverState *bs_old)
> >  /* Check a few fields that should remain attached to the device */
> >  assert(bs_new->dev == NULL);
> >  assert(bs_new->job == NULL);
> > -assert(bdrv_op_blocker_is_empty(bs_new));
> 
> Do we want to unswap the blocker field inside bdrv_move_feature_fields()
> now?

Conceptually a BDS's blocker is a "feature field" (whatever it is) that
shouldn't be swapped by bdrv_swap(), just as .device_name and .refcnt. So it is
added to bdrv_move_feature_fields() since introduced, in patch 02.

Are you seeing any issue with this?

Fam

> 
> >  assert(bs_new->io_limits_enabled == false);
> >  assert(!throttle_have_timer(&bs_new->throttle_state));
> >  
> > diff --git a/block/mirror.c b/block/mirror.c
> > index dd5ee05..6dc84e8 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -487,6 +487,7 @@ immediate_exit:
> >   * trigger the unref from the top one */
> >  BlockDriverState *p = s->base->backing_hd;
> >  s->base->backing_hd = NULL;
> > +bdrv_op_unblock_all(p, s->base->backing_blocker);
> >  bdrv_unref(p);
> >  }
> >  }
> > diff --git a/include/block/block_int.h b/include/block/block_int.h
> > index 1d3f76f..1f4f78b 100644
> > --- a/include/block/block_int.h
> > +++ b/include/block/block_int.h
> > @@ -369,6 +369,9 @@ struct BlockDriverState {
> >  BlockJob *job;
> >  
> >  QDict *options;
> > +
> > +/* The error object in use for blocking operations on backing_hd */
> > +Error *backing_blocker;
> >  };
> >  
> >  int get_tmp_filename(char *filename, int size);
> > -- 
> > 1.9.0
> > 
> > 
> 



Re: [Qemu-devel] [PATCH v17 08/14] block: Support dropping active in bdrv_drop_intermediate

2014-04-08 Thread Fam Zheng
On Tue, 04/08 10:15, Markus Armbruster wrote:
> Jeff Cody  writes:
> 
> > On Mon, Mar 10, 2014 at 03:26:04PM +0800, Fam Zheng wrote:
> >> Dropping intermediate could be useful both for commit and stream, and
> >> BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
> >> to work with op blockers.
> >> 
> >> Signed-off-by: Fam Zheng 
> >> ---
> >>  block.c| 139 
> >> -
> >>  block/commit.c |   2 +-
> >>  2 files changed, 70 insertions(+), 71 deletions(-)
> >> 
> >> diff --git a/block.c b/block.c
> >> index 05f7766..0af7c62 100644
> >> --- a/block.c
> >> +++ b/block.c
> >> @@ -2503,115 +2503,114 @@ BlockDriverState 
> >> *bdrv_find_overlay(BlockDriverState *active,
> >>  return overlay;
> >>  }
> >>  
> >> -typedef struct BlkIntermediateStates {
> >> -BlockDriverState *bs;
> >> -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
> >> -} BlkIntermediateStates;
> >> -
> >> -
> >>  /*
> >> - * Drops images above 'base' up to and including 'top', and sets the image
> >> - * above 'top' to have base as its backing file.
> >> + * Drops images above 'base' up to and including 'top', and sets new 
> >> 'base' as
> >> + * backing_hd of top's overlay (the image orignally has 'top' as backing 
> >> file).
> >> + * top's overlay may be NULL if 'top' is active, no such update needed.
> >> + * Requires that the top's overlay to 'top' is opened r/w.
> >> + *
> >> + * 1) This will convert the following chain:
> >> + *
> >> + * ... <- base <- ... <- top <- overlay <-... <- active
> >>   *
> >> - * Requires that the overlay to 'top' is opened r/w, so that the backing 
> >> file
> >> - * information in 'bs' can be properly updated.
> >> + * to
> >> + *
> >> + * ... <- base <- overlay <- active
> >> + *
> >> + * 2) It is allowed for bottom==base, in which case it converts:
> >>   *
> >> - * E.g., this will convert the following chain:
> >> - * bottom <- base <- intermediate <- top <- active
> >> + * base <- ... <- top <- overlay <- ... <- active
> >>   *
> >>   * to
> >>   *
> >> - * bottom <- base <- active
> >> + * base <- overlay <- active
> >>   *
> >> - * It is allowed for bottom==base, in which case it converts:
> >> + * 2) It also allows active==top, in which case it converts:
> >>   *
> >> - * base <- intermediate <- top <- active
> >> + * ... <- base <- ... <- top (active)
> >>   *
> >>   * to
> >>   *
> >> - * base <- active
> >> + * ... <- base == active == top
> >> + *
> >> + * i.e. only base and lower remains: *top == *base when return.
> >> + *
> >> + * 3) If base==NULL, it will drop all the BDS below overlay and set its
> >> + * backing_hd to NULL. I.e.:
> >>   *
> >> - * Error conditions:
> >> - *  if active == top, that is considered an error
> >> + * base(NULL) <- ... <- overlay <- ... <- active
> >> + *
> >> + * to
> >> + *
> >> + * overlay <- ... <- active
> >>   *
> >>   */
> >>  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState 
> >> *top,
> >> BlockDriverState *base)
> >>  {
> >> -BlockDriverState *intermediate;
> >> -BlockDriverState *base_bs = NULL;
> >> -BlockDriverState *new_top_bs = NULL;
> >> -BlkIntermediateStates *intermediate_state, *next;
> >> -int ret = -EIO;
> >> -
> >> -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) 
> >> states_to_delete;
> >> -QSIMPLEQ_INIT(&states_to_delete);
> >> +BlockDriverState *drop_start, *overlay, *bs;
> >> +int ret = -EINVAL;
> >>  
> >> -if (!top->drv || !base->drv) {
> >> +assert(active);
> >> +assert(top);
> >> +/* Verify that top is in backing chain of active */
> >> +bs = active;
> >> +while (bs && bs != top) {
> >> +

[Qemu-devel] [PATCH] iscsi: Remember to set ret for iscsi_open in error case

2014-04-09 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 block/iscsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index 64a509f..f425573 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1233,6 +1233,7 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
 iscsi_readcapacity_sync(iscsilun, &local_err);
 if (local_err != NULL) {
 error_propagate(errp, local_err);
+ret = -EINVAL;
 goto out;
 }
 bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
-- 
1.9.1




Re: [Qemu-devel] [PATCH 1/4] block: qemu-iotests - add common.qemu, for bash-controlled qemu tests

2014-04-09 Thread Fam Zheng
On Mon, 03/17 21:24, Jeff Cody wrote:
> +# Launch a QEMU process.
> +#
> +# Input parameters:
> +# $qemu_comm_method: set this variable to 'monitor' (case insensitive)
> +#to use the QEMU HMP monitor for communication.
> +#Otherwise, the default of QMP is used.
> +# Returns:
> +# $QEMU_HANDLE: set to a handle value to communicate with this QEMU instance.
> +#
> +function _launch_qemu()
> +{
> +local comm=
> +local fifo_out=
> +local fifo_in=
> +
> +if (shopt -s nocasematch; [[ "${qemu_comm_method}" == "monitor" ]])
> +then
> +comm="-monitor stdio -qmp none"
> +else
> +local qemu_comm_method="qmp"
> +comm="-monitor none -qmp stdio"
> +fi
> +
> +fifo_out=${QEMU_FIFO_OUT}_${_QEMU_HANDLE}
> +fifo_in=${QEMU_FIFO_IN}_${_QEMU_HANDLE}
> +mkfifo "${fifo_out}"
> +mkfifo "${fifo_in}"
> +
> +"${QEMU}" -nographic -serial none ${comm} "${@}" 2>&1 \
> + >"${fifo_out}" \
> + <"${fifo_in}" &

Shall we use '-machine accel=qtest' as we do in iotests.py (to run no guest
code)?  Because below patch has a big difference of 067's stability and run
time in my case:

diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index d025192..a379a3b 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -39,7 +39,7 @@ _supported_os Linux
 function do_run_qemu()
 {
 echo Testing: "$@"
-$QEMU -nographic -qmp stdio -serial none "$@"
+$QEMU -nographic -machine accel=qtest -qmp stdio -serial none "$@"
 echo
 }


Fam

> +QEMU_PID[${_QEMU_HANDLE}]=$!
> +
> +if [ "${BASH_VERSINFO[0]}" -ge "4" ] && [ "${BASH_VERSINFO[1]}" -ge "1" ]
> +then
> +# bash >= 4.1 required for automatic fd
> +exec {_out_fd}<"${fifo_out}"
> +exec {_in_fd}>"${fifo_in}"
> +else
> +let _out_fd++
> +let _in_fd++
> +eval "exec ${_out_fd}<'${fifo_out}'"
> +eval "exec ${_in_fd}>'${fifo_in}'"
> +fi
> +
> +QEMU_OUT[${_QEMU_HANDLE}]=${_out_fd}
> +QEMU_IN[${_QEMU_HANDLE}]=${_in_fd}
> +
> +if [ "${qemu_comm_method}" == "qmp" ]
> +then
> +# Don't print response, since it has version information in it
> +silent=yes _timed_wait_for ${_QEMU_HANDLE} "capabilities"
> +fi
> +QEMU_HANDLE=${_QEMU_HANDLE}
> +let _QEMU_HANDLE++
> +}



Re: [Qemu-devel] [PATCH v17 12/14] block: Add blockdev-backup to transaction

2014-04-09 Thread Fam Zheng
On Mon, 04/07 15:11, Eric Blake wrote:
> On 03/10/2014 01:26 AM, Fam Zheng wrote:
> > Signed-off-by: Fam Zheng 
> > ---
> >  blockdev.c   | 48 
> >  qapi-schema.json |  1 +
> >  2 files changed, 49 insertions(+)
> > 
> 
> > +++ b/qapi-schema.json
> > @@ -1972,6 +1972,7 @@
> >'data': {
> > 'blockdev-snapshot-sync': 'BlockdevSnapshot',
> > 'drive-backup': 'DriveBackup',
> > +   'blockdev-backup': 'BlockdevBackup',
> > 'abort': 'Abort',
> > 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
> > } }
> 
> It might be nice to mention that the union was expanded in 2.1;
> something like:
> 
> # A discriminated record of operations that can be performed with
> # @transaction.
> #
> # Since 1.1, blockdev-backup since 2.1
> ##
> { 'union': 'TransactionAction',
> 
> Of course, this is a pre-existing problem, as we've added other actions
> since 1.1 (blockdev-snapshot-internal-sync in 1.7, for example), so it
> could be justified as a separate patch.  But it can't hurt to start
> being more thorough in our docs.
> 

OK, I'll add this to comment.

Thanks,
Fam



Re: [Qemu-devel] [PATCH v17 06/14] block: Add backing_blocker in BlockDriverState

2014-04-09 Thread Fam Zheng
On Wed, 04/09 14:29, Jeff Cody wrote:
> On Mon, Mar 10, 2014 at 03:26:02PM +0800, Fam Zheng wrote:
> > This makes use of op_blocker and blocks all the operations except for
> > commit target, on each BlockDriverState->backing_hd.
> > 
> > The asserts for op_blocker in bdrv_swap are removed because with this
> > change, the target of block commit has at least the backing blocker of
> > its child, so the assertion is not true. Callers should do their check.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> >  block.c   | 24 
> >  block/mirror.c|  1 +
> >  include/block/block_int.h |  3 +++
> >  3 files changed, 24 insertions(+), 4 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 64738dc..95247c8 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -1050,16 +1050,33 @@ fail:
> >  void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState 
> > *backing_hd)
> >  {
> >  
> > +if (bs->backing_hd) {
> > +assert(error_is_set(&bs->backing_blocker));
> > +bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
> > +} else if (backing_hd) {
> > +error_setg(&bs->backing_blocker,
> > +   "device is used as backing hd of '%s'",
> > +   bs->device_name);
> > +}
> > +
> >  bs->backing_hd = backing_hd;
> >  if (!backing_hd) {
> >  bs->backing_file[0] = '\0';
> >  bs->backing_format[0] = '\0';
> > +if (error_is_set(&bs->backing_blocker)) {
> > +error_free(bs->backing_blocker);
> > +}
> >  goto out;
> >  }
> >  bs->open_flags &= ~BDRV_O_NO_BACKING;
> >  pstrcpy(bs->backing_file, sizeof(bs->backing_file), 
> > backing_hd->filename);
> >  pstrcpy(bs->backing_format, sizeof(bs->backing_format),
> >  backing_hd->drv ? backing_hd->drv->format_name : "");
> > +
> > +bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
> > +/* Otherwise we won't be able to commit due to check in bdrv_commit */
> > +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
> > +bs->backing_blocker);
> >  out:
> >  bdrv_refresh_limits(bs);
> >  }
> > @@ -1699,8 +1716,9 @@ void bdrv_close(BlockDriverState *bs)
> >  
> >  if (bs->drv) {
> >  if (bs->backing_hd) {
> > -bdrv_unref(bs->backing_hd);
> > -bs->backing_hd = NULL;
> > +BlockDriverState *backing_hd = bs->backing_hd;
> > +bdrv_set_backing_hd(bs, NULL);
> > +bdrv_unref(backing_hd);
> >  }
> >  bs->drv->bdrv_close(bs);
> >  g_free(bs->opaque);
> > @@ -1908,7 +1926,6 @@ void bdrv_swap(BlockDriverState *bs_new, 
> > BlockDriverState *bs_old)
> >  assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
> >  assert(bs_new->job == NULL);
> >  assert(bs_new->dev == NULL);
> > -assert(bdrv_op_blocker_is_empty(bs_new));
> >  assert(bs_new->io_limits_enabled == false);
> >  assert(!throttle_have_timer(&bs_new->throttle_state));
> >  
> > @@ -1927,7 +1944,6 @@ void bdrv_swap(BlockDriverState *bs_new, 
> > BlockDriverState *bs_old)
> >  /* Check a few fields that should remain attached to the device */
> >  assert(bs_new->dev == NULL);
> >  assert(bs_new->job == NULL);
> > -assert(bdrv_op_blocker_is_empty(bs_new));
> >  assert(bs_new->io_limits_enabled == false);
> >  assert(!throttle_have_timer(&bs_new->throttle_state));
> >  
> > diff --git a/block/mirror.c b/block/mirror.c
> > index dd5ee05..6dc84e8 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -487,6 +487,7 @@ immediate_exit:
> >   * trigger the unref from the top one */
> >  BlockDriverState *p = s->base->backing_hd;
> >  s->base->backing_hd = NULL;
> > +bdrv_op_unblock_all(p, s->base->backing_blocker);
> >  bdrv_unref(p);
> 
> FYI, this is what I changed in my testing, to try out the active layer
> case in bdrv_drop_intermediate().  Since you'll need to respin anyway,
> might as well clean this up to use the updated
> bdrv_drop_intermediate():

Good idea!

However bdrv_drop_intermediate() do

[Qemu-devel] [PATCH v18 00/15] Drop in_use from BlockDriverState and enable point-in-time snapshot exporting over NBD

2014-04-09 Thread Fam Zheng
v18: Address reviewing comments from Jeff and Eric. Rebased to current master.
 Side by side diff from v17: http://bit.ly/1oO2Fvt

[01/15] block: Add BlockOpType enum
Add Jeff's reviewed-by.

[02/15] block: Introduce op_blockers to BlockDriverState
Add Jeff's reviewed-by.

[03/15] block: Replace in_use with operation blocker
Add Jeff's reviewed-by.

[04/15] block: Move op_blocker check from block_job_create to its caller
Add Jeff's reviewed-by.

[05/15] block: Add bdrv_set_backing_hd()
Don't unset bs->backing_file and bs->backing_format when
backing_hd==NULL, because qcow2_close() will save these into image
header.

[08/15] block: Support dropping active in bdrv_drop_intermediate
Swap parameters for bdrv_swap:
bdrv_swap(active, base); -> bdrv_swap(base, active);
Use bdrv_set_backing_hd().

[10/15] commit: Use bdrv_drop_intermediate
New. (Jeff)

[11/15] qmp: Add command 'blockdev-backup'
Since 2.0 -> Since 2.1. (Eric)

[13/15] block: Add blockdev-backup to transaction
Comment "Since 2.1" for blockdev-backup. (Eric)

[15/15] qemu-iotests: Image fleecing test case 089
Case number 083 -> 089.

Fam Zheng (15):
  block: Add BlockOpType enum
  block: Introduce op_blockers to BlockDriverState
  block: Replace in_use with operation blocker
  block: Move op_blocker check from block_job_create to its caller
  block: Add bdrv_set_backing_hd()
  block: Add backing_blocker in BlockDriverState
  block: Parse "backing" option to reference existing BDS
  block: Support dropping active in bdrv_drop_intermediate
  stream: Use bdrv_drop_intermediate and drop close_unused_images
  commit: Use bdrv_drop_intermediate
  qmp: Add command 'blockdev-backup'
  block: Allow backup on referenced named BlockDriverState
  block: Add blockdev-backup to transaction
  qemu-iotests: Test blockdev-backup in 055
  qemu-iotests: Image fleecing test case 089

 block-migration.c   |   7 +-
 block.c | 312 +++-
 block/backup.c  |  26 
 block/commit.c  |   2 +-
 block/mirror.c  |   9 +-
 block/stream.c  |  42 +-
 blockdev.c  | 122 ++--
 blockjob.c  |  14 +-
 hw/block/dataplane/virtio-blk.c |  18 ++-
 include/block/block.h   |  29 +++-
 include/block/block_int.h   |   9 +-
 include/block/blockjob.h|   3 +
 qapi-schema.json|  52 +++
 qmp-commands.hx |  44 ++
 tests/qemu-iotests/055  | 275 +--
 tests/qemu-iotests/055.out  |   4 +-
 tests/qemu-iotests/089  |  99 +
 tests/qemu-iotests/089.out  |   5 +
 tests/qemu-iotests/group|   1 +
 19 files changed, 856 insertions(+), 217 deletions(-)
 create mode 100755 tests/qemu-iotests/089
 create mode 100644 tests/qemu-iotests/089.out

-- 
1.9.1




[Qemu-devel] [PATCH v18 05/15] block: Add bdrv_set_backing_hd()

2014-04-09 Thread Fam Zheng
This is the common but non-trivial steps to assign or change the
backing_hd of BDS.

Signed-off-by: Fam Zheng 
---
 block.c   | 37 +++--
 include/block/block.h |  1 +
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/block.c b/block.c
index c9cae4e..7859340 100644
--- a/block.c
+++ b/block.c
@@ -1052,6 +1052,21 @@ fail:
 return ret;
 }
 
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+{
+
+bs->backing_hd = backing_hd;
+if (!backing_hd) {
+goto out;
+}
+bs->open_flags &= ~BDRV_O_NO_BACKING;
+pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
+pstrcpy(bs->backing_format, sizeof(bs->backing_format),
+backing_hd->drv ? backing_hd->drv->format_name : "");
+out:
+bdrv_refresh_limits(bs);
+}
+
 /*
  * Opens the backing file for a BlockDriverState if not yet open
  *
@@ -1065,6 +1080,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 char backing_filename[PATH_MAX];
 int back_flags, ret;
 BlockDriver *back_drv = NULL;
+BlockDriverState *backing_hd;
 Error *local_err = NULL;
 
 if (bs->backing_hd != NULL) {
@@ -1088,6 +1104,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
sizeof(backing_filename));
 }
 
+backing_hd = bdrv_new("");
+
 if (bs->backing_format[0] != '\0') {
 back_drv = bdrv_find_format(bs->backing_format);
 }
@@ -1096,23 +1114,19 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
 BDRV_O_COPY_ON_READ);
 
-assert(bs->backing_hd == NULL);
-ret = bdrv_open(&bs->backing_hd,
+ret = bdrv_open(&backing_hd,
 *backing_filename ? backing_filename : NULL, NULL, options,
 back_flags, back_drv, &local_err);
 if (ret < 0) {
-bs->backing_hd = NULL;
+bdrv_unref(backing_hd);
+backing_hd = NULL;
 bs->open_flags |= BDRV_O_NO_BACKING;
 error_setg(errp, "Could not open backing file: %s",
error_get_pretty(local_err));
 error_free(local_err);
 return ret;
 }
-
-if (bs->backing_hd->file) {
-pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-bs->backing_hd->file->filename);
-}
+bdrv_set_backing_hd(bs, backing_hd);
 
 /* Recalculate the BlockLimits with the backing file */
 bdrv_refresh_limits(bs);
@@ -1964,12 +1978,7 @@ void bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top)
 
 /* The contents of 'tmp' will become bs_top, as we are
  * swapping bs_new and bs_top contents. */
-bs_top->backing_hd = bs_new;
-bs_top->open_flags &= ~BDRV_O_NO_BACKING;
-pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
-bs_new->filename);
-pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
-bs_new->drv ? bs_new->drv->format_name : "");
+bdrv_set_backing_hd(bs_top, bs_new);
 }
 
 static void bdrv_delete(BlockDriverState *bs)
diff --git a/include/block/block.h b/include/block/block.h
index 89d9405..b76e4af 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -208,6 +208,7 @@ int bdrv_parse_discard_flags(const char *mode, int *flags);
 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
 QDict *options, const char *bdref_key, int flags,
 bool allow_none, Error **errp);
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
 void bdrv_append_temp_snapshot(BlockDriverState *bs, Error **errp);
 int bdrv_open(BlockDriverState **pbs, const char *filename,
-- 
1.9.1




[Qemu-devel] [PATCH v18 01/15] block: Add BlockOpType enum

2014-04-09 Thread Fam Zheng
This adds the enum of all the operations that can be taken on a block
device.

Signed-off-by: Fam Zheng 
Reviewed-by: Benoit Canet 
Reviewed-by: Jeff Cody 
---
 include/block/block.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index b3230a2..e6814aa 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -154,6 +154,25 @@ typedef struct BDRVReopenState {
 void *opaque;
 } BDRVReopenState;
 
+/*
+ * Block operation types
+ */
+typedef enum BlockOpType {
+BLOCK_OP_TYPE_BACKUP_SOURCE,
+BLOCK_OP_TYPE_BACKUP_TARGET,
+BLOCK_OP_TYPE_CHANGE,
+BLOCK_OP_TYPE_COMMIT,
+BLOCK_OP_TYPE_DATAPLANE,
+BLOCK_OP_TYPE_DRIVE_DEL,
+BLOCK_OP_TYPE_EJECT,
+BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
+BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
+BLOCK_OP_TYPE_MIRROR,
+BLOCK_OP_TYPE_RESIZE,
+BLOCK_OP_TYPE_STREAM,
+BLOCK_OP_TYPE_MAX,
+} BlockOpType;
 
 void bdrv_iostatus_enable(BlockDriverState *bs);
 void bdrv_iostatus_reset(BlockDriverState *bs);
-- 
1.9.1




[Qemu-devel] [PATCH v18 09/15] stream: Use bdrv_drop_intermediate and drop close_unused_images

2014-04-09 Thread Fam Zheng
This reuses the new bdrv_drop_intermediate.

Signed-off-by: Fam Zheng 
---
 block/stream.c | 42 +-
 1 file changed, 1 insertion(+), 41 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index dd0b4ac..1b348a2 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -32,7 +32,6 @@ typedef struct StreamBlockJob {
 RateLimit limit;
 BlockDriverState *base;
 BlockdevOnError on_error;
-char backing_file_id[1024];
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockDriverState *bs,
@@ -51,34 +50,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
 return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
 }
 
-static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
-const char *base_id)
-{
-BlockDriverState *intermediate;
-intermediate = top->backing_hd;
-
-/* Must assign before bdrv_delete() to prevent traversing dangling pointer
- * while we delete backing image instances.
- */
-top->backing_hd = base;
-
-while (intermediate) {
-BlockDriverState *unused;
-
-/* reached base */
-if (intermediate == base) {
-break;
-}
-
-unused = intermediate;
-intermediate = intermediate->backing_hd;
-unused->backing_hd = NULL;
-bdrv_unref(unused);
-}
-
-bdrv_refresh_limits(top);
-}
-
 static void coroutine_fn stream_run(void *opaque)
 {
 StreamBlockJob *s = opaque;
@@ -184,15 +155,7 @@ wait:
 ret = error;
 
 if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
-const char *base_id = NULL, *base_fmt = NULL;
-if (base) {
-base_id = s->backing_file_id;
-if (base->drv) {
-base_fmt = base->drv->format_name;
-}
-}
-ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-close_unused_images(bs, base, base_id);
+ret = bdrv_drop_intermediate(bs, bs->backing_hd, base);
 }
 
 qemu_vfree(buf);
@@ -237,9 +200,6 @@ void stream_start(BlockDriverState *bs, BlockDriverState 
*base,
 }
 
 s->base = base;
-if (base_id) {
-pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
-}
 
 s->on_error = on_error;
 s->common.co = qemu_coroutine_create(stream_run);
-- 
1.9.1




[Qemu-devel] [PATCH v18 07/15] block: Parse "backing" option to reference existing BDS

2014-04-09 Thread Fam Zheng
Now it's safe to allow reference for backing_hd in the interface.

Signed-off-by: Fam Zheng 
---
 block.c | 27 +--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 6f803fa..beacf09 100644
--- a/block.c
+++ b/block.c
@@ -1401,12 +1401,35 @@ int bdrv_open(BlockDriverState **pbs, const char 
*filename,
 /* If there is a backing file, use it */
 if ((flags & BDRV_O_NO_BACKING) == 0) {
 QDict *backing_options;
+const char *backing_name;
+BlockDriverState *backing_hd;
 
+backing_name = qdict_get_try_str(options, "backing");
 qdict_extract_subqdict(options, &backing_options, "backing.");
-ret = bdrv_open_backing_file(bs, backing_options, &local_err);
-if (ret < 0) {
+
+if (backing_name && qdict_size(backing_options)) {
+error_setg(&local_err,
+   "Option \"backing\" and \"backing.*\" cannot be "
+   "used together");
+ret = -EINVAL;
 goto close_and_fail;
 }
+if (backing_name) {
+backing_hd = bdrv_find(backing_name);
+if (!backing_hd) {
+error_set(&local_err, QERR_DEVICE_NOT_FOUND, backing_name);
+ret = -ENOENT;
+goto close_and_fail;
+}
+qdict_del(options, "backing");
+bdrv_set_backing_hd(bs, backing_hd);
+bdrv_ref(backing_hd);
+} else {
+ret = bdrv_open_backing_file(bs, backing_options, &local_err);
+if (ret < 0) {
+goto close_and_fail;
+}
+}
 }
 
 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
-- 
1.9.1




[Qemu-devel] [PATCH v18 06/15] block: Add backing_blocker in BlockDriverState

2014-04-09 Thread Fam Zheng
This makes use of op_blocker and blocks all the operations except for
commit target, on each BlockDriverState->backing_hd.

The asserts for op_blocker in bdrv_swap are removed because with this
change, the target of block commit has at least the backing blocker of
its child, so the assertion is not true. Callers should do their check.

Signed-off-by: Fam Zheng 
---
 block.c   | 24 
 block/mirror.c|  1 +
 include/block/block_int.h |  3 +++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 7859340..6f803fa 100644
--- a/block.c
+++ b/block.c
@@ -1055,14 +1055,31 @@ fail:
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
 {
 
+if (bs->backing_hd) {
+assert(error_is_set(&bs->backing_blocker));
+bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
+} else if (backing_hd) {
+error_setg(&bs->backing_blocker,
+   "device is used as backing hd of '%s'",
+   bs->device_name);
+}
+
 bs->backing_hd = backing_hd;
 if (!backing_hd) {
+if (error_is_set(&bs->backing_blocker)) {
+error_free(bs->backing_blocker);
+}
 goto out;
 }
 bs->open_flags &= ~BDRV_O_NO_BACKING;
 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
 backing_hd->drv ? backing_hd->drv->format_name : "");
+
+bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
+/* Otherwise we won't be able to commit due to check in bdrv_commit */
+bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
+bs->backing_blocker);
 out:
 bdrv_refresh_limits(bs);
 }
@@ -1717,8 +1734,9 @@ void bdrv_close(BlockDriverState *bs)
 
 if (bs->drv) {
 if (bs->backing_hd) {
-bdrv_unref(bs->backing_hd);
-bs->backing_hd = NULL;
+BlockDriverState *backing_hd = bs->backing_hd;
+bdrv_set_backing_hd(bs, NULL);
+bdrv_unref(backing_hd);
 }
 bs->drv->bdrv_close(bs);
 g_free(bs->opaque);
@@ -1926,7 +1944,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
 assert(bs_new->job == NULL);
 assert(bs_new->dev == NULL);
-assert(bdrv_op_blocker_is_empty(bs_new));
 assert(bs_new->io_limits_enabled == false);
 assert(!throttle_have_timer(&bs_new->throttle_state));
 
@@ -1945,7 +1962,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 /* Check a few fields that should remain attached to the device */
 assert(bs_new->dev == NULL);
 assert(bs_new->job == NULL);
-assert(bdrv_op_blocker_is_empty(bs_new));
 assert(bs_new->io_limits_enabled == false);
 assert(!throttle_have_timer(&bs_new->throttle_state));
 
diff --git a/block/mirror.c b/block/mirror.c
index 0ef41f9..22ec5be 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -496,6 +496,7 @@ immediate_exit:
  * trigger the unref from the top one */
 BlockDriverState *p = s->base->backing_hd;
 s->base->backing_hd = NULL;
+bdrv_op_unblock_all(p, s->base->backing_blocker);
 bdrv_unref(p);
 }
 }
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 025ee02..594df74 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -369,6 +369,9 @@ struct BlockDriverState {
 BlockJob *job;
 
 QDict *options;
+
+/* The error object in use for blocking operations on backing_hd */
+Error *backing_blocker;
 };
 
 int get_tmp_filename(char *filename, int size);
-- 
1.9.1




[Qemu-devel] [PATCH v18 11/15] qmp: Add command 'blockdev-backup'

2014-04-09 Thread Fam Zheng
Similar to drive-backup, but this command uses a device id as target
instead of creating/opening an image file.

Also add blocker on target bs, since the target is also a named device
now.

Add check and report error for bs == target which became possible but is
an illegal case with introduction of blockdev-backup.

Signed-off-by: Fam Zheng 
---
 block/backup.c   | 26 ++
 blockdev.c   | 47 +++
 qapi-schema.json | 49 +
 qmp-commands.hx  | 44 
 4 files changed, 166 insertions(+)

diff --git a/block/backup.c b/block/backup.c
index 15a2e55..ea46340 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -344,6 +344,7 @@ static void coroutine_fn backup_run(void *opaque)
 hbitmap_free(job->bitmap);
 
 bdrv_iostatus_disable(target);
+bdrv_op_unblock_all(target, job->common.blocker);
 bdrv_unref(target);
 
 block_job_completed(&job->common, ret);
@@ -362,6 +363,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 assert(target);
 assert(cb);
 
+if (bs == target) {
+error_setg(errp, "Source and target cannot be the same");
+return;
+}
+
 if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
  on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
 !bdrv_iostatus_is_enabled(bs)) {
@@ -369,6 +375,24 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 return;
 }
 
+if (!bdrv_is_inserted(bs)) {
+error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bs->device_name);
+return;
+}
+
+if (!bdrv_is_inserted(target)) {
+error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, target->device_name);
+return;
+}
+
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+return;
+}
+
+if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
+return;
+}
+
 len = bdrv_getlength(bs);
 if (len < 0) {
 error_setg_errno(errp, -len, "unable to get length for '%s'",
@@ -382,6 +406,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 return;
 }
 
+bdrv_op_block_all(target, job->common.blocker);
+
 job->on_source_error = on_source_error;
 job->on_target_error = on_target_error;
 job->target = target;
diff --git a/blockdev.c b/blockdev.c
index d68fd2b..f241455 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1969,6 +1969,8 @@ void qmp_drive_backup(const char *device, const char 
*target,
 return;
 }
 
+/* Although backup_run has this check too, we need to use bs->drv below, so
+ * do an early check redundantly. */
 if (!bdrv_is_inserted(bs)) {
 error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
 return;
@@ -1985,6 +1987,7 @@ void qmp_drive_backup(const char *device, const char 
*target,
 }
 }
 
+/* Early check to avoid creating target */
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
 return;
 }
@@ -2047,6 +2050,50 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error 
**errp)
 return bdrv_named_nodes_list();
 }
 
+void qmp_blockdev_backup(const char *device, const char *target,
+ enum MirrorSyncMode sync,
+ bool has_speed, int64_t speed,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ Error **errp)
+{
+BlockDriverState *bs;
+BlockDriverState *target_bs;
+Error *local_err = NULL;
+
+if (!has_speed) {
+speed = 0;
+}
+if (!has_on_source_error) {
+on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+}
+if (!has_on_target_error) {
+on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+}
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+
+target_bs = bdrv_find(target);
+if (!target_bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, target);
+return;
+}
+
+bdrv_ref(target_bs);
+backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+ block_job_cb, bs, &local_err);
+if (local_err != NULL) {
+bdrv_unref(target_bs);
+error_propagate(errp, local_err);
+}
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
 
 void qmp_drive_mirror(const char *device, const char *target,
diff --git a/qapi-schema.json b/qapi-schema.json
index 391356f..fe54b90 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1948,6 +1948,40 @@
 '*on-target-error': 'BlockdevOnError' } }
 
 ##
+# @BlockdevBackup
+

[Qemu-devel] [PATCH v18 15/15] qemu-iotests: Image fleecing test case 089

2014-04-09 Thread Fam Zheng
This tests the workflow of creating a lightweight point-in-time snapshot
with blockdev-backup command, and exporting it with built-in NBD server.

It's tested that any post-snapshot writing to the original device
doesn't change data seen in NBD target.

Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/089 | 99 ++
 tests/qemu-iotests/089.out |  5 +++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 105 insertions(+)
 create mode 100755 tests/qemu-iotests/089
 create mode 100644 tests/qemu-iotests/089.out

diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089
new file mode 100755
index 000..8be32d7
--- /dev/null
+++ b/tests/qemu-iotests/089
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+#
+# Tests for image fleecing (point in time snapshot export to NBD)
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# Based on 055.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+
+test_img = os.path.join(iotests.test_dir, 'test.img')
+target_img = os.path.join(iotests.test_dir, 'target.img')
+nbd_sock = os.path.join(iotests.test_dir, 'nbd.sock')
+
+class TestImageFleecing(iotests.QMPTestCase):
+image_len = 64 * 1024 * 1024 # MB
+
+def setUp(self):
+# Write data to the image so we can compare later
+qemu_img('create', '-f', iotests.imgfmt, test_img, 
str(TestImageFleecing.image_len))
+self.patterns = [
+("0x5d", "0", "64k"),
+("0xd5", "1M", "64k"),
+("0xdc", "32M", "64k"),
+("0xdc", "67043328", "64k")]
+
+for p in self.patterns:
+qemu_io('-c', 'write -P%s %s %s' % p, test_img)
+
+qemu_img('create', '-f', iotests.imgfmt, target_img, 
str(TestImageFleecing.image_len))
+
+self.vm = iotests.VM().add_drive(test_img)
+self.vm.launch()
+
+self.overwrite_patterns = [
+("0xa0", "0", "64k"),
+("0x0a", "1M", "64k"),
+("0x55", "32M", "64k"),
+("0x56", "67043328", "64k")]
+
+self.nbd_uri = "nbd+unix:///drive1?socket=%s" % nbd_sock
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(test_img)
+os.remove(target_img)
+
+def verify_patterns(self):
+for p in self.patterns:
+self.assertEqual(-1, qemu_io(self.nbd_uri, '-c', 'read -P%s %s %s' 
% p).find("verification failed"),
+ "Failed to verify pattern: %s %s %s" % p)
+
+def test_image_fleecing(self):
+result = self.vm.qmp("blockdev-add", **{"options": {
+"driver": "qcow2",
+"id": "drive1",
+"file": {
+"driver": "file",
+"filename": target_img,
+},
+"backing": "drive0",
+}})
+self.assert_qmp(result, 'return', {})
+result = self.vm.qmp("nbd-server-start", **{"addr": { "type": "unix", 
"data": { "path": nbd_sock } } })
+self.assert_qmp(result, 'return', {})
+result = self.vm.qmp("blockdev-backup", device="drive0", 
target="drive1", sync="none")
+self.assert_qmp(result, 'return', {})
+result = self.vm.qmp("nbd-server-add", device="drive1")
+self.assert_qmp(result, 'return', {})
+
+self.verify_patterns()
+
+for p in self.overwrite_patterns:
+self.vm.hmp_qemu_io("drive0", "write -P%s %s %s" % p)
+
+self.verify_patterns()
+
+self.cancel_and_wait(resume=True)
+self.assert_no_active_block_jobs()
+
+if __name__ == '__main__':
+iotests.main(supported_fmts=['raw', 'qcow2'])
diff --git a/tests/qemu-iotests/089.out b/tests/qemu-iotests/089.out
new file mode 100644
index 000..ae1213e
--- /dev/null
+++ b/tests/qemu-iotests/089.out
@@ -0,0 +1,5 @@
+.
+--
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 864643d..e975e93 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -95,3 +95,4 @@
 086 rw auto quick
 087 rw auto
 088 rw auto
+089 rw auto quick
-- 
1.9.1




[Qemu-devel] [PATCH v18 14/15] qemu-iotests: Test blockdev-backup in 055

2014-04-09 Thread Fam Zheng
This applies cases on drive-backup on blockdev-backup, except cases with
target format and mode.

Also add a case to check source == target.

Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/055 | 275 ++---
 tests/qemu-iotests/055.out |   4 +-
 2 files changed, 235 insertions(+), 44 deletions(-)

diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 451b67d..1fab088 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Tests for drive-backup
+# Tests for drive-backup and blockdev-backup
 #
 # Copyright (C) 2013 Red Hat, Inc.
 #
@@ -27,6 +27,7 @@ from iotests import qemu_img, qemu_io
 
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
+blockdev_target_img = os.path.join(iotests.test_dir, 'blockdev-target.img')
 
 class TestSingleDrive(iotests.QMPTestCase):
 image_len = 64 * 1024 * 1024 # MB
@@ -38,34 +39,48 @@ class TestSingleDrive(iotests.QMPTestCase):
 qemu_io('-c', 'write -P0xd5 1M 32k', test_img)
 qemu_io('-c', 'write -P0xdc 32M 124k', test_img)
 qemu_io('-c', 'write -P0xdc 67043328 64k', test_img)
+qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, 
str(TestSingleDrive.image_len))
 
-self.vm = iotests.VM().add_drive(test_img)
+self.vm = 
iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
 self.vm.launch()
 
 def tearDown(self):
 self.vm.shutdown()
 os.remove(test_img)
+os.remove(blockdev_target_img)
 try:
 os.remove(target_img)
 except OSError:
 pass
 
-def test_cancel(self):
+def do_test_cancel(self, test_drive_backup):
 self.assert_no_active_block_jobs()
 
-result = self.vm.qmp('drive-backup', device='drive0',
- target=target_img, sync='full')
+if test_drive_backup:
+result = self.vm.qmp('drive-backup', device='drive0',
+ target=target_img, sync='full')
+else:
+result = self.vm.qmp('blockdev-backup', device='drive0',
+ target='drive1', sync='full')
 self.assert_qmp(result, 'return', {})
 
 event = self.cancel_and_wait()
 self.assert_qmp(event, 'data/type', 'backup')
 
-def test_pause(self):
+def test_cancel(self):
+self.do_test_cancel(True)
+self.do_test_cancel(False)
+
+def do_test_pause(self, test_drive_backup):
 self.assert_no_active_block_jobs()
 
 self.vm.pause_drive('drive0')
-result = self.vm.qmp('drive-backup', device='drive0',
- target=target_img, sync='full')
+if test_drive_backup:
+result = self.vm.qmp('drive-backup', device='drive0',
+ target=target_img, sync='full')
+else:
+result = self.vm.qmp('blockdev-backup', device='drive0',
+ target='drive1', sync='full')
 self.assert_qmp(result, 'return', {})
 
 result = self.vm.qmp('block-job-pause', device='drive0')
@@ -86,14 +101,28 @@ class TestSingleDrive(iotests.QMPTestCase):
 self.wait_until_completed()
 
 self.vm.shutdown()
-self.assertTrue(iotests.compare_images(test_img, target_img),
-'target image does not match source after backup')
+if test_drive_backup:
+self.assertTrue(iotests.compare_images(test_img, target_img),
+'target image does not match source after backup')
+else:
+self.assertTrue(iotests.compare_images(test_img, 
blockdev_target_img),
+'target image does not match source after backup')
+
+def test_pause_drive_backup(self):
+self.do_test_pause(True)
+
+def test_pause_blockdev_backup(self):
+self.do_test_pause(False)
 
 def test_medium_not_found(self):
 result = self.vm.qmp('drive-backup', device='ide1-cd0',
  target=target_img, sync='full')
 self.assert_qmp(result, 'error/class', 'GenericError')
 
+result = self.vm.qmp('blockdev-backup', device='ide1-cd0',
+ target='drive1', sync='full')
+self.assert_qmp(result, 'error/class', 'GenericError')
+
 def test_ima

[Qemu-devel] [PATCH v18 04/15] block: Move op_blocker check from block_job_create to its caller

2014-04-09 Thread Fam Zheng
It makes no sense to check for "any" blocker on bs, we are here only
because of the mechanical conversion from in_use to op_blockers. Remove
it now, and let the callers check specific operation types. Backup and
mirror already have it, add checker to stream and commit.

Signed-off-by: Fam Zheng 
Reviewed-by: Benoit Canet 
Reviewed-by: Jeff Cody 
---
 blockdev.c | 8 
 blockjob.c | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 593925e..d68fd2b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1846,6 +1846,10 @@ void qmp_block_stream(const char *device, bool has_base,
 return;
 }
 
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
+return;
+}
+
 if (base) {
 base_bs = bdrv_find_backing_image(bs, base);
 if (base_bs == NULL) {
@@ -1886,6 +1890,10 @@ void qmp_block_commit(const char *device,
 return;
 }
 
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) {
+return;
+}
+
 /* default top_bs is the active layer */
 top_bs = bs;
 
diff --git a/blockjob.c b/blockjob.c
index f643a78..3e33051 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -41,7 +41,7 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 {
 BlockJob *job;
 
-if (bs->job || !bdrv_op_blocker_is_empty(bs)) {
+if (bs->job) {
 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
 return NULL;
 }
-- 
1.9.1




[Qemu-devel] [PATCH v18 10/15] commit: Use bdrv_drop_intermediate

2014-04-09 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 block/mirror.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 22ec5be..c3d819e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -490,14 +490,10 @@ immediate_exit:
 if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
 bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
 }
-bdrv_swap(s->target, s->common.bs);
 if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-/* drop the bs loop chain formed by the swap: break the loop then
- * trigger the unref from the top one */
-BlockDriverState *p = s->base->backing_hd;
-s->base->backing_hd = NULL;
-bdrv_op_unblock_all(p, s->base->backing_blocker);
-bdrv_unref(p);
+ret = bdrv_drop_intermediate(s->common.bs, s->common.bs, s->base);
+} else {
+bdrv_swap(s->target, s->common.bs);
 }
 }
 bdrv_unref(s->target);
-- 
1.9.1




[Qemu-devel] [PATCH v18 12/15] block: Allow backup on referenced named BlockDriverState

2014-04-09 Thread Fam Zheng
Drive backup is a read only operation on source bs. We want to allow
this specific case to enable image-fleecing. Note that when
image-fleecing job starts, the job still add its blocker to source bs,
and any other operation on it will be blocked by that.

Signed-off-by: Fam Zheng 
---
 block.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block.c b/block.c
index b8e229f..9595e17 100644
--- a/block.c
+++ b/block.c
@@ -1080,6 +1080,8 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 /* Otherwise we won't be able to commit due to check in bdrv_commit */
 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
 bs->backing_blocker);
+bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+bs->backing_blocker);
 out:
 bdrv_refresh_limits(bs);
 }
-- 
1.9.1




[Qemu-devel] [PATCH v18 13/15] block: Add blockdev-backup to transaction

2014-04-09 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 blockdev.c   | 48 
 qapi-schema.json |  3 +++
 2 files changed, 51 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index f241455..8a6ae0a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1409,6 +1409,49 @@ static void drive_backup_abort(BlkTransactionState 
*common)
 }
 }
 
+typedef struct BlockdevBackupState {
+BlkTransactionState common;
+BlockDriverState *bs;
+BlockJob *job;
+} BlockdevBackupState;
+
+static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
+{
+BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, 
common);
+BlockdevBackup *backup;
+Error *local_err = NULL;
+
+assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
+backup = common->action->blockdev_backup;
+
+qmp_blockdev_backup(backup->device, backup->target,
+backup->sync,
+backup->has_speed, backup->speed,
+backup->has_on_source_error, backup->on_source_error,
+backup->has_on_target_error, backup->on_target_error,
+&local_err);
+if (error_is_set(&local_err)) {
+error_propagate(errp, local_err);
+state->bs = NULL;
+state->job = NULL;
+return;
+}
+
+state->bs = bdrv_find(backup->device);
+state->job = state->bs->job;
+}
+
+static void blockdev_backup_abort(BlkTransactionState *common)
+{
+BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, 
common);
+BlockDriverState *bs = state->bs;
+
+/* Only cancel if it's the job we started */
+if (bs && bs->job && bs->job == state->job) {
+block_job_cancel_sync(bs->job);
+}
+}
+
 static void abort_prepare(BlkTransactionState *common, Error **errp)
 {
 error_setg(errp, "Transaction aborted using Abort action");
@@ -1431,6 +1474,11 @@ static const BdrvActionOps actions[] = {
 .prepare = drive_backup_prepare,
 .abort = drive_backup_abort,
 },
+[TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
+.instance_size = sizeof(BlockdevBackupState),
+.prepare = blockdev_backup_prepare,
+.abort = blockdev_backup_abort,
+},
 [TRANSACTION_ACTION_KIND_ABORT] = {
 .instance_size = sizeof(BlkTransactionState),
 .prepare = abort_prepare,
diff --git a/qapi-schema.json b/qapi-schema.json
index fe54b90..5426c45 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1996,11 +1996,14 @@
 #
 # A discriminated record of operations that can be performed with
 # @transaction.
+#
+# Since 1.1, blockdev-backup since 2.1
 ##
 { 'union': 'TransactionAction',
   'data': {
'blockdev-snapshot-sync': 'BlockdevSnapshot',
'drive-backup': 'DriveBackup',
+   'blockdev-backup': 'BlockdevBackup',
'abort': 'Abort',
'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
} }
-- 
1.9.1




[Qemu-devel] [PATCH v18 02/15] block: Introduce op_blockers to BlockDriverState

2014-04-09 Thread Fam Zheng
BlockDriverState.op_blockers is an array of lists with BLOCK_OP_TYPE_MAX
elements. Each list is a list of blockers of an operation type
(BlockOpType), that marks this BDS as currently blocked for a certain
type of operation with reason errors stored in the list. The rule of
usage is:

 * BDS user who wants to take an operation should check if there's any
   blocker of the type with bdrv_op_is_blocked().

 * BDS user who wants to block certain types of operation, should call
   bdrv_op_block (or bdrv_op_block_all to block all types of operations,
   which is similar to the existing bdrv_set_in_use()).

 * A blocker is only referenced by op_blockers, so the lifecycle is
   managed by caller, and shouldn't be lost until unblock, so typically
   a caller does these:

   - Allocate a blocker with error_setg or similar, call bdrv_op_block()
 to block some operations.
   - Hold the blocker, do his job.
   - Unblock operations that it blocked, with the same reason pointer
 passed to bdrv_op_unblock().
   - Release the blocker with error_free().

Signed-off-by: Fam Zheng 
Reviewed-by: Benoit Canet 
Reviewed-by: Jeff Cody 
---
 block.c   | 75 +++
 include/block/block.h |  7 +
 include/block/block_int.h |  5 
 3 files changed, 87 insertions(+)

diff --git a/block.c b/block.c
index 990a754..abfca3a 100644
--- a/block.c
+++ b/block.c
@@ -335,6 +335,7 @@ void bdrv_register(BlockDriver *bdrv)
 BlockDriverState *bdrv_new(const char *device_name)
 {
 BlockDriverState *bs;
+int i;
 
 bs = g_malloc0(sizeof(BlockDriverState));
 QLIST_INIT(&bs->dirty_bitmaps);
@@ -342,6 +343,9 @@ BlockDriverState *bdrv_new(const char *device_name)
 if (device_name[0] != '\0') {
 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
 }
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+QLIST_INIT(&bs->op_blockers[i]);
+}
 bdrv_iostatus_disable(bs);
 notifier_list_init(&bs->close_notifiers);
 notifier_with_return_list_init(&bs->before_write_notifiers);
@@ -1873,6 +1877,8 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
 bs_src->device_name);
 bs_dest->device_list = bs_src->device_list;
+memcpy(bs_dest->op_blockers, bs_src->op_blockers,
+   sizeof(bs_dest->op_blockers));
 }
 
 /*
@@ -5199,6 +5205,75 @@ void bdrv_unref(BlockDriverState *bs)
 }
 }
 
+struct BdrvOpBlocker {
+Error *reason;
+QLIST_ENTRY(BdrvOpBlocker) list;
+};
+
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
+{
+BdrvOpBlocker *blocker;
+assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+if (!QLIST_EMPTY(&bs->op_blockers[op])) {
+blocker = QLIST_FIRST(&bs->op_blockers[op]);
+if (errp) {
+*errp = error_copy(blocker->reason);
+}
+return true;
+}
+return false;
+}
+
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
+{
+BdrvOpBlocker *blocker;
+assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+
+blocker = g_malloc0(sizeof(BdrvOpBlocker));
+blocker->reason = reason;
+QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
+}
+
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
+{
+BdrvOpBlocker *blocker, *next;
+assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
+if (blocker->reason == reason) {
+QLIST_REMOVE(blocker, list);
+g_free(blocker);
+}
+}
+}
+
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
+{
+int i;
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+bdrv_op_block(bs, i, reason);
+}
+}
+
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
+{
+int i;
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+bdrv_op_unblock(bs, i, reason);
+}
+}
+
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
+{
+int i;
+
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+if (!QLIST_EMPTY(&bs->op_blockers[i])) {
+return false;
+}
+}
+return true;
+}
+
 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
 {
 assert(bs->in_use != in_use);
diff --git a/include/block/block.h b/include/block/block.h
index e6814aa..cf30046 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -466,6 +466,13 @@ void bdrv_unref(BlockDriverState *bs);
 void bdrv_set_in_use(BlockDriverState *bs, int in_use);
 int bdrv_in_use(BlockDriverState *bs);
 
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(

[Qemu-devel] [PATCH v18 08/15] block: Support dropping active in bdrv_drop_intermediate

2014-04-09 Thread Fam Zheng
Dropping intermediate could be useful both for commit and stream, and
BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
to work with op blockers.

Signed-off-by: Fam Zheng 
---
 block.c| 139 -
 block/commit.c |   2 +-
 2 files changed, 70 insertions(+), 71 deletions(-)

diff --git a/block.c b/block.c
index beacf09..b8e229f 100644
--- a/block.c
+++ b/block.c
@@ -2521,115 +2521,114 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState 
*active,
 return overlay;
 }
 
-typedef struct BlkIntermediateStates {
-BlockDriverState *bs;
-QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
 /*
- * Drops images above 'base' up to and including 'top', and sets the image
- * above 'top' to have base as its backing file.
+ * Drops images above 'base' up to and including 'top', and sets new 'base' as
+ * backing_hd of top's overlay (the image orignally has 'top' as backing file).
+ * top's overlay may be NULL if 'top' is active, no such update needed.
+ * Requires that the top's overlay to 'top' is opened r/w.
+ *
+ * 1) This will convert the following chain:
+ *
+ * ... <- base <- ... <- top <- overlay <-... <- active
  *
- * Requires that the overlay to 'top' is opened r/w, so that the backing file
- * information in 'bs' can be properly updated.
+ * to
+ *
+ * ... <- base <- overlay <- active
+ *
+ * 2) It is allowed for bottom==base, in which case it converts:
  *
- * E.g., this will convert the following chain:
- * bottom <- base <- intermediate <- top <- active
+ * base <- ... <- top <- overlay <- ... <- active
  *
  * to
  *
- * bottom <- base <- active
+ * base <- overlay <- active
  *
- * It is allowed for bottom==base, in which case it converts:
+ * 2) It also allows active==top, in which case it converts:
  *
- * base <- intermediate <- top <- active
+ * ... <- base <- ... <- top (active)
  *
  * to
  *
- * base <- active
+ * ... <- base == active == top
+ *
+ * i.e. only base and lower remains: *top == *base when return.
+ *
+ * 3) If base==NULL, it will drop all the BDS below overlay and set its
+ * backing_hd to NULL. I.e.:
  *
- * Error conditions:
- *  if active == top, that is considered an error
+ * base(NULL) <- ... <- overlay <- ... <- active
+ *
+ * to
+ *
+ * overlay <- ... <- active
  *
  */
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base)
 {
-BlockDriverState *intermediate;
-BlockDriverState *base_bs = NULL;
-BlockDriverState *new_top_bs = NULL;
-BlkIntermediateStates *intermediate_state, *next;
-int ret = -EIO;
-
-QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
-QSIMPLEQ_INIT(&states_to_delete);
+BlockDriverState *drop_start, *overlay, *bs;
+int ret = -EINVAL;
 
-if (!top->drv || !base->drv) {
+assert(active);
+assert(top);
+/* Verify that top is in backing chain of active */
+bs = active;
+while (bs && bs != top) {
+bs = bs->backing_hd;
+}
+if (!bs) {
 goto exit;
 }
+/* Verify that base is in backing chain of top */
+if (base) {
+while (bs && bs != base) {
+bs = bs->backing_hd;
+}
+if (bs != base) {
+goto exit;
+}
+}
 
-new_top_bs = bdrv_find_overlay(active, top);
-
-if (new_top_bs == NULL) {
-/* we could not find the image above 'top', this is an error */
+if (!top->drv || (base && !base->drv)) {
 goto exit;
 }
-
-/* special case of new_top_bs->backing_hd already pointing to base - 
nothing
- * to do, no intermediate images */
-if (new_top_bs->backing_hd == base) {
+if (top == base) {
+ret = 0;
+goto exit;
+} else if (top == active) {
+assert(base);
+drop_start = active->backing_hd;
+bdrv_swap(base, active);
+bdrv_set_backing_hd(base, NULL);
+bdrv_unref(drop_start);
 ret = 0;
 goto exit;
 }
 
-intermediate = top;
-
-/* now we will go down through the list, and add each BDS we find
- * into our deletion queue, until we hit the 'base'
- */
-while (intermediate) {
-intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
-intermediate_state->bs = intermediate;
-QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
-if (intermediate->backing_hd == base) {
-base_bs = intermediate->backing_hd;
-break;
-}
-intermediate = intermediate->

[Qemu-devel] [PATCH v18 03/15] block: Replace in_use with operation blocker

2014-04-09 Thread Fam Zheng
This drops BlockDriverState.in_use with op_blockers:

  - Call bdrv_op_block_all in place of bdrv_set_in_use(bs, 1).
  - Call bdrv_op_unblock_all in place of bdrv_set_in_use(bs, 0).
  - Check bdrv_op_is_blocked() in place of bdrv_in_use(bs).
The specific types are used, e.g. in place of starting block backup,
bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP, ...).
  - Check bdrv_op_blocker_is_empty() in place of assert(!bs->in_use).

Note: there is only bdrv_op_block_all and bdrv_op_unblock_all callers at
this moment. So although the checks are specific to op types, this
changes can still be seen as identical logic with previously with
in_use. The difference is error message are improved because of blocker
error info.

Signed-off-by: Fam Zheng 
Reviewed-by: Jeff Cody 
---
 block-migration.c   |  7 +--
 block.c | 24 +++-
 blockdev.c  | 19 +--
 blockjob.c  | 14 +-
 hw/block/dataplane/virtio-blk.c | 18 --
 include/block/block.h   |  2 --
 include/block/block_int.h   |  1 -
 include/block/blockjob.h|  3 +++
 8 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 897fdba..bf9a25f 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -59,6 +59,7 @@ typedef struct BlkMigDevState {
 unsigned long *aio_bitmap;
 int64_t completed_sectors;
 BdrvDirtyBitmap *dirty_bitmap;
+Error *blocker;
 } BlkMigDevState;
 
 typedef struct BlkMigBlock {
@@ -346,7 +347,8 @@ static void init_blk_migration_it(void *opaque, 
BlockDriverState *bs)
 bmds->completed_sectors = 0;
 bmds->shared_base = block_mig_state.shared_base;
 alloc_aio_bitmap(bmds);
-bdrv_set_in_use(bs, 1);
+error_setg(&bmds->blocker, "block device is in use by migration");
+bdrv_op_block_all(bs, bmds->blocker);
 bdrv_ref(bs);
 
 block_mig_state.total_sector_sum += sectors;
@@ -584,7 +586,8 @@ static void blk_mig_cleanup(void)
 blk_mig_lock();
 while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
-bdrv_set_in_use(bmds->bs, 0);
+bdrv_op_unblock_all(bmds->bs, bmds->blocker);
+error_free(bmds->blocker);
 bdrv_unref(bmds->bs);
 g_free(bmds->aio_bitmap);
 g_free(bmds);
diff --git a/block.c b/block.c
index abfca3a..c9cae4e 100644
--- a/block.c
+++ b/block.c
@@ -1870,7 +1870,6 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 bs_dest->refcnt = bs_src->refcnt;
 
 /* job */
-bs_dest->in_use = bs_src->in_use;
 bs_dest->job= bs_src->job;
 
 /* keep the same entry in bdrv_states */
@@ -1913,7 +1912,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
 assert(bs_new->job == NULL);
 assert(bs_new->dev == NULL);
-assert(bs_new->in_use == 0);
+assert(bdrv_op_blocker_is_empty(bs_new));
 assert(bs_new->io_limits_enabled == false);
 assert(!throttle_have_timer(&bs_new->throttle_state));
 
@@ -1932,7 +1931,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 /* Check a few fields that should remain attached to the device */
 assert(bs_new->dev == NULL);
 assert(bs_new->job == NULL);
-assert(bs_new->in_use == 0);
+assert(bdrv_op_blocker_is_empty(bs_new));
 assert(bs_new->io_limits_enabled == false);
 assert(!throttle_have_timer(&bs_new->throttle_state));
 
@@ -1977,7 +1976,7 @@ static void bdrv_delete(BlockDriverState *bs)
 {
 assert(!bs->dev);
 assert(!bs->job);
-assert(!bs->in_use);
+assert(bdrv_op_blocker_is_empty(bs));
 assert(!bs->refcnt);
 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
@@ -2159,7 +2158,8 @@ int bdrv_commit(BlockDriverState *bs)
 return -ENOTSUP;
 }
 
-if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
+bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
 return -EBUSY;
 }
 
@@ -3398,8 +3398,9 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
 return -ENOTSUP;
 if (bs->read_only)
 return -EACCES;
-if (bdrv_in_use(bs))
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
 return -EBUSY;
+}
 ret = drv->bdrv_truncate(bs, offset);
 if (ret == 0) {
 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
@@ -5274,17 +5275,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
 return true;
 }
 
-void bdrv_set_in_use(BlockDriverState *bs, in

Re: [Qemu-devel] [PATCH v2 1/5] block: qemu-iotests - add common.qemu, for bash-controlled qemu tests

2014-04-09 Thread Fam Zheng
On Wed, 04/09 22:41, Jeff Cody wrote:
> This creates some common functions for bash language qemu-iotests
> to control, and communicate with, a running QEMU process.
> 
> 4 functions are introduced:
> 
> 1. _launch_qemu()
> This launches the QEMU process(es), and sets up the file
> descriptors and fifos for communication.  You can choose to
> launch each QEMU process listening for either QMP or HMP
> monitor.  You can call this function multiple times, and
> save the handle returned from each.  The returned handle is
> in $QEMU_HANDLE.  You must copy this value.
> 
> Commands 2 and 3 use the handle received from _launch_qemu(), to talk
> to the appropriate process.
> 
> 2. _send_qemu_cmd()
> Sends a command string, specified by $2, to QEMU.  If $2 is
> non-NULL, _send_qemu_cmd() will wait to receive $2 as a

Do you mean $3 in this sentence?

> required result string from QEMU.  Failure to receive $3 will
> cause the test to fail.  The command can optionally be retried
> $qemu_cmd_repeat number of times.
> 
> 3. _timed_wait_for()
> Waits for a response, for up to a default of 10 seconds.  If
> $2 is not seen in that time (anywhere in the response), then
> the test fails.  Primarily used by _send_qemu_cmd, but could
> be useful standalone, as well.  To prevent automatic exit
> (and therefore test failure), set $qemu_wait_no_error to a
> non-NULL value.  If $silent is a non-NULL value, then output
> to stdout will be suppressed.
> 
> 4. _cleanup_qemu()
> Kills the running QEMU processes, and removes the fifos.
> 
> Signed-off-by: Jeff Cody 
> ---
>  tests/qemu-iotests/common.qemu | 195 
> +
>  1 file changed, 195 insertions(+)
>  create mode 100644 tests/qemu-iotests/common.qemu
> 
> diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
> new file mode 100644
> index 000..12c42f1
> --- /dev/null
> +++ b/tests/qemu-iotests/common.qemu
> @@ -0,0 +1,195 @@
> +#!/bin/bash
> +#
> +# This allows for launching of multiple QEMU instances, with independent
> +# communication possible to each instance.
> +#
> +# Each instance can choose, at launch, to use either the QMP or the
> +# HMP (monitor) interface.
> +#
> +# All instances are cleaned up via _cleanup_qemu, including killing the
> +# running qemu instance.
> +#
> +# Copyright (C) 2014 Red Hat, Inc.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +
> +QEMU_COMM_TIMEOUT=10
> +
> +QEMU_FIFO_IN="${TEST_DIR}/qmp-in-$$"
> +QEMU_FIFO_OUT="${TEST_DIR}/qmp-out-$$"
> +
> +QEMU_PID=
> +_QEMU_HANDLE=0
> +QEMU_HANDLE=0
> +
> +# If bash version is >= 4.1, these will be overwritten and dynamic
> +# file descriptor values assigned.
> +_out_fd=3
> +_in_fd=4
> +
> +# Wait for expected QMP response from QEMU.  Will time out
> +# after 10 seconds, which counts as failure.
> +#
> +# Override QEMU_COMM_TIMEOUT for a timeout different than the
> +# default 10 seconds
> +#
> +# $1: The handle to use
> +# $2+ All remaining arguments comprise the string to search for
> +#in the response.
> +#
> +# If $silent is set to anything but an empty string, then
> +# response is not echoed out.
> +function _timed_wait_for()
> +{
> +local h=${1}
> +shift
> +
> +QEMU_STATUS[$h]=0
> +while read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
> +do
> +if [ -z "${silent}" ]; then
> +echo "${resp}" | _filter_testdir | _filter_qemu \
> +   | _filter_qemu_io | _filter_qmp
> +fi
> +grep -q "${*}" < <(echo ${resp})
> +if [ $? -eq 0 ]; then
> +return
> +fi
> +done
> +QEMU_STATUS[$h]=-1
> +if [ -z "${qemu_wait_no_error}" ]; then
> +echo "Timeout waiting for ${*} on handle ${h}"
> +exit 1  # Timeout means the test failed
> +fi
> +}
> +
> +
> +# Sends QMP or HMP command to QEMU, and waits for the expected response
> +#
> +# $1:   QEMU handle to use
> +# $2:   String of the QMP command to send
> +# ${@: -1}  (Last string passed)
> +# String that the QEMU response should contain. If it is a null
> +# string, do not wait for a response
> +#
> +# Set qemu_cmd_repeat to the number of times to 

Re: [Qemu-devel] [PATCH v2 3/5] block: qemu-iotests - test for live migration

2014-04-09 Thread Fam Zheng
On Wed, 04/09 22:41, Jeff Cody wrote:
> This is an initial, simple live migration test from one
> running VM to another, using monitor commands.
> 
> This is also an example on using the new common.qemu functions
> for controlling multiple running qemu instances, for tests that
> need a live qemu vm.
> 
> Signed-off-by: Jeff Cody 
> ---
>  tests/qemu-iotests/089 | 97 
> ++
>  tests/qemu-iotests/089.out | 20 ++

I used 089 in my last image fleecing series, (originally 083 but already used).
So one of us need to shift the case number.

Thanks,
Fam

>  tests/qemu-iotests/group   |  1 +
>  3 files changed, 118 insertions(+)
>  create mode 100755 tests/qemu-iotests/089
>  create mode 100644 tests/qemu-iotests/089.out
> 
> diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089
> new file mode 100755
> index 000..22a7cf1
> --- /dev/null
> +++ b/tests/qemu-iotests/089
> @@ -0,0 +1,97 @@
> +#!/bin/bash
> +#
> +# Live migration test
> +#
> +# Performs a migration from one VM to another via monitor commands
> +#
> +# Copyright (C) 2014 Red Hat, Inc.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +
> +# creator
> +owner=jc...@redhat.com
> +
> +seq=`basename $0`
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +status=1 # failure is the default!
> +
> +MIG_FIFO="${TEST_DIR}/migrate"
> +
> +_cleanup()
> +{
> +rm -f "${MIG_FIFO}"
> +_cleanup_qemu
> + _cleanup_test_img
> +
> +}
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +# get standard environment, filters and checks
> +. ./common.rc
> +. ./common.filter
> +. ./common.qemu
> +
> +_supported_fmt qcow2
> +_supported_proto file
> +_supported_os Linux
> +
> +size=1G
> +
> +IMGOPTS="cluster_size=512" _make_test_img $size
> +
> +mkfifo "${MIG_FIFO}"
> +
> +echo
> +echo === Starting QEMU VM1 ===
> +echo
> +
> +qemu_comm_method="monitor"
> +_launch_qemu -drive file="${TEST_IMG}",cache=none,id=disk
> +h1=$QEMU_HANDLE
> +
> +echo
> +echo === Starting QEMU VM2 ===
> +echo
> +_launch_qemu -drive file="${TEST_IMG}",cache=none,id=disk \
> + -incoming "exec: cat '${MIG_FIFO}'"
> +h2=$QEMU_HANDLE
> +
> +echo
> +echo === VM 1: Migrate from VM1 to VM2  ===
> +echo
> +
> +silent=yes
> +_send_qemu_cmd $h1 'qemu-io disk "write 0 4M"' "(qemu)"
> +echo "vm1: qemu-io disk write complete"
> +_send_qemu_cmd $h1 "migrate \"exec: cat > '${MIG_FIFO}'\"" "(qemu)"
> +echo "vm1: live migration started"
> +qemu_cmd_repeat=20 _send_qemu_cmd $h1 "info migrate" "completed"
> +echo "vm1: live migration completed"
> +
> +echo
> +echo === VM 2: Post-migration, write to disk, verify running ===
> +echo
> +
> +_send_qemu_cmd $h2 'qemu-io disk "write 4M 1M"' "(qemu)"
> +echo "vm2: qemu-io disk write complete"
> +qemu_cmd_repeat=20 _send_qemu_cmd $h2 "info status" "running"
> +echo "vm2: qemu process running successfully"
> +
> +
> +echo "*** done"
> +rm -f $seq.full
> +status=0
> diff --git a/tests/qemu-iotests/089.out b/tests/qemu-iotests/089.out
> new file mode 100644
> index 000..4e9e6c9
> --- /dev/null
> +++ b/tests/qemu-iotests/089.out
> @@ -0,0 +1,20 @@
> +QA output created by 089
> +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
> +
> +=== Starting QEMU VM1 ===
> +
> +
> +=== Starting QEMU VM2 ===
> +
> +
> +=== VM 1: Migrate from VM1 to VM2 ===
> +
> +vm1: qemu-io disk write complete
> +vm1: live migration started
> +vm1: live migration completed
> +
> +=== VM 2: Post-migration, write to disk, verify running ===
> +
> +vm2: qemu-io disk write complete
> +vm2: qemu process running successfully
> +*** done
> diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
> index 864643d..73e6b5d 100644
> --- a/tests/qemu-iotests/group
> +++ b/tests/qemu-iotests/group
> @@ -95,3 +95,4 @@
>  086 rw auto quick
>  087 rw auto
>  088 rw auto
> +089 rw auto
> -- 
> 1.8.3.1
> 
> 



Re: [Qemu-devel] [PATCH v2 2/5] block: qemu-iotests - update 085 to use common.qemu

2014-04-09 Thread Fam Zheng
On Wed, 04/09 22:41, Jeff Cody wrote:
> The new functionality of common.qemu implements the QEMU control
> and communication functionality that was originally in test 085.
> 
> This removes that now-duplicate functionality, and uses the
> common.qemu functions.
> 

Just a note.

A quick grep shows 067, 071, 081 and 087 are also bash cases with QEMU process.
Not necessarily in this series but they are also candidates too convert.

Thanks,
Fam



Re: [Qemu-devel] [PATCH v2 5/5] block: qemu-iotests: make test 019 and 086 work with spaced pathnames

2014-04-10 Thread Fam Zheng
On Wed, 04/09 22:41, Jeff Cody wrote:
> Both tests 019 and 086 need proper quotations to work with pathnames
> that contain spaces.
> 
> Signed-off-by: Jeff Cody 
> ---
>  tests/qemu-iotests/019 | 2 +-
>  tests/qemu-iotests/086 | 8 
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019
> index e67445c..f5ecbf5 100755
> --- a/tests/qemu-iotests/019
> +++ b/tests/qemu-iotests/019
> @@ -96,7 +96,7 @@ mv "$TEST_IMG" "$TEST_IMG.orig"
>  for backing_option in "-B " "-o backing_file="; do
>  
>  echo
> -echo Testing conversion with $backing_option$TEST_IMG.base | 
> _filter_testdir | _filter_imgfmt
> +echo Testing conversion with $backing_option"$TEST_IMG.base" | 
> _filter_testdir | _filter_imgfmt
>  echo
>  $QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" 
> "$TEST_IMG.orig" "$TEST_IMG"
>  
> diff --git a/tests/qemu-iotests/086 b/tests/qemu-iotests/086
> index 48fe85b..d9a80cf 100755
> --- a/tests/qemu-iotests/086
> +++ b/tests/qemu-iotests/086
> @@ -51,10 +51,10 @@ function run_qemu_img()
>  size=128M
>  
>  _make_test_img $size
> -$QEMU_IO -c 'write 0 1M' $TEST_IMG | _filter_qemu_io
> -$QEMU_IO -c 'write 2M 1M' $TEST_IMG | _filter_qemu_io
> -$QEMU_IO -c 'write 4M 1M' $TEST_IMG | _filter_qemu_io
> -$QEMU_IO -c 'write 32M 1M' $TEST_IMG | _filter_qemu_io
> +$QEMU_IO -c 'write 0 1M' "$TEST_IMG" | _filter_qemu_io
> +$QEMU_IO -c 'write 2M 1M' "$TEST_IMG" | _filter_qemu_io
> +$QEMU_IO -c 'write 4M 1M' "$TEST_IMG" | _filter_qemu_io
> +$QEMU_IO -c 'write 32M 1M' "$TEST_IMG" | _filter_qemu_io
>  
>  $QEMU_IMG convert -p -O $IMGFMT -f $IMGFMT "$TEST_IMG" "$TEST_IMG".base  
> 2>&1 |\
>  _filter_testdir | sed -e 's/\r/\n/g'
> -- 
> 1.8.3.1
> 
> 

Reviewed-by: Fam Zheng 



Re: [Qemu-devel] [PATCH v2 4/5] block: qemu-iotests - fix image cleanup when using spaced pathnames

2014-04-10 Thread Fam Zheng
On Wed, 04/09 22:41, Jeff Cody wrote:
> The _rm_test_img() function in common.rc did not quote the image
> file, which left droppings in the scratch directory (and performed
> a potentially unsafe rm -f).
> 
> This adds the necessary quotes.
> 
> Signed-off-by: Jeff Cody 
> ---
>  tests/qemu-iotests/common.rc | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
> index 7f00883..195c564 100644
> --- a/tests/qemu-iotests/common.rc
> +++ b/tests/qemu-iotests/common.rc
> @@ -178,10 +178,10 @@ _rm_test_img()
>  local img=$1

Since we are quoting $img, should we quote $1 as well?

Fam

>  if [ "$IMGFMT" = "vmdk" ]; then
>  # Remove all the extents for vmdk
> -$QEMU_IMG info $img 2>/dev/null | grep 'filename:' | cut -f 2 -d: \
> +"$QEMU_IMG" info "$img" 2>/dev/null | grep 'filename:' | cut -f 2 
> -d: \
>  | xargs -I {} rm -f "{}"
>  fi
> -rm -f $img
> +rm -f "$img"
>  }
>  
>  _cleanup_test_img()
> -- 
> 1.8.3.1
> 
> 



Re: [Qemu-devel] [PATCH v2 5/6] qemu-img: Specify backing file for commit

2014-04-10 Thread Fam Zheng
On Tue, 04/08 14:50, Max Reitz wrote:
> Introduce a new parameter for qemu-img commit which may be used to
> explicitly specify the backing file unto which an image should be
> committed if the backing chain has more than a single layer.
> 
> Signed-off-by: Max Reitz 
> ---
>  qemu-img-cmds.hx |  4 ++--
>  qemu-img.c   | 22 +++---
>  qemu-img.texi|  8 +++-
>  3 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
> index 8bc55cd..7f62f6d 100644
> --- a/qemu-img-cmds.hx
> +++ b/qemu-img-cmds.hx
> @@ -22,9 +22,9 @@ STEXI
>  ETEXI
>  
>  DEF("commit", img_commit,
> -"commit [-q] [-f fmt] [-t cache] [-p] filename")
> +"commit [-q] [-f fmt] [-t cache] [-b backing_file] [-p] filename")
>  STEXI
> -@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-p] @var{filename}
> +@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{backing_file}] 
> [-p] @var{filename}
>  ETEXI
>  
>  DEF("compare", img_compare,
> diff --git a/qemu-img.c b/qemu-img.c
> index 0a9eff7..9d4bdbc 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -725,15 +725,16 @@ static void run_block_job(BlockJob *job, Error **errp)
>  static int img_commit(int argc, char **argv)
>  {
>  int c, ret, flags;
> -const char *filename, *fmt, *cache;
> +const char *filename, *fmt, *cache, *base;
>  BlockDriverState *bs, *base_bs;
>  bool progress = false, quiet = false;
>  Error *local_err = NULL;
>  
>  fmt = NULL;
>  cache = BDRV_DEFAULT_CACHE;
> +base = NULL;
>  for(;;) {
> -c = getopt(argc, argv, "f:ht:qp");
> +c = getopt(argc, argv, "f:ht:b:qp");
>  if (c == -1) {
>  break;
>  }
> @@ -748,6 +749,9 @@ static int img_commit(int argc, char **argv)
>  case 't':
>  cache = optarg;
>  break;
> +case 'b':
> +base = optarg;
> +break;
>  case 'p':
>  progress = true;
>  break;
> @@ -782,12 +786,16 @@ static int img_commit(int argc, char **argv)
>  qemu_progress_init(progress, 1.f);
>  qemu_progress_print(0.f, 100);
>  
> -/* This is different from QMP, which by default uses the deepest file in 
> the
> - * backing chain (i.e., the very base); however, the traditional 
> behavior of
> - * qemu-img commit is using the immediate backing file. */
> -base_bs = bs->backing_hd;
> +if (base) {
> +base_bs = bdrv_find_backing_image(bs, base);
> +} else {
> +/* This is different from QMP, which by default uses the deepest 
> file in
> + * the backing chain (i.e., the very base); however, the traditional
> + * behavior of qemu-img commit is using the immediate backing file. 
> */
> +base_bs = bs->backing_hd;
> +}
>  if (!base_bs) {
> -error_set(&local_err, QERR_BASE_NOT_FOUND, "NULL");
> +error_set(&local_err, QERR_BASE_NOT_FOUND, base ?: "NULL");
>  goto done;
>  }
>  
> diff --git a/qemu-img.texi b/qemu-img.texi
> index 1a9c08f..4a9f493 100644
> --- a/qemu-img.texi
> +++ b/qemu-img.texi
> @@ -140,7 +140,7 @@ this case. @var{backing_file} will never be modified 
> unless you use the
>  The size can also be specified using the @var{size} option with @code{-o},
>  it doesn't need to be specified separately in this case.
>  
> -@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-p] @var{filename}
> +@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{backing_file}] 
> [-p] @var{filename}
>  
>  Commit the changes recorded in @var{filename} in its base image or backing 
> file.
>  If the backing file is smaller than the snapshot, then the backing file will 
> be
> @@ -149,6 +149,12 @@ the backing file, the backing file will not be 
> truncated.  If you want the
>  backing file to match the size of the smaller snapshot, you can safely 
> truncate
>  it yourself once the commit operation successfully completes.
>  
> +If the backing chain of the given image file @var{filename} has more than one
> +layer, the backing file unto which the changes shall be committed may be
> +specified as @var{backing_file} (which has to be part of @var{filename}'s
> +backing chain). If @var{filename} is not specified, the immediate backing 
> file

s/@var{filename}/@var{backing_file}/ ?

BTW how about just calling it 'base' as in qmp commands, because backing_file
has usages in (slightly) different context of create.

Other than the two questions,

Reviewed-by: Fam Zheng 

> +of the top image (which is @var{filename}) will be used.
> +
>  @item compare [-f @var{fmt}] [-F @var{fmt}] [-p] [-s] [-q] @var{filename1} 
> @var{filename2}
>  
>  Check if two images have the same content. You can compare images with
> -- 
> 1.9.1
> 
> 



Re: [Qemu-devel] [PATCH v2 4/5] block: qemu-iotests - fix image cleanup when using spaced pathnames

2014-04-10 Thread Fam Zheng
On Thu, 04/10 14:09, Jeff Cody wrote:
> On Thu, Apr 10, 2014 at 08:48:10AM -0600, Eric Blake wrote:
> > On 04/10/2014 08:43 AM, Eric Blake wrote:
> > > On 04/10/2014 06:53 AM, Jeff Cody wrote:
> > > 
> >  +++ b/tests/qemu-iotests/common.rc
> >  @@ -178,10 +178,10 @@ _rm_test_img()
> >   local img=$1
> > >>>
> > >>> Since we are quoting $img, should we quote $1 as well?
> > >>>
> > 
> > > 
> > > http://austingroupbugs.net/view.php?id=351
> > > 
> > > But even with the notion of an assignment-context argument added to a
> > > future version of POSIX, the reality is that given the present standard,
> > > it's safer to either use "" to ensure no word splitting:
> > 
> > Well, if you were trying to be portable to multiple shells, then it
> > would matter.  But as this script is explicitly being run under
> > /bin/bash, and as bash already has support for declaration utilities
> > where local is one such utility, your script as written is safe without
> > "" in the arguments to local.  So I'm fine whether you choose to change
> > it in a respin or to leave it as written in this version.

Thanks for the thorough explanation, Eric!

> 
> Hi Eric,
> 
> Thanks - I consulted specifically with just the bash documentation, so
> you are right, this script (and likely most of qemu-iotests) is
> bash-only.
> 
> That particular line is context as well, and not an actual change - so
> while it may be a good idea to quote it to make the scripts closer to
> posix-only, my guess is there are quite a few similar lines throughout
> all the qemu-iotests scripts.
> 
> Given that, if we address that it would probably make sense to do that
> in a bash->posix conversion series for all the scripts (likely a low
> priority, however).
> 

OK :)

Thanks,
Fam



Re: [Qemu-devel] [Qemu-stable] [PATCH] block-commit: speed is an optional parameter

2014-04-10 Thread Fam Zheng
On Thu, 04/10 19:36, Max Reitz wrote:
> As speed is an optional parameter for the QMP block-commit command, it
> should be set to 0 if not given (as it is undefined if has_speed is
> false), that is, the speed should not be limited.
> 
> Signed-off-by: Max Reitz 
> Reviewed-by: Eric Blake 
> ---
> This patch was previously part of the "qemu-img: Implement commit like
> QMP" series, but as this is a general bugfix, it has been separated.
> ---

Reviewed-by: Fam Zheng 

The (generated) caller qmp_marshal_input_block_commit() actually leaves speed
uninitialized, so we're not safe without this patch:

int qmp_marshal_input_block_commit(Monitor *mon, const QDict *qdict, QObject 
**ret)
{
Error *local_err = NULL;
Error **errp = &local_err;
QDict *args = (QDict *)qdict;
QmpInputVisitor *mi;
QapiDeallocVisitor *md;
Visitor *v;
char * device = NULL;
bool has_base = false;
char * base = NULL;
char * top = NULL;
bool has_speed = false;
int64_t speed;

I'm wondering what the reason is to initialize pointers (like base and device)
while leaving int64_t values uninitilized in the code generator?

Fam

>  blockdev.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/blockdev.c b/blockdev.c
> index c3422a1..5dd01ea 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1876,6 +1876,10 @@ void qmp_block_commit(const char *device,
>   */
>  BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
>  
> +if (!has_speed) {
> +speed = 0;
> +}
> +
>  /* drain all i/o before commits */
>  bdrv_drain_all();
>  
> -- 
> 1.9.1
> 
> 



Re: [Qemu-devel] [PATCH] iscsi: Remember to set ret for iscsi_open in error case

2014-04-10 Thread Fam Zheng
On Thu, 04/10 13:11, Kevin Wolf wrote:
> Am 10.04.2014 um 03:33 hat Fam Zheng geschrieben:
> > Signed-off-by: Fam Zheng 
> 
> Thanks, applied to the block branch.
> 

Is this going into 2.0?

Thanks,
Fam



Re: [Qemu-devel] qapi-commands.py generates code that uses uninitialized variables

2014-04-11 Thread Fam Zheng
On Fri, 04/11 08:27, Peter Maydell wrote:
> On 11 April 2014 02:40, Eric Blake  wrote:
> > We uncovered a real bug that would be fixed by this patch:
> > https://lists.gnu.org/archive/html/qemu-devel/2014-04/msg01745.html
> 
> No, that's a bug in the called code. The API here defines
> that for optional parameters, if the have_foo bool is false
> then the foo argument isn't set. The generated code
> can't know the correct default value (it just happens
> to be 0 in the case you point out, but what if the default
> speed were 100?) so this must be handled by the called
> code.
> 

Default value for a variable isn't default value for API logic, so apparently
called code must always handle both have_foo and foo. But there is a point to
take this patch from the language perspective, to avoid that an unset variable
is passed as a parameter from generated code.

Thanks,
Fam




[Qemu-devel] [PATCH v2] block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap

2014-04-14 Thread Fam Zheng
bdrv_getlength could fail, check the return value before using it.

Signed-off-by: Fam Zheng 

---
v2: Make use of error_setg_errno and -errno. (Kevin)

Signed-off-by: Fam Zheng 
---
 block-migration.c | 28 
 block.c   | 10 --
 block/mirror.c|  5 -
 include/block/block.h |  3 ++-
 4 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 897fdba..2a6df66 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -310,13 +310,26 @@ static int mig_save_device_bulk(QEMUFile *f, 
BlkMigDevState *bmds)
 
 /* Called with iothread lock taken.  */
 
-static void set_dirty_tracking(void)
+static int set_dirty_tracking(void)
 {
 BlkMigDevState *bmds;
 
 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
+bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
+  NULL);
+if (!bmds->dirty_bitmap) {
+goto fail;
+}
+}
+return 0;
+
+fail:
+QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+if (bmds->dirty_bitmap) {
+bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
+}
 }
+return -errno;
 }
 
 static void unset_dirty_tracking(void)
@@ -611,10 +624,17 @@ static int block_save_setup(QEMUFile *f, void *opaque)
 block_mig_state.submitted, block_mig_state.transferred);
 
 qemu_mutex_lock_iothread();
-init_blk_migration(f);
 
 /* start track dirty blocks */
-set_dirty_tracking();
+ret = set_dirty_tracking();
+
+if (ret) {
+qemu_mutex_unlock_iothread();
+return ret;
+}
+
+init_blk_migration(f);
+
 qemu_mutex_unlock_iothread();
 
 ret = flush_blks(f);
diff --git a/block.c b/block.c
index 990a754..e24b955 100644
--- a/block.c
+++ b/block.c
@@ -5096,7 +5096,8 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
QEMUIOVector *qiov)
 return true;
 }
 
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity)
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity,
+  Error **errp)
 {
 int64_t bitmap_size;
 BdrvDirtyBitmap *bitmap;
@@ -5105,7 +5106,12 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
 
 granularity >>= BDRV_SECTOR_BITS;
 assert(granularity);
-bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+bitmap_size = bdrv_getlength(bs);
+if (bitmap_size < 0) {
+error_setg_errno(errp, bitmap_size, "could not get length of device");
+return NULL;
+}
+bitmap_size >>= BDRV_SECTOR_BITS;
 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
diff --git a/block/mirror.c b/block/mirror.c
index 0ef41f9..2618c37 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -605,7 +605,10 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 s->granularity = granularity;
 s->buf_size = MAX(buf_size, granularity);
 
-s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
+s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
+if (!s->dirty_bitmap) {
+return;
+}
 bdrv_set_enable_write_cache(s->target, true);
 bdrv_set_on_error(s->target, on_target_error, on_target_error);
 bdrv_iostatus_enable(s->target);
diff --git a/include/block/block.h b/include/block/block.h
index b3230a2..2b51eec 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -429,7 +429,8 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, 
QEMUIOVector *qiov);
 
 struct HBitmapIter;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity);
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int 
granularity,
+  Error **errp);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t 
sector);
-- 
1.9.2




Re: [Qemu-devel] [PATCH] vmdk: Allow vmdk_create to work with protocol

2013-12-19 Thread Fam Zheng

On 2013年12月19日 21:12, Stefan Hajnoczi wrote:

On Tue, Dec 17, 2013 at 08:00:00PM +0800, Fam Zheng wrote:

@@ -1511,48 +1521,55 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize,
  header.check_bytes[3] = 0xa;

  /* write all the data */
-ret = qemu_write_full(fd, &magic, sizeof(magic));
-if (ret != sizeof(magic)) {
-ret = -errno;
+ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+if (ret < 0) {
+error_set(errp, QERR_IO_ERROR);
  goto exit;
  }
-ret = qemu_write_full(fd, &header, sizeof(header));
+ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
  if (ret != sizeof(header)) {
+error_set(errp, QERR_IO_ERROR);
  ret = -errno;


This line should be deleted.

Also, I noticed you changed ret != sizeof(magic) to ret < 0 for the
magic number bdrv_pwrite() but did not change the condition for the
header write.  Please keep the error handling condition consistent.



OK.


  goto exit;
  }

-ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
+ret = bdrv_truncate(bs, (le64_to_cpu(header.grain_offset)) << 9);


Why add parentheses around le64_to_cpu()?



Unintended. Will remove.


  if (ret < 0) {
-ret = -errno;
-goto exit;
+error_setg(errp, "Could not truncate file");


goto exit?



Yes, thanks.


  }

  /* write grain directory */
-lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
-for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
+gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE * sizeof(*gd_buf);
+gd_buf = g_malloc0(gd_buf_size);
+for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
   i < gt_count; i++, tmp += gt_size) {
-ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-if (ret != sizeof(tmp)) {
-ret = -errno;
-goto exit;
-}


Was this old code not endian-safe?  It appears to be writing native
endian values.  The new code is different.



Yes, a bonus bug fix. I'll add note in the commit message.


@@ -1771,33 +1791,34 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options,
 total_size / (int64_t)(63 * number_heads * 512),
 number_heads,
 adapter_type);
-if (split || flat) {
-fd = qemu_open(filename,
-   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-   0644);
+desc_len = strlen(desc);
+/* the descriptor offset = 0x200 */
+if (!split && !flat) {
+desc_offset = 0x200;
  } else {
-fd = qemu_open(filename,
-   O_WRONLY | O_BINARY | O_LARGEFILE,
-   0644);
+ret = bdrv_create_file(filename, options, &local_err);


Missing error handling if bdrv_create_file() fails.



OK.


  }
-if (fd < 0) {
-ret = -errno;
+ret = bdrv_file_open(&new_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not write description");
  goto exit;
  }
-/* the descriptor offset = 0x200 */
-if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
-ret = -errno;
-goto close_exit;
+ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not write description");


goto close_exit?



OK.


  }
-ret = qemu_write_full(fd, desc, strlen(desc));
-if (ret != strlen(desc)) {
-ret = -errno;
-goto close_exit;
+/* bdrv_pwrite write padding zeros to align to sector, we don't need that
+ * for description file */
+if (desc_offset == 0) {
+ret = bdrv_truncate(new_bs, desc_offset + desc_len);


We know desc_offset == 0, so desc_offset (0) + desc_len is really just
desc_len.



OK.

Thanks for the review!

Fam



[Qemu-devel] [PATCH v2] vmdk: Allow vmdk_create to work with protocol

2013-12-19 Thread Fam Zheng
This improves vmdk_create to use bdrv_* functions to replace qemu_open
and other fd functions. The error handling are improved as well. One
difference is that bdrv_pwrite will round up buffer to sectors, so for
description file, an extra bdrv_truncate is used in the end to drop
inding zeros.

Notes:

 - A bonus bug fix is correct endian is used in initializing GD entries.

 - ROUND_UP and DIV_ROUND_UP are used where possible.

I tested that new code produces exactly the same file as previously.

Signed-off-by: Fam Zheng 

---
v2: Address Stefan's comments:

Signed-off-by: Fam Zheng 
---
 block/vmdk.c | 164 ++-
 1 file changed, 95 insertions(+), 69 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 0734bc2..ef078f6 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1447,23 +1447,33 @@ static int coroutine_fn 
vmdk_co_write_zeroes(BlockDriverState *bs,
 }
 
 static int vmdk_create_extent(const char *filename, int64_t filesize,
-  bool flat, bool compress, bool zeroed_grain)
+  bool flat, bool compress, bool zeroed_grain,
+  Error **errp)
 {
 int ret, i;
-int fd = 0;
+BlockDriverState *bs = NULL;
 VMDK4Header header;
-uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
+Error *local_err;
+uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
+uint32_t *gd_buf = NULL;
+int gd_buf_size;
 
-fd = qemu_open(filename,
-   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-   0644);
-if (fd < 0) {
-return -errno;
+ret = bdrv_create_file(filename, NULL, &local_err);
+if (ret < 0) {
+error_propagate(errp, local_err);
+goto exit;
 }
+
+ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+if (ret < 0) {
+error_propagate(errp, local_err);
+goto exit;
+}
+
 if (flat) {
-ret = ftruncate(fd, filesize);
+ret = bdrv_truncate(bs, filesize);
 if (ret < 0) {
-ret = -errno;
+error_setg(errp, "Could not truncate file");
 }
 goto exit;
 }
@@ -1474,24 +1484,23 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize,
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
 header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
-header.capacity = filesize / 512;
+header.capacity = filesize / BDRV_SECTOR_SIZE;
 header.granularity = 128;
-header.num_gtes_per_gt = 512;
+header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
 
-grains = (filesize / 512 + header.granularity - 1) / header.granularity;
-gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
-gt_count =
-(grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
-gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
+grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
+gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
+   BDRV_SECTOR_SIZE);
+gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
+gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
 
 header.desc_offset = 1;
 header.desc_size = 20;
 header.rgd_offset = header.desc_offset + header.desc_size;
-header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
+header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
 header.grain_offset =
-   ((header.gd_offset + gd_size + (gt_size * gt_count) +
- header.granularity - 1) / header.granularity) *
-header.granularity;
+ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
+ header.granularity);
 /* swap endianness for all header fields */
 header.version = cpu_to_le32(header.version);
 header.flags = cpu_to_le32(header.flags);
@@ -1511,48 +1520,55 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize,
 header.check_bytes[3] = 0xa;
 
 /* write all the data */
-ret = qemu_write_full(fd, &magic, sizeof(magic));
-if (ret != sizeof(magic)) {
-ret = -errno;
+ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+if (ret < 0) {
+error_set(errp, QERR_IO_ERROR);
 goto exit;
 }
-ret = qemu_write_full(fd, &header, sizeof(header));
-if (ret != sizeof(header)) {
-ret = -errno;
+ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+if (ret < 0) {
+error_set(errp, QERR_IO_ERROR);
 goto exit;
 }
 
-ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
+ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) <

[Qemu-devel] [PATCH] acpi unit-test: Create temporary disk file under /tmp

2013-12-19 Thread Fam Zheng
As other tests, the image file is created in /tmp other than current
dir. Thus there will not be an unignored file under tests for intree
build.

Signed-off-by: Fam Zheng 
---
 tests/acpi-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/acpi-test.c b/tests/acpi-test.c
index ca83b1d6..e4f31b7 100644
--- a/tests/acpi-test.c
+++ b/tests/acpi-test.c
@@ -116,7 +116,7 @@ static uint8_t boot_sector[0x200] = {
 [0x1FF] = 0xAA,
 };
 
-static const char *disk = "tests/acpi-test-disk.raw";
+static const char *disk = "/tmp/qtest-acpi-test-disk.raw";
 
 static void free_test_data(test_data *data)
 {
-- 
1.8.5.1




Re: [Qemu-devel] [PATCH] acpi unit-test: Create temporary disk file under /tmp

2013-12-20 Thread Fam Zheng

On 12/20/13 16:36, Michael Tokarev wrote:

20.12.2013 11:05, Fam Zheng wrote:

As other tests, the image file is created in /tmp other than current
dir. Thus there will not be an unignored file under tests for intree
build.

Signed-off-by: Fam Zheng 
---
  tests/acpi-test.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/acpi-test.c b/tests/acpi-test.c
index ca83b1d6..e4f31b7 100644
--- a/tests/acpi-test.c
+++ b/tests/acpi-test.c
@@ -116,7 +116,7 @@ static uint8_t boot_sector[0x200] = {
  [0x1FF] = 0xAA,
  };

-static const char *disk = "tests/acpi-test-disk.raw";
+static const char *disk = "/tmp/qtest-acpi-test-disk.raw";


Please, PLEASE do NOT do this.

If you want a temp dir, create one in qemu source directory
(there's no urge to be able to specify one at build/run time,
a symlink created by user will do), -- so that all this junk
will be in a single dir easy to remove.

But never, ever, use /tmp like this, not here not elsewhere
else.  When you use /tmp, first, there's $TEMPDIR, and second,
much more important, there are apis like mkstemp(3) or
tmpfile(3), or tempnam(3), -- THIS is the only proper way to
use common temporary directories.

Never, ever, create fixed (or even predictable, with getpid()
for example) files in public temporary dirs.  Never.

For this issue, I think a better solution is to actually remove
this file on make clean.  Now you're just moving an unignored
file elsewhere, which isn't good by its own.



OK, will take your suggestion. Thanks for explanation.

Fam



[Qemu-devel] [PATCH] acpi unit-test: Remove temporary disk after test

2013-12-25 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 tests/acpi-test.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/acpi-test.c b/tests/acpi-test.c
index ca83b1d6..df1af83 100644
--- a/tests/acpi-test.c
+++ b/tests/acpi-test.c
@@ -382,6 +382,7 @@ int main(int argc, char *argv[])
 {
 const char *arch = qtest_get_arch();
 FILE *f = fopen(disk, "w");
+int ret;
 fwrite(boot_sector, 1, sizeof boot_sector, f);
 fclose(f);
 
@@ -390,5 +391,7 @@ int main(int argc, char *argv[])
 if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
 qtest_add_func("acpi/tcg", test_acpi_tcg);
 }
-return g_test_run();
+ret = g_test_run();
+unlink(disk);
+return ret;
 }
-- 
1.8.5.1




Re: [Qemu-devel] [PATCHv4] block: add native support for NFS

2013-12-25 Thread Fam Zheng
+.help = "URL to the NFS file",
+},
+{ /* end of list */ }
+},
+};
+
+static void nfs_client_close(NFSClient *client)
+{
+if (client->context) {
+if (client->fh) {
+nfs_close(client->context, client->fh);
+}
+qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+nfs_destroy_context(client->context);
+}
+memset(client, 0, sizeof(NFSClient));
+}
+
+static void nfs_file_close(BlockDriverState *bs)
+{
+NFSClient *client = bs->opaque;
+nfs_client_close(client);
+}
+
+static int64_t nfs_client_open(NFSClient *client, const char *filename,
+   int flags, Error **errp)
+{
+int ret = -EINVAL;
+URI *uri;
+char *file = NULL, *strp = NULL;
+struct stat st;
+
+uri = uri_parse(filename);
+if (!uri) {
+error_setg(errp, "Invalid URL specified.");


Could drop the period in the end of error message, but I don't mind 
merging it as is:


Reviewed-by: Fam Zheng 

Thanks


+goto fail;
+}
+strp = strrchr(uri->path, '/');
+if (strp == NULL) {
+error_setg(errp, "Invalid URL specified.");
+goto fail;
+}
+file = g_strdup(strp);
+*strp = 0;
+
+client->context = nfs_init_context();
+if (client->context == NULL) {
+error_setg(errp, "Failed to init NFS context");
+goto fail;
+}
+
+ret = nfs_mount(client->context, uri->server, uri->path);
+if (ret < 0) {
+error_setg(errp, "Failed to mount nfs share: %s",
+nfs_get_error(client->context));
+goto fail;
+}
+
+if (flags & O_CREAT) {
+ret = nfs_creat(client->context, file, 0600, &client->fh);
+if (ret < 0) {
+error_setg(errp, "Failed to create file: %s",
+ nfs_get_error(client->context));
+goto fail;
+}
+} else {
+ret = nfs_open(client->context, file, flags, &client->fh);
+if (ret < 0) {
+error_setg(errp, "Failed to open file : %s",
+   nfs_get_error(client->context));
+goto fail;
+}
+}
+
+ret = nfs_fstat(client->context, client->fh, &st);
+if (ret < 0) {
+error_setg(errp, "Failed to fstat file: %s",
+   nfs_get_error(client->context));
+goto fail;
+}
+
+ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+client->has_zero_init = S_ISREG(st.st_mode);
+goto out;
+fail:
+nfs_client_close(client);
+out:
+uri_free(uri);
+g_free(file);
+return ret;
+}
+
+static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp) {
+NFSClient *client = bs->opaque;
+int64_t ret;
+QemuOpts *opts;
+Error *local_err = NULL;
+
+opts = qemu_opts_create_nofail(&runtime_opts);
+qemu_opts_absorb_qdict(opts, options, &local_err);
+if (error_is_set(&local_err)) {
+qerror_report_err(local_err);
+error_free(local_err);
+return -EINVAL;
+}
+ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+  (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
+  errp);
+if (ret < 0) {
+return ret;
+}
+bs->total_sectors = ret;
+return 0;
+}
+
+static int nfs_file_create(const char *filename, QEMUOptionParameter *options,
+   Error **errp)
+{
+int ret = 0;
+int64_t total_size = 0;
+NFSClient *client = g_malloc0(sizeof(NFSClient));
+
+/* Read out options */
+while (options && options->name) {
+if (!strcmp(options->name, "size")) {
+total_size = options->value.n;
+}
+options++;
+}
+
+ret = nfs_client_open(client, filename, O_CREAT, errp);
+if (ret < 0) {
+goto out;
+}
+ret = nfs_ftruncate(client->context, client->fh, total_size);
+out:
+nfs_client_close(client);
+g_free(client);
+return ret;
+}
+
+static int nfs_has_zero_init(BlockDriverState *bs)
+{
+NFSClient *client = bs->opaque;
+return client->has_zero_init;
+}
+
+static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
+{
+NFSClient *client = bs->opaque;
+NFSRPC task = {0};
+struct stat st;
+
+task.st = &st;
+if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+&task) != 0) {
+return -ENOMEM;
+}
+
+while (!task.complete) {
+nfs_set_events(client);
+qemu_aio_wait();
+}
+
+return (task.status < 0 ? task.status : st.st_blocks * st.st_blksize);
+}
+
+static int nfs_file_truncat

Re: [Qemu-devel] [PATCHv4] block: add native support for NFS

2013-12-26 Thread Fam Zheng

On 2013年12月26日 14:10, ronnie sahlberg wrote:

On Wed, Dec 25, 2013 at 9:42 PM, Fam Zheng  wrote:

On 2013年12月21日 00:04, Peter Lieven wrote:


This patch adds native support for accessing images on NFS shares without
the requirement to actually mount the entire NFS share on the host.

NFS Images can simply be specified by an url of the form:
nfs:

For example:
qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2

You need LibNFS from Ronnie Sahlberg available at:
 git://github.com/sahlberg/libnfs.git
for this to work.

During configure it is automatically probed for libnfs and support
is enabled on-the-fly. You can forbid or enforce libnfs support
with --disable-libnfs or --enable-libnfs respectively.

Due to NFS restrictions you might need to execute your binaries
as root, allow them to open priviledged ports (<1024) or specify
insecure option on the NFS server.



What are the error messages like, if no privilege. Is root always required
for this to work?


NFS servers often default to only allow client connections that
originates from a system port.
I know three different ways to solve this:

1, Run QEMU as root, which allows libnfs to bind to a system port.
This is probably suboptimal since I guess most people would want to
avoid running qemu as root if they can avoid it.

2, Change the NFS server to allow connections from nonsystem ports. On
linux NFS servers this is done by adding
"insecure" as the export option in /etc/exports.
This may be preferable to option 1 (since secure/insecure does not
really add much security in the first place).

3, Assign the capability to qemu to bind to system ports when running
as non-root user.
This is probably the most attractive option of the three.
You can still run qemu as non-root  and you dont have to change the
security mode on the NFS server.
It is highly non-portable though and only work on platforms that
provide capabilities.
On linux you add this capability using :
sudo setcap 'cap_net_bind_service=+ep' /path/to/executable




Thank you very much for elaboration, Ronnie. It's clear to me now and 
hopefully this can help users with their setup too.


Fam




Re: [Qemu-devel] [RFC PATCH] qcow2: add a readahead cache for qcow2_decompress_cluster

2013-12-26 Thread Fam Zheng

On 2013年12月27日 00:19, Peter Lieven wrote:

while evaluatiing compressed qcow2 images as a good basis for
virtual machine templates I found out that there are a lot
of partly redundant (compressed clusters have common physical
sectors) and relatively short reads.

This doesn't hurt if the image resides on a local
filesystem where we can benefit from the local page cache,
but it adds a lot of penalty when accessing remote images
on NFS or similar exports.

This patch effectevily implements a readahead of 2 * cluster_size
which is 2 * 64kB per default resulting in 128kB readahead. This
is the common setting for Linux for instance.

For example this leads to the following times when converting
a compressed qcow2 image to a local tmpfs partition.

Old:
time ./qemu-img convert nfs://10.0.0.1/export/VC-Ubuntu-LTS-12.04.2-64bit.qcow2 
/tmp/test.raw
real0m24.681s
user0m8.597s
sys 0m4.084s

New:
time ./qemu-img convert nfs://10.0.0.1/export/VC-Ubuntu-LTS-12.04.2-64bit.qcow2 
/tmp/test.raw
real0m16.121s
user0m7.932s
sys 0m2.244s

Signed-off-by: Peter Lieven 
---
  block/qcow2-cluster.c |   27 +--
  block/qcow2.h |1 +
  2 files changed, 26 insertions(+), 2 deletions(-)


I like this idea, but here's a question. Actually, this penalty is 
common to all protocol drivers: curl, gluster, whatever. Readahead is 
not only good for compression processing, but also quite helpful for 
boot: BIOS and GRUB may send sequential 1 sector IO, synchronously, thus 
suffer from high latency of network communication. So I think if we want 
to do this, we will want to share it with other format and protocol 
combinations.


Fam




Re: [Qemu-devel] qemu-iotests: test 051 fails

2014-01-05 Thread Fam Zheng

On 2014年01月05日 02:39, Peter Lieven wrote:

~/git/qemu/tests/qemu-iotests$ ./check -qcow2 51
QEMU  -- ./qemu
QEMU_IMG  -- ./qemu-img
QEMU_IO   -- ./qemu-io
QEMU_NBD  -- /usr/local/bin/qemu-nbd
IMGFMT-- qcow2 (compat=1.1)
IMGPROTO  -- file
PLATFORM  -- Linux/x86_64 lieven-pc 3.2.0-57-generic
SOCKET_SCM_HELPER --

051 - output mismatch (see 051.out.bad)
--- 051.out2014-01-04 19:11:31.557812472 +0100
+++ 051.out.bad2014-01-04 19:38:03.185810819 +0100
@@ -91,7 +91,6 @@
  QEMU X.Y.Z monitor - type 'help' for more information
  (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty
  QEMU_PROG: -drive if=virtio: Device initialization failed.
-QEMU_PROG: -drive if=virtio: Device initialization failed.
  QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized

  Testing: -drive if=scsi
Failures: 051
Failed 1 of 1 tests

Peter



Yes, this is the bad commit:

commit 75884afd5c6c42e523b08565e289dbe319e17ad9
Author: Andreas Färber 
Date:   Tue Jul 30 01:35:08 2013 +0200

virtio-blk: Convert to QOM realize

Signed-off-by: Andreas Färber 
Signed-off-by: Paolo Bonzini 

I guess simply removing the duplicated line in the output file is 
enough. I'll post a patch for it, thanks for the report!


Fam




[Qemu-devel] [PATCH] qemu-iotests: Remove duplicated error message line in 051.out

2014-01-05 Thread Fam Zheng
Since 75884af (virtio-blk: Convert to QOM realize), the duplication of
error message is gone. Fix the test reference output to match this.

Reported-by: Peter Lieven 
Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/051.out | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 49e95a2..c2cadba 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -91,7 +91,6 @@ Testing: -drive if=virtio
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty
 QEMU_PROG: -drive if=virtio: Device initialization failed.
-QEMU_PROG: -drive if=virtio: Device initialization failed.
 QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized
 
 Testing: -drive if=scsi
-- 
1.8.5.1




Re: [Qemu-devel] [PATCHv2 04/18] qemu-iotests: fix test 013 to work with any protocol

2014-01-05 Thread Fam Zheng

On 2014年01月06日 01:21, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
  tests/qemu-iotests/013 |9 -
  tests/qemu-iotests/013.out |2 +-
  2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/013 b/tests/qemu-iotests/013
index ea3cab9..0dbc934 100755
--- a/tests/qemu-iotests/013
+++ b/tests/qemu-iotests/013
@@ -41,14 +41,14 @@ trap "_cleanup; exit \$status" 0 1 2 3 15

  # much of this could be generic for any format supporting compression.
  _supported_fmt qcow qcow2
-_supported_proto file
+_supported_proto generic
  _supported_os Linux

  TEST_OFFSETS="0 4294967296"
  TEST_OPS="writev read write readv"
  CLUSTER_SIZE=4096



I think dropping these three TEST_IMG overriding change...


-_make_test_img 6G
+TEST_IMG=$TEST_IMG.orig _make_test_img 6G


#1



  echo "Testing empty image"
  echo
@@ -56,16 +56,15 @@ echo
  for offset in $TEST_OFFSETS; do
  echo "At offset $offset:"
  for op in $TEST_OPS; do
-io_test $op $offset $CLUSTER_SIZE 8
+TEST_IMG=$TEST_IMG.orig io_test $op $offset $CLUSTER_SIZE 8


#2


  done
-_check_test_img
+TEST_IMG=$TEST_IMG.orig _check_test_img


#3


  done


  echo "Compressing image"
  echo

-mv "$TEST_IMG" "$TEST_IMG.orig"


and changing this to

TEST_IMG=$TEST_IMG.orig _make_test_img 6G

Should work.


  $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG"

  echo "Testing compressed image"
diff --git a/tests/qemu-iotests/013.out b/tests/qemu-iotests/013.out
index 43a414c..763cb0c 100644
--- a/tests/qemu-iotests/013.out
+++ b/tests/qemu-iotests/013.out
@@ -1,5 +1,5 @@
  QA output created by 013
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=6442450944


So this is not necessary.

Fam



Re: [Qemu-devel] [PATCHv2 05/18] qemu-iotests: fix tests 014 and 023 to work with any protocol

2014-01-05 Thread Fam Zheng

On 2014年01月06日 01:21, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
  tests/qemu-iotests/014|4 ++--
  tests/qemu-iotests/014.out|2 +-
  tests/qemu-iotests/023|   11 +--
  tests/qemu-iotests/023.out|   16 
  tests/qemu-iotests/common.pattern |7 +++
  5 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/tests/qemu-iotests/014 b/tests/qemu-iotests/014
index b23c2db..01fb614 100755
--- a/tests/qemu-iotests/014
+++ b/tests/qemu-iotests/014
@@ -43,14 +43,14 @@ trap "_cleanup; exit \$status" 0 1 2 3 15

  # much of this could be generic for any format supporting snapshots
  _supported_fmt qcow2
-_supported_proto file
+_supported_proto generic
  _supported_os Linux

  TEST_OFFSETS="0 4294967296"
  TEST_OPS="writev read write readv"
  CLUSTER_SIZE=4096

-_make_test_img 6G
+TEST_IMG=$TEST_IMG.orig _make_test_img 6G

  echo "Testing empty image:"
  for offset in $TEST_OFFSETS; do
diff --git a/tests/qemu-iotests/014.out b/tests/qemu-iotests/014.out
index 4744b4b..6459af0 100644
--- a/tests/qemu-iotests/014.out
+++ b/tests/qemu-iotests/014.out
@@ -1,5 +1,5 @@
  QA output created by 014
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=6442450944
  Testing empty image:
  test2: With offset 0
  === Clusters to be compressed [1]
diff --git a/tests/qemu-iotests/023 b/tests/qemu-iotests/023
index 9ad06b9..2357696 100755
--- a/tests/qemu-iotests/023
+++ b/tests/qemu-iotests/023
@@ -41,7 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15

  # much of this could be generic for any format supporting compression.
  _supported_fmt qcow qcow2
-_supported_proto file
+_supported_proto generic
  _supported_os Linux

  TEST_OFFSETS="0 4294967296"
@@ -55,7 +55,7 @@ for CLUSTER_SIZE in $CLUSTER_SIZES; do
  echo "Creating new image; cluster size: $CLUSTER_SIZE"
  echo

-_make_test_img 8G
+TEST_IMG=$TEST_IMG.orig _make_test_img 8G

  echo "Testing empty image"
  echo
@@ -63,15 +63,14 @@ for CLUSTER_SIZE in $CLUSTER_SIZES; do
  for offset in $TEST_OFFSETS; do
  echo "At offset $offset:"
  for op in $TEST_OPS; do
-io_test $op $offset $CLUSTER_SIZE 3
+TEST_IMG=$TEST_IMG.orig io_test $op $offset $CLUSTER_SIZE 3
  done
-_check_test_img
+TEST_IMG=$TEST_IMG.orig _check_test_img
  done

  echo "Compressing image"
  echo

-mv "$TEST_IMG" "$TEST_IMG.orig"
  $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG"

  echo "Testing compressed image"
@@ -101,7 +100,7 @@ for CLUSTER_SIZE in $CLUSTER_SIZES; do
  echo "Creating another new image"
  echo

-_make_test_img 8G
+TEST_IMG=$TEST_IMG.orig _make_test_img 8G

  echo "More complex patterns"
  echo
diff --git a/tests/qemu-iotests/023.out b/tests/qemu-iotests/023.out
index ec32341..b80836d 100644
--- a/tests/qemu-iotests/023.out
+++ b/tests/qemu-iotests/023.out
@@ -1,7 +1,7 @@
  QA output created by 023
  Creating new image; cluster size: 1024

-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8589934592
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=8589934592
  Testing empty image

  At offset 0:
@@ -5664,7 +5664,7 @@ read 3072/3072 bytes at offset 4295491072
  No errors were found on the image.
  Creating another new image

-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8589934592
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=8589934592
  More complex patterns

  test2: With offset 0
@@ -5887,7 +5887,7 @@ read 2048/2048 bytes at offset 4295001088
  No errors were found on the image.
  Creating new image; cluster size: 4096

-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8589934592
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=8589934592
  Testing empty image

  At offset 0:
@@ -12270,7 +12270,7 @@ read 12288/12288 bytes at offset 4301256704
  No errors were found on the image.
  Creating another new image

-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8589934592
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=8589934592
  More complex patterns

  test2: With offset 0
@@ -12493,7 +12493,7 @@ read 8192/8192 bytes at offset 4295102464
  No errors were found on the image.
  Creating new image; cluster size: 16384

-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8589934592
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=8589934592
  Testing empty image

  At offset 0:
@@ -18876,7 +18876,7 @@ read 49152/49152 bytes at offset 4395622400
  No errors were found on the image.
  Creating another new image

-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8589934592
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=8589934592
  More complex patterns

  test2: With offset 0
@@ -19099,7 +19099,7 @@ read 32768/32768 bytes at offset 4295507968
  No errors were found on the image.
  Creating new image; cluster size: 65536

-Formatting 'TEST_DIR/t.IMGFMT',

Re: [Qemu-devel] [PATCHv2 06/18] qemu-iotests: fix test 018 to work with any protocol

2014-01-05 Thread Fam Zheng

On 2014年01月06日 01:21, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
  tests/qemu-iotests/018 |   22 +-
  tests/qemu-iotests/018.out |4 ++--
  2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/tests/qemu-iotests/018 b/tests/qemu-iotests/018
index aa9d3cb..fecf281 100755
--- a/tests/qemu-iotests/018
+++ b/tests/qemu-iotests/018
@@ -41,46 +41,42 @@ trap "_cleanup; exit \$status" 0 1 2 3 15

  # Any format supporting backing files
  _supported_fmt qcow qcow2 vmdk qed
-_supported_proto file
+_supported_proto generic
  _supported_os Linux

  TEST_OFFSETS="0 4294967296"

-TEST_IMG_SAVE=$TEST_IMG
-TEST_IMG=$TEST_IMG.base
-_make_test_img 6G
+TEST_IMG=$TEST_IMG.base _make_test_img 6G

  echo "Filling base image"
  echo

  for offset in $TEST_OFFSETS; do
  # Some clusters with alternating backing file/image file reads
-io writev $(( offset )) 512 1024 64
+TEST_IMG=$TEST_IMG.base io writev $(( offset )) 512 1024 64

  # Complete backing clusters
-io writev $(( offset  + 64 * 1024))  65536 65536 1
+TEST_IMG=$TEST_IMG.base io writev $(( offset  + 64 * 1024))  65536 65536 1
  done
-_check_test_img
+TEST_IMG=$TEST_IMG.base _check_test_img

  echo "Creating test image with backing file"
  echo

-TEST_IMG=$TEST_IMG_SAVE
-_make_test_img -b "$TEST_IMG.base" 6G
+TEST_IMG=$TEST_IMG.orig _make_test_img -b "$TEST_IMG.base" 6G

  echo "Filling test image"
  echo

  for offset in $TEST_OFFSETS; do
  # Some clusters with alternating backing file/image file reads
-io writev $(( offset + 512 )) 512 1024 64
+TEST_IMG=$TEST_IMG.orig io writev $(( offset + 512 )) 512 1024 64

  # Complete test image clusters
-io writev $(( offset + 64 * 1024 + 65536))  65536 65536 1
+TEST_IMG=$TEST_IMG.orig io writev $(( offset + 64 * 1024 + 65536))  65536 
65536 1
  done
-_check_test_img
+TEST_IMG=$TEST_IMG.orig _check_test_img

-mv "$TEST_IMG" "$TEST_IMG.orig"


The same pattern. But I'm stopping to duplicate the same comment.

Fam




Re: [Qemu-devel] [PATCHv2 17/18] qemu-iotests: blacklist test 020 for NFS protocol

2014-01-05 Thread Fam Zheng

On 2014年01月06日 01:22, Peter Lieven wrote:

reopening is currently not supported.

Signed-off-by: Peter Lieven 


For curiosity, is anything specific to nfs stopping the support of 
reopen? I think commit will work with host NFS mounted images.


Fam




Re: [Qemu-devel] qemu-iotests: test 051 fails

2014-01-05 Thread Fam Zheng

On 2014年01月06日 14:14, Stefan Hajnoczi wrote:

On Mon, Jan 06, 2014 at 09:44:30AM +0800, Fam Zheng wrote:

On 2014年01月05日 02:39, Peter Lieven wrote:

~/git/qemu/tests/qemu-iotests$ ./check -qcow2 51
QEMU  -- ./qemu
QEMU_IMG  -- ./qemu-img
QEMU_IO   -- ./qemu-io
QEMU_NBD  -- /usr/local/bin/qemu-nbd
IMGFMT-- qcow2 (compat=1.1)
IMGPROTO  -- file
PLATFORM  -- Linux/x86_64 lieven-pc 3.2.0-57-generic
SOCKET_SCM_HELPER --

051 - output mismatch (see 051.out.bad)
--- 051.out2014-01-04 19:11:31.557812472 +0100
+++ 051.out.bad2014-01-04 19:38:03.185810819 +0100
@@ -91,7 +91,6 @@
  QEMU X.Y.Z monitor - type 'help' for more information
  (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty
  QEMU_PROG: -drive if=virtio: Device initialization failed.
-QEMU_PROG: -drive if=virtio: Device initialization failed.
  QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized

  Testing: -drive if=scsi
Failures: 051
Failed 1 of 1 tests

Peter



Yes, this is the bad commit:

commit 75884afd5c6c42e523b08565e289dbe319e17ad9
Author: Andreas Färber 
Date:   Tue Jul 30 01:35:08 2013 +0200

 virtio-blk: Convert to QOM realize

 Signed-off-by: Andreas Färber 
 Signed-off-by: Paolo Bonzini 

I guess simply removing the duplicated line in the output file is
enough. I'll post a patch for it, thanks for the report!


See my earlier email to this thread.  I already posted a patch.



That's good, thanks!

Fam




Re: [Qemu-devel] [PATCH v3 2/3] qapi: change qapi to convert schema json

2014-01-05 Thread Fam Zheng

On 2014年01月05日 20:02, Amos Kong wrote:

QMP schema is defined in a json file, it will be parsed by
qapi scripts and generate C files.

We want to return the schema information to management,
this patch converts the json file to a string table in a
C head file, then we can use the json content in QEMU code.

eg: (qmp-schema.h)
   const char *const qmp_schema_table[] = {
 "{ 'type': 'NameInfo', 'data': {'*name': 'str'} }",
 "{ 'command': 'query-name', 'returns': 'NameInfo' }",
 ...
   }

Signed-off-by: Amos Kong 
---
  Makefile |  5 -
  scripts/qapi-commands.py |  2 +-
  scripts/qapi-types.py| 48 +---
  scripts/qapi-visit.py|  2 +-
  scripts/qapi.py  | 20 +++-
  5 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index bdff4e4..2c29755 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ endif
  endif

  GENERATED_HEADERS = config-host.h qemu-options.def
-GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
+GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qmp-schema.h
  GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c

  GENERATED_HEADERS += trace/generated-events.h
@@ -229,6 +229,9 @@ $(SRC_PATH)/qapi-schema.json 
$(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
  qmp-commands.h qmp-marshal.c :\
  $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -m -o 
"." < $<, "  GEN   $@")
+qmp-schema.h:\
+$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
+   $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." -s 
"$@" < $<, "  GEN   $@")

  QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h 
qga-qmp-commands.h)
  $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py
index b12b696..5f4fb94 100644
--- a/scripts/qapi-commands.py
+++ b/scripts/qapi-commands.py
@@ -440,7 +440,7 @@ except os.error, e:
  if e.errno != errno.EEXIST:
  raise

-exprs = parse_schema(sys.stdin)
+exprs = parse_schema(sys.stdin)[0]
  commands = filter(lambda expr: expr.has_key('command'), exprs)
  commands = filter(lambda expr: not expr.has_key('gen'), commands)

diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index 4a1652b..0f86b95 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -15,6 +15,7 @@ import sys
  import os
  import getopt
  import errno
+import re

  def generate_fwd_struct(name, members, builtin_type=False):
  if builtin_type:
@@ -282,9 +283,10 @@ void qapi_free_%(type)s(%(c_type)s obj)


  try:
-opts, args = getopt.gnu_getopt(sys.argv[1:], "chbp:o:",
+opts, args = getopt.gnu_getopt(sys.argv[1:], "chbs:p:o:",
 ["source", "header", "builtins",
-"prefix=", "output-dir="])
+"schema-dump-file=", "prefix=",
+"output-dir="])
  except getopt.GetoptError, err:
  print str(err)
  sys.exit(1)
@@ -293,6 +295,7 @@ output_dir = ""
  prefix = ""
  c_file = 'qapi-types.c'
  h_file = 'qapi-types.h'
+schema_dump_file = ""

  do_c = False
  do_h = False
@@ -309,11 +312,17 @@ for o, a in opts:
  do_h = True
  elif o in ("-b", "--builtins"):
  do_builtins = True
+elif o in ("-s", "--schema-dump-file"):
+schema_dump_file = a

  if not do_c and not do_h:
  do_c = True
  do_h = True

+if schema_dump_file:
+do_c = False
+do_h = False
+
  c_file = output_dir + prefix + c_file
  h_file = output_dir + prefix + h_file

@@ -381,7 +390,40 @@ fdecl.write(mcgen('''
  ''',
guard=guardname(h_file)))

-exprs = parse_schema(sys.stdin)
+exprs_all = parse_schema(sys.stdin)
+
+schema_table = """/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+
+/*
+ * Schema json string table converted from qapi-schema.json
+ *
+ * Copyright (c) 2013 Red Hat, Inc.
+ *
+ * Authors:
+ *  Amos Kong 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+const char *const qmp_schema_table[] = {
+"""
+
+if schema_dump_file:
+for line in exprs_all[1]:
+line = re.sub(r'#.*\n', ' ', line.strip())
+line = re.sub(r'\n', ' ', line.strip())
+line = re.sub(r' +', ' ', line)
+schema_table += '  "%s",\n' % (line)
+
+schema_table += '  NULL };\n'
+f = open(schema_dump_file, "w")
+f.write(schema_table)
+f.flush()
+f.close()
+
+exprs = exprs_all[0]
  exprs = filter(lambda expr: not expr.has_key('gen'), exprs)

  fdecl.write(guardstart("QAPI_TYPES_BUILTIN_STRUCT_DECL"))
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index 65f1a54..db68084 100644
--- a/scrip

Re: [Qemu-devel] [PATCH v3 3/3] qmp: full introspection support for QMP

2014-01-06 Thread Fam Zheng

On 2014年01月05日 20:02, Amos Kong wrote:

This patch introduces a new monitor command to query QMP schema
information, the return data is a range of schema structs, which
contains the useful metadata to help management to check supported
features, QMP commands detail, etc.

It parses all json definition in qapi-schema.json, and generate a
dynamic struct tree, QMP infrastructure will convert the tree to
json string and return to QMP client.

I defined a 'DataObject' union in qapi-schema.json, it's used to
describe the dynamic data struct.

I also added a document about QMP full introspection support
(docs/qmp-full-introspection.txt), it helps to use the new interface
and understand the abstract method in describing the dynamic struct.

TODO:
Wenchao Xia is working to convert QMP events to qapi-schema.json,
then event can also be queried by this interface.

I will introduce another command 'query-qga-schema' to query QGA
schema information, it's easy to add this support based on this
patch.

Signed-off-by: Amos Kong 
---


I have a few comments on the current implementation below, there are a 
few things to improve. However I agree to what Eric suggested in reply 
to V2: it may be better to generate most of the response data in python 
code at compile time and simplify the logic in C. Because this 
implementation is slow and it is unnecessary runtime computation. It 
also duplicates much of existing qapi.py logic (data types and other 
semantics parsing).



  docs/qmp-full-introspection.txt |  97 ++
  qapi-schema.json| 150 
  qmp-commands.hx |  43 -
  qmp.c   | 382 
  4 files changed, 671 insertions(+), 1 deletion(-)
  create mode 100644 docs/qmp-full-introspection.txt

diff --git a/docs/qmp-full-introspection.txt b/docs/qmp-full-introspection.txt
new file mode 100644
index 000..1617df7
--- /dev/null
+++ b/docs/qmp-full-introspection.txt
@@ -0,0 +1,97 @@
+= Full introspection support for QMP =
+
+
+== Purpose ==
+
+Add a new monitor command for management to  query QMP schema
+information, it returns a range of schema structs, which contain the
+useful metadata to help management to check supported features, QMP
+commands detail, etc.
+
+== Usage ==
+
+Json schema:
+  { 'type': 'NameInfo', 'data': {'*name': 'str'} }
+  { 'command': 'query-name', 'returns': 'NameInfo' }
+
+Execute QMP command:
+
+  { "execute": "query-qmp-schema" }
+
+Returns:
+
+  { "return": [
+  {
+  "name": "query-name",
+  "type": "command",
+  "returns": {
+  "name": "NameInfo",
+  "type": "type",
+  "data": [
+  {
+  "name": "name",
+  "optional": true,
+  "recursive": false,
+  "type": "str"
+  }
+  ]
+  }
+  },
+  ...
+   }
+
+The whole schema information will be returned in one go, it contains
+all the schema entries. It doesn't support to be filtered by type
+or name. Currently it takes about 5 seconds to return about 1.5M string.
+
+== 'DataObject' union ==
+
+{ 'union': 'DataObject',
+  'base': 'DataObjectBase',
+  'discriminator': 'type',
+  'data': {
+'anonymous-struct': 'DataObjectAnonymousStruct',
+'command': 'DataObjectCommand',
+'enumeration': 'DataObjectEnumeration',
+'reference-type': 'String',
+'type': 'DataObjectType',
+'unionobj': 'DataObjectUnion' } }
+
+Currently we have schema difinitions for type, command, enumeration,
+union. Some arbitrary structs (dictionary, list or string) and native
+types are also used in the body of definitions.
+
+Here we use "DataObject" union to abstract all above schema. We want
+to provide more useful metadata, and used some enum/unions to indicate
+the dynamic type. In the output, some simple data is processed too
+unwieldy. In another side, some complex data is described clearly.
+It's also caused by some limitation of QAPI infrastructure.
+
+So we define 'DataObject' to be an union, it always has an object name
+except anonymous struct.
+
+'command', 'enumeration', 'type', 'unionobj' are common schema type,
+'union' is a build-in type, so I used unionobj here.
+
+'reference-type' will be used to describe native types and unextended
+types.
+
+'anonymous-struct' will be used to describe arbitrary structs
+(dictionary, list or string).
+
+== Avoid dead loop in recursive extending ==
+
+We have four types (ImageInfo, BlockStats, PciDeviceInfo, ObjectData)
+that uses themself in their own define data directly or indirectly,
+we will not repeatedly extend them to avoid dead loop.
+
+We use a string to record the visit path, type index of each node
+will be saved to the string, indexes are split by ':'.
+
+Push index to visit_path_str before extending, and pop index from
+visit_path_str after extending.
+
+If the type was already extended 

Re: [Qemu-devel] [PATCHv2 04/18] qemu-iotests: fix test 013 to work with any protocol

2014-01-06 Thread Fam Zheng

On 2014年01月06日 14:48, Peter Lieven wrote:

On 06.01.2014 06:31, Fam Zheng wrote:

On 2014年01月06日 01:21, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
  tests/qemu-iotests/013 |9 -
  tests/qemu-iotests/013.out |2 +-
  2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/013 b/tests/qemu-iotests/013
index ea3cab9..0dbc934 100755
--- a/tests/qemu-iotests/013
+++ b/tests/qemu-iotests/013
@@ -41,14 +41,14 @@ trap "_cleanup; exit \$status" 0 1 2 3 15

  # much of this could be generic for any format supporting compression.
  _supported_fmt qcow qcow2
-_supported_proto file
+_supported_proto generic
  _supported_os Linux

  TEST_OFFSETS="0 4294967296"
  TEST_OPS="writev read write readv"
  CLUSTER_SIZE=4096



I think dropping these three TEST_IMG overriding change...


-_make_test_img 6G
+TEST_IMG=$TEST_IMG.orig _make_test_img 6G


#1



  echo "Testing empty image"
  echo
@@ -56,16 +56,15 @@ echo
  for offset in $TEST_OFFSETS; do
  echo "At offset $offset:"
  for op in $TEST_OPS; do
-io_test $op $offset $CLUSTER_SIZE 8
+TEST_IMG=$TEST_IMG.orig io_test $op $offset $CLUSTER_SIZE 8


#2


  done
-_check_test_img
+TEST_IMG=$TEST_IMG.orig _check_test_img


#3


  done


  echo "Compressing image"
  echo

-mv "$TEST_IMG" "$TEST_IMG.orig"


and changing this to

TEST_IMG=$TEST_IMG.orig _make_test_img 6G

Should work.

Unfortunately it doesn't. All subsequent commands will then work
on $TEST_IMG.orig altough they shouldn't. In case of
013 this is io_test, _check_test_img and the cleanup at the end.



Why? The overriding is temporary and subsequent commands are not affected.

My proposal above doesn't work, though, because an empty new image 
doesn't contain the right data, what is needed here is copy. So maybe 
change the "mv" line to:


$QEMU_IMG convert -f $IMGFMT -O $IMGFMT "$TEST_IMG" "$TEST_IMG.orig"

could do the work, but I'm not sure if this fits every case.


There are 3 options:
  - override it in every line that should use an alternate $TEST_IMG
  - save the original $TEST_IMG and restore it.
  - rework all commands to take the file as parameter and not use
a global variable for it.

I choosed the first one because it makes clear which $TEST_IMG is acutally
used. You see from the output and the code that you are dealing with the
file that is later used as $TEST_IMG.orig. If you see $TEST_IMG there you
can't distinguish if its the backing or original file or the actual image.

But I thought that this would be controversal. This is I why I splitted
the patch
into individual ones. So its possible to drop all these patches and
still be able
to proceed with the integration of the NFS protocol driver.


I'll leave maintainers to decide.

Fam




Re: [Qemu-devel] [PATCH v3 2/3] qapi: change qapi to convert schema json

2014-01-06 Thread Fam Zheng

On 2014年01月05日 20:02, Amos Kong wrote:

QMP schema is defined in a json file, it will be parsed by
qapi scripts and generate C files.

We want to return the schema information to management,
this patch converts the json file to a string table in a
C head file, then we can use the json content in QEMU code.

eg: (qmp-schema.h)
   const char *const qmp_schema_table[] = {
 "{ 'type': 'NameInfo', 'data': {'*name': 'str'} }",
 "{ 'command': 'query-name', 'returns': 'NameInfo' }",
 ...
   }

Signed-off-by: Amos Kong 
---
  Makefile |  5 -
  scripts/qapi-commands.py |  2 +-
  scripts/qapi-types.py| 48 +---
  scripts/qapi-visit.py|  2 +-
  scripts/qapi.py  | 20 +++-
  5 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index bdff4e4..2c29755 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ endif
  endif

  GENERATED_HEADERS = config-host.h qemu-options.def
-GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
+GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qmp-schema.h
  GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c

  GENERATED_HEADERS += trace/generated-events.h
@@ -229,6 +229,9 @@ $(SRC_PATH)/qapi-schema.json 
$(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
  qmp-commands.h qmp-marshal.c :\
  $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -m -o 
"." < $<, "  GEN   $@")
+qmp-schema.h:\
+$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
+   $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." -s 
"$@" < $<, "  GEN   $@")



It would be nice to also add this file to .gitignore together with this 
patch.


Fam



Re: [Qemu-devel] [PATCHv2 04/18] qemu-iotests: fix test 013 to work with any protocol

2014-01-06 Thread Fam Zheng

On 2014年01月06日 20:21, Peter Lieven wrote:

On 06.01.2014 11:09, Fam Zheng wrote:

On 2014年01月06日 14:48, Peter Lieven wrote:

On 06.01.2014 06:31, Fam Zheng wrote:

On 2014年01月06日 01:21, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
  tests/qemu-iotests/013 |9 -
  tests/qemu-iotests/013.out |2 +-
  2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/013 b/tests/qemu-iotests/013
index ea3cab9..0dbc934 100755
--- a/tests/qemu-iotests/013
+++ b/tests/qemu-iotests/013
@@ -41,14 +41,14 @@ trap "_cleanup; exit \$status" 0 1 2 3 15

  # much of this could be generic for any format supporting
compression.
  _supported_fmt qcow qcow2
-_supported_proto file
+_supported_proto generic
  _supported_os Linux

  TEST_OFFSETS="0 4294967296"
  TEST_OPS="writev read write readv"
  CLUSTER_SIZE=4096



I think dropping these three TEST_IMG overriding change...


-_make_test_img 6G
+TEST_IMG=$TEST_IMG.orig _make_test_img 6G


#1



  echo "Testing empty image"
  echo
@@ -56,16 +56,15 @@ echo
  for offset in $TEST_OFFSETS; do
  echo "At offset $offset:"
  for op in $TEST_OPS; do
-io_test $op $offset $CLUSTER_SIZE 8
+TEST_IMG=$TEST_IMG.orig io_test $op $offset $CLUSTER_SIZE 8


#2


  done
-_check_test_img
+TEST_IMG=$TEST_IMG.orig _check_test_img


#3


  done


  echo "Compressing image"
  echo

-mv "$TEST_IMG" "$TEST_IMG.orig"


and changing this to

TEST_IMG=$TEST_IMG.orig _make_test_img 6G

Should work.

Unfortunately it doesn't. All subsequent commands will then work
on $TEST_IMG.orig altough they shouldn't. In case of
013 this is io_test, _check_test_img and the cleanup at the end.



Why? The overriding is temporary and subsequent commands are not
affected.

If you put in a singe

TEST_IMG=$TEST_IMG.orig

line, this affects all further commands in the same test script.

If you put the TEST_IMG=$TEST_IMG.orig before a command it affectes only
this single command.



My proposal above doesn't work, though, because an empty new image
doesn't contain the right data, what is needed here is copy. So maybe
change the "mv" line to:

$QEMU_IMG convert -f $IMGFMT -O $IMGFMT "$TEST_IMG" "$TEST_IMG.orig"

could do the work, but I'm not sure if this fits every case.

This is unnecessary (copy) overhead and in some cases it could falsify
the test. The convert process
does not guarantee to create identical copies. You could use raw format,
but in this case the image
can only be a multiple of 512 byte.


OK, thanks for clarification.

Fam




[Qemu-devel] [PATCH] vmdk: Fix big flat extent IO

2014-01-07 Thread Fam Zheng
Local variable "n" as int64_t avoids overflow with large sector number
calculation. See test case change for failure case.

Signed-off-by: Fam Zheng 
---
 block/vmdk.c   |  4 +--
 tests/qemu-iotests/059 |  7 +
 tests/qemu-iotests/059.out | 74 ++
 3 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 0734bc2..3404cd0 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1320,8 +1320,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t 
sector_num,
 {
 BDRVVmdkState *s = bs->opaque;
 VmdkExtent *extent = NULL;
-int n, ret;
-int64_t index_in_cluster;
+int ret;
+int64_t index_in_cluster, n;
 uint64_t extent_begin_sector, extent_relative_sector_num;
 uint64_t cluster_offset;
 VmdkMetaData m_data;
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
index 73941c3..9376596 100755
--- a/tests/qemu-iotests/059
+++ b/tests/qemu-iotests/059
@@ -85,6 +85,13 @@ echo "=== Testing version 3 ==="
 _use_sample_img iotest-version3.vmdk.bz2
 _img_info
 
+echo
+echo "=== Testing 4TB monolithicFlat creation and IO ==="
+IMGOPTS="subformat=monolithicFlat" _make_test_img 4T
+_img_info
+$QEMU_IO -c "write -P 0xa 900G 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -v 900G 1024" "$TEST_IMG" | _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index 4ff935c..4d9f6af 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2042,4 +2042,78 @@ Format specific information:
 image: TEST_DIR/iotest-version3.IMGFMT
 file format: IMGFMT
 virtual size: 1.0G (1073741824 bytes)
+
+=== Testing 4TB monolithicFlat creation and IO ===
+Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104
+image: TEST_DIR/iotest-version3.IMGFMT
+file format: IMGFMT
+virtual size: 4.0T (4398046511104 bytes)
+wrote 512/512 bytes at offset 966367641600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+e1:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10010:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10020:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10030:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10040:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10050:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10060:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10070:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10080:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10090:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e100a0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e100b0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e100c0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e100d0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e100e0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e100f0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10100:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10110:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10120:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10130:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10140:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10150:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10160:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10170:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10180:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10190:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e101a0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e101b0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e101c0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e101d0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e101e0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e101f0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  
+e10200:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
+e10210:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
+e10220:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
+e10230:  00 00 00 

Re: [Qemu-devel] [PATCH v8 02/12] qapi: Add BlockOperationType enum

2014-01-07 Thread Fam Zheng

On 2014年01月03日 18:09, Stefan Hajnoczi wrote:

On Fri, Dec 13, 2013 at 03:35:10PM +0800, Fam Zheng wrote:

This adds the enum of all the operations that can be taken on a block
device.

Signed-off-by: Fam Zheng 
---
  qapi-schema.json | 50 ++
  1 file changed, 50 insertions(+)

diff --git a/qapi-schema.json b/qapi-schema.json
index d6f8615..8e982a2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1440,6 +1440,56 @@
'data': ['commit', 'stream', 'mirror', 'backup'] }

  ##
+# @BlockOperationType
+#
+# Type of a block operation. (since 2.0)


Why is this exposed in qapi-schema.json?  The blockers concept is
internal to QEMU and not exposed via QMP.



I plan to add it into block information (query-block, for example). But 
in follow up patches. It could be useful for user to check which 
commands/operations are possible without trial-and-fail, when we put 
more and more complicated state and configuration into the BDS graph.


Fam



[Qemu-devel] [PATCH v9 02/11] block: Introduce op_blockers to BlockDriverState

2014-01-07 Thread Fam Zheng
BlockDriverState.op_blockers is an array of lists with BLOCK_OP_TYPE_MAX
elements. Each list is a list of blockers of an operation type
(BlockOpType), that marks this BDS as currently blocked for a certain
type of operation with reason errors stored in the list. The rule of
usage is:

 * BDS user who wants to take an operation should check if there's any
   blocker of the type with bdrv_op_is_blocked().

 * BDS user who wants to block certain types of operation, should call
   bdrv_op_block (or bdrv_op_block_all to block all types of operations,
   which is similar to the existing bdrv_set_in_use()).

 * A blocker is only referenced by op_blockers, so the lifecycle is
   managed by caller, and shouldn't be lost until unblock, so typically
   a caller does these:

   - Allocate a blocker with error_setg or similar, call bdrv_op_block()
 to block some operations.
   - Hold the blocker, do his job.
   - Unblock operations that it blocked, with the same reason pointer
 passed to bdrv_op_unblock().
   - Release the blocker with error_free().

Signed-off-by: Fam Zheng 
---
 block.c   | 71 +++
 include/block/block.h |  7 +
 include/block/block_int.h |  5 
 3 files changed, 83 insertions(+)

diff --git a/block.c b/block.c
index 64e7d22..91cda9c 100644
--- a/block.c
+++ b/block.c
@@ -1627,6 +1627,8 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 /* keep the same entry in bdrv_states */
 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
 bs_src->device_name);
+memcpy(bs_dest->op_blockers, bs_src->op_blockers,
+   sizeof(bs_dest->op_blockers));
 bs_dest->list = bs_src->list;
 }
 
@@ -4634,6 +4636,75 @@ void bdrv_unref(BlockDriverState *bs)
 }
 }
 
+struct BdrvOpBlocker {
+Error *reason;
+QLIST_ENTRY(BdrvOpBlocker) list;
+};
+
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
+{
+BdrvOpBlocker *blocker;
+assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+if (!QLIST_EMPTY(&bs->op_blockers[op])) {
+blocker = QLIST_FIRST(&bs->op_blockers[op]);
+if (errp) {
+*errp = error_copy(blocker->reason);
+}
+return true;
+}
+return false;
+}
+
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
+{
+BdrvOpBlocker *blocker;
+assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+
+blocker = g_malloc0(sizeof(BdrvOpBlocker));
+blocker->reason = reason;
+QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
+}
+
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
+{
+BdrvOpBlocker *blocker, *next;
+assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
+QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
+if (blocker->reason == reason) {
+QLIST_REMOVE(blocker, list);
+g_free(blocker);
+}
+}
+}
+
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
+{
+int i;
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+bdrv_op_block(bs, i, reason);
+}
+}
+
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
+{
+int i;
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+bdrv_op_unblock(bs, i, reason);
+}
+}
+
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
+{
+int i;
+
+for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
+if (!QLIST_EMPTY(&bs->op_blockers[i])) {
+return false;
+}
+}
+return true;
+}
+
 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
 {
 assert(bs->in_use != in_use);
diff --git a/include/block/block.h b/include/block/block.h
index 36efaea..890af1a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -442,6 +442,13 @@ void bdrv_unref(BlockDriverState *bs);
 void bdrv_set_in_use(BlockDriverState *bs, int in_use);
 int bdrv_in_use(BlockDriverState *bs);
 
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+
 #ifdef CONFIG_LINUX_AIO
 int raw_get_aio_fd(BlockDriverState *bs);
 #else
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8b132d7..458acd6 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -252,6 +252,8 @@ typedef struct BlockLimits {
 int opt_transfer_length;
 } BlockLimits;
 
+typedef struct BdrvOpBlocker BdrvOpBlocker;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, 

[Qemu-devel] [PATCH v9 01/11] qapi: Add BlockOperationType enum

2014-01-07 Thread Fam Zheng
This adds the enum of all the operations that can be taken on a block
device.

Signed-off-by: Fam Zheng 
---
 qapi-schema.json | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/qapi-schema.json b/qapi-schema.json
index c3c939c..288d024 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1440,6 +1440,56 @@
   'data': ['commit', 'stream', 'mirror', 'backup'] }
 
 ##
+# @BlockOperationType
+#
+# Type of a block operation. (since 2.0)
+#
+# @backup-source: As a backup source. See the 'drive-backup' command.
+#
+# @backup-target: As a backup target. See the 'drive-backup' command.
+#
+# @change: See the 'change' command.
+#
+# @commit: See the 'block-commit' command.
+#
+# @dataplane: The virtio-blk dataplane feature.
+#
+# @drive-del: See the 'drive_del' HMP command.
+#
+# @eject: See the 'eject' command.
+#
+# @external-snapshot: See the 'blockdev-snapshot-sync' command.
+#
+# @internal-snapshot: See the 'blockdev-snapshot-internal-sync' command.
+#
+# @internal-snapshot-delete: See the 'blockdev-snapshot-delete-internal-sync' 
command.
+#
+# @mirror: See the 'drive-mirror' command.
+#
+# @resize: See the 'block-resize' command.
+#
+# @stream: See the 'block-stream' command.
+#
+# Since: 2.0
+##
+{ 'enum': 'BlockOpType',
+  'data': [
+'backup-source',
+'backup-target',
+'change',
+'commit',
+'dataplane',
+'drive-del',
+'eject',
+'external-snapshot',
+'internal-snapshot',
+'internal-snapshot-delete',
+'mirror',
+'resize',
+'stream'
+] }
+
+##
 # @BlockJobInfo:
 #
 # Information about a long-running block device operation.
-- 
1.8.5.1




[Qemu-devel] [PATCH v9 00/11] Drop in_use from BlockDriverState and enable point-in-time snapshot exporting over NBD

2014-01-07 Thread Fam Zheng
This series adds for point-in-time snapshot NBD exporting based on
blockdev-backup (variant of drive-backup with existing device as target).

We get a thin point-in-time snapshot by COW mechanism of drive-backup, and
export it through built in NBD server. The steps are as below:

 1. (SHELL) qemu-img create -f qcow2 BACKUP.qcow2 

(Alternatively we can use -o backing_file=RUNNING-VM.img to omit explicitly
providing the size by ourselves, but it's risky because RUNNING-VM.qcow2 is
used r/w by guest. Whether or not setting backing file in the image file
doesn't matter, as we are going to override the backing hd in the next
step)

 2. (QMP) blockdev-add backing=source-drive file.driver=file 
file.filename=BACKUP.qcow2 id=target0 if=none driver=qcow2

(where source-drive is the running BlockDriverState name for
RUNNING-VM.img. This patch implements "backing=" option to override
backing_hd for added drive)

 3. (QMP) blockdev-backup device=source-drive sync=none target=target0

(this is the QMP command introduced by this series, which use a named
device as target of drive-backup)

 4. (QMP) nbd-server-add device=target0

When image fleecing done:

 1. (QMP) block-job-cancel device=source-drive

 2. (HMP) drive_del target0

 3. (SHELL) rm BACKUP.qcow2

v9: Rebased to qemu.git. Address Stefan's comments:

[05/11] block: Add bdrv_set_backing_hd()
Set bs->backing_file and bs->backing_format.

[06/11] block: Add backing_blocker in BlockDriverState
Reuse bdrv_set_backing_hd().

[07/11] block: Parse "backing" option to reference existing BDS
Fix use-after-free.
Check for "backing=" and "backing.file=" conflict.
Remove unintended bdrv_swap hunks.

[08/11] block: Support dropping active in bdrv_drop_intermediate
Fix function comment.

[09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images



Fam Zheng (11):
  qapi: Add BlockOperationType enum
  block: Introduce op_blockers to BlockDriverState
  block: Replace in_use with operation blocker
  block: Move op_blocker check from block_job_create to its caller
  block: Add bdrv_set_backing_hd()
  block: Add backing_blocker in BlockDriverState
  block: Parse "backing" option to reference existing BDS
  block: Support dropping active in bdrv_drop_intermediate
  stream: Use bdrv_drop_intermediate and drop close_unused_images
  qmp: Add command 'blockdev-backup'
  block: Allow backup on referenced named BlockDriverState

 block-migration.c   |   7 +-
 block.c | 306 +++-
 block/backup.c  |  21 +++
 block/commit.c  |   1 +
 block/stream.c  |  28 +---
 blockdev.c  |  70 +++--
 blockjob.c  |  14 +-
 hw/block/dataplane/virtio-blk.c |  19 ++-
 include/block/block.h   |  10 +-
 include/block/block_int.h   |   9 +-
 include/block/blockjob.h|   3 +
 qapi-schema.json|  99 +
 qmp-commands.hx |  44 ++
 13 files changed, 477 insertions(+), 154 deletions(-)

-- 
1.8.5.1




<    6   7   8   9   10   11   12   13   14   15   >