Re: [Qemu-devel] [PATCH V18 5/6] add-cow file format core code.

Dong Xu Wang Mon, 22 Apr 2013 18:55:10 -0700

On 2013/4/18 18:03, Stefan Hajnoczi wrote:

On Wed, Apr 10, 2013 at 04:11:52PM +0800, Dong Xu Wang wrote:

+    header.cluster_bits = ffs(cluster_size) - 1;
+    if (header.cluster_bits < MIN_CLUSTER_BITS ||
+        header.cluster_bits > MAX_CLUSTER_BITS ||
+        (1 << header.cluster_bits) != cluster_size) {
+        error_report(
+            "Cluster size must be a power of two between %d and %dk",
+            1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
+        return -EINVAL;
+    }
+
+   header.header_size = MAX(cluster_size, DEFAULT_HEADER_SIZE);


Indentation.

okay.

+    if (backing_filename) {
+        header.backing_offset = sizeof(header);
+        header.backing_size = strlen(backing_filename);
+
+        if (!backing_fmt) {
+            backing_bs = bdrv_new("image");
+            ret = bdrv_open(backing_bs, backing_filename, NULL,
+                            BDRV_O_RDWR | BDRV_O_CACHE_WB, NULL);
+            if (ret < 0) {
+                return ret;


backing_bs is leaked.

Okay. will fix.

+    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
+    if (ret < 0) {
+        return ret;
+    }
+    snprintf(header.backing_fmt, sizeof(header.backing_fmt), "%s",
+             backing_fmt ? backing_fmt : "");
+    snprintf(header.image_fmt, sizeof(header.image_fmt), "%s",
+             image_format ? image_format : "raw");


snprintf() doesn't have the semantics in the add-cow specification:

" 44 - 59:    backing file format
               Format of backing file. It will be filled with
               0 if backing file name offset is 0. If backing
               file name offset is non-empty, it must be
               non-empty. It is coded in free-form ASCII, and
               is not NUL-terminated. Zero padded on the right.

   60 - 75:    image file format
               Format of image file. It must be non-empty. It
               is coded in free-form ASCII, and is not
               NUL-terminated. Zero padded on the right."

strncpy() does the zero padding and doesn't NUL-terminate if the max buffer
size is used.

Okay.

+    if ((s->header.compat_features & ACOW_F_ALL_ALLOCATED) == 0) {
+        snprintf(bs->backing_format, sizeof(bs->backing_format),
+                 "%s", s->header.backing_fmt);


s->header.backing_fmt is not NUL-terminated so using snprintf() is
inappropriate (could it read beyond the end of .backing_fmt?).

Okay.

+    }
+
+    if (s->header.cluster_bits < MIN_CLUSTER_BITS ||
+        s->header.cluster_bits > MAX_CLUSTER_BITS) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->cluster_size = 1 << s->header.cluster_bits;
+    if (s->header.header_size != MAX(s->cluster_size, DEFAULT_HEADER_SIZE)) {
+        char buf[64];
+        snprintf(buf, sizeof(buf), "Header size: %d",


%u or PRIu32 since header_size is uint32_t.  This avoids compiler or
code scanner warnings.

Okay.

+    s->image_hd = bdrv_new("");
+    ret = bdrv_open(s->image_hd, image_filename, NULL, flags,
+                    bdrv_find_format(s->header.image_fmt));


Cannot use image_fmt as a string since it is not NUL-terminated.

Okay.

+static coroutine_fn int add_cow_co_writev(BlockDriverState *bs,
+                                          int64_t sector_num,
+                                          int remaining_sectors,
+                                          QEMUIOVector *qiov)
+{
+    BDRVAddCowState *s = bs->opaque;
+    int ret = 0, i;
+    QEMUIOVector hd_qiov;
+    uint8_t *table;
+    uint64_t offset;
+    int mask = s->cluster_sectors - 1;
+    int cluster_mask = s->cluster_size - 1;
+
+    qemu_co_mutex_lock(&s->lock);
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+    ret = bdrv_co_writev(s->image_hd, sector_num,
+                         remaining_sectors, qiov);


All writes are serialized.  This means write performance will be very
poor for multi-threaded workloads.

qcow2 tracks allocating writes and allows them to execute at the same
time if they do not overlap clusters.

Okay, will refer qcow2 related code.

+
+    if (ret < 0) {
+        goto fail;
+    }
+    if ((s->header.compat_features & ACOW_F_ALL_ALLOCATED) == 0) {
+        /* Copy content of unmodified sectors */
+        if (!is_cluster_head(sector_num, s->cluster_sectors)
+            && !is_allocated(bs, sector_num)) {
+            ret = copy_sectors(bs, sector_num & ~mask, sector_num);
+            if (ret < 0) {
+                goto fail;
+            }
+        }
+
+        if (!is_cluster_tail(sector_num + remaining_sectors - 1,
+                             s->cluster_sectors)
+            && !is_allocated(bs, sector_num + remaining_sectors - 1)) {
+            ret = copy_sectors(bs, sector_num + remaining_sectors,
+                               ((sector_num + remaining_sectors) | mask) + 1);
+            if (ret < 0) {
+                goto fail;
+            }
+        }


This trashes the cluster when remaining_sectors = 0, sector_num =
cluster_sectors, and sector cluster_sectors - 1 is unallocated.

Probably best to return early when remaining_sectors == 0.

Okay.

+
+        for (i = sector_num / s->cluster_sectors;
+            i <= (sector_num + remaining_sectors - 1) / s->cluster_sectors;
+            i++) {
+            offset = s->header.header_size
+                + (offset_in_bitmap(i * s->cluster_sectors,
+                s->cluster_sectors) & (~cluster_mask));
+            ret = block_cache_get(bs, s->bitmap_cache, offset, (void 
**)&table);
+            if (ret < 0) {
+                goto fail;
+            }
+            if ((table[i / 8] & (1 << (i % 8))) == 0) {
+                table[i / 8] |= (1 << (i % 8));


i is based on sector_num while table[] starts at offset, not sector 0.
The index expression i / 8 leads to out-of-bounds accesses.

I think you forgot to & (s->cluster_sectors - 1).

Okay.

+static coroutine_fn int add_cow_co_flush(BlockDriverState *bs)
+{
+    BDRVAddCowState *s = bs->opaque;
+    int ret;
+
+    qemu_co_mutex_lock(&s->lock);
+    if (s->bitmap_cache) {
+        ret = block_cache_flush(bs, s->bitmap_cache);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+    ret = bdrv_flush(s->image_hd);


This is the wrong way around.  We must flush image_hd first so that
valid data is on disk.  Then we can flush bitmap_cache to mark the
clusters allocated.

Beyond explicit flushes you also need to make sure that image_hd is
flushed *before* bitmap_cache tables are written out (e.g. cache
eviction when the cache becomes full).  It seems this code is missing.

Also please use bdrv_co_flush() instead of bdrv_flush() in
add_cow_co_flush() since this is a coroutine function.

Okay.

diff --git a/block/block-cache.c b/block/block-cache.c
index 3544691..4824632 100644
--- a/block/block-cache.c
+++ b/block/block-cache.c
@@ -130,7 +130,7 @@ static int block_cache_entry_flush(BlockDriverState *bs, 
BlockCache *c, int i)
      } else if (c->table_type == BLOCK_TABLE_L2) {
          BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
      } else if (c->table_type == BLOCK_TABLE_BITMAP) {
-        BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
+        BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
      }

      ret = bdrv_pwrite(bs->file, c->entries[i].offset,
@@ -265,7 +265,7 @@ static int block_cache_do_get(BlockDriverState *bs, 
BlockCache *c,
          if (c->table_type == BLOCK_TABLE_L2) {
              BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
          } else if (c->table_type == BLOCK_TABLE_BITMAP) {
-            BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
          }

          ret = bdrv_pread(bs->file, offset, c->entries[i].table,


I commented on this in the previous patch.  Please squash this fix into
the previous patch.

Okay.

diff --git a/block/qcow2.h b/block/qcow2.h
index e7f6aec..a4e514b 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -98,6 +98,9 @@ typedef struct QCowSnapshot {
      uint64_t vm_clock_nsec;
  } QCowSnapshot;

+struct BlockCache;
+typedef struct BlockCache BlockCache;
+
  typedef struct Qcow2UnknownHeaderExtension {
      uint32_t magic;
      uint32_t len;
@@ -389,7 +392,4 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const 
char *snapshot_name);
  void qcow2_free_snapshots(BlockDriverState *bs);
  int qcow2_read_snapshots(BlockDriverState *bs);

-/* qcow2-cache.c functions */
-
-


More qcow2-cache.c move cleanups?  Please squash into the previous
patch.

Okay.

Re: [Qemu-devel] [PATCH V18 5/6] add-cow file format core code.

Reply via email to