Qcow2 disks are currently buggy: We can't write more than 4 gigabytes
of data without wrapping around the write offset and clobbering data
that we shouldn't touch.  This is because of an integer promotion issue:

        disk->end = (disk->end + disk->clustersz - 1) & ~(disk->clustersz - 1);

Here, disk->clustersz is a uint32_t containing (1<<16), so it gets
converted to 0xffff00000. Since this is an unsigned value, it is not
sign extended, and therefore truncates disk->end + disk->clustersz.

This change converts the clustersz to an off_t in order to remove the
whole class of errors by avoiding type conversions entirely.

While I'm at it, this patch fixes up a few nits around logging, where
warn was used instead of warnx, leading to some bogus error strings
being printed, and adds a few checks that we should have been doing.
This patch also removes a lie in a comment.

OK?

diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c
index 3a215599d49..5643f7ee166 100644
--- usr.sbin/vmd/vioqcow2.c
+++ usr.sbin/vmd/vioqcow2.c
@@ -79,11 +79,11 @@ struct qcdisk {
        int       fd;
        uint64_t *l1;
        off_t     end;
-       uint32_t  clustersz;
+       off_t     clustersz;
        off_t     disksz; /* In bytes */
-       uint32_t cryptmethod;
+       uint32_t  cryptmethod;
 
-       uint32_t l1sz;
+       off_t    l1sz;
        off_t    l1off;
 
        off_t    refoff;
@@ -151,7 +151,7 @@ virtio_qcow2_get_base(int fd, char *path, size_t npath, 
const char *dpath)
                return -1;
        }
        if (strncmp(header.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)) != 0) {
-               log_warn("%s: invalid magic numbers", __func__);
+               log_warnx("%s: invalid magic numbers", __func__);
                return -1;
        }
        backingoff = be64toh(header.backingoff);
@@ -160,7 +160,7 @@ virtio_qcow2_get_base(int fd, char *path, size_t npath, 
const char *dpath)
                return 0;
 
        if (backingsz >= npath - 1) {
-               log_warn("%s: snapshot path too long", __func__);
+               log_warnx("%s: snapshot path too long", __func__);
                return -1;
        }
        if (pread(fd, path, backingsz, backingoff) != backingsz) {
@@ -207,7 +207,7 @@ qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
        struct qcheader header;
        uint64_t backingoff;
        uint32_t backingsz;
-       size_t i;
+       off_t i;
        int version, fd;
 
        pthread_rwlock_init(&disk->lock, NULL);
@@ -254,6 +254,10 @@ qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
                    disk->incompatfeatures & ~(QCOW2_DIRTY|QCOW2_CORRUPT));
                goto error;
        }
+       if (be32toh(header.reforder) != 4) {
+               log_warnx("%s: unsupported refcount size\n", __func__);
+               goto error;
+       }
 
        disk->l1 = calloc(disk->l1sz, sizeof(*disk->l1));
        if (!disk->l1)
@@ -418,6 +422,10 @@ qc2_pwrite(void *p, char *buf, size_t len, off_t off)
                        phys_off = mkcluster(disk, d, off, phys_off);
                if (phys_off == -1)
                        return -1;
+               if (phys_off < disk->clustersz) {
+                       log_warnx("writing reserved cluster");
+                       return -1;
+               }
                if (pwrite(disk->fd, buf, sz, phys_off) != sz)
                        return -1;
                off += sz;
@@ -488,7 +496,7 @@ xlate(struct qcdisk *disk, off_t off, int *inplace)
        if (inplace)
                *inplace = !!(cluster & QCOW2_INPLACE);
        if (cluster & QCOW2_COMPRESSED) {
-               log_warn("%s: compressed clusters unsupported", __func__);
+               log_warnx("%s: compressed clusters unsupported", __func__);
                goto err;
        }
        pthread_rwlock_unlock(&disk->lock);
@@ -527,11 +535,6 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, off_t 
off, off_t src_phys)
        if (l1off >= disk->l1sz)
                goto fail;
 
-       /*
-        * Align disk to cluster size, for ftruncate: Not strictly
-        * required, but it easier to eyeball buggy write offsets,
-        * and helps performance a bit.
-        */
        disk->end = (disk->end + disk->clustersz - 1) & ~(disk->clustersz - 1);
 
        l2tab = disk->l1[l1off];
@@ -584,6 +587,8 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, off_t 
off, off_t src_phys)
 
        pthread_rwlock_unlock(&disk->lock);
        clusteroff = off % disk->clustersz;
+       if (cluster + clusteroff < disk->clustersz)
+               fatalx("write would clobber header");
        return cluster + clusteroff;
 
 fail:

-- 
    Ori Bernstein

Reply via email to