Qcow2 disks are currently buggy: We can't write more than 4 gigabytes
of data without wrapping around the write offset and clobbering data
that we shouldn't touch. This is because of an integer promotion issue:
disk->end = (disk->end + disk->clustersz - 1) & ~(disk->clustersz - 1);
Here, disk->clustersz is a uint32_t containing (1<<16), so it gets
converted to 0xffff00000. Since this is an unsigned value, it is not
sign extended, and therefore truncates disk->end + disk->clustersz.
This change converts the clustersz to an off_t in order to remove the
whole class of errors by avoiding type conversions entirely.
While I'm at it, this patch fixes up a few nits around logging, where
warn was used instead of warnx, leading to some bogus error strings
being printed, and adds a few checks that we should have been doing.
This patch also removes a lie in a comment.
OK?
diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c
index 3a215599d49..5643f7ee166 100644
--- usr.sbin/vmd/vioqcow2.c
+++ usr.sbin/vmd/vioqcow2.c
@@ -79,11 +79,11 @@ struct qcdisk {
int fd;
uint64_t *l1;
off_t end;
- uint32_t clustersz;
+ off_t clustersz;
off_t disksz; /* In bytes */
- uint32_t cryptmethod;
+ uint32_t cryptmethod;
- uint32_t l1sz;
+ off_t l1sz;
off_t l1off;
off_t refoff;
@@ -151,7 +151,7 @@ virtio_qcow2_get_base(int fd, char *path, size_t npath,
const char *dpath)
return -1;
}
if (strncmp(header.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)) != 0) {
- log_warn("%s: invalid magic numbers", __func__);
+ log_warnx("%s: invalid magic numbers", __func__);
return -1;
}
backingoff = be64toh(header.backingoff);
@@ -160,7 +160,7 @@ virtio_qcow2_get_base(int fd, char *path, size_t npath,
const char *dpath)
return 0;
if (backingsz >= npath - 1) {
- log_warn("%s: snapshot path too long", __func__);
+ log_warnx("%s: snapshot path too long", __func__);
return -1;
}
if (pread(fd, path, backingsz, backingoff) != backingsz) {
@@ -207,7 +207,7 @@ qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
struct qcheader header;
uint64_t backingoff;
uint32_t backingsz;
- size_t i;
+ off_t i;
int version, fd;
pthread_rwlock_init(&disk->lock, NULL);
@@ -254,6 +254,10 @@ qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
disk->incompatfeatures & ~(QCOW2_DIRTY|QCOW2_CORRUPT));
goto error;
}
+ if (be32toh(header.reforder) != 4) {
+ log_warnx("%s: unsupported refcount size\n", __func__);
+ goto error;
+ }
disk->l1 = calloc(disk->l1sz, sizeof(*disk->l1));
if (!disk->l1)
@@ -418,6 +422,10 @@ qc2_pwrite(void *p, char *buf, size_t len, off_t off)
phys_off = mkcluster(disk, d, off, phys_off);
if (phys_off == -1)
return -1;
+ if (phys_off < disk->clustersz) {
+ log_warnx("writing reserved cluster");
+ return -1;
+ }
if (pwrite(disk->fd, buf, sz, phys_off) != sz)
return -1;
off += sz;
@@ -488,7 +496,7 @@ xlate(struct qcdisk *disk, off_t off, int *inplace)
if (inplace)
*inplace = !!(cluster & QCOW2_INPLACE);
if (cluster & QCOW2_COMPRESSED) {
- log_warn("%s: compressed clusters unsupported", __func__);
+ log_warnx("%s: compressed clusters unsupported", __func__);
goto err;
}
pthread_rwlock_unlock(&disk->lock);
@@ -527,11 +535,6 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, off_t
off, off_t src_phys)
if (l1off >= disk->l1sz)
goto fail;
- /*
- * Align disk to cluster size, for ftruncate: Not strictly
- * required, but it easier to eyeball buggy write offsets,
- * and helps performance a bit.
- */
disk->end = (disk->end + disk->clustersz - 1) & ~(disk->clustersz - 1);
l2tab = disk->l1[l1off];
@@ -584,6 +587,8 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, off_t
off, off_t src_phys)
pthread_rwlock_unlock(&disk->lock);
clusteroff = off % disk->clustersz;
+ if (cluster + clusteroff < disk->clustersz)
+ fatalx("write would clobber header");
return cluster + clusteroff;
fail:
--
Ori Bernstein