I think I have discovered a vulnerability in qemu. It is related to the block device drivers: that is, the backends which implement the functionality offered to a guest via emulated block devices such as the emulated IDE controller.
When a block device read or write request is made by the guest, nothing checks that the request is within the range supported by the backend, but the code in the backend typically assumes that the request is sensible. Depending on the backend, this can allow the guest to read and write arbitrary memory locations in qemu, and possibly gain control over the qemu process, escaping from the emulation/virtualisation. I have demonstrated to my own satisfaction that there is problem, using a modified Linux kernel as a guest with an instrumented CVS head qemu. I'm not much good as an exploit developer so I haven't done better than cause qemu to crash. I have also prepared a patch which I have checked prevents my test case but which I haven't subjected to anything resembling proper functional testing. I contacted privately five of the main qemu developers but the only response was from Andrzej Zaborowski who didn't consider the problem serious, saying Qemu never claimed to be secure. Of course it's better to be secure than not if it doesn't add a bad overhead. and suggesting that I should just post to the qemu-devel mailing list. As well as pure emulation, qemu is also used by various virtualisation systems as a device model or emulation harness. For example, at least Xen (with HVM guests) is affected and and kvm probably too. So I didn't agree with Andrzej. I'm going to go and try to do some more testing of my patch, which is attached, in the context of Xen. I expect that unless we discover that I'm wrong, we'll be publishing a fix for this bug in Xen's xen-unstable and backporting it to xen-3.2 and xen-3.1, in perhaps a week or two. I'll post to the qemu list at that time too. There is not yet a CVE reference for this vulnerability. Ian.
diff --git a/block.c b/block.c index 0f8ad7b..d7f1114 100644 --- a/block.c +++ b/block.c @@ -123,6 +123,24 @@ void path_combine(char *dest, int dest_size, } } +static int bdrv_rw_badreq_sectors(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + return + nb_sectors < 0 || + nb_sectors > bs->total_sectors || + sector_num > bs->total_sectors - nb_sectors; +} + +static int bdrv_rw_badreq_bytes(BlockDriverState *bs, + int64_t offset, int count) +{ + int64_t size = bs->total_sectors << SECTOR_BITS; + return + count < 0 || + count > size || + offset > size - count; +} static void bdrv_register(BlockDriver *bdrv) { @@ -375,6 +393,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, } bs->drv = drv; bs->opaque = qemu_mallocz(drv->instance_size); + bs->total_sectors = 0; /* driver will set if it does not do getlength */ if (bs->opaque == NULL && drv->instance_size > 0) return -1; /* Note: for compatibility, we open disk image files as RDWR, and @@ -440,6 +459,7 @@ void bdrv_close(BlockDriverState *bs) bs->drv = NULL; /* call the change callback */ + bs->total_sectors = 0; bs->media_changed = 1; if (bs->change_cb) bs->change_cb(bs->change_opaque); @@ -505,6 +525,8 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num, if (!drv) return -ENOMEDIUM; + if (bdrv_rw_badreq_sectors(bs, sector_num, nb_sectors)) + return -EDOM; if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { memcpy(buf, bs->boot_sector_data, 512); sector_num++; @@ -545,6 +567,8 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, return -ENOMEDIUM; if (bs->read_only) return -EACCES; + if (bdrv_rw_badreq_sectors(bs, sector_num, nb_sectors)) + return -EDOM; if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { memcpy(bs->boot_sector_data, buf, 512); } @@ -670,6 +694,8 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset, return -ENOMEDIUM; if (!drv->bdrv_pread) return bdrv_pread_em(bs, offset, buf1, count1); + if (bdrv_rw_badreq_bytes(bs, offset, count1)) + return -EDOM; return drv->bdrv_pread(bs, offset, buf1, count1); } @@ -685,6 +711,8 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset, return -ENOMEDIUM; if (!drv->bdrv_pwrite) return bdrv_pwrite_em(bs, offset, buf1, count1); + if (bdrv_rw_badreq_bytes(bs, offset, count1)) + return -EDOM; return drv->bdrv_pwrite(bs, offset, buf1, count1); } @@ -951,6 +979,8 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, return -ENOMEDIUM; if (!drv->bdrv_write_compressed) return -ENOTSUP; + if (bdrv_rw_badreq_sectors(bs, sector_num, nb_sectors)) + return -EDOM; return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } @@ -1097,6 +1127,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, if (!drv) return NULL; + if (bdrv_rw_badreq_sectors(bs, sector_num, nb_sectors)) + return NULL; /* XXX: we assume that nb_sectors == 0 is suppored by the async read */ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { @@ -1128,6 +1160,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, return NULL; if (bs->read_only) return NULL; + if (bdrv_rw_badreq_sectors(bs, sector_num, nb_sectors)) + return NULL; if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { memcpy(bs->boot_sector_data, buf, 512); }