On Tue, Jun 14, 2011 at 4:31 PM, Corey Bryant <brynt...@us.ibm.com> wrote: > sVirt provides SELinux MAC isolation for Qemu guest processes and their > corresponding resources (image files). sVirt provides this support > by labeling guests and resources with security labels that are stored > in file system extended attributes. Some file systems, such as NFS, do > not support the extended attribute security namespace, which is needed > for image file isolation when using the sVirt SELinux security driver > in libvirt. > > The proposed solution entails a combination of Qemu, libvirt, and > SELinux patches that work together to isolate multiple guests' images > when they're stored in the same NFS mount. This results in an > environment where sVirt isolation and NFS image file isolation can both > be provided. > > This patch contains the Qemu code to support this solution. I would > like to solicit input from the libvirt community prior to starting > the libvirt patch. > > Currently, Qemu opens an image file in addition to performing the > necessary read and write operations. The proposed solution will move > the open out of Qemu and into libvirt. Once libvirt opens an image > file for the guest, it will pass the file descriptor to Qemu via a > new fd: protocol. > > If the image file resides in an NFS mount, the following SELinux policy > changes will provide image isolation: > > - A new SELinux boolean is created (e.g. virt_read_write_nfs) to > allow Qemu (svirt_t) to only have SELinux read and write > permissions on nfs_t files > > - Qemu (svirt_t) also gets SELinux use permissions on libvirt > (virtd_t) file descriptors > > Following is a sample invocation of Qemu using the fd: protocol on > the command line: > > qemu -drive file=fd:4,format=qcow2 > > The fd: protocol is also supported by the drive_add monitor command. > This requires that the specified file descriptor is passed to the > monitor alongside a prior getfd monitor command. > > There are some additional features provided by certain image types > where Qemu reopens the image file. All of these scenarios will be > unsupported for the fd: protocol, at least for this patch: > > - The -snapshot command line option > - The savevm monitor command > - The snapshot_blkdev monitor command > - Starting Qemu with a backing file
There's also native CDROM device. Did you consider adding an explicit reopen method to block layer? > The thought is that this support can be added in the future, but is > not required for the initial fd: support. > > This patch was tested with the following formats: raw, cow, qcow, > qcow2, qed, and vmdk, using the fd: protocol from the command line > and the monitor. Tests were also run to verify existing file name > support and qemu-img were not regressed. Non-valid file descriptors, > fd: without format, snapshot and backing files were also tested. > > Signed-off-by: Corey Bryant <cor...@linux.vnet.ibm.com> > --- > block.c | 16 ++++++++++ > block.h | 1 + > block/cow.c | 5 +++ > block/qcow.c | 5 +++ > block/qcow2.c | 5 +++ > block/qed.c | 4 ++ > block/raw-posix.c | 81 > +++++++++++++++++++++++++++++++++++++++++++++++------ > block/vmdk.c | 5 +++ > block_int.h | 1 + > blockdev.c | 10 ++++++ > monitor.c | 5 +++ > monitor.h | 1 + > qemu-options.hx | 8 +++-- > qemu-tool.c | 5 +++ > 14 files changed, 140 insertions(+), 12 deletions(-) > > diff --git a/block.c b/block.c > index 24a25d5..500db84 100644 > --- a/block.c > +++ b/block.c > @@ -536,6 +536,10 @@ int bdrv_open(BlockDriverState *bs, const char > *filename, int flags, > char tmp_filename[PATH_MAX]; > char backing_filename[PATH_MAX]; > > + if (bdrv_is_fd_protocol(bs)) { > + return -ENOTSUP; > + } > + > /* if snapshot, we create a temporary backing file and open it > instead of opening 'filename' directly */ > > @@ -585,6 +589,10 @@ int bdrv_open(BlockDriverState *bs, const char > *filename, int flags, > > /* Find the right image format driver */ > if (!drv) { > + /* format must be specified for fd: protocol */ > + if (bdrv_is_fd_protocol(bs)) { > + return -ENOTSUP; > + } > ret = find_image_format(filename, &drv); > } > > @@ -1460,6 +1468,11 @@ int bdrv_enable_write_cache(BlockDriverState *bs) > return bs->enable_write_cache; > } > > +int bdrv_is_fd_protocol(BlockDriverState *bs) > +{ > + return bs->fd_protocol; > +} > + > /* XXX: no longer used */ > void bdrv_set_change_cb(BlockDriverState *bs, > void (*change_cb)(void *opaque, int reason), > @@ -1964,6 +1977,9 @@ int bdrv_snapshot_create(BlockDriverState *bs, > BlockDriver *drv = bs->drv; > if (!drv) > return -ENOMEDIUM; > + if (bdrv_is_fd_protocol(bs)) { > + return -ENOTSUP; > + } > if (drv->bdrv_snapshot_create) > return drv->bdrv_snapshot_create(bs, sn_info); > if (bs->file) > diff --git a/block.h b/block.h > index da7d39c..dd46d52 100644 > --- a/block.h > +++ b/block.h > @@ -182,6 +182,7 @@ int bdrv_is_removable(BlockDriverState *bs); > int bdrv_is_read_only(BlockDriverState *bs); > int bdrv_is_sg(BlockDriverState *bs); > int bdrv_enable_write_cache(BlockDriverState *bs); > +int bdrv_is_fd_protocol(BlockDriverState *bs); > int bdrv_is_inserted(BlockDriverState *bs); > int bdrv_media_changed(BlockDriverState *bs); > int bdrv_is_locked(BlockDriverState *bs); > diff --git a/block/cow.c b/block/cow.c > index 4cf543c..e17f8e7 100644 > --- a/block/cow.c > +++ b/block/cow.c > @@ -82,6 +82,11 @@ static int cow_open(BlockDriverState *bs, int flags) > pstrcpy(bs->backing_file, sizeof(bs->backing_file), > cow_header.backing_file); > > + if (bs->backing_file[0] != '\0' && bdrv_is_fd_protocol(bs)) { > + /* backing file currently not supported by fd: protocol */ > + goto fail; > + } > + > bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); > s->cow_sectors_offset = (bitmap_size + 511) & ~511; > return 0; > diff --git a/block/qcow.c b/block/qcow.c > index a26c886..1e46bdb 100644 > --- a/block/qcow.c > +++ b/block/qcow.c > @@ -157,6 +157,11 @@ static int qcow_open(BlockDriverState *bs, int flags) > if (bdrv_pread(bs->file, header.backing_file_offset, > bs->backing_file, len) != len) > goto fail; > bs->backing_file[len] = '\0'; > + > + if (bs->backing_file[0] != '\0' && bdrv_is_fd_protocol(bs)) { > + /* backing file currently not supported by fd: protocol */ > + goto fail; > + } > } > return 0; > > diff --git a/block/qcow2.c b/block/qcow2.c > index 8451ded..8b9c160 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -270,6 +270,11 @@ static int qcow2_open(BlockDriverState *bs, int flags) > goto fail; > } > bs->backing_file[len] = '\0'; > + > + if (bs->backing_file[0] != '\0' && bdrv_is_fd_protocol(bs)) { > + ret = -ENOTSUP; > + goto fail; > + } > } > if (qcow2_read_snapshots(bs) < 0) { > ret = -EINVAL; > diff --git a/block/qed.c b/block/qed.c > index 3970379..5028897 100644 > --- a/block/qed.c > +++ b/block/qed.c > @@ -446,6 +446,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) > return ret; > } > > + if (bs->backing_file[0] != '\0' && bdrv_is_fd_protocol(bs)) { > + return -ENOTSUP; > + } > + > if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) { > pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); > } > diff --git a/block/raw-posix.c b/block/raw-posix.c > index 4cd7d7a..c72de3d 100644 > --- a/block/raw-posix.c > +++ b/block/raw-posix.c > @@ -28,6 +28,7 @@ > #include "block_int.h" > #include "module.h" > #include "block/raw-posix-aio.h" > +#include "monitor.h" > > #ifdef CONFIG_COCOA > #include <paths.h> > @@ -183,7 +184,8 @@ static int raw_open_common(BlockDriverState *bs, const > char *filename, > int bdrv_flags, int open_flags) > { > BDRVRawState *s = bs->opaque; > - int fd, ret; > + int fd = -1; > + int ret; > > ret = raw_normalize_devicepath(&filename); > if (ret != 0) { > @@ -205,15 +207,17 @@ static int raw_open_common(BlockDriverState *bs, const > char *filename, > if (!(bdrv_flags & BDRV_O_CACHE_WB)) > s->open_flags |= O_DSYNC; > > - s->fd = -1; > - fd = qemu_open(filename, s->open_flags, 0644); > - if (fd < 0) { > - ret = -errno; > - if (ret == -EROFS) > - ret = -EACCES; > - return ret; > + if (s->fd == -1) { > + fd = qemu_open(filename, s->open_flags, 0644); > + if (fd < 0) { > + ret = -errno; > + if (ret == -EROFS) { > + ret = -EACCES; > + } > + return ret; > + } > + s->fd = fd; > } > - s->fd = fd; > s->aligned_buf = NULL; > > if ((bdrv_flags & BDRV_O_NOCACHE)) { > @@ -270,6 +274,7 @@ static int raw_open(BlockDriverState *bs, const char > *filename, int flags) > { > BDRVRawState *s = bs->opaque; > > + s->fd = -1; > s->type = FTYPE_FILE; > return raw_open_common(bs, filename, flags, 0); > } > @@ -890,6 +895,60 @@ static BlockDriver bdrv_file = { > .create_options = raw_create_options, > }; > > +static int raw_open_fd(BlockDriverState *bs, const char *filename, int flags) > +{ > + BDRVRawState *s = bs->opaque; > + const char *fd_str; > + int fd; > + > + /* extract the file descriptor - fail if it's not fd: */ > + if (!strstart(filename, "fd:", &fd_str)) { > + return -EINVAL; > + } > + > + if (!qemu_isdigit(fd_str[0])) { > + /* get fd from monitor */ > + fd = qemu_get_fd(fd_str); > + if (fd == -1) { > + return -EBADF; > + } > + } else { > + char *endptr = NULL; > + > + fd = strtol(fd_str, &endptr, 10); > + if (*endptr || (fd == 0 && fd_str == endptr)) { > + return -EBADF; > + } > + } > + > + s->fd = fd; > + s->type = FTYPE_FILE; > + > + return raw_open_common(bs, filename, flags, 0); > +} > + > +static BlockDriver bdrv_file_fd = { > + .format_name = "file", > + .protocol_name = "fd", > + .instance_size = sizeof(BDRVRawState), > + .bdrv_probe = NULL, /* no probe for protocols */ > + .bdrv_file_open = raw_open_fd, > + .bdrv_read = raw_read, > + .bdrv_write = raw_write, > + .bdrv_close = raw_close, > + .bdrv_flush = raw_flush, > + .bdrv_discard = raw_discard, > + > + .bdrv_aio_readv = raw_aio_readv, > + .bdrv_aio_writev = raw_aio_writev, > + .bdrv_aio_flush = raw_aio_flush, > + > + .bdrv_truncate = raw_truncate, > + .bdrv_getlength = raw_getlength, > + > + .create_options = raw_create_options, > +}; > + > /***********************************************/ > /* host device */ > > @@ -998,6 +1057,7 @@ static int hdev_open(BlockDriverState *bs, const char > *filename, int flags) > } > #endif > > + s->fd = -1; > s->type = FTYPE_FILE; > #if defined(__linux__) > { > @@ -1168,6 +1228,7 @@ static int floppy_open(BlockDriverState *bs, const char > *filename, int flags) > BDRVRawState *s = bs->opaque; > int ret; > > + s->fd = -1; > s->type = FTYPE_FD; > > /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */ > @@ -1280,6 +1341,7 @@ static int cdrom_open(BlockDriverState *bs, const char > *filename, int flags) > { > BDRVRawState *s = bs->opaque; > > + s->fd = -1; > s->type = FTYPE_CD; > > /* open will not fail even if no CD is inserted, so add O_NONBLOCK */ > @@ -1503,6 +1565,7 @@ static void bdrv_file_init(void) > * Register all the drivers. Note that order is important, the driver > * registered last will get probed first. > */ > + bdrv_register(&bdrv_file_fd); > bdrv_register(&bdrv_file); > bdrv_register(&bdrv_host_device); > #ifdef __linux__ > diff --git a/block/vmdk.c b/block/vmdk.c > index 922b23d..2ea808e 100644 > --- a/block/vmdk.c > +++ b/block/vmdk.c > @@ -353,6 +353,11 @@ static int vmdk_parent_open(BlockDriverState *bs) > return -1; > > pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); > + > + if (bs->backing_file[0] != '\0' && bdrv_is_fd_protocol(bs)) { > + /* backing file currently not supported by fd: protocol */ > + return -1; > + } > } > > return 0; > diff --git a/block_int.h b/block_int.h > index fa91337..a305ee2 100644 > --- a/block_int.h > +++ b/block_int.h > @@ -152,6 +152,7 @@ struct BlockDriverState { > int encrypted; /* if true, the media is encrypted */ > int valid_key; /* if true, a valid encryption key has been set */ > int sg; /* if true, the device is a /dev/sg* */ > + int fd_protocol; /* if true, the fd: protocol was specified */ bool? > /* event callback when inserting/removing */ > void (*change_cb)(void *opaque, int reason); > void *change_opaque; > diff --git a/blockdev.c b/blockdev.c > index e81e0ab..a536c20 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -546,6 +546,10 @@ DriveInfo *drive_init(QemuOpts *opts, int > default_to_scsi) > > bdrv_flags |= ro ? 0 : BDRV_O_RDWR; > > + if (strncmp(file, "fd:", 3) == 0) { > + dinfo->bdrv->fd_protocol = 1; > + } > + > ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv); > if (ret < 0) { > error_report("could not open disk image %s: %s", > @@ -606,6 +610,12 @@ int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, > QObject **ret_data) > goto out; > } > > + if (bdrv_is_fd_protocol(bs)) { > + qerror_report(QERR_UNSUPPORTED); > + ret = -1; > + goto out; > + } > + > pstrcpy(old_filename, sizeof(old_filename), bs->filename); > > old_drv = bs->drv; > diff --git a/monitor.c b/monitor.c > index 59a3e76..ea60be2 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -2832,6 +2832,11 @@ int monitor_get_fd(Monitor *mon, const char *fdname) > return -1; > } > > +int qemu_get_fd(const char *fdname) > +{ > + return cur_mon ? monitor_get_fd(cur_mon, fdname) : -1; > +} > + > static const mon_cmd_t mon_cmds[] = { > #include "hmp-commands.h" > { NULL, NULL, }, > diff --git a/monitor.h b/monitor.h > index 4f2d328..de5b987 100644 > --- a/monitor.h > +++ b/monitor.h > @@ -51,6 +51,7 @@ int monitor_read_bdrv_key_start(Monitor *mon, > BlockDriverState *bs, > void *opaque); > > int monitor_get_fd(Monitor *mon, const char *fdname); > +int qemu_get_fd(const char *fdname); > > void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) > GCC_FMT_ATTR(2, 0); > diff --git a/qemu-options.hx b/qemu-options.hx > index 1d5ad8b..f9b66f4 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -116,7 +116,7 @@ using @file{/dev/cdrom} as filename (@pxref{host_drives}). > ETEXI > > DEF("drive", HAS_ARG, QEMU_OPTION_drive, > - "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" > + "-drive > [file=[fd:]file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" > " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" > " [,cache=writethrough|writeback|none|unsafe][,format=f]\n" > " [,serial=s][,addr=A][,id=name][,aio=threads|native]\n" > @@ -129,10 +129,12 @@ STEXI > Define a new drive. Valid options are: > > @table @option > -@item file=@var{file} > +@item file=[fd:]@var{file} > This option defines which disk image (@pxref{disk_images}) to use with > this drive. If the filename contains comma, you must double it > -(for instance, "file=my,,file" to use file "my,file"). > +(for instance, "file=my,,file" to use file "my,file"). @option{fd:}@var{file} > +specifies the file descriptor of an already open disk image. > +@option{format=}@var{format} is required by @option{fd:}@var{file}. > @item if=@var{interface} > This option defines on which type on interface the drive is connected. > Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio. > diff --git a/qemu-tool.c b/qemu-tool.c > index 41e5c41..8fe6b8c 100644 > --- a/qemu-tool.c > +++ b/qemu-tool.c > @@ -96,3 +96,8 @@ int64_t qemu_get_clock_ns(QEMUClock *clock) > { > return 0; > } > + > +int qemu_get_fd(const char *fdname) > +{ > + return -1; > +} > -- > 1.7.1 > > >