On Wed, Oct 03, 2018 at 11:41:41PM -0700, Ori Bernstein wrote: > Thanks, another update based on Reyk's feeback and fixes. >
You missed one thing: jmc@'s manpage comments. For everything else: Looks good! Tests work fine. OK reyk@ Reyk > diff --git regress/usr.sbin/vmd/diskfmt/Makefile > regress/usr.sbin/vmd/diskfmt/Makefile > index c2a5f42d5f6..1f8673e0e26 100644 > --- regress/usr.sbin/vmd/diskfmt/Makefile > +++ regress/usr.sbin/vmd/diskfmt/Makefile > @@ -11,7 +11,7 @@ > VMD_DIR=$(BSDSRCDIR)/usr.sbin/vmd/ > > PROG=vioscribble > -SRCS=vioscribble.c $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c > +SRCS=vioscribble.c vioqcow2.c vioraw.c > CFLAGS+=-I$(VMD_DIR) -pthread > LDFLAGS+=-pthread > > @@ -26,3 +26,6 @@ scribble-images: > .PHONY: ${REGRESS_TARGETS} scribble-images > > .include <bsd.regress.mk> > + > +vioqcow2.c vioraw.c: $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c > + cp $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c . > diff --git regress/usr.sbin/vmd/diskfmt/vioscribble.c > regress/usr.sbin/vmd/diskfmt/vioscribble.c > index 14d720db652..1da8efedac7 100644 > --- regress/usr.sbin/vmd/diskfmt/vioscribble.c > +++ regress/usr.sbin/vmd/diskfmt/vioscribble.c > @@ -122,16 +122,18 @@ main(int argc, char **argv) > verbose = !!getenv("VERBOSE"); > qcfd = open("scribble.qc2", O_RDWR); > rawfd = open("scribble.raw", O_RDWR); > - if (qcfd == -1 || virtio_init_qcow2(&qcowfile, &qcsz, qcfd) == -1) > + if (qcfd == -1) > err(1, "unable to open qcow"); > - if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, rawfd) == -1) > + if (virtio_init_qcow2(&qcowfile, &qcsz, &qcfd, 1) == -1) > + err(1, "unable to init qcow"); > + if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, &rawfd, 1) == -1) > err(1, "unable to open raw"); > > srandom_deterministic(123); > > /* scribble to both disks */ > printf("scribbling...\n"); > - for (i = 0; i < 16; i++) { > + for (i = 0; i < 1024*16; i++) { > off = (random() % DISKSZ); > len = random() % sizeof buf + 1; > fill(off, buf, sizeof buf); > diff --git usr.sbin/vmctl/main.c usr.sbin/vmctl/main.c > index 8748ecfdedc..a3ab4672370 100644 > --- usr.sbin/vmctl/main.c > +++ usr.sbin/vmctl/main.c > @@ -67,7 +67,8 @@ int ctl_receive(struct parse_result *, int, char > *[]); > > struct ctl_command ctl_commands[] = { > { "console", CMD_CONSOLE, ctl_console, "id" }, > - { "create", CMD_CREATE, ctl_create, "\"path\" -s size", 1 }, > + { "create", CMD_CREATE, ctl_create, > + "\"path\" [-s size] [-b base]", 1 }, > { "load", CMD_LOAD, ctl_load, "\"path\"" }, > { "log", CMD_LOG, ctl_log, "[verbose|brief]" }, > { "reload", CMD_RELOAD, ctl_reload, "" }, > @@ -538,47 +539,55 @@ int > ctl_create(struct parse_result *res, int argc, char *argv[]) > { > int ch, ret, type; > - const char *paths[2], *disk, *format; > + const char *disk, *format, *base; > > if (argc < 2) > ctl_usage(res->ctl); > > + base = NULL; > type = parse_disktype(argv[1], &disk); > > - paths[0] = disk; > - paths[1] = NULL; > - > - if (unveil(paths[0], "rwc") == -1) > + if (pledge("stdio rpath wpath cpath unveil", NULL) == -1) > + err(1, "pledge"); > + if (unveil(disk, "rwc") == -1) > err(1, "unveil"); > > - if (pledge("stdio rpath wpath cpath", NULL) == -1) > - err(1, "pledge"); > argc--; > argv++; > > - while ((ch = getopt(argc, argv, "s:")) != -1) { > + while ((ch = getopt(argc, argv, "s:b:")) != -1) { > switch (ch) { > case 's': > if (parse_size(res, optarg, 0) != 0) > errx(1, "invalid size: %s", optarg); > break; > + case 'b': > + base = optarg; > + if (unveil(base, "r") == -1) > + err(1, "unveil"); > + break; > default: > ctl_usage(res->ctl); > /* NOTREACHED */ > } > } > + if (unveil(NULL, NULL)) > + err(1, "unveil"); > > - if (res->size == 0) { > - fprintf(stderr, "missing size argument\n"); > + if (base && type != VMDF_QCOW2) > + errx(1, "base images require qcow2 disk format"); > + if (res->size == 0 && !base) { > + fprintf(stderr, "could not create %s: missing size argument\n", > + disk); > ctl_usage(res->ctl); > } > > if (type == VMDF_QCOW2) { > format = "qcow2"; > - ret = create_qc2_imagefile(paths[0], res->size); > + ret = create_qc2_imagefile(disk, base, res->size); > } else { > format = "raw"; > - ret = create_raw_imagefile(paths[0], res->size); > + ret = create_raw_imagefile(disk, res->size); > } > > if (ret != 0) { > diff --git usr.sbin/vmctl/vmctl.8 usr.sbin/vmctl/vmctl.8 > index f7890ac99f8..7a02452789c 100644 > --- usr.sbin/vmctl/vmctl.8 > +++ usr.sbin/vmctl/vmctl.8 > @@ -50,7 +50,7 @@ Using > .Xr cu 1 > connect to the console of the VM with the specified > .Ar id . > -.It Cm create Ar path Fl s Ar size > +.It Cm create Ar path Fl s Op Ar size Op Fl b Ar base > Creates a VM disk image file with the specified > .Ar path > and > @@ -65,7 +65,14 @@ or > in order to specify the disk format. > If left unspecified, the format defaults to > .Pa raw > -if it cannot be derived automatically. > +if it cannot be derived automatically. For qcow2, a > +.Ar base > +image may be specified. The base image is not modified. The derived image > +contains only the changes written by the VM. When creating a derived image, > +the > +.Ar size > +may be omitted, and probed from the base image. If it is provided, it must > +match the base image size. > .It Cm load Ar filename > Load additional configuration from the specified file. > .It Cm log brief > diff --git usr.sbin/vmctl/vmctl.c usr.sbin/vmctl/vmctl.c > index b09e1115ff7..12db3f69525 100644 > --- usr.sbin/vmctl/vmctl.c > +++ usr.sbin/vmctl/vmctl.c > @@ -847,7 +847,8 @@ create_raw_imagefile(const char *imgfile_path, long > imgsize) > #define ALIGN(sz, align) \ > ((sz + align - 1) & ~(align - 1)) > int > -create_qc2_imagefile(const char *imgfile_path, long imgsize) > +create_qc2_imagefile(const char *imgfile_path, > + const char *base_path, long imgsize) > { > struct qcheader { > char magic[4]; > @@ -869,15 +870,33 @@ create_qc2_imagefile(const char *imgfile_path, long > imgsize) > uint64_t autoclearfeatures; > uint32_t reforder; > uint32_t headersz; > - } __packed hdr; > + } __packed hdr, basehdr; > int fd, ret; > + ssize_t base_len; > uint64_t l1sz, refsz, disksz, initsz, clustersz; > uint64_t l1off, refoff, v, i, l1entrysz, refentrysz; > uint16_t refs; > > - disksz = 1024*1024*imgsize; > + disksz = 1024 * 1024 * imgsize; > + > + if (base_path) { > + fd = open(base_path, O_RDONLY); > + if (read(fd, &basehdr, sizeof(basehdr)) != sizeof(basehdr)) > + err(1, "failure to read base image header"); > + close(fd); > + if (strncmp(basehdr.magic, > + VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)) != 0) > + errx(1, "base image is not a qcow2 file"); > + if (!disksz) > + disksz = betoh64(basehdr.disksz); > + else if (disksz != betoh64(basehdr.disksz)) > + errx(1, "base size does not match requested size"); > + } > + if (!base_path && !disksz) > + errx(1, "missing disk size"); > + > clustersz = (1<<16); > - l1off = ALIGN(sizeof hdr, clustersz); > + l1off = ALIGN(sizeof(hdr), clustersz); > > l1entrysz = clustersz * clustersz / 8; > l1sz = (disksz + l1entrysz - 1) / l1entrysz; > @@ -887,11 +906,12 @@ create_qc2_imagefile(const char *imgfile_path, long > imgsize) > refsz = (disksz + refentrysz - 1) / refentrysz; > > initsz = ALIGN(refoff + refsz*clustersz, clustersz); > + base_len = base_path ? strlen(base_path) : 0; > > - memcpy(hdr.magic, "QFI\xfb", 4); > + memcpy(hdr.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)); > hdr.version = htobe32(3); > - hdr.backingoff = htobe64(0); > - hdr.backingsz = htobe32(0); > + hdr.backingoff = htobe64(base_path ? sizeof(hdr) : 0); > + hdr.backingsz = htobe32(base_len); > hdr.clustershift = htobe32(16); > hdr.disksz = htobe64(disksz); > hdr.cryptmethod = htobe32(0); > @@ -905,7 +925,7 @@ create_qc2_imagefile(const char *imgfile_path, long > imgsize) > hdr.compatfeatures = htobe64(0); > hdr.autoclearfeatures = htobe64(0); > hdr.reforder = htobe32(4); > - hdr.headersz = htobe32(sizeof hdr); > + hdr.headersz = htobe32(sizeof(hdr)); > > /* Refuse to overwrite an existing image */ > fd = open(imgfile_path, O_RDWR | O_CREAT | O_TRUNC | O_EXCL, > @@ -914,7 +934,11 @@ create_qc2_imagefile(const char *imgfile_path, long > imgsize) > return (errno); > > /* Write out the header */ > - if (write(fd, &hdr, sizeof hdr) != sizeof hdr) > + if (write(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) > + goto error; > + > + /* Add the base image */ > + if (base_path && write(fd, base_path, base_len) != base_len) > goto error; > > /* Extend to desired size, and add one refcount cluster */ > diff --git usr.sbin/vmctl/vmctl.h usr.sbin/vmctl/vmctl.h > index 006411d9785..2d0355450ee 100644 > --- usr.sbin/vmctl/vmctl.h > +++ usr.sbin/vmctl/vmctl.h > @@ -87,7 +87,7 @@ __dead void > > /* vmctl.c */ > int create_raw_imagefile(const char *, long); > -int create_qc2_imagefile(const char *, long); > +int create_qc2_imagefile(const char *, const char *, long); > int vm_start(uint32_t, const char *, int, int, char **, int, > char **, int *, char *, char *, char *); > int vm_start_complete(struct imsg *, int *, int); > diff --git usr.sbin/vmd/config.c usr.sbin/vmd/config.c > index 550b73c1a39..68be738d304 100644 > --- usr.sbin/vmd/config.c > +++ usr.sbin/vmd/config.c > @@ -35,6 +35,7 @@ > #include <util.h> > #include <errno.h> > #include <imsg.h> > +#include <libgen.h> > > #include "proc.h" > #include "vmd.h" > @@ -176,16 +177,21 @@ config_getreset(struct vmd *env, struct imsg *imsg) > int > config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t > uid) > { > + int diskfds[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; > struct vmd_if *vif; > struct vmop_create_params *vmc = &vm->vm_params; > struct vm_create_params *vcp = &vmc->vmc_params; > - unsigned int i; > + unsigned int i, j; > int fd = -1, vmboot = 0; > - int kernfd = -1, *diskfds = NULL, *tapfds = NULL; > + int kernfd = -1; > + int *tapfds; > int cdromfd = -1; > int saved_errno = 0; > + int n = 0, aflags, oflags; > char ifname[IF_NAMESIZE], *s; > char path[PATH_MAX]; > + char base[PATH_MAX]; > + char expanded[PATH_MAX]; > unsigned int unit; > > errno = 0; > @@ -205,13 +211,9 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, > uint32_t peerid, uid_t uid) > } > } > > - diskfds = reallocarray(NULL, vcp->vcp_ndisks, sizeof(*diskfds)); > - if (diskfds == NULL) { > - log_warn("%s: can't allocate disk fds", __func__); > - goto fail; > - } > - for (i = 0; i < vcp->vcp_ndisks; i++) > - diskfds[i] = -1; > + for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) > + diskfds[i][j] = -1; > > tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds)); > if (tapfds == NULL) { > @@ -289,22 +291,71 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, > uint32_t peerid, uid_t uid) > > /* Open disk images for child */ > for (i = 0 ; i < vcp->vcp_ndisks; i++) { > - /* Stat disk[i] to ensure it is a regular file */ > - if ((diskfds[i] = open(vcp->vcp_disks[i], > - O_RDWR|O_EXLOCK|O_NONBLOCK)) == -1) { > - log_warn("%s: can't open disk %s", __func__, > - vcp->vcp_disks[i]); > - errno = VMD_DISK_MISSING; > - goto fail; > - } > + if (strlcpy(path, vcp->vcp_disks[i], sizeof(path)) > + >= sizeof(path)) > + log_warnx("%s, disk path too long", __func__); > + memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); > + oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; > + aflags = R_OK|W_OK; > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { > + /* Stat disk[i] to ensure it is a regular file */ > + if ((diskfds[i][j] = open(path, oflags)) == -1) { > + log_warn("%s: can't open disk %s", __func__, > + vcp->vcp_disks[i]); > + errno = VMD_DISK_MISSING; > + goto fail; > + } > > - if (vm_checkaccess(diskfds[i], > - vmc->vmc_checkaccess & VMOP_CREATE_DISK, > - uid, R_OK|W_OK) == -1) { > - log_warnx("vm \"%s\" no read/write access to disk %s", > - vcp->vcp_name, vcp->vcp_disks[i]); > - errno = EPERM; > - goto fail; > + if (vm_checkaccess(diskfds[i][j], > + vmc->vmc_checkaccess & VMOP_CREATE_DISK, > + uid, aflags) == -1) { > + log_warnx("vm \"%s\" unable to access " > + "disk %s", vcp->vcp_name, path); > + errno = EPERM; > + goto fail; > + } > + > + /* > + * Clear the write and exclusive flags for base images. > + * All writes should go to the top image, allowing them > + * to be shared. > + */ > + oflags = O_RDONLY|O_NONBLOCK; > + aflags = R_OK; > + n = virtio_get_base(diskfds[i][j], base, sizeof base, > + vmc->vmc_disktypes[i]); > + if (n == 0) > + break; > + if (n == -1) { > + log_warnx("vm \"%s\" unable to read " > + "base %s for disk %s", vcp->vcp_name, > + base, vcp->vcp_disks[i]); > + goto fail; > + } > + /* > + * Relative paths should be interpreted relative > + * to the disk image, rather than relative to the > + * directory vmd happens to be running in, since > + * this is the only userful interpretation. > + */ > + if (base[0] == '/') { > + if (realpath(base, path) == NULL) { > + log_warn("unable to resolve %s", base); > + goto fail; > + } > + } else { > + s = dirname(path); > + if (snprintf(expanded, sizeof(expanded), > + "%s/%s", s, base) >= (int)sizeof(expanded)) > { > + log_warn("path too long: %s/%s", > + s, base); > + goto fail; > + } > + if (realpath(expanded, path) == NULL) { > + log_warn("unable to resolve %s", base); > + goto fail; > + } > + } > } > } > > @@ -402,9 +453,13 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, > uint32_t peerid, uid_t uid) > NULL, 0); > > for (i = 0; i < vcp->vcp_ndisks; i++) { > - proc_compose_imsg(ps, PROC_VMM, -1, > - IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, diskfds[i], > - &i, sizeof(i)); > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { > + if (diskfds[i][j] == -1) > + break; > + proc_compose_imsg(ps, PROC_VMM, -1, > + IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, > + diskfds[i][j], &i, sizeof(i)); > + } > } > for (i = 0; i < vcp->vcp_nnics; i++) { > proc_compose_imsg(ps, PROC_VMM, -1, > @@ -416,7 +471,6 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, > uint32_t peerid, uid_t uid) > proc_compose_imsg(ps, PROC_VMM, -1, > IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); > > - free(diskfds); > free(tapfds); > > vm->vm_running = 1; > @@ -430,11 +484,10 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, > uint32_t peerid, uid_t uid) > close(kernfd); > if (cdromfd != -1) > close(cdromfd); > - if (diskfds != NULL) { > - for (i = 0; i < vcp->vcp_ndisks; i++) > - close(diskfds[i]); > - free(diskfds); > - } > + for (i = 0; i < vcp->vcp_ndisks; i++) > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) > + if (diskfds[i][j] != -1) > + close(diskfds[i][j]); > if (tapfds != NULL) { > for (i = 0; i < vcp->vcp_nnics; i++) > close(tapfds[i]); > @@ -489,7 +542,7 @@ int > config_getdisk(struct privsep *ps, struct imsg *imsg) > { > struct vmd_vm *vm; > - unsigned int n; > + unsigned int n, idx; > > errno = 0; > if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { > @@ -500,14 +553,18 @@ config_getdisk(struct privsep *ps, struct imsg *imsg) > IMSG_SIZE_CHECK(imsg, &n); > memcpy(&n, imsg->data, sizeof(n)); > > - if (n >= vm->vm_params.vmc_params.vcp_ndisks || > - vm->vm_disks[n] != -1 || imsg->fd == -1) { > + if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) { > log_warnx("invalid disk id"); > errno = EINVAL; > return (-1); > } > - vm->vm_disks[n] = imsg->fd; > - > + idx = vm->vm_params.vmc_diskbases[n]++; > + if (idx >= VM_MAX_BASE_PER_DISK) { > + log_warnx("too many bases for disk"); > + errno = EINVAL; > + return (-1); > + } > + vm->vm_disks[n][idx] = imsg->fd; > return (0); > } > > diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c > index c3211d186fa..25799cc5a3d 100644 > --- usr.sbin/vmd/vioqcow2.c > +++ usr.sbin/vmd/vioqcow2.c > @@ -104,8 +104,7 @@ static off_t xlate(struct qcdisk *, off_t, int *); > static int copy_cluster(struct qcdisk *, struct qcdisk *, off_t, off_t); > static off_t mkcluster(struct qcdisk *, struct qcdisk *, off_t, off_t); > static int inc_refs(struct qcdisk *, off_t, int); > -static int qc2_openpath(struct qcdisk *, char *, int); > -static int qc2_open(struct qcdisk *, int); > +static int qc2_open(struct qcdisk *, int *, size_t); > static ssize_t qc2_pread(void *, char *, size_t, off_t); > static ssize_t qc2_pwrite(void *, char *, size_t, off_t); > static void qc2_close(void *, int); > @@ -118,14 +117,14 @@ static void qc2_close(void *, int); > * May open snapshot base images. > */ > int > -virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int fd) > +virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int *fd, size_t > nfd) > { > struct qcdisk *diskp; > > diskp = malloc(sizeof(struct qcdisk)); > if (diskp == NULL) > return -1; > - if (qc2_open(diskp, fd) == -1) { > + if (qc2_open(diskp, fd, nfd) == -1) { > log_warnx("%s: could not open qcow2 disk", __func__); > return -1; > } > @@ -137,19 +136,40 @@ virtio_init_qcow2(struct virtio_backing *file, off_t > *szp, int fd) > return 0; > } > > -static int > -qc2_openpath(struct qcdisk *disk, char *path, int flags) > +ssize_t > +virtio_qcow2_get_base(int fd, char *path, size_t npath) > { > - int fd; > + struct qcheader header; > + uint64_t backingoff; > + uint32_t backingsz; > > - fd = open(path, flags); > - if (fd < 0) > + if (pread(fd, &header, sizeof(header), 0) != sizeof(header)) { > + log_warnx("%s: short read on header", __func__); > + return -1; > + } > + if (strncmp(header.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)) != 0) { > + log_warn("%s: invalid magic numbers", __func__); > return -1; > - return qc2_open(disk, fd); > + } > + backingoff = be64toh(header.backingoff); > + backingsz = be32toh(header.backingsz); > + if (backingsz != 0) { > + if (backingsz >= npath - 1) { > + log_warn("%s: snapshot path too long", __func__); > + return -1; > + } > + if (pread(fd, path, backingsz, backingoff) != backingsz) { > + log_warnx("%s: could not read snapshot base name", > + __func__); > + return -1; > + } > + path[backingsz] = '\0'; > + } > + return backingsz; > } > > static int > -qc2_open(struct qcdisk *disk, int fd) > +qc2_open(struct qcdisk *disk, int *fds, size_t nfd) > { > char basepath[PATH_MAX]; > struct stat st; > @@ -157,14 +177,15 @@ qc2_open(struct qcdisk *disk, int fd) > uint64_t backingoff; > uint32_t backingsz; > size_t i; > - int version; > + int version, fd; > > pthread_rwlock_init(&disk->lock, NULL); > + fd = fds[0]; > disk->fd = fd; > disk->base = NULL; > disk->l1 = NULL; > > - if (pread(fd, &header, sizeof header, 0) != sizeof header) { > + if (pread(fd, &header, sizeof(header), 0) != sizeof(header)) { > log_warn("%s: short read on header", __func__); > goto error; > } > @@ -203,11 +224,11 @@ qc2_open(struct qcdisk *disk, int fd) > goto error; > } > > - disk->l1 = calloc(disk->l1sz, sizeof *disk->l1); > + disk->l1 = calloc(disk->l1sz, sizeof(*disk->l1)); > if (!disk->l1) > goto error; > - if (pread(disk->fd, disk->l1, 8*disk->l1sz, disk->l1off) > - != 8*disk->l1sz) { > + if (pread(disk->fd, disk->l1, 8 * disk->l1sz, disk->l1off) > + != 8 * disk->l1sz) { > log_warn("%s: unable to read qcow2 L1 table", __func__); > goto error; > } > @@ -222,14 +243,7 @@ qc2_open(struct qcdisk *disk, int fd) > backingoff = be64toh(header.backingoff); > backingsz = be32toh(header.backingsz); > if (backingsz != 0) { > - /* > - * FIXME: we need to figure out a way of opening these things, > - * otherwise we just crash with a pledge violation. > - */ > - log_warn("%s: unsupported external snapshot images", __func__); > - goto error; > - > - if (backingsz >= sizeof basepath - 1) { > + if (backingsz >= sizeof(basepath) - 1) { > log_warn("%s: snapshot path too long", __func__); > goto error; > } > @@ -239,11 +253,17 @@ qc2_open(struct qcdisk *disk, int fd) > goto error; > } > basepath[backingsz] = 0; > + if (nfd <= 1) { > + log_warnx("%s: missing base image %s", __func__, > + basepath); > + goto error; > + } > + > > disk->base = calloc(1, sizeof(struct qcdisk)); > if (!disk->base) > goto error; > - if (qc2_openpath(disk->base, basepath, O_RDONLY) == -1) { > + if (qc2_open(disk->base, fds + 1, nfd - 1) == -1) { > log_warn("%s: could not open %s", basepath, __func__); > goto error; > } > @@ -428,7 +448,7 @@ xlate(struct qcdisk *disk, off_t off, int *inplace) > return 0; > } > l2off = (off / disk->clustersz) % l2sz; > - pread(disk->fd, &buf, sizeof(buf), l2tab + l2off*8); > + pread(disk->fd, &buf, sizeof(buf), l2tab + l2off * 8); > cluster = be64toh(buf); > /* > * cluster may be 0, but all future operations don't affect > @@ -521,12 +541,12 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, > off_t off, off_t src_phys) > cluster = disk->end; > disk->end += disk->clustersz; > buf = htobe64(cluster | QCOW2_INPLACE); > - if (pwrite(disk->fd, &buf, sizeof buf, l2tab + l2off*8) != sizeof(buf)) > + if (pwrite(disk->fd, &buf, sizeof(buf), l2tab + l2off * 8) != 8) > goto fail; > > /* TODO: lazily sync: currently VMD doesn't close things */ > buf = htobe64(disk->l1[l1off]); > - if (pwrite(disk->fd, &buf, sizeof buf, disk->l1off + 8*l1off) != 8) > + if (pwrite(disk->fd, &buf, sizeof(buf), disk->l1off + 8 * l1off) != 8) > goto fail; > if (inc_refs(disk, cluster, 1) == -1) > goto fail; > @@ -570,8 +590,8 @@ inc_refs(struct qcdisk *disk, off_t off, int newcluster) > nper = disk->clustersz / 2; > l1idx = (off / disk->clustersz) / nper; > l2idx = (off / disk->clustersz) % nper; > - l1off = disk->refoff + 8*l1idx; > - if (pread(disk->fd, &buf, sizeof buf, l1off) != 8) > + l1off = disk->refoff + 8 * l1idx; > + if (pread(disk->fd, &buf, sizeof(buf), l1off) != 8) > return -1; > > l2cluster = be64toh(buf); > @@ -583,19 +603,20 @@ inc_refs(struct qcdisk *disk, off_t off, int newcluster) > return -1; > } > buf = htobe64(l2cluster); > - if (pwrite(disk->fd, &buf, sizeof buf, l1off) != 8) { > + if (pwrite(disk->fd, &buf, sizeof(buf), l1off) != 8) { > return -1; > } > } > > refs = 1; > if (!newcluster) { > - if (pread(disk->fd, &refs, sizeof refs, l2cluster+2*l2idx) != 2) > + if (pread(disk->fd, &refs, sizeof(refs), > + l2cluster + 2 * l2idx) != 2) > return -1; > refs = be16toh(refs) + 1; > } > refs = htobe16(refs); > - if (pwrite(disk->fd, &refs, sizeof refs, l2cluster + 2*l2idx) != 2) { > + if (pwrite(disk->fd, &refs, sizeof(refs), l2cluster + 2 * l2idx) != 2) { > log_warn("%s: could not write ref block", __func__); > return -1; > } > diff --git usr.sbin/vmd/vioraw.c usr.sbin/vmd/vioraw.c > index e02ab67c5dc..ff4bbb3095e 100644 > --- usr.sbin/vmd/vioraw.c > +++ usr.sbin/vmd/vioraw.c > @@ -53,19 +53,21 @@ raw_close(void *file, int stayopen) > * returning -1 for error, 0 for success. > */ > int > -virtio_init_raw(struct virtio_backing *file, off_t *szp, int fd) > +virtio_init_raw(struct virtio_backing *file, off_t *szp, int *fd, size_t nfd) > { > off_t sz; > int *fdp; > > - sz = lseek(fd, 0, SEEK_END); > + if (nfd != 1) > + return -1; > + sz = lseek(fd[0], 0, SEEK_END); > if (sz == -1) > return -1; > > fdp = malloc(sizeof(int)); > if (!fdp) > return -1; > - *fdp = fd; > + *fdp = fd[0]; > file->p = fdp; > file->pread = raw_pread; > file->pwrite = raw_pwrite; > diff --git usr.sbin/vmd/virtio.c usr.sbin/vmd/virtio.c > index 93490344560..94818b24d9e 100644 > --- usr.sbin/vmd/virtio.c > +++ usr.sbin/vmd/virtio.c > @@ -1745,24 +1745,41 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, > uint8_t *intr, > return (0); > } > > +int > +virtio_get_base(int fd, char *path, size_t npath ,int type) > +{ > + switch (type) { > + case VMDF_RAW: > + return 0; > + case VMDF_QCOW2: > + return virtio_qcow2_get_base(fd, path, npath); > + } > + log_warnx("%s: invalid disk format", __func__); > + return -1; > +} > + > +/* > + * Initializes a struct virtio_backing using the list of fds. > + */ > static int > -virtio_init_disk(struct virtio_backing *file, off_t *sz, int fd, int type) > +virtio_init_disk(struct virtio_backing *file, off_t *sz, > + int *fd, size_t nfd, int type) > { > /* > * probe disk types in order of preference, first one to work wins. > * TODO: provide a way of specifying the type and options. > */ > switch (type) { > - case VMDF_RAW: return virtio_init_raw(file, sz, fd); > - case VMDF_QCOW2: return virtio_init_qcow2(file, sz, fd); > + case VMDF_RAW: return virtio_init_raw(file, sz, fd, nfd); > + case VMDF_QCOW2: return virtio_init_qcow2(file, sz, fd, nfd); > } > log_warnx("%s: invalid disk format", __func__); > return -1; > } > > void > -virtio_init(struct vmd_vm *vm, int child_cdrom, int *child_disks, > - int *child_taps) > +virtio_init(struct vmd_vm *vm, int child_cdrom, > + int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) > { > struct vmop_create_params *vmc = &vm->vm_params; > struct vm_create_params *vcp = &vmc->vmc_params; > @@ -1838,7 +1855,8 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int > *child_disks, > vioblk[i].vm_id = vcp->vcp_id; > vioblk[i].irq = pci_get_dev_irq(id); > if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, > - child_disks[i], vmc->vmc_disktypes[i]) == -1) { > + child_disks[i], vmc->vmc_diskbases[i], > + vmc->vmc_disktypes[i]) == -1) { > log_warnx("%s: unable to determine disk format", > __func__); > return; > @@ -1967,7 +1985,7 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int > *child_disks, > vioscsi->vq[i].last_avail = 0; > } > if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, > - child_cdrom, VMDF_RAW) == -1) { > + &child_cdrom, 1, VMDF_RAW) == -1) { > log_warnx("%s: unable to determine iso format", > __func__); > return; > @@ -2123,7 +2141,8 @@ vionet_restore(int fd, struct vmd_vm *vm, int > *child_taps) > } > > int > -vioblk_restore(int fd, struct vmop_create_params *vmc, int *child_disks) > +vioblk_restore(int fd, struct vmop_create_params *vmc, > + int child_disks[][VM_MAX_BASE_PER_DISK]) > { > struct vm_create_params *vcp = &vmc->vmc_params; > uint8_t i; > @@ -2149,7 +2168,8 @@ vioblk_restore(int fd, struct vmop_create_params *vmc, > int *child_disks) > return (-1); > } > if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz, > - child_disks[i], vmc->vmc_disktypes[i]) == -1) { > + child_disks[i], vmc->vmc_diskbases[i], > + vmc->vmc_disktypes[i]) == -1) { > log_warnx("%s: unable to determine disk format", > __func__); > return (-1); > @@ -2186,7 +2206,7 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, > int child_cdrom) > return (-1); > } > > - if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, child_cdrom, > + if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1, > VMDF_RAW) == -1) { > log_warnx("%s: unable to determine iso format", __func__); > return (-1); > @@ -2198,8 +2218,8 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, > int child_cdrom) > } > > int > -virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, int *child_disks, > - int *child_taps) > +virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, > + int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) > { > struct vmop_create_params *vmc = &vm->vm_params; > struct vm_create_params *vcp = &vmc->vmc_params; > diff --git usr.sbin/vmd/virtio.h usr.sbin/vmd/virtio.h > index 46006916b6a..bb632bb5502 100644 > --- usr.sbin/vmd/virtio.h > +++ usr.sbin/vmd/virtio.h > @@ -257,10 +257,11 @@ struct ioinfo { > }; > > /* virtio.c */ > -void virtio_init(struct vmd_vm *, int, int *, int *); > +void virtio_init(struct vmd_vm *, int, int[][VM_MAX_BASE_PER_DISK], int *); > void virtio_shutdown(struct vmd_vm *); > int virtio_dump(int); > -int virtio_restore(int, struct vmd_vm *, int, int *, int *); > +int virtio_restore(int, struct vmd_vm *, int, > + int[][VM_MAX_BASE_PER_DISK], int *); > uint32_t vring_size(uint32_t); > > int virtio_rnd_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); > @@ -270,12 +271,14 @@ void viornd_update_qs(void); > void viornd_update_qa(void); > int viornd_notifyq(void); > > -int virtio_init_raw(struct virtio_backing *dev, off_t *sz, int fd); > -int virtio_init_qcow2(struct virtio_backing *dev, off_t *sz, int fd); > +ssize_t virtio_qcow2_get_base(int, char *, size_t); > +int virtio_init_raw(struct virtio_backing *, off_t *, int*, size_t); > +int virtio_init_qcow2(struct virtio_backing *, off_t *, int*, size_t); > > int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); > int vioblk_dump(int); > -int vioblk_restore(int, struct vmop_create_params *, int *); > +int vioblk_restore(int, struct vmop_create_params *, > + int[][VM_MAX_BASE_PER_DISK]); > void vioblk_update_qs(struct vioblk_dev *); > void vioblk_update_qa(struct vioblk_dev *); > int vioblk_notifyq(struct vioblk_dev *); > diff --git usr.sbin/vmd/vm.c usr.sbin/vmd/vm.c > index ef4494d918b..37dfb95bb0d 100644 > --- usr.sbin/vmd/vm.c > +++ usr.sbin/vmd/vm.c > @@ -65,8 +65,8 @@ > > io_fn_t ioports_map[MAX_PORTS]; > > -int run_vm(int, int *, int *, struct vmop_create_params *, > - struct vcpu_reg_state *); > +int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *, > + struct vmop_create_params *, struct vcpu_reg_state *); > void vm_dispatch_vmm(int, short, void *); > void *event_thread(void *); > void *vcpu_run_loop(void *); > @@ -75,8 +75,10 @@ int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state > *); > void create_memory_map(struct vm_create_params *); > int alloc_guest_mem(struct vm_create_params *); > int vmm_create_vm(struct vm_create_params *); > -void init_emulated_hw(struct vmop_create_params *, int, int *, int *); > -void restore_emulated_hw(struct vm_create_params *, int, int *, int *,int); > +void init_emulated_hw(struct vmop_create_params *, int, > + int[][VM_MAX_BASE_PER_DISK], int *); > +void restore_emulated_hw(struct vm_create_params *, int, int *, > + int[][VM_MAX_BASE_PER_DISK],int); > void vcpu_exit_inout(struct vm_run_params *); > uint8_t vcpu_exit_pci(struct vm_run_params *); > int vcpu_pic_intr(uint32_t, uint32_t, uint8_t); > @@ -327,7 +329,8 @@ start_vm(struct vmd_vm *vm, int fd) > > /* Find and open kernel image */ > if ((fp = vmboot_open(vm->vm_kernel, > - vm->vm_disks[0], vmc->vmc_disktypes[0], &vmboot)) == NULL) > + vm->vm_disks[0], vmc->vmc_diskbases[0], > + vmc->vmc_disktypes[0], &vmboot)) == NULL) > fatalx("failed to open kernel - exiting"); > > /* Load kernel image */ > @@ -903,7 +906,7 @@ vmm_create_vm(struct vm_create_params *vcp) > */ > void > init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom, > - int *child_disks, int *child_taps) > + int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) > { > struct vm_create_params *vcp = &vmc->vmc_params; > int i; > @@ -968,7 +971,7 @@ init_emulated_hw(struct vmop_create_params *vmc, int > child_cdrom, > */ > void > restore_emulated_hw(struct vm_create_params *vcp, int fd, > - int *child_taps, int *child_disks, int child_cdrom) > + int *child_taps, int child_disks[][VM_MAX_BASE_PER_DISK], int > child_cdrom) > { > /* struct vm_create_params *vcp = &vmc->vmc_params; */ > int i; > @@ -1029,8 +1032,9 @@ restore_emulated_hw(struct vm_create_params *vcp, int > fd, > * !0 : the VM exited abnormally or failed to start > */ > int > -run_vm(int child_cdrom, int *child_disks, int *child_taps, > - struct vmop_create_params *vmc, struct vcpu_reg_state *vrs) > +run_vm(int child_cdrom, int child_disks[][VM_MAX_BASE_PER_DISK], > + int *child_taps, struct vmop_create_params *vmc, > + struct vcpu_reg_state *vrs) > { > struct vm_create_params *vcp = &vmc->vmc_params; > struct vm_rwregs_params vregsp; > diff --git usr.sbin/vmd/vmboot.c usr.sbin/vmd/vmboot.c > index 44ceeb64a7e..718c9739fa5 100644 > --- usr.sbin/vmd/vmboot.c > +++ usr.sbin/vmd/vmboot.c > @@ -385,7 +385,7 @@ vmboot_loadfile(struct open_file *f, char *file, size_t > *size) > } > > FILE * > -vmboot_open(int kernel_fd, int disk_fd, unsigned int disk_type, > +vmboot_open(int kernel_fd, int *disk_fd, int nfd, unsigned int disk_type, > struct vmboot_params *vmboot) > { > char file[PATH_MAX]; > @@ -404,7 +404,7 @@ vmboot_open(int kernel_fd, int disk_fd, unsigned int > disk_type, > if (kernel_fd != -1) > return (fdopen(kernel_fd, "r")); > > - if (disk_fd == -1) > + if (disk_fd == NULL || nfd < 1) > return (NULL); > > if ((vfp = calloc(1, sizeof(*vfp))) == NULL) > @@ -414,20 +414,19 @@ vmboot_open(int kernel_fd, int disk_fd, unsigned int > disk_type, > > switch (vmboot->vbp_type) { > case VMDF_RAW: > - if (virtio_init_raw(vfp, &sz, disk_fd) == -1) { > + if (virtio_init_raw(vfp, &sz, disk_fd, nfd) == -1) { > log_debug("%s: could not open raw disk", __func__); > goto fail; > } > break; > case VMDF_QCOW2: > - if (virtio_init_qcow2(vfp, &sz, disk_fd) == -1) { > + if (virtio_init_qcow2(vfp, &sz, disk_fd, nfd) == -1) { > log_debug("%s: could not open qcow2 disk", __func__); > goto fail; > } > break; > } > > - vmboot->vbp_fd = disk_fd; > vmboot_file.f_devdata = vmboot; > > if ((vmboot->vbp_partoff = > diff --git usr.sbin/vmd/vmd.c usr.sbin/vmd/vmd.c > index 1571be21bc5..18a5e0d3d5d 100644 > --- usr.sbin/vmd/vmd.c > +++ usr.sbin/vmd/vmd.c > @@ -1097,7 +1097,7 @@ void > vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) > { > struct privsep *ps = &env->vmd_ps; > - unsigned int i; > + unsigned int i, j; > > if (vm == NULL) > return; > @@ -1117,9 +1117,11 @@ vm_stop(struct vmd_vm *vm, int keeptty, const char > *caller) > close(vm->vm_iev.ibuf.fd); > } > for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) { > - if (vm->vm_disks[i] != -1) { > - close(vm->vm_disks[i]); > - vm->vm_disks[i] = -1; > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { > + if (vm->vm_disks[i][j] != -1) { > + close(vm->vm_disks[i][j]); > + vm->vm_disks[i][j] = -1; > + } > } > } > for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) { > @@ -1176,7 +1178,7 @@ vm_register(struct privsep *ps, struct > vmop_create_params *vmc, > struct vmop_owner *vmo = NULL; > struct vmd_user *usr = NULL; > uint32_t rng; > - unsigned int i; > + unsigned int i, j; > struct vmd_switch *sw; > char *s; > > @@ -1267,7 +1269,8 @@ vm_register(struct privsep *ps, struct > vmop_create_params *vmc, > vm->vm_user = usr; > > for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) > - vm->vm_disks[i] = -1; > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) > + vm->vm_disks[i][j] = -1; > for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) > vm->vm_ifs[i].vif_fd = -1; > for (i = 0; i < vcp->vcp_nnics; i++) { > diff --git usr.sbin/vmd/vmd.h usr.sbin/vmd/vmd.h > index b348d12c757..b7c012854e8 100644 > --- usr.sbin/vmd/vmd.h > +++ usr.sbin/vmd/vmd.h > @@ -48,6 +48,7 @@ > #define VM_DEFAULT_DEVICE "hd0a" > #define VM_BOOT_CONF "/etc/boot.conf" > #define VM_NAME_MAX 64 > +#define VM_MAX_BASE_PER_DISK 4 > #define VM_TTYNAME_MAX 16 > #define MAX_TAP 256 > #define NR_BACKLOG 5 > @@ -169,6 +170,7 @@ struct vmop_create_params { > #define VMIFF_OPTMASK (VMIFF_LOCKED|VMIFF_LOCAL|VMIFF_RDOMAIN) > > unsigned int vmc_disktypes[VMM_MAX_DISKS_PER_VM]; > + unsigned int vmc_diskbases[VMM_MAX_DISKS_PER_VM]; > #define VMDF_RAW 0x01 > #define VMDF_QCOW2 0x02 > > @@ -202,7 +204,6 @@ struct vm_dump_header { > } __packed; > > struct vmboot_params { > - int vbp_fd; > off_t vbp_partoff; > char vbp_device[PATH_MAX]; > char vbp_image[PATH_MAX]; > @@ -241,7 +242,7 @@ struct vmd_vm { > uint32_t vm_vmid; > int vm_kernel; > int vm_cdrom; > - int vm_disks[VMM_MAX_DISKS_PER_VM]; > + int > vm_disks[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; > struct vmd_if vm_ifs[VMM_MAX_NICS_PER_VM]; > char *vm_ttyname; > int vm_tty; > @@ -407,7 +408,7 @@ int config_getif(struct privsep *, struct imsg *); > int config_getcdrom(struct privsep *, struct imsg *); > > /* vmboot.c */ > -FILE *vmboot_open(int, int, unsigned int, struct vmboot_params *); > +FILE *vmboot_open(int, int *, int, unsigned int, struct vmboot_params *); > void vmboot_close(FILE *, struct vmboot_params *); > > /* parse.y */ > @@ -415,4 +416,7 @@ int parse_config(const char *); > int cmdline_symset(char *); > int host(const char *, struct address *); > > +/* virtio.c */ > +int virtio_get_base(int, char *, size_t, int); > + > #endif /* VMD_H */ > diff --git usr.sbin/vmd/vmm.c usr.sbin/vmd/vmm.c > index 7757856323f..47e2b2324be 100644 > --- usr.sbin/vmd/vmm.c > +++ usr.sbin/vmd/vmm.c > @@ -608,7 +608,7 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) > struct vmd_vm *vm; > int ret = EINVAL; > int fds[2]; > - size_t i; > + size_t i, j; > > if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { > log_warnx("%s: can't find vm", __func__); > @@ -643,8 +643,11 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) > close(fds[1]); > > for (i = 0 ; i < vcp->vcp_ndisks; i++) { > - close(vm->vm_disks[i]); > - vm->vm_disks[i] = -1; > + for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { > + if (vm->vm_disks[i][j] != -1) > + close(vm->vm_disks[i][j]); > + vm->vm_disks[i][j] = -1; > + } > } > for (i = 0 ; i < vcp->vcp_nnics; i++) { > close(vm->vm_ifs[i].vif_fd); > > -- > Ori Bernstein --