On Tue, Oct 02, 2018 at 11:13:35PM -0700, Ori Bernstein wrote:
> On Mon, 1 Oct 2018 11:24:01 -0700, Ori Bernstein <o...@eigenstate.org> wrote:
> 
> > On Mon, 1 Oct 2018 12:55:12 +0200
> > Reyk Floeter <r...@openbsd.org> wrote:
> > 
> > > Hi Ori,
> > > 
> > > On Sun, Sep 30, 2018 at 12:27:00PM -0700, Ori Bernstein wrote:
> > > > I've added support to vmd for external snapshots. That is,
> > > > snapshots that are derived from a base image. Data lookups
> > > > start in the derived image, and if the derived image does not
> > > > contain some data, the search proceeds ot the base image.
> > > > Multiple derived images may exist off of a single base image.
> > > > 
> > > 
> > > Nice work!  This will be quite useful, thanks.
> > > 
> > > I think I broke your diff as my last commit to derive the raw/qcow2
> > > format introduced some conflicts.  I had posted it on hackers@ and
> > > forgot that your aren't on the internal list yet - sorry for that.
> 
> Updated version. Changes from the last diff:
> 
> - Merge in syntax changes. 
> - Don't over-read when getting the base images.
> - Fix relative paths in base images.
> - Allow multiple derived images to use a single base image, and allow a user
>   with only read permisssions to base their images on top of it.
> - Probe the base image size, use/validate it when craeting disk images.
> - Fix style a bit (long lines, changing from sizeof foo to sizeof(foo).
> - Move a define out of vmmvar.h

You're still using VMM_MAX_BASE_PER_DISK instead of
VM_MAX_BASE_PER_DISK in the code.  I patched the diff to make it
compile without the vmmvar.h change here.

> - And update the manpage with these changes.
> - Improve error checking around creating/resolving base disk paths.
> 

Light testing works except of an issue with read-only base images; the
required fix is in the comments below.

Other than that, it is really cool to run many VMs from a single base
image.  In my tests, I installed OpenBSD once and started a few VMs
using the installed disk as a base.

More comments below.

Reyk

> 
> diff --git regress/usr.sbin/vmd/diskfmt/Makefile 
> regress/usr.sbin/vmd/diskfmt/Makefile
> index c2a5f42d5f6..1f8673e0e26 100644
> --- regress/usr.sbin/vmd/diskfmt/Makefile
> +++ regress/usr.sbin/vmd/diskfmt/Makefile
> @@ -11,7 +11,7 @@
>  VMD_DIR=$(BSDSRCDIR)/usr.sbin/vmd/
>  
>  PROG=vioscribble
> -SRCS=vioscribble.c $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
> +SRCS=vioscribble.c vioqcow2.c vioraw.c
>  CFLAGS+=-I$(VMD_DIR) -pthread
>  LDFLAGS+=-pthread
>  
> @@ -26,3 +26,6 @@ scribble-images:
>  .PHONY: ${REGRESS_TARGETS} scribble-images
>  
>  .include <bsd.regress.mk>
> +
> +vioqcow2.c vioraw.c: $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
> +     cp $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c .
> diff --git regress/usr.sbin/vmd/diskfmt/vioscribble.c 
> regress/usr.sbin/vmd/diskfmt/vioscribble.c
> index 14d720db652..1da8efedac7 100644
> --- regress/usr.sbin/vmd/diskfmt/vioscribble.c
> +++ regress/usr.sbin/vmd/diskfmt/vioscribble.c
> @@ -122,16 +122,18 @@ main(int argc, char **argv)
>       verbose = !!getenv("VERBOSE");
>       qcfd = open("scribble.qc2", O_RDWR);
>       rawfd = open("scribble.raw", O_RDWR);
> -     if (qcfd == -1 || virtio_init_qcow2(&qcowfile, &qcsz, qcfd) == -1)
> +     if (qcfd == -1)
>               err(1, "unable to open qcow");
> -     if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, rawfd) == -1)
> +     if (virtio_init_qcow2(&qcowfile, &qcsz, &qcfd, 1) == -1)
> +             err(1, "unable to init qcow");
> +     if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, &rawfd, 1) == -1)
>               err(1, "unable to open raw");
>  
>       srandom_deterministic(123);
>  
>       /* scribble to both disks */
>       printf("scribbling...\n");
> -     for (i = 0; i < 16; i++) {
> +     for (i = 0; i < 1024*16; i++) {
>               off = (random() % DISKSZ);
>               len = random() % sizeof buf + 1;
>               fill(off, buf, sizeof buf);
> diff --git usr.sbin/vmctl/main.c usr.sbin/vmctl/main.c
> index 8748ecfdedc..4637256452b 100644
> --- usr.sbin/vmctl/main.c
> +++ usr.sbin/vmctl/main.c
> @@ -67,7 +67,8 @@ int          ctl_receive(struct parse_result *, int, char 
> *[]);
>  
>  struct ctl_command ctl_commands[] = {
>       { "console",    CMD_CONSOLE,    ctl_console,    "id" },
> -     { "create",     CMD_CREATE,     ctl_create,     "\"path\" -s size", 1 },
> +     { "create",     CMD_CREATE,     ctl_create,     
> +             "\"path\" [-s size] [-b base]", 1 },
>       { "load",       CMD_LOAD,       ctl_load,       "\"path\"" },
>       { "log",        CMD_LOG,        ctl_log,        "[verbose|brief]" },
>       { "reload",     CMD_RELOAD,     ctl_reload,     "" },
> @@ -538,47 +539,54 @@ int
>  ctl_create(struct parse_result *res, int argc, char *argv[])
>  {
>       int              ch, ret, type;
> -     const char      *paths[2], *disk, *format;
> +     const char      *disk, *format, *base;
>  
>       if (argc < 2)
>               ctl_usage(res->ctl);
>  
> +     base = NULL;
>       type = parse_disktype(argv[1], &disk);
>  
> -     paths[0] = disk;
> -     paths[1] = NULL;
> -
> -     if (unveil(paths[0], "rwc") == -1)
> +     if (unveil(disk, "rwc") == -1)
>               err(1, "unveil");
>  
> -     if (pledge("stdio rpath wpath cpath", NULL) == -1)
> -             err(1, "pledge");

Instead of doing the initial disk parsing without pledge, it is also
possible to pledge early with the "unveil" promise and to lock unveil
manually later with unveil(NULL, NULL).  It wouldn't need any other
pledge promises so it is not making it worse but better.  You can also
add another pledge without the *path and unveil promises after opening
the files.

>       argc--;
>       argv++;
>  
> -     while ((ch = getopt(argc, argv, "s:")) != -1) {
> +     while ((ch = getopt(argc, argv, "s:b:")) != -1) {
>               switch (ch) {
>               case 's':
>                       if (parse_size(res, optarg, 0) != 0)
>                               errx(1, "invalid size: %s", optarg);
>                       break;
> +             case 'b':
> +                     base = optarg;
> +                     if (unveil(base, "r") == -1)
> +                             err(1, "unveil");
> +                     break;
>               default:
>                       ctl_usage(res->ctl);
>                       /* NOTREACHED */
>               }
>       }
>  
> -     if (res->size == 0) {
> -             fprintf(stderr, "missing size argument\n");
> +     if (pledge("stdio rpath wpath cpath", NULL) == -1)
> +             err(1, "pledge");
> +
> +     if (base && type != VMDF_QCOW2)
> +             errx(1, "base images require qcow2 disk format");
> +     if (res->size == 0 && !base) {
> +             fprintf(stderr, "could not create %s: missing size argument\n",
> +                 disk);
>               ctl_usage(res->ctl);
>       }
>  
>       if (type == VMDF_QCOW2) {
>               format = "qcow2";
> -             ret = create_qc2_imagefile(paths[0], res->size);
> +             ret = create_qc2_imagefile(disk, base, res->size);
>       } else {
>               format = "raw";
> -             ret = create_raw_imagefile(paths[0], res->size);
> +             ret = create_raw_imagefile(disk, res->size);
>       }
>  
>       if (ret != 0) {
> diff --git usr.sbin/vmctl/vmctl.8 usr.sbin/vmctl/vmctl.8
> index f7890ac99f8..7a02452789c 100644
> --- usr.sbin/vmctl/vmctl.8
> +++ usr.sbin/vmctl/vmctl.8
> @@ -50,7 +50,7 @@ Using
>  .Xr cu 1
>  connect to the console of the VM with the specified
>  .Ar id .
> -.It Cm create Ar path Fl s Ar size
> +.It Cm create Ar path Fl s Op Ar size Op  Fl b Ar base
>  Creates a VM disk image file with the specified
>  .Ar path
>  and
> @@ -65,7 +65,14 @@ or
>  in order to specify the disk format.
>  If left unspecified, the format defaults to
>  .Pa raw
> -if it cannot be derived automatically.
> +if it cannot be derived automatically.  For qcow2, a
> +.Ar base
> +image may be specified. The base image is not modified. The derived image
> +contains only the changes written by the VM. When creating a derived image,
> +the
> +.Ar size
> +may be omitted, and probed from the base image. If it is provided, it must
> +match the base image size.
>  .It Cm load Ar filename
>  Load additional configuration from the specified file.
>  .It Cm log brief

... I'm with jmc@ on the manpage here.

> diff --git usr.sbin/vmctl/vmctl.c usr.sbin/vmctl/vmctl.c
> index b09e1115ff7..81fbdba9a28 100644
> --- usr.sbin/vmctl/vmctl.c
> +++ usr.sbin/vmctl/vmctl.c
> @@ -847,7 +847,8 @@ create_raw_imagefile(const char *imgfile_path, long 
> imgsize)
>  #define ALIGN(sz, align) \
>       ((sz + align - 1) & ~(align - 1))
>  int
> -create_qc2_imagefile(const char *imgfile_path, long imgsize)
> +create_qc2_imagefile(const char *imgfile_path,
> +    const char *base_path, long imgsize)
>  {
>       struct qcheader {
>               char magic[4];
> @@ -869,15 +870,30 @@ create_qc2_imagefile(const char *imgfile_path, long 
> imgsize)
>               uint64_t autoclearfeatures;
>               uint32_t reforder;
>               uint32_t headersz;
> -     } __packed hdr;
> +     } __packed hdr, basehdr;
>       int fd, ret;
> +     ssize_t base_len;
>       uint64_t l1sz, refsz, disksz, initsz, clustersz;
>       uint64_t l1off, refoff, v, i, l1entrysz, refentrysz;
>       uint16_t refs;
>  
>       disksz = 1024*1024*imgsize;

This is not part of the diff but sticks into my eye.

According to style(9):
"Unary operators don't require spaces; binary operators do."

So it would be slightly more readable and better knf:

        disksz = 1024 * 1024 * imgsize;

> +
> +     if (base_path) {
> +             fd = open(base_path, O_RDONLY);
> +             if (read(fd, &basehdr, sizeof(basehdr)) != sizeof(basehdr))
> +                     err(1, "failure to read base image header");

I think you should compare the VM_MAGIC_QCOW here as well, at least
the magic bytes without the version.  vmd will do it later but it is
good to prevent users from trying to use a non-qcow2 base early in the
process.

> +             close(fd);
> +             if (!disksz)
> +                     disksz = betoh64(basehdr.disksz);
> +             else if (disksz != betoh64(basehdr.disksz))
> +                     errx(1, "base size does not match requested size");
> +     }
> +     if (!base_path && !disksz)
> +             errx(1, "missing disk size");
> +
>       clustersz = (1<<16);
> -     l1off = ALIGN(sizeof hdr, clustersz);
> +     l1off = ALIGN(sizeof(hdr), clustersz);
>  
>       l1entrysz = clustersz * clustersz / 8;
>       l1sz = (disksz + l1entrysz - 1) / l1entrysz;
> @@ -887,11 +903,12 @@ create_qc2_imagefile(const char *imgfile_path, long 
> imgsize)
>       refsz = (disksz + refentrysz - 1) / refentrysz;
>  
>       initsz = ALIGN(refoff + refsz*clustersz, clustersz);
> +     base_len = base_path ? strlen(base_path) : 0;
>  
> -     memcpy(hdr.magic, "QFI\xfb", 4);
> +     memcpy(hdr.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW));
>       hdr.version             = htobe32(3);
> -     hdr.backingoff          = htobe64(0);
> -     hdr.backingsz           = htobe32(0);
> +     hdr.backingoff          = htobe64(base_path ? sizeof(hdr) : 0);
> +     hdr.backingsz           = htobe32(base_len);
>       hdr.clustershift        = htobe32(16);
>       hdr.disksz              = htobe64(disksz);
>       hdr.cryptmethod         = htobe32(0);
> @@ -905,7 +922,7 @@ create_qc2_imagefile(const char *imgfile_path, long 
> imgsize)
>       hdr.compatfeatures      = htobe64(0);
>       hdr.autoclearfeatures   = htobe64(0);
>       hdr.reforder            = htobe32(4);
> -     hdr.headersz            = htobe32(sizeof hdr);
> +     hdr.headersz            = htobe32(sizeof(hdr));
>  
>       /* Refuse to overwrite an existing image */
>       fd = open(imgfile_path, O_RDWR | O_CREAT | O_TRUNC | O_EXCL,
> @@ -914,7 +931,11 @@ create_qc2_imagefile(const char *imgfile_path, long 
> imgsize)
>               return (errno);
>  
>       /* Write out the header */
> -     if (write(fd, &hdr, sizeof hdr) != sizeof hdr)
> +     if (write(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
> +             goto error;
> +
> +     /* Add the base image */
> +     if (base_path && write(fd, base_path, base_len) != base_len)
>               goto error;
>  
>       /* Extend to desired size, and add one refcount cluster */
> diff --git usr.sbin/vmctl/vmctl.h usr.sbin/vmctl/vmctl.h
> index 006411d9785..cdf50cad755 100644
> --- usr.sbin/vmctl/vmctl.h
> +++ usr.sbin/vmctl/vmctl.h
> @@ -87,7 +87,7 @@ __dead void
>  
>  /* vmctl.c */
>  int   create_raw_imagefile(const char *, long);
> -int   create_qc2_imagefile(const char *, long);
> +int   create_qc2_imagefile(const char *, const char*, long);

Add a space: const char *

>  int   vm_start(uint32_t, const char *, int, int, char **, int,
>           char **, int *, char *, char *, char *);
>  int   vm_start_complete(struct imsg *, int *, int);
> diff --git usr.sbin/vmd/config.c usr.sbin/vmd/config.c
> index 550b73c1a39..8350d0fc4fc 100644
> --- usr.sbin/vmd/config.c
> +++ usr.sbin/vmd/config.c
> @@ -35,6 +35,7 @@
>  #include <util.h>
>  #include <errno.h>
>  #include <imsg.h>
> +#include <libgen.h>
>  
>  #include "proc.h"
>  #include "vmd.h"
> @@ -176,16 +177,21 @@ config_getreset(struct vmd *env, struct imsg *imsg)
>  int
>  config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t 
> uid)
>  {
> +     int diskfds[VMM_MAX_DISKS_PER_VM][VMM_MAX_BASE_PER_DISK];
>       struct vmd_if           *vif;
>       struct vmop_create_params *vmc = &vm->vm_params;
>       struct vm_create_params *vcp = &vmc->vmc_params;
> -     unsigned int             i;
> +     unsigned int             i, j;
>       int                      fd = -1, vmboot = 0;
> -     int                      kernfd = -1, *diskfds = NULL, *tapfds = NULL;
> +     int                      kernfd = -1;
> +     int                     *tapfds;
>       int                      cdromfd = -1;
>       int                      saved_errno = 0;
> +     int                      n = 0, aflags, oflags;
>       char                     ifname[IF_NAMESIZE], *s;
>       char                     path[PATH_MAX];
> +     char                     base[PATH_MAX];
> +     char                     expanded[PATH_MAX];
>       unsigned int             unit;
>  
>       errno = 0;
> @@ -205,13 +211,9 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, 
> uint32_t peerid, uid_t uid)
>               }
>       }
>  
> -     diskfds = reallocarray(NULL, vcp->vcp_ndisks, sizeof(*diskfds));
> -     if (diskfds == NULL) {
> -             log_warn("%s: can't allocate disk fds", __func__);
> -             goto fail;
> -     }
> -     for (i = 0; i < vcp->vcp_ndisks; i++)
> -             diskfds[i] = -1;
> +     for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
> +             for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++)
> +                     diskfds[i][j] = -1;
>  
>       tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds));
>       if (tapfds == NULL) {
> @@ -289,22 +291,71 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, 
> uint32_t peerid, uid_t uid)
>  
>       /* Open disk images for child */
>       for (i = 0 ; i < vcp->vcp_ndisks; i++) {
> -                /* Stat disk[i] to ensure it is a regular file */
> -             if ((diskfds[i] = open(vcp->vcp_disks[i],
> -                 O_RDWR|O_EXLOCK|O_NONBLOCK)) == -1) {
> -                     log_warn("%s: can't open disk %s", __func__,
> -                         vcp->vcp_disks[i]);
> -                     errno = VMD_DISK_MISSING;
> -                     goto fail;
> -             }
> +             if (strlcpy(path, vcp->vcp_disks[i], sizeof path) > PATH_MAX)

The correct idiom is:

                if (strlcpy(path, vcp->vcp_disks[i],
                    sizeof(path)) >= sizeof(path))

- >=
- sizeof(path) instead PATH_MAX to be error-prone

> +                     log_warnx("%s, disk path too long", __func__);
> +             memset(vmc->vmc_diskbases, 0, sizeof vmc->vmc_diskbases);

sizeof with braces ;)

> +             oflags = O_RDWR|O_EXLOCK|O_NONBLOCK;
> +             aflags = R_OK|W_OK;
> +             for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
> +                     /* Stat disk[i] to ensure it is a regular file */
> +                     if ((diskfds[i][j] = open(path, oflags)) == -1) {
> +                             log_warn("%s: can't open disk %s", __func__,
> +                                 vcp->vcp_disks[i]);
> +                             errno = VMD_DISK_MISSING;
> +                             goto fail;
> +                     }
>  
> -             if (vm_checkaccess(diskfds[i],
> -                 vmc->vmc_checkaccess & VMOP_CREATE_DISK,
> -                 uid, R_OK|W_OK) == -1) {
> -                     log_warnx("vm \"%s\" no read/write access to disk %s",
> -                         vcp->vcp_name, vcp->vcp_disks[i]);
> -                     errno = EPERM;
> -                     goto fail;
> +                     if (vm_checkaccess(diskfds[i][j],
> +                         vmc->vmc_checkaccess & VMOP_CREATE_DISK,
> +                         uid, R_OK|W_OK) == -1) {

This doesn't use aflags and access to read-only bases still fails, it must be:

                            uid, aflags) == -1) {

> +                             log_warnx("vm \"%s\" unable to access "
> +                                 "disk %s", vcp->vcp_name,
> +                                 vcp->vcp_disks[i]);

And here you should print path instead of vcp->vcp_disks[i]: If access
to the base image fails, it would print the disk name instead of the
base image.

> +                             errno = EPERM;
> +                             goto fail;
> +                     }
> +
> +                     /*
> +                      * Clear the write and exclusive flags for base images.
> +                      * All writes should go to the top image, allowing them
> +                      * to be shared.
> +                      */
> +                     oflags = O_RDONLY|O_NONBLOCK;
> +                     aflags = R_OK;
> +                     n = virtio_get_base(diskfds[i][j], base, sizeof base,
> +                         vmc->vmc_disktypes[i]);
> +                     if (n == 0)
> +                             break;
> +                     if (n == -1) {
> +                             log_warnx("vm \"%s\" unable to read "
> +                                 "base for disk %s", vcp->vcp_name,
> +                                 vcp->vcp_disks[i]);
> +                             goto fail;
> +                     }
> +                     /*
> +                      * Relative paths should be interpreted relative
> +                      * to the disk image, rather than relative to the
> +                      * directory vmd happens to be running in, since
> +                      * this is the only userful interpretation.
> +                      */
> +                     if (base[0] == '/') {
> +                             if (realpath(base, path) == NULL) {
> +                                     log_warn("unable to resolve %s", base);
> +                                     goto fail;
> +                             }
> +                     } else {
> +                             s = dirname(path);
> +                             if (snprintf(expanded, sizeof(expanded),
> +                                 "%s/%s", s, base) >= (int)sizeof(expanded)) 
> {
> +                                     log_warn("path too long: %s/%s",
> +                                         s, base);
> +                                     goto fail;
> +                             }
> +                             if (realpath(expanded, path) == NULL) {
> +                                     log_warn("unable to resolve %s", base);
> +                                     goto fail;
> +                             }
> +                     }
>               }
>       }
>  
> @@ -402,9 +453,13 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, 
> uint32_t peerid, uid_t uid)
>                   NULL, 0);
>  
>       for (i = 0; i < vcp->vcp_ndisks; i++) {
> -             proc_compose_imsg(ps, PROC_VMM, -1,
> -                 IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, diskfds[i],
> -                 &i, sizeof(i));
> +             for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
> +                     if (diskfds[i][j] == -1)
> +                             break;
> +                     proc_compose_imsg(ps, PROC_VMM, -1,
> +                         IMSG_VMDOP_START_VM_DISK, vm->vm_vmid,
> +                         diskfds[i][j], &i, sizeof(i));
> +             }
>       }
>       for (i = 0; i < vcp->vcp_nnics; i++) {
>               proc_compose_imsg(ps, PROC_VMM, -1,
> @@ -416,7 +471,6 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, 
> uint32_t peerid, uid_t uid)
>               proc_compose_imsg(ps, PROC_VMM, -1,
>                   IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd,  NULL, 0);
>  
> -     free(diskfds);
>       free(tapfds);
>  
>       vm->vm_running = 1;
> @@ -430,11 +484,10 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, 
> uint32_t peerid, uid_t uid)
>               close(kernfd);
>       if (cdromfd != -1)
>               close(cdromfd);
> -     if (diskfds != NULL) {
> -             for (i = 0; i < vcp->vcp_ndisks; i++)
> -                     close(diskfds[i]);
> -             free(diskfds);
> -     }
> +     for (i = 0; i < vcp->vcp_ndisks; i++)
> +             for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++)
> +                     if (diskfds[i][j] != -1)
> +                             close(diskfds[i][j]);
>       if (tapfds != NULL) {
>               for (i = 0; i < vcp->vcp_nnics; i++)
>                       close(tapfds[i]);
> @@ -489,7 +542,7 @@ int
>  config_getdisk(struct privsep *ps, struct imsg *imsg)
>  {
>       struct vmd_vm   *vm;
> -     unsigned int     n;
> +     unsigned int     n, idx;
>  
>       errno = 0;
>       if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
> @@ -500,14 +553,18 @@ config_getdisk(struct privsep *ps, struct imsg *imsg)
>       IMSG_SIZE_CHECK(imsg, &n);
>       memcpy(&n, imsg->data, sizeof(n));
>  
> -     if (n >= vm->vm_params.vmc_params.vcp_ndisks ||
> -         vm->vm_disks[n] != -1 || imsg->fd == -1) {
> +     if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) {
>               log_warnx("invalid disk id");
>               errno = EINVAL;
>               return (-1);
>       }
> -     vm->vm_disks[n] = imsg->fd;
> -
> +     idx = vm->vm_params.vmc_diskbases[n]++;
> +     if (idx >= VMM_MAX_BASE_PER_DISK) {
> +             log_warnx("too many bases for disk");
> +             errno = EINVAL;
> +             return (-1);
> +     }
> +     vm->vm_disks[n][idx] = imsg->fd;
>       return (0);
>  }
>  
> diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c
> index c3211d186fa..81dca129896 100644
> --- usr.sbin/vmd/vioqcow2.c
> +++ usr.sbin/vmd/vioqcow2.c
> @@ -104,8 +104,7 @@ static off_t xlate(struct qcdisk *, off_t, int *);
>  static int copy_cluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
>  static off_t mkcluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
>  static int inc_refs(struct qcdisk *, off_t, int);
> -static int qc2_openpath(struct qcdisk *, char *, int);
> -static int qc2_open(struct qcdisk *, int);
> +static int qc2_open(struct qcdisk *, int *, size_t);
>  static ssize_t qc2_pread(void *, char *, size_t, off_t);
>  static ssize_t qc2_pwrite(void *, char *, size_t, off_t);
>  static void qc2_close(void *, int);
> @@ -118,14 +117,14 @@ static void qc2_close(void *, int);
>   * May open snapshot base images.
>   */
>  int
> -virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int fd)
> +virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int *fd, size_t 
> nfd)
>  {
>       struct qcdisk *diskp;
>  
>       diskp = malloc(sizeof(struct qcdisk));
>       if (diskp == NULL)
>               return -1;
> -     if (qc2_open(diskp, fd) == -1) {
> +     if (qc2_open(diskp, fd, nfd) == -1) {
>               log_warnx("%s: could not open qcow2 disk", __func__);
>               return -1;
>       }
> @@ -137,19 +136,41 @@ virtio_init_qcow2(struct virtio_backing *file, off_t 
> *szp, int fd)
>       return 0;
>  }
>  
> -static int
> -qc2_openpath(struct qcdisk *disk, char *path, int flags)
> +ssize_t
> +virtio_qcow2_get_base(int fd, char *path, size_t npath)
>  {
> -     int fd;
> +     struct qcheader header;
> +     uint64_t backingoff;
> +     uint32_t backingsz;
>  
> -     fd = open(path, flags);
> -     if (fd < 0)
> +     if (pread(fd, &header, sizeof(header), 0) != sizeof(header)) {
> +             log_warnx("%s: short read on header", __func__);
> +             return -1;
> +     }
> +     if (strncmp(header.magic, "QFI\xfb", 4) != 0) {

It is more for consistency, but we have a #define:

        strncmp(header.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW))

> +             log_warn("%s: invalid magic numbers", __func__);
>               return -1;
> -     return qc2_open(disk, fd);
> +     }
> +     backingoff = be64toh(header.backingoff);
> +     backingsz = be32toh(header.backingsz);
> +     if (backingsz != 0) {
> +             if (backingsz >= npath - 1) {
> +                     log_warn("%s: snapshot path too long", __func__);
> +                     return -1;
> +             }
> +             if (pread(fd, path, backingsz, backingoff) != backingsz) {
> +                     log_warnx("%s: could not read snapshot base name",
> +                         __func__);
> +                     return -1;
> +             }
> +             path[backingsz] = '\0';
> +             log_warnx("path: %s\n", path);

That looks like a left-over debug message.  And it shouldn't be warn.
And shouldn't have a \n.

> +     }
> +     return backingsz;
>  }
>  
>  static int
> -qc2_open(struct qcdisk *disk, int fd)
> +qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
>  {
>       char basepath[PATH_MAX];
>       struct stat st;
> @@ -157,14 +178,15 @@ qc2_open(struct qcdisk *disk, int fd)
>       uint64_t backingoff;
>       uint32_t backingsz;
>       size_t i;
> -     int version;
> +     int version, fd;
>  
>       pthread_rwlock_init(&disk->lock, NULL);
> +     fd = fds[0];
>       disk->fd = fd;
>       disk->base = NULL;
>       disk->l1 = NULL;
>  
> -     if (pread(fd, &header, sizeof header, 0) != sizeof header) {
> +     if (pread(fd, &header, sizeof(header), 0) != sizeof(header)) {
>               log_warn("%s: short read on header", __func__);
>               goto error;
>       }
> @@ -203,7 +225,7 @@ qc2_open(struct qcdisk *disk, int fd)
>               goto error;
>       }
>  
> -     disk->l1 = calloc(disk->l1sz, sizeof *disk->l1);
> +     disk->l1 = calloc(disk->l1sz, sizeof(*disk->l1));
>       if (!disk->l1)
>               goto error;
>       if (pread(disk->fd, disk->l1, 8*disk->l1sz, disk->l1off)
> @@ -222,14 +244,7 @@ qc2_open(struct qcdisk *disk, int fd)
>       backingoff = be64toh(header.backingoff);
>       backingsz = be32toh(header.backingsz);
>       if (backingsz != 0) {
> -             /*
> -              * FIXME: we need to figure out a way of opening these things,
> -              * otherwise we just crash with a pledge violation.
> -              */
> -             log_warn("%s: unsupported external snapshot images", __func__);
> -             goto error;
> -
> -             if (backingsz >= sizeof basepath - 1) {
> +             if (backingsz >= sizeof(basepath) - 1) {
>                       log_warn("%s: snapshot path too long", __func__);
>                       goto error;
>               }
> @@ -239,11 +254,17 @@ qc2_open(struct qcdisk *disk, int fd)
>                       goto error;
>               }
>               basepath[backingsz] = 0;
> +             if (nfd <= 1) {
> +                     log_warnx("%s: missing base image %s", __func__, 
> +                         basepath);
> +                     goto error;
> +             }
> +
>  
>               disk->base = calloc(1, sizeof(struct qcdisk));
>               if (!disk->base)
>                       goto error;
> -             if (qc2_openpath(disk->base, basepath, O_RDONLY) == -1) {
> +             if (qc2_open(disk->base, fds + 1, nfd - 1) == -1) {
>                       log_warn("%s: could not open %s", basepath, __func__);
>                       goto error;
>               }
> @@ -521,12 +542,12 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, 
> off_t off, off_t src_phys)
>       cluster = disk->end;
>       disk->end += disk->clustersz;
>       buf = htobe64(cluster | QCOW2_INPLACE);
> -     if (pwrite(disk->fd, &buf, sizeof buf, l2tab + l2off*8) != sizeof(buf))
> +     if (pwrite(disk->fd, &buf, sizeof(buf), l2tab + l2off*8) != sizeof(buf))

While here, you can add spaces but than you have to wrap the line

        if (pwrite(disk->fd, &buf, sizeof(buf), l2tab + l2off * 8) !=
            sizeof(buf))
>               goto fail;
>  
>       /* TODO: lazily sync: currently VMD doesn't close things */
>       buf = htobe64(disk->l1[l1off]);
> -     if (pwrite(disk->fd, &buf, sizeof buf, disk->l1off + 8*l1off) != 8)
> +     if (pwrite(disk->fd, &buf, sizeof(buf), disk->l1off + 8*l1off) != 8)

And this one can remain on the same line :)

        if (pwrite(disk->fd, &buf, sizeof(buf), disk->l1off + 8 * l1off) != 8)

>               goto fail;
>       if (inc_refs(disk, cluster, 1) == -1)
>               goto fail;
> @@ -571,7 +592,7 @@ inc_refs(struct qcdisk *disk, off_t off, int newcluster)
>       l1idx = (off / disk->clustersz) / nper;
>       l2idx = (off / disk->clustersz) % nper;
>       l1off = disk->refoff + 8*l1idx;
> -     if (pread(disk->fd, &buf, sizeof buf, l1off) != 8)
> +     if (pread(disk->fd, &buf, sizeof(buf), l1off) != 8)
>               return -1;
>  
>       l2cluster = be64toh(buf);
> @@ -583,19 +604,20 @@ inc_refs(struct qcdisk *disk, off_t off, int newcluster)
>                       return -1;
>               }
>               buf = htobe64(l2cluster);
> -             if (pwrite(disk->fd, &buf, sizeof buf, l1off) != 8) {
> +             if (pwrite(disk->fd, &buf, sizeof(buf), l1off) != 8) {
>                       return -1;
>               }
>       }
>  
>       refs = 1;
>       if (!newcluster) {
> -             if (pread(disk->fd, &refs, sizeof refs, l2cluster+2*l2idx) != 2)
> +             if (pread(disk->fd, &refs, sizeof(refs), 
> +                 l2cluster+2*l2idx) != 2)

That is a good example why spaces around binary operators are more readable:

                    l2cluster + 2 * l2idx

>                       return -1;
>               refs = be16toh(refs) + 1;
>       }
>       refs = htobe16(refs);
> -     if (pwrite(disk->fd, &refs, sizeof refs, l2cluster + 2*l2idx) != 2) {
> +     if (pwrite(disk->fd, &refs, sizeof(refs), l2cluster + 2*l2idx) != 2) {
>               log_warn("%s: could not write ref block", __func__);
>               return -1;
>       }
> diff --git usr.sbin/vmd/vioraw.c usr.sbin/vmd/vioraw.c
> index e02ab67c5dc..ff4bbb3095e 100644
> --- usr.sbin/vmd/vioraw.c
> +++ usr.sbin/vmd/vioraw.c
> @@ -53,19 +53,21 @@ raw_close(void *file, int stayopen)
>   * returning -1 for error, 0 for success.
>   */
>  int
> -virtio_init_raw(struct virtio_backing *file, off_t *szp, int fd)
> +virtio_init_raw(struct virtio_backing *file, off_t *szp, int *fd, size_t nfd)
>  {
>       off_t sz;
>       int *fdp;
>  
> -     sz = lseek(fd, 0, SEEK_END);
> +     if (nfd != 1)
> +             return -1;
> +     sz = lseek(fd[0], 0, SEEK_END);
>       if (sz == -1)
>               return -1;
>  
>       fdp = malloc(sizeof(int));
>       if (!fdp)
>               return -1;
> -     *fdp = fd;
> +     *fdp = fd[0];
>       file->p = fdp;
>       file->pread = raw_pread;
>       file->pwrite = raw_pwrite;
> diff --git usr.sbin/vmd/virtio.c usr.sbin/vmd/virtio.c
> index 93490344560..1c66f33216a 100644
> --- usr.sbin/vmd/virtio.c
> +++ usr.sbin/vmd/virtio.c
> @@ -1745,24 +1745,39 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, 
> uint8_t *intr,
>       return (0);
>  }
>  
> +int
> +virtio_get_base(int fd, char *path, size_t npath ,int type)
> +{
> +     switch (type) {
> +     case VMDF_RAW:          return 0;

As mentioned before, unlike shell code or Rust match statements, we
usually put a newline between case and the block, even for single-line
statements.

> +     case VMDF_QCOW2:        return virtio_qcow2_get_base(fd, path, npath);
> +     }
> +     log_warnx("%s: invalid disk format", __func__);
> +     return -1;
> +}
> +
> +/*
> + * Initializes a struct virtio_backing using the list of fds.
> + */
>  static int
> -virtio_init_disk(struct virtio_backing *file, off_t *sz, int fd, int type)
> +virtio_init_disk(struct virtio_backing *file, off_t *sz,
> +    int *fd, size_t nfd, int type)
>  {
>       /* 
>        * probe disk types in order of preference, first one to work wins.
>        * TODO: provide a way of specifying the type and options.
>        */
>       switch (type) {
> -     case VMDF_RAW:          return virtio_init_raw(file, sz, fd);
> -     case VMDF_QCOW2:        return virtio_init_qcow2(file, sz, fd);
> +     case VMDF_RAW:          return virtio_init_raw(file, sz, fd, nfd);
> +     case VMDF_QCOW2:        return virtio_init_qcow2(file, sz, fd, nfd);
>       }
>       log_warnx("%s: invalid disk format", __func__);
>       return -1;
>  }
>  
>  void
> -virtio_init(struct vmd_vm *vm, int child_cdrom, int *child_disks,
> -    int *child_taps)
> +virtio_init(struct vmd_vm *vm, int child_cdrom,
> +    int child_disks[][VMM_MAX_BASE_PER_DISK], int *child_taps)
>  {
>       struct vmop_create_params *vmc = &vm->vm_params;
>       struct vm_create_params *vcp = &vmc->vmc_params;
> @@ -1838,7 +1853,8 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int 
> *child_disks,
>                       vioblk[i].vm_id = vcp->vcp_id;
>                       vioblk[i].irq = pci_get_dev_irq(id);
>                       if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
> -                         child_disks[i], vmc->vmc_disktypes[i]) == -1) {
> +                         child_disks[i], vmc->vmc_diskbases[i], 
> +                         vmc->vmc_disktypes[i]) == -1) {
>                               log_warnx("%s: unable to determine disk format",
>                                   __func__);
>                               return;
> @@ -1967,7 +1983,7 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int 
> *child_disks,
>                       vioscsi->vq[i].last_avail = 0;
>               }
>               if (virtio_init_disk(&vioscsi->file, &vioscsi->sz,
> -                 child_cdrom, VMDF_RAW) == -1) {
> +                 &child_cdrom, 1, VMDF_RAW) == -1) {
>                       log_warnx("%s: unable to determine iso format",
>                           __func__);
>                       return;
> @@ -2123,7 +2139,8 @@ vionet_restore(int fd, struct vmd_vm *vm, int 
> *child_taps)
>  }
>  
>  int
> -vioblk_restore(int fd, struct vmop_create_params *vmc, int *child_disks)
> +vioblk_restore(int fd, struct vmop_create_params *vmc,
> +    int child_disks[][VMM_MAX_BASE_PER_DISK])
>  {
>       struct vm_create_params *vcp = &vmc->vmc_params;
>       uint8_t i;
> @@ -2149,7 +2166,8 @@ vioblk_restore(int fd, struct vmop_create_params *vmc, 
> int *child_disks)
>                       return (-1);
>               }
>               if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
> -                 child_disks[i], vmc->vmc_disktypes[i]) == -1)  {
> +                 child_disks[i], vmc->vmc_diskbases[i],
> +                 vmc->vmc_disktypes[i]) == -1)  {
>                       log_warnx("%s: unable to determine disk format",
>                           __func__);
>                       return (-1);
> @@ -2186,7 +2204,7 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, 
> int child_cdrom)
>               return (-1);
>       }
>  
> -     if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, child_cdrom,
> +     if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1,
>           VMDF_RAW) == -1) {
>               log_warnx("%s: unable to determine iso format", __func__);
>               return (-1);
> @@ -2198,8 +2216,8 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, 
> int child_cdrom)
>  }
>  
>  int
> -virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, int *child_disks,
> -    int *child_taps)
> +virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom,
> +    int child_disks[][VMM_MAX_BASE_PER_DISK], int *child_taps)
>  {
>       struct vmop_create_params *vmc = &vm->vm_params;
>       struct vm_create_params *vcp = &vmc->vmc_params;
> diff --git usr.sbin/vmd/virtio.h usr.sbin/vmd/virtio.h
> index 46006916b6a..91f0e323204 100644
> --- usr.sbin/vmd/virtio.h
> +++ usr.sbin/vmd/virtio.h
> @@ -257,10 +257,11 @@ struct ioinfo {
>  };
>  
>  /* virtio.c */
> -void virtio_init(struct vmd_vm *, int, int *, int *);
> +void virtio_init(struct vmd_vm *, int, int[][VMM_MAX_BASE_PER_DISK], int *);
>  void virtio_shutdown(struct vmd_vm *);
>  int virtio_dump(int);
> -int virtio_restore(int, struct vmd_vm *, int, int *, int *);
> +int virtio_restore(int, struct vmd_vm *, int,
> +    int[][VMM_MAX_BASE_PER_DISK], int *);
>  uint32_t vring_size(uint32_t);
>  
>  int virtio_rnd_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> @@ -270,12 +271,14 @@ void viornd_update_qs(void);
>  void viornd_update_qa(void);
>  int viornd_notifyq(void);
>  
> -int virtio_init_raw(struct virtio_backing *dev, off_t *sz, int fd);
> -int virtio_init_qcow2(struct virtio_backing *dev, off_t *sz, int fd);
> +ssize_t virtio_qcow2_get_base(int, char *, size_t);
> +int virtio_init_raw(struct virtio_backing *, off_t *, int*, size_t);
> +int virtio_init_qcow2(struct virtio_backing *, off_t *, int*, size_t);
>  
>  int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
>  int vioblk_dump(int);
> -int vioblk_restore(int, struct vmop_create_params *, int *);
> +int vioblk_restore(int, struct vmop_create_params *,
> +    int[][VMM_MAX_BASE_PER_DISK]);
>  void vioblk_update_qs(struct vioblk_dev *);
>  void vioblk_update_qa(struct vioblk_dev *);
>  int vioblk_notifyq(struct vioblk_dev *);
> diff --git usr.sbin/vmd/vm.c usr.sbin/vmd/vm.c
> index ef4494d918b..7c9aaf27012 100644
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -65,8 +65,8 @@
>  
>  io_fn_t ioports_map[MAX_PORTS];
>  
> -int run_vm(int, int *, int *, struct vmop_create_params *,
> -    struct vcpu_reg_state *);
> +int run_vm(int, int[][VMM_MAX_BASE_PER_DISK], int *,
> +    struct vmop_create_params *, struct vcpu_reg_state *);
>  void vm_dispatch_vmm(int, short, void *);
>  void *event_thread(void *);
>  void *vcpu_run_loop(void *);
> @@ -75,8 +75,10 @@ int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state 
> *);
>  void create_memory_map(struct vm_create_params *);
>  int alloc_guest_mem(struct vm_create_params *);
>  int vmm_create_vm(struct vm_create_params *);
> -void init_emulated_hw(struct vmop_create_params *, int, int *, int *);
> -void restore_emulated_hw(struct vm_create_params *, int, int *, int *,int);
> +void init_emulated_hw(struct vmop_create_params *, int,
> +    int[][VMM_MAX_BASE_PER_DISK], int *);
> +void restore_emulated_hw(struct vm_create_params *, int, int *,
> +    int[][VMM_MAX_BASE_PER_DISK],int);
>  void vcpu_exit_inout(struct vm_run_params *);
>  uint8_t vcpu_exit_pci(struct vm_run_params *);
>  int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
> @@ -327,7 +329,7 @@ start_vm(struct vmd_vm *vm, int fd)
>  
>               /* Find and open kernel image */
>               if ((fp = vmboot_open(vm->vm_kernel,
> -                 vm->vm_disks[0], vmc->vmc_disktypes[0], &vmboot)) == NULL)
> +                 vm->vm_disks[0][0], vmc->vmc_disktypes[0], &vmboot)) == 
> NULL)

OK, now I get that the first fd is the actual "working" disk image,
but I still think that vmboot_open needs the other fds for the base
images as well.  But we can probably fix that in another diff after
this one got in.

>                       fatalx("failed to open kernel - exiting");
>  
>               /* Load kernel image */
> @@ -903,7 +905,7 @@ vmm_create_vm(struct vm_create_params *vcp)
>   */
>  void
>  init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
> -    int *child_disks, int *child_taps)
> +    int child_disks[][VMM_MAX_BASE_PER_DISK], int *child_taps)
>  {
>       struct vm_create_params *vcp = &vmc->vmc_params;
>       int i;
> @@ -968,7 +970,7 @@ init_emulated_hw(struct vmop_create_params *vmc, int 
> child_cdrom,
>   */
>  void
>  restore_emulated_hw(struct vm_create_params *vcp, int fd,
> -    int *child_taps, int *child_disks, int child_cdrom)
> +    int *child_taps, int child_disks[][VMM_MAX_BASE_PER_DISK], int 
> child_cdrom)
>  {
>       /* struct vm_create_params *vcp = &vmc->vmc_params; */
>       int i;
> @@ -1029,8 +1031,9 @@ restore_emulated_hw(struct vm_create_params *vcp, int 
> fd,
>   *  !0 : the VM exited abnormally or failed to start
>   */
>  int
> -run_vm(int child_cdrom, int *child_disks, int *child_taps,
> -    struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
> +run_vm(int child_cdrom, int child_disks[][VMM_MAX_BASE_PER_DISK],
> +    int *child_taps, struct vmop_create_params *vmc,
> +    struct vcpu_reg_state *vrs)
>  {
>       struct vm_create_params *vcp = &vmc->vmc_params;
>       struct vm_rwregs_params vregsp;
> diff --git usr.sbin/vmd/vmboot.c usr.sbin/vmd/vmboot.c
> index 44ceeb64a7e..f23d8d6b25a 100644
> --- usr.sbin/vmd/vmboot.c
> +++ usr.sbin/vmd/vmboot.c
> @@ -414,13 +414,13 @@ vmboot_open(int kernel_fd, int disk_fd, unsigned int 
> disk_type,
>  
>       switch (vmboot->vbp_type) {
>       case VMDF_RAW:
> -             if (virtio_init_raw(vfp, &sz, disk_fd) == -1) {
> +             if (virtio_init_raw(vfp, &sz, &disk_fd, 1) == -1) {
>                       log_debug("%s: could not open raw disk", __func__);
>                       goto fail;
>               }
>               break;
>       case VMDF_QCOW2:
> -             if (virtio_init_qcow2(vfp, &sz, disk_fd) == -1) {
> +             if (virtio_init_qcow2(vfp, &sz, &disk_fd, 1) == -1) {

jup, a qcow2 disk with base image would fail here.

>                       log_debug("%s: could not open qcow2 disk", __func__);
>                       goto fail;
>               }
> diff --git usr.sbin/vmd/vmd.c usr.sbin/vmd/vmd.c
> index 1571be21bc5..8377812c316 100644
> --- usr.sbin/vmd/vmd.c
> +++ usr.sbin/vmd/vmd.c
> @@ -1097,7 +1097,7 @@ void
>  vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
>  {
>       struct privsep  *ps = &env->vmd_ps;
> -     unsigned int     i;
> +     unsigned int     i, j;
>  
>       if (vm == NULL)
>               return;
> @@ -1117,9 +1117,11 @@ vm_stop(struct vmd_vm *vm, int keeptty, const char 
> *caller)
>               close(vm->vm_iev.ibuf.fd);
>       }
>       for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) {
> -             if (vm->vm_disks[i] != -1) {
> -                     close(vm->vm_disks[i]);
> -                     vm->vm_disks[i] = -1;
> +             for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {

As mentioned on top, there are some VMM_MAX_BASE_PER_DISK instead of
VM_MAX_BASE_PER_DISK in here.

> +                     if (vm->vm_disks[i][j] != -1) {
> +                             close(vm->vm_disks[i][j]);
> +                             vm->vm_disks[i][j] = -1;
> +                     }
>               }
>       }
>       for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
> @@ -1176,7 +1178,7 @@ vm_register(struct privsep *ps, struct 
> vmop_create_params *vmc,
>       struct vmop_owner       *vmo = NULL;
>       struct vmd_user         *usr = NULL;
>       uint32_t                 rng;
> -     unsigned int             i;
> +     unsigned int             i, j;
>       struct vmd_switch       *sw;
>       char                    *s;
>  
> @@ -1267,7 +1269,8 @@ vm_register(struct privsep *ps, struct 
> vmop_create_params *vmc,
>       vm->vm_user = usr;
>  
>       for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
> -             vm->vm_disks[i] = -1;
> +             for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++)
> +                     vm->vm_disks[i][j] = -1;
>       for (i = 0; i < VMM_MAX_NICS_PER_VM; i++)
>               vm->vm_ifs[i].vif_fd = -1;
>       for (i = 0; i < vcp->vcp_nnics; i++) {
> diff --git usr.sbin/vmd/vmd.h usr.sbin/vmd/vmd.h
> index b348d12c757..0683629b3b1 100644
> --- usr.sbin/vmd/vmd.h
> +++ usr.sbin/vmd/vmd.h
> @@ -48,6 +48,7 @@
>  #define VM_DEFAULT_DEVICE    "hd0a"
>  #define VM_BOOT_CONF         "/etc/boot.conf"
>  #define VM_NAME_MAX          64
> +#define VM_MAX_BASE_PER_DISK 4
>  #define VM_TTYNAME_MAX               16
>  #define MAX_TAP                      256
>  #define NR_BACKLOG           5
> @@ -169,6 +170,7 @@ struct vmop_create_params {
>  #define VMIFF_OPTMASK                (VMIFF_LOCKED|VMIFF_LOCAL|VMIFF_RDOMAIN)
>  
>       unsigned int             vmc_disktypes[VMM_MAX_DISKS_PER_VM];
> +     unsigned int             vmc_diskbases[VMM_MAX_DISKS_PER_VM];
>  #define VMDF_RAW             0x01
>  #define VMDF_QCOW2           0x02
>  
> @@ -241,7 +243,7 @@ struct vmd_vm {
>       uint32_t                 vm_vmid;
>       int                      vm_kernel;
>       int                      vm_cdrom;
> -     int                      vm_disks[VMM_MAX_DISKS_PER_VM];
> +     int                      
> vm_disks[VMM_MAX_DISKS_PER_VM][VMM_MAX_BASE_PER_DISK];
>       struct vmd_if            vm_ifs[VMM_MAX_NICS_PER_VM];
>       char                    *vm_ttyname;
>       int                      vm_tty;
> @@ -415,4 +417,7 @@ int        parse_config(const char *);
>  int   cmdline_symset(char *);
>  int   host(const char *, struct address *);
>  
> +/* virtio.c */
> +int   virtio_get_base(int, char *, size_t, int);
> +
>  #endif /* VMD_H */
> diff --git usr.sbin/vmd/vmm.c usr.sbin/vmd/vmm.c
> index 7757856323f..3176fd85713 100644
> --- usr.sbin/vmd/vmm.c
> +++ usr.sbin/vmd/vmm.c
> @@ -608,7 +608,7 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
>       struct vmd_vm           *vm;
>       int                      ret = EINVAL;
>       int                      fds[2];
> -     size_t                   i;
> +     size_t                   i, j;
>  
>       if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
>               log_warnx("%s: can't find vm", __func__);
> @@ -643,8 +643,11 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
>               close(fds[1]);
>  
>               for (i = 0 ; i < vcp->vcp_ndisks; i++) {
> -                     close(vm->vm_disks[i]);
> -                     vm->vm_disks[i] = -1;
> +                     for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
> +                             if (vm->vm_disks[i][j] != -1)
> +                                     close(vm->vm_disks[i][j]);
> +                             vm->vm_disks[i][j] = -1;
> +                     }
>               }
>               for (i = 0 ; i < vcp->vcp_nnics; i++) {
>                       close(vm->vm_ifs[i].vif_fd);
> 
> -- 
>     Ori Bernstein

-- 

Reply via email to