I've added support to vmd for external snapshots. That is,
snapshots that are derived from a base image. Data lookups
start in the derived image, and if the derived image does not
contain some data, the search proceeds ot the base image.
Multiple derived images may exist off of a single base image.
A limitation of this format is that modifying the base image
will corrupt the derived image.
This change also adds support for creating disk derived disk
images to vmctl. To use it:
vmctl create derived.img -s 16G -b base.img -f qcow2
The main implementation change is that we now probe base
images before sending the disk FDs to the VM, which means that
we can actually open the images.
The base image paths may be relative. If they are relative,
they are interpreted relative to the location of the derived
image, and not relative to the directory where vmd happens to
be running.
For review, a bit of scrutiny could be directed to the
messaging. It relies on imsg being in-order, which seems to
be the case, but isn't documented in the manpage -- If I can't
rely on that, the protocol needs to be tweaked.
After this change, we send imsgs to the same disk index
repeatedly, and each message adds another base to the stack of
images. So, for example, if I have 2images image that look
like this:
disk0 -> base0 -> base1
disk1
Then we send the following messages:
VMDOP_START_VM_DISK (i=0, fd=open(disk0))
VMDOP_START_VM_DISK (i=0, fd=open(base0))
VMDOP_START_VM_DISK (i=0, fd=open(base1))
VMDOP_START_VM_DISK (i=1, fd=open(disk1))
This also opens the door to ephemeral snapshots, which vmd can
implicitly create when it starts a vm, and removes
automatically on exit.
Testing has been the usual -- OpenBSD installs, a bit of catting,
and some random 'dd'. Heavier use and testing would be appreciated.
Diff below.
diff --git regress/usr.sbin/vmd/diskfmt/Makefile
regress/usr.sbin/vmd/diskfmt/Makefile
index c2a5f42d5f6..1f8673e0e26 100644
--- regress/usr.sbin/vmd/diskfmt/Makefile
+++ regress/usr.sbin/vmd/diskfmt/Makefile
@@ -11,7 +11,7 @@
VMD_DIR=$(BSDSRCDIR)/usr.sbin/vmd/
PROG=vioscribble
-SRCS=vioscribble.c $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
+SRCS=vioscribble.c vioqcow2.c vioraw.c
CFLAGS+=-I$(VMD_DIR) -pthread
LDFLAGS+=-pthread
@@ -26,3 +26,6 @@ scribble-images:
.PHONY: ${REGRESS_TARGETS} scribble-images
.include <bsd.regress.mk>
+
+vioqcow2.c vioraw.c: $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
+ cp $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c .
diff --git regress/usr.sbin/vmd/diskfmt/vioscribble.c
regress/usr.sbin/vmd/diskfmt/vioscribble.c
index 14d720db652..1da8efedac7 100644
--- regress/usr.sbin/vmd/diskfmt/vioscribble.c
+++ regress/usr.sbin/vmd/diskfmt/vioscribble.c
@@ -122,16 +122,18 @@ main(int argc, char **argv)
verbose = !!getenv("VERBOSE");
qcfd = open("scribble.qc2", O_RDWR);
rawfd = open("scribble.raw", O_RDWR);
- if (qcfd == -1 || virtio_init_qcow2(&qcowfile, &qcsz, qcfd) == -1)
+ if (qcfd == -1)
err(1, "unable to open qcow");
- if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, rawfd) == -1)
+ if (virtio_init_qcow2(&qcowfile, &qcsz, &qcfd, 1) == -1)
+ err(1, "unable to init qcow");
+ if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, &rawfd, 1) == -1)
err(1, "unable to open raw");
srandom_deterministic(123);
/* scribble to both disks */
printf("scribbling...\n");
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < 1024*16; i++) {
off = (random() % DISKSZ);
len = random() % sizeof buf + 1;
fill(off, buf, sizeof buf);
diff --git sys/arch/amd64/include/vmmvar.h sys/arch/amd64/include/vmmvar.h
index 812e660f5f2..98cc498bf84 100644
--- sys/arch/amd64/include/vmmvar.h
+++ sys/arch/amd64/include/vmmvar.h
@@ -24,6 +24,7 @@
#define VMM_HV_SIGNATURE "OpenBSDVMM58"
#define VMM_MAX_MEM_RANGES 16
+#define VMM_MAX_BASE_PER_DISK 4
#define VMM_MAX_DISKS_PER_VM 4
#define VMM_MAX_PATH_DISK 128
#define VMM_MAX_PATH_CDROM 128
diff --git usr.sbin/vmctl/main.c usr.sbin/vmctl/main.c
index 0f06a9ed1d2..d75a51b83c5 100644
--- usr.sbin/vmctl/main.c
+++ usr.sbin/vmctl/main.c
@@ -64,7 +64,7 @@ int ctl_receive(struct parse_result *, int, char
*[]);
struct ctl_command ctl_commands[] = {
{ "console", CMD_CONSOLE, ctl_console, "id" },
{ "create", CMD_CREATE, ctl_create,
- "\"path\" -s size [-f fmt]", 1 },
+ "\"path\" -s size [-f fmt] [-b base]", 1 },
{ "load", CMD_LOAD, ctl_load, "\"path\"" },
{ "log", CMD_LOG, ctl_log, "[verbose|brief]" },
{ "reload", CMD_RELOAD, ctl_reload, "" },
@@ -504,11 +504,12 @@ int
ctl_create(struct parse_result *res, int argc, char *argv[])
{
int ch, ret;
- const char *paths[2], *format;
+ const char *paths[2], *format, *base;
if (argc < 2)
ctl_usage(res->ctl);
+ base = NULL;
paths[0] = argv[1];
paths[1] = NULL;
format = "raw";
@@ -521,7 +522,7 @@ ctl_create(struct parse_result *res, int argc, char *argv[])
argc--;
argv++;
- while ((ch = getopt(argc, argv, "s:f:")) != -1) {
+ while ((ch = getopt(argc, argv, "s:f:b:")) != -1) {
switch (ch) {
case 's':
if (parse_size(res, optarg, 0) != 0)
@@ -530,6 +531,9 @@ ctl_create(struct parse_result *res, int argc, char *argv[])
case 'f':
format = optarg;
break;
+ case 'b':
+ base = optarg;
+ break;
default:
ctl_usage(res->ctl);
/* NOTREACHED */
@@ -537,13 +541,16 @@ ctl_create(struct parse_result *res, int argc, char
*argv[])
}
if (res->size == 0) {
- fprintf(stderr, "missing size argument\n");
+ fprintf(stderr, "could not create %s: missing size argument\n",
+ paths[0]);
ctl_usage(res->ctl);
}
- if (strcmp(format, "raw") == 0)
+ if (strcmp(format, "raw") == 0) {
+ if (base)
+ errx(1, "raw images do not accept base argument");
ret = create_raw_imagefile(paths[0], res->size);
- else if (strcmp(format, "qcow2") == 0)
- ret = create_qc2_imagefile(paths[0], res->size);
+ } else if (strcmp(format, "qcow2") == 0)
+ ret = create_qc2_imagefile(paths[0], base, res->size);
else
errx(1, "unknown image format %s", format);
if (ret != 0) {
diff --git usr.sbin/vmctl/vmctl.8 usr.sbin/vmctl/vmctl.8
index 4941cde3b07..c9d297d2e90 100644
--- usr.sbin/vmctl/vmctl.8
+++ usr.sbin/vmctl/vmctl.8
@@ -50,7 +50,7 @@ Using
.Xr cu 1
connect to the console of the VM with the specified
.Ar id .
-.It Cm create Ar path Fl s Ar size Op Fl f Ar format
+.It Cm create Ar path Fl s Ar size Op Fl f Ar format Fl b Ar base
Creates a VM disk image file with the specified
.Ar path
and
@@ -65,6 +65,10 @@ or
defaulting to
.Ar raw
if left unspecified.
+For qcow2, a
+.Ar base
+image may be specified. The base image is not modified. The
+derived image contains only the changes written by the VM.
.It Cm load Ar filename
Load additional configuration from the specified file.
.It Cm log brief
diff --git usr.sbin/vmctl/vmctl.c usr.sbin/vmctl/vmctl.c
index 355bd4d0900..6101bb310dd 100644
--- usr.sbin/vmctl/vmctl.c
+++ usr.sbin/vmctl/vmctl.c
@@ -847,7 +847,8 @@ create_raw_imagefile(const char *imgfile_path, long imgsize)
#define ALIGN(sz, align) \
((sz + align - 1) & ~(align - 1))
int
-create_qc2_imagefile(const char *imgfile_path, long imgsize)
+create_qc2_imagefile(const char *imgfile_path,
+ const char *base_path, long imgsize)
{
struct qcheader {
char magic[4];
@@ -871,6 +872,7 @@ create_qc2_imagefile(const char *imgfile_path, long imgsize)
uint32_t headersz;
} __packed hdr;
int fd, ret;
+ ssize_t base_len;
uint64_t l1sz, refsz, disksz, initsz, clustersz;
uint64_t l1off, refoff, v, i;
uint16_t refs;
@@ -888,11 +890,12 @@ create_qc2_imagefile(const char *imgfile_path, long
imgsize)
refsz = 1;
initsz = ALIGN(refoff + refsz*clustersz, clustersz);
+ base_len = base_path ? strlen(base_path) : 0;
memcpy(hdr.magic, "QFI\xfb", 4);
hdr.version = htobe32(3);
- hdr.backingoff = htobe64(0);
- hdr.backingsz = htobe32(0);
+ hdr.backingoff = htobe64(base_path ? sizeof hdr : 0);
+ hdr.backingsz = htobe32(base_len);
hdr.clustershift = htobe32(16);
hdr.disksz = htobe64(disksz);
hdr.cryptmethod = htobe32(0);
@@ -918,6 +921,10 @@ create_qc2_imagefile(const char *imgfile_path, long
imgsize)
if (write(fd, &hdr, sizeof hdr) != sizeof hdr)
goto error;
+ /* Add the base image */
+ if (base_path && write(fd, base_path, base_len) != base_len)
+ goto error;
+
/* Extend to desired size, and add one refcount cluster */
if (ftruncate(fd, (off_t)initsz + clustersz) == -1)
goto error;
diff --git usr.sbin/vmctl/vmctl.h usr.sbin/vmctl/vmctl.h
index 27e094c26de..da96e926e36 100644
--- usr.sbin/vmctl/vmctl.h
+++ usr.sbin/vmctl/vmctl.h
@@ -87,7 +87,7 @@ __dead void
/* vmctl.c */
int create_raw_imagefile(const char *, long);
-int create_qc2_imagefile(const char *, long);
+int create_qc2_imagefile(const char *, const char*, long);
int vm_start(uint32_t, const char *, int, int, char **, int,
char **, int *, char *, char *, char *);
int vm_start_complete(struct imsg *, int *, int);
diff --git usr.sbin/vmd/config.c usr.sbin/vmd/config.c
index 550b73c1a39..ba70e41f930 100644
--- usr.sbin/vmd/config.c
+++ usr.sbin/vmd/config.c
@@ -35,6 +35,7 @@
#include <util.h>
#include <errno.h>
#include <imsg.h>
+#include <libgen.h>
#include "proc.h"
#include "vmd.h"
@@ -176,16 +177,21 @@ config_getreset(struct vmd *env, struct imsg *imsg)
int
config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
{
+ int
diskfds[VMM_MAX_DISKS_PER_VM][VMM_MAX_BASE_PER_DISK];
struct vmd_if *vif;
struct vmop_create_params *vmc = &vm->vm_params;
struct vm_create_params *vcp = &vmc->vmc_params;
- unsigned int i;
+ unsigned int i, j;
int fd = -1, vmboot = 0;
- int kernfd = -1, *diskfds = NULL, *tapfds = NULL;
+ int kernfd = -1;
+ int *tapfds;
int cdromfd = -1;
int saved_errno = 0;
+ int n = 0, flags;
char ifname[IF_NAMESIZE], *s;
char path[PATH_MAX];
+ char base[PATH_MAX];
+ char expanded[PATH_MAX];
unsigned int unit;
errno = 0;
@@ -205,13 +211,9 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
}
}
- diskfds = reallocarray(NULL, vcp->vcp_ndisks, sizeof(*diskfds));
- if (diskfds == NULL) {
- log_warn("%s: can't allocate disk fds", __func__);
- goto fail;
- }
- for (i = 0; i < vcp->vcp_ndisks; i++)
- diskfds[i] = -1;
+ for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++)
+ diskfds[i][j] = -1;
tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds));
if (tapfds == NULL) {
@@ -289,22 +291,55 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
/* Open disk images for child */
for (i = 0 ; i < vcp->vcp_ndisks; i++) {
- /* Stat disk[i] to ensure it is a regular file */
- if ((diskfds[i] = open(vcp->vcp_disks[i],
- O_RDWR|O_EXLOCK|O_NONBLOCK)) == -1) {
- log_warn("%s: can't open disk %s", __func__,
- vcp->vcp_disks[i]);
- errno = VMD_DISK_MISSING;
- goto fail;
- }
+ if (strlcpy(path, vcp->vcp_disks[i], sizeof path) > PATH_MAX)
+ log_warnx("%s, disk path too long", __func__);
+ memset(vmc->vmc_diskbases, 0, sizeof vmc->vmc_diskbases);
+ flags = O_RDWR|O_EXLOCK|O_NONBLOCK;
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
+ /* Stat disk[i] to ensure it is a regular file */
+ if ((diskfds[i][j] = open(path, flags)) == -1) {
+ log_warn("%s: can't open disk %s", __func__,
+ vcp->vcp_disks[i]);
+ errno = VMD_DISK_MISSING;
+ goto fail;
+ }
+
+ if (vm_checkaccess(diskfds[i][j],
+ vmc->vmc_checkaccess & VMOP_CREATE_DISK,
+ uid, R_OK|W_OK) == -1) {
+ log_warnx("vm \"%s\" no read/write "
+ "access to disk %s", vcp->vcp_name,
+ vcp->vcp_disks[i]);
+ errno = EPERM;
+ goto fail;
+ }
+
+ /*
+ * Clear the read-write flag for base images.
+ * All writes should go to the top image.
+ */
+ flags = O_RDONLY|O_EXLOCK|O_NONBLOCK;
+ n = virtio_get_base(diskfds[i][j], base, sizeof base,
+ vmc->vmc_disktypes[i]);
+ if (n == -1)
+ log_warnx("vm \"%s\" unable to read"
+ "base for disk %s", vcp->vcp_name,
+ vcp->vcp_disks[i]);
+ if (n == 0)
+ break;
+ /*
+ * Relative paths should be interpreted relative
+ * to the disk image, rather than relative to the
+ * directory vmd happens to be running in, since
+ * this is the only userful interpretation.
+ */
+ if (base[0] != '/') {
+ s = dirname(path);
+ snprintf(expanded, sizeof expanded,
+ "%s/%s", s, base);
+ realpath(expanded, path);
+ }
- if (vm_checkaccess(diskfds[i],
- vmc->vmc_checkaccess & VMOP_CREATE_DISK,
- uid, R_OK|W_OK) == -1) {
- log_warnx("vm \"%s\" no read/write access to disk %s",
- vcp->vcp_name, vcp->vcp_disks[i]);
- errno = EPERM;
- goto fail;
}
}
@@ -402,9 +437,13 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
NULL, 0);
for (i = 0; i < vcp->vcp_ndisks; i++) {
- proc_compose_imsg(ps, PROC_VMM, -1,
- IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, diskfds[i],
- &i, sizeof(i));
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
+ if (diskfds[i][j] == -1)
+ break;
+ proc_compose_imsg(ps, PROC_VMM, -1,
+ IMSG_VMDOP_START_VM_DISK, vm->vm_vmid,
+ diskfds[i][j], &i, sizeof(i));
+ }
}
for (i = 0; i < vcp->vcp_nnics; i++) {
proc_compose_imsg(ps, PROC_VMM, -1,
@@ -416,7 +455,6 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
proc_compose_imsg(ps, PROC_VMM, -1,
IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0);
- free(diskfds);
free(tapfds);
vm->vm_running = 1;
@@ -430,11 +468,10 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
close(kernfd);
if (cdromfd != -1)
close(cdromfd);
- if (diskfds != NULL) {
- for (i = 0; i < vcp->vcp_ndisks; i++)
- close(diskfds[i]);
- free(diskfds);
- }
+ for (i = 0; i < vcp->vcp_ndisks; i++)
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++)
+ if (diskfds[i][j] != -1)
+ close(diskfds[i][j]);
if (tapfds != NULL) {
for (i = 0; i < vcp->vcp_nnics; i++)
close(tapfds[i]);
@@ -489,7 +526,7 @@ int
config_getdisk(struct privsep *ps, struct imsg *imsg)
{
struct vmd_vm *vm;
- unsigned int n;
+ unsigned int n, idx;
errno = 0;
if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
@@ -500,14 +537,18 @@ config_getdisk(struct privsep *ps, struct imsg *imsg)
IMSG_SIZE_CHECK(imsg, &n);
memcpy(&n, imsg->data, sizeof(n));
- if (n >= vm->vm_params.vmc_params.vcp_ndisks ||
- vm->vm_disks[n] != -1 || imsg->fd == -1) {
+ if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) {
log_warnx("invalid disk id");
errno = EINVAL;
return (-1);
}
- vm->vm_disks[n] = imsg->fd;
-
+ idx = vm->vm_params.vmc_diskbases[n]++;
+ if (idx >= VMM_MAX_BASE_PER_DISK) {
+ log_warnx("too many bases for disk");
+ errno = EINVAL;
+ return (-1);
+ }
+ vm->vm_disks[n][idx] = imsg->fd;
return (0);
}
diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c
index ada0bafc72d..e4ebe020813 100644
--- usr.sbin/vmd/vioqcow2.c
+++ usr.sbin/vmd/vioqcow2.c
@@ -104,8 +104,7 @@ static off_t xlate(struct qcdisk *, off_t, int *);
static int copy_cluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
static off_t mkcluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
static int inc_refs(struct qcdisk *, off_t, int);
-static int qc2_openpath(struct qcdisk *, char *, int);
-static int qc2_open(struct qcdisk *, int);
+static int qc2_open(struct qcdisk *, int *, size_t);
static ssize_t qc2_pread(void *, char *, size_t, off_t);
static ssize_t qc2_pwrite(void *, char *, size_t, off_t);
static void qc2_close(void *, int);
@@ -118,14 +117,14 @@ static void qc2_close(void *, int);
* May open snapshot base images.
*/
int
-virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int fd)
+virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int *fd, size_t nfd)
{
struct qcdisk *diskp;
diskp = malloc(sizeof(struct qcdisk));
if (diskp == NULL)
return -1;
- if (qc2_open(diskp, fd) == -1) {
+ if (qc2_open(diskp, fd, nfd) == -1) {
log_warnx("%s: could not open qcow2 disk", __func__);
free(diskp);
return -1;
@@ -138,19 +137,41 @@ virtio_init_qcow2(struct virtio_backing *file, off_t
*szp, int fd)
return 0;
}
-static int
-qc2_openpath(struct qcdisk *disk, char *path, int flags)
+ssize_t
+virtio_qcow2_get_base(int fd, char *path, size_t npath)
{
- int fd;
+ struct qcheader header;
+ uint64_t backingoff;
+ uint32_t backingsz;
- fd = open(path, flags);
- if (fd < 0)
+ if (pread(fd, &header, sizeof header, 0) != sizeof header) {
+ log_warn("%s: short read on header", __func__);
return -1;
- return qc2_open(disk, fd);
+ }
+ if (strncmp(header.magic, "QFI\xfb", 4) != 0) {
+ log_warn("%s: invalid magic numbers", __func__);
+ return -1;
+ }
+ backingoff = be64toh(header.backingoff);
+ backingsz = be32toh(header.backingsz);
+ if (backingsz != 0) {
+ if (backingsz >= npath - 1) {
+ log_warn("%s: snapshot path too long", __func__);
+ return -1;
+ }
+ if (pread(fd, path, npath, backingoff) != backingsz) {
+ log_warn("%s: could not read snapshot base name",
+ __func__);
+ return -1;
+ }
+ path[backingsz] = 0;
+ log_warn("path: %s\n", path);
+ }
+ return backingsz;
}
static int
-qc2_open(struct qcdisk *disk, int fd)
+qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
{
char basepath[PATH_MAX];
struct stat st;
@@ -158,9 +179,10 @@ qc2_open(struct qcdisk *disk, int fd)
uint64_t backingoff;
uint32_t backingsz;
size_t i;
- int version;
+ int version, fd;
pthread_rwlock_init(&disk->lock, NULL);
+ fd = fds[0];
disk->fd = fd;
disk->base = NULL;
disk->l1 = NULL;
@@ -222,13 +244,6 @@ qc2_open(struct qcdisk *disk, int fd)
backingoff = be64toh(header.backingoff);
backingsz = be32toh(header.backingsz);
if (backingsz != 0) {
- /*
- * FIXME: we need to figure out a way of opening these things,
- * otherwise we just crash with a pledge violation.
- */
- log_warn("%s: unsupported external snapshot images", __func__);
- goto error;
-
if (backingsz >= sizeof basepath - 1) {
log_warn("%s: snapshot path too long", __func__);
goto error;
@@ -239,11 +254,16 @@ qc2_open(struct qcdisk *disk, int fd)
goto error;
}
basepath[backingsz] = 0;
+ if (nfd <= 1) {
+ log_warnx("%s: missing base image %s", __func__,
basepath);
+ goto error;
+ }
+
disk->base = calloc(1, sizeof(struct qcdisk));
if (!disk->base)
goto error;
- if (qc2_openpath(disk->base, basepath, O_RDONLY) == -1) {
+ if (qc2_open(disk->base, fds + 1, nfd - 1) == -1) {
log_warn("%s: could not open %s", basepath, __func__);
goto error;
}
diff --git usr.sbin/vmd/vioraw.c usr.sbin/vmd/vioraw.c
index e02ab67c5dc..ff4bbb3095e 100644
--- usr.sbin/vmd/vioraw.c
+++ usr.sbin/vmd/vioraw.c
@@ -53,19 +53,21 @@ raw_close(void *file, int stayopen)
* returning -1 for error, 0 for success.
*/
int
-virtio_init_raw(struct virtio_backing *file, off_t *szp, int fd)
+virtio_init_raw(struct virtio_backing *file, off_t *szp, int *fd, size_t nfd)
{
off_t sz;
int *fdp;
- sz = lseek(fd, 0, SEEK_END);
+ if (nfd != 1)
+ return -1;
+ sz = lseek(fd[0], 0, SEEK_END);
if (sz == -1)
return -1;
fdp = malloc(sizeof(int));
if (!fdp)
return -1;
- *fdp = fd;
+ *fdp = fd[0];
file->p = fdp;
file->pread = raw_pread;
file->pwrite = raw_pwrite;
diff --git usr.sbin/vmd/virtio.c usr.sbin/vmd/virtio.c
index 93490344560..1c66f33216a 100644
--- usr.sbin/vmd/virtio.c
+++ usr.sbin/vmd/virtio.c
@@ -1745,24 +1745,39 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t
*intr,
return (0);
}
+int
+virtio_get_base(int fd, char *path, size_t npath ,int type)
+{
+ switch (type) {
+ case VMDF_RAW: return 0;
+ case VMDF_QCOW2: return virtio_qcow2_get_base(fd, path, npath);
+ }
+ log_warnx("%s: invalid disk format", __func__);
+ return -1;
+}
+
+/*
+ * Initializes a struct virtio_backing using the list of fds.
+ */
static int
-virtio_init_disk(struct virtio_backing *file, off_t *sz, int fd, int type)
+virtio_init_disk(struct virtio_backing *file, off_t *sz,
+ int *fd, size_t nfd, int type)
{
/*
* probe disk types in order of preference, first one to work wins.
* TODO: provide a way of specifying the type and options.
*/
switch (type) {
- case VMDF_RAW: return virtio_init_raw(file, sz, fd);
- case VMDF_QCOW2: return virtio_init_qcow2(file, sz, fd);
+ case VMDF_RAW: return virtio_init_raw(file, sz, fd, nfd);
+ case VMDF_QCOW2: return virtio_init_qcow2(file, sz, fd, nfd);
}
log_warnx("%s: invalid disk format", __func__);
return -1;
}
void
-virtio_init(struct vmd_vm *vm, int child_cdrom, int *child_disks,
- int *child_taps)
+virtio_init(struct vmd_vm *vm, int child_cdrom,
+ int child_disks[][VMM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vmop_create_params *vmc = &vm->vm_params;
struct vm_create_params *vcp = &vmc->vmc_params;
@@ -1838,7 +1853,8 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int
*child_disks,
vioblk[i].vm_id = vcp->vcp_id;
vioblk[i].irq = pci_get_dev_irq(id);
if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
- child_disks[i], vmc->vmc_disktypes[i]) == -1) {
+ child_disks[i], vmc->vmc_diskbases[i],
+ vmc->vmc_disktypes[i]) == -1) {
log_warnx("%s: unable to determine disk format",
__func__);
return;
@@ -1967,7 +1983,7 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int
*child_disks,
vioscsi->vq[i].last_avail = 0;
}
if (virtio_init_disk(&vioscsi->file, &vioscsi->sz,
- child_cdrom, VMDF_RAW) == -1) {
+ &child_cdrom, 1, VMDF_RAW) == -1) {
log_warnx("%s: unable to determine iso format",
__func__);
return;
@@ -2123,7 +2139,8 @@ vionet_restore(int fd, struct vmd_vm *vm, int *child_taps)
}
int
-vioblk_restore(int fd, struct vmop_create_params *vmc, int *child_disks)
+vioblk_restore(int fd, struct vmop_create_params *vmc,
+ int child_disks[][VMM_MAX_BASE_PER_DISK])
{
struct vm_create_params *vcp = &vmc->vmc_params;
uint8_t i;
@@ -2149,7 +2166,8 @@ vioblk_restore(int fd, struct vmop_create_params *vmc,
int *child_disks)
return (-1);
}
if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
- child_disks[i], vmc->vmc_disktypes[i]) == -1) {
+ child_disks[i], vmc->vmc_diskbases[i],
+ vmc->vmc_disktypes[i]) == -1) {
log_warnx("%s: unable to determine disk format",
__func__);
return (-1);
@@ -2186,7 +2204,7 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, int
child_cdrom)
return (-1);
}
- if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, child_cdrom,
+ if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1,
VMDF_RAW) == -1) {
log_warnx("%s: unable to determine iso format", __func__);
return (-1);
@@ -2198,8 +2216,8 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, int
child_cdrom)
}
int
-virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, int *child_disks,
- int *child_taps)
+virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom,
+ int child_disks[][VMM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vmop_create_params *vmc = &vm->vm_params;
struct vm_create_params *vcp = &vmc->vmc_params;
diff --git usr.sbin/vmd/virtio.h usr.sbin/vmd/virtio.h
index 46006916b6a..91f0e323204 100644
--- usr.sbin/vmd/virtio.h
+++ usr.sbin/vmd/virtio.h
@@ -257,10 +257,11 @@ struct ioinfo {
};
/* virtio.c */
-void virtio_init(struct vmd_vm *, int, int *, int *);
+void virtio_init(struct vmd_vm *, int, int[][VMM_MAX_BASE_PER_DISK], int *);
void virtio_shutdown(struct vmd_vm *);
int virtio_dump(int);
-int virtio_restore(int, struct vmd_vm *, int, int *, int *);
+int virtio_restore(int, struct vmd_vm *, int,
+ int[][VMM_MAX_BASE_PER_DISK], int *);
uint32_t vring_size(uint32_t);
int virtio_rnd_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
@@ -270,12 +271,14 @@ void viornd_update_qs(void);
void viornd_update_qa(void);
int viornd_notifyq(void);
-int virtio_init_raw(struct virtio_backing *dev, off_t *sz, int fd);
-int virtio_init_qcow2(struct virtio_backing *dev, off_t *sz, int fd);
+ssize_t virtio_qcow2_get_base(int, char *, size_t);
+int virtio_init_raw(struct virtio_backing *, off_t *, int*, size_t);
+int virtio_init_qcow2(struct virtio_backing *, off_t *, int*, size_t);
int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
int vioblk_dump(int);
-int vioblk_restore(int, struct vmop_create_params *, int *);
+int vioblk_restore(int, struct vmop_create_params *,
+ int[][VMM_MAX_BASE_PER_DISK]);
void vioblk_update_qs(struct vioblk_dev *);
void vioblk_update_qa(struct vioblk_dev *);
int vioblk_notifyq(struct vioblk_dev *);
diff --git usr.sbin/vmd/vm.c usr.sbin/vmd/vm.c
index ef4494d918b..7c9aaf27012 100644
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -65,8 +65,8 @@
io_fn_t ioports_map[MAX_PORTS];
-int run_vm(int, int *, int *, struct vmop_create_params *,
- struct vcpu_reg_state *);
+int run_vm(int, int[][VMM_MAX_BASE_PER_DISK], int *,
+ struct vmop_create_params *, struct vcpu_reg_state *);
void vm_dispatch_vmm(int, short, void *);
void *event_thread(void *);
void *vcpu_run_loop(void *);
@@ -75,8 +75,10 @@ int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state *);
void create_memory_map(struct vm_create_params *);
int alloc_guest_mem(struct vm_create_params *);
int vmm_create_vm(struct vm_create_params *);
-void init_emulated_hw(struct vmop_create_params *, int, int *, int *);
-void restore_emulated_hw(struct vm_create_params *, int, int *, int *,int);
+void init_emulated_hw(struct vmop_create_params *, int,
+ int[][VMM_MAX_BASE_PER_DISK], int *);
+void restore_emulated_hw(struct vm_create_params *, int, int *,
+ int[][VMM_MAX_BASE_PER_DISK],int);
void vcpu_exit_inout(struct vm_run_params *);
uint8_t vcpu_exit_pci(struct vm_run_params *);
int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
@@ -327,7 +329,7 @@ start_vm(struct vmd_vm *vm, int fd)
/* Find and open kernel image */
if ((fp = vmboot_open(vm->vm_kernel,
- vm->vm_disks[0], vmc->vmc_disktypes[0], &vmboot)) == NULL)
+ vm->vm_disks[0][0], vmc->vmc_disktypes[0], &vmboot)) ==
NULL)
fatalx("failed to open kernel - exiting");
/* Load kernel image */
@@ -903,7 +905,7 @@ vmm_create_vm(struct vm_create_params *vcp)
*/
void
init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
- int *child_disks, int *child_taps)
+ int child_disks[][VMM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vm_create_params *vcp = &vmc->vmc_params;
int i;
@@ -968,7 +970,7 @@ init_emulated_hw(struct vmop_create_params *vmc, int
child_cdrom,
*/
void
restore_emulated_hw(struct vm_create_params *vcp, int fd,
- int *child_taps, int *child_disks, int child_cdrom)
+ int *child_taps, int child_disks[][VMM_MAX_BASE_PER_DISK], int child_cdrom)
{
/* struct vm_create_params *vcp = &vmc->vmc_params; */
int i;
@@ -1029,8 +1031,9 @@ restore_emulated_hw(struct vm_create_params *vcp, int fd,
* !0 : the VM exited abnormally or failed to start
*/
int
-run_vm(int child_cdrom, int *child_disks, int *child_taps,
- struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
+run_vm(int child_cdrom, int child_disks[][VMM_MAX_BASE_PER_DISK],
+ int *child_taps, struct vmop_create_params *vmc,
+ struct vcpu_reg_state *vrs)
{
struct vm_create_params *vcp = &vmc->vmc_params;
struct vm_rwregs_params vregsp;
diff --git usr.sbin/vmd/vmboot.c usr.sbin/vmd/vmboot.c
index 44ceeb64a7e..f23d8d6b25a 100644
--- usr.sbin/vmd/vmboot.c
+++ usr.sbin/vmd/vmboot.c
@@ -414,13 +414,13 @@ vmboot_open(int kernel_fd, int disk_fd, unsigned int
disk_type,
switch (vmboot->vbp_type) {
case VMDF_RAW:
- if (virtio_init_raw(vfp, &sz, disk_fd) == -1) {
+ if (virtio_init_raw(vfp, &sz, &disk_fd, 1) == -1) {
log_debug("%s: could not open raw disk", __func__);
goto fail;
}
break;
case VMDF_QCOW2:
- if (virtio_init_qcow2(vfp, &sz, disk_fd) == -1) {
+ if (virtio_init_qcow2(vfp, &sz, &disk_fd, 1) == -1) {
log_debug("%s: could not open qcow2 disk", __func__);
goto fail;
}
diff --git usr.sbin/vmd/vmd.c usr.sbin/vmd/vmd.c
index 890e9288dd1..133abe12722 100644
--- usr.sbin/vmd/vmd.c
+++ usr.sbin/vmd/vmd.c
@@ -1080,7 +1080,7 @@ void
vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
{
struct privsep *ps = &env->vmd_ps;
- unsigned int i;
+ unsigned int i, j;
if (vm == NULL)
return;
@@ -1100,9 +1100,11 @@ vm_stop(struct vmd_vm *vm, int keeptty, const char
*caller)
close(vm->vm_iev.ibuf.fd);
}
for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) {
- if (vm->vm_disks[i] != -1) {
- close(vm->vm_disks[i]);
- vm->vm_disks[i] = -1;
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
+ if (vm->vm_disks[i][j] != -1) {
+ close(vm->vm_disks[i][j]);
+ vm->vm_disks[i][j] = -1;
+ }
}
}
for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
@@ -1159,7 +1161,7 @@ vm_register(struct privsep *ps, struct vmop_create_params
*vmc,
struct vmop_owner *vmo = NULL;
struct vmd_user *usr = NULL;
uint32_t rng;
- unsigned int i;
+ unsigned int i, j;
struct vmd_switch *sw;
char *s;
@@ -1250,7 +1252,8 @@ vm_register(struct privsep *ps, struct vmop_create_params
*vmc,
vm->vm_user = usr;
for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
- vm->vm_disks[i] = -1;
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++)
+ vm->vm_disks[i][j] = -1;
for (i = 0; i < VMM_MAX_NICS_PER_VM; i++)
vm->vm_ifs[i].vif_fd = -1;
for (i = 0; i < vcp->vcp_nnics; i++) {
diff --git usr.sbin/vmd/vmd.h usr.sbin/vmd/vmd.h
index 4cf0295d946..b803d44340c 100644
--- usr.sbin/vmd/vmd.h
+++ usr.sbin/vmd/vmd.h
@@ -166,6 +166,7 @@ struct vmop_create_params {
#define VMIFF_OPTMASK (VMIFF_LOCKED|VMIFF_LOCAL|VMIFF_RDOMAIN)
unsigned int vmc_disktypes[VMM_MAX_DISKS_PER_VM];
+ unsigned int vmc_diskbases[VMM_MAX_DISKS_PER_VM];
#define VMDF_RAW 0x01
#define VMDF_QCOW2 0x02
@@ -238,7 +239,7 @@ struct vmd_vm {
uint32_t vm_vmid;
int vm_kernel;
int vm_cdrom;
- int vm_disks[VMM_MAX_DISKS_PER_VM];
+ int
vm_disks[VMM_MAX_DISKS_PER_VM][VMM_MAX_BASE_PER_DISK];
struct vmd_if vm_ifs[VMM_MAX_NICS_PER_VM];
char *vm_ttyname;
int vm_tty;
@@ -412,4 +413,7 @@ int parse_config(const char *);
int cmdline_symset(char *);
int host(const char *, struct address *);
+/* virtio.c */
+int virtio_get_base(int, char *, size_t, int);
+
#endif /* VMD_H */
diff --git usr.sbin/vmd/vmm.c usr.sbin/vmd/vmm.c
index 7757856323f..3176fd85713 100644
--- usr.sbin/vmd/vmm.c
+++ usr.sbin/vmd/vmm.c
@@ -608,7 +608,7 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
struct vmd_vm *vm;
int ret = EINVAL;
int fds[2];
- size_t i;
+ size_t i, j;
if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
log_warnx("%s: can't find vm", __func__);
@@ -643,8 +643,11 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
close(fds[1]);
for (i = 0 ; i < vcp->vcp_ndisks; i++) {
- close(vm->vm_disks[i]);
- vm->vm_disks[i] = -1;
+ for (j = 0; j < VMM_MAX_BASE_PER_DISK; j++) {
+ if (vm->vm_disks[i][j] != -1)
+ close(vm->vm_disks[i][j]);
+ vm->vm_disks[i][j] = -1;
+ }
}
for (i = 0 ; i < vcp->vcp_nnics; i++) {
close(vm->vm_ifs[i].vif_fd);
--
Ori Bernstein