On Wed, 3 Oct 2018 07:27:17 +0100, Jason McIntyre <[email protected]> wrote:
> On Tue, Oct 02, 2018 at 11:13:35PM -0700, Ori Bernstein wrote:
> >
> > Updated version. Changes from the last diff:
> >
> > - Merge in syntax changes.
> > - Don't over-read when getting the base images.
> > - Fix relative paths in base images.
> > - Allow multiple derived images to use a single base image, and allow a user
> > with only read permisssions to base their images on top of it.
> > - Probe the base image size, use/validate it when craeting disk images.
> > - Fix style a bit (long lines, changing from sizeof foo to sizeof(foo).
> > - Move a define out of vmmvar.h
> > - And update the manpage with these changes.
> > - Improve error checking around creating/resolving base disk paths.
> >
>
> morning.
>
> you should start new sentences on new lines - it forces a double spacing
> between sentences that all man pages have.
>
> if you run your proposed changes to man pages through "mandoc -Tlint",
> it will pick up on silly things like that.
>
> note there is also a double space in "Op Fl b"
>
> jmc
>
Forgot to do this in my last update -- done now, and ran it through mandoc
-Tlint.
Also fixed the bug that Reyk caught.
diff --git regress/usr.sbin/vmd/diskfmt/Makefile
regress/usr.sbin/vmd/diskfmt/Makefile
index c2a5f42d5f6..1f8673e0e26 100644
--- regress/usr.sbin/vmd/diskfmt/Makefile
+++ regress/usr.sbin/vmd/diskfmt/Makefile
@@ -11,7 +11,7 @@
VMD_DIR=$(BSDSRCDIR)/usr.sbin/vmd/
PROG=vioscribble
-SRCS=vioscribble.c $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
+SRCS=vioscribble.c vioqcow2.c vioraw.c
CFLAGS+=-I$(VMD_DIR) -pthread
LDFLAGS+=-pthread
@@ -26,3 +26,6 @@ scribble-images:
.PHONY: ${REGRESS_TARGETS} scribble-images
.include <bsd.regress.mk>
+
+vioqcow2.c vioraw.c: $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
+ cp $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c .
diff --git regress/usr.sbin/vmd/diskfmt/vioscribble.c
regress/usr.sbin/vmd/diskfmt/vioscribble.c
index 14d720db652..1da8efedac7 100644
--- regress/usr.sbin/vmd/diskfmt/vioscribble.c
+++ regress/usr.sbin/vmd/diskfmt/vioscribble.c
@@ -122,16 +122,18 @@ main(int argc, char **argv)
verbose = !!getenv("VERBOSE");
qcfd = open("scribble.qc2", O_RDWR);
rawfd = open("scribble.raw", O_RDWR);
- if (qcfd == -1 || virtio_init_qcow2(&qcowfile, &qcsz, qcfd) == -1)
+ if (qcfd == -1)
err(1, "unable to open qcow");
- if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, rawfd) == -1)
+ if (virtio_init_qcow2(&qcowfile, &qcsz, &qcfd, 1) == -1)
+ err(1, "unable to init qcow");
+ if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, &rawfd, 1) == -1)
err(1, "unable to open raw");
srandom_deterministic(123);
/* scribble to both disks */
printf("scribbling...\n");
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < 1024*16; i++) {
off = (random() % DISKSZ);
len = random() % sizeof buf + 1;
fill(off, buf, sizeof buf);
diff --git usr.sbin/vmctl/main.c usr.sbin/vmctl/main.c
index 69c5e013f4c..2cfb6848899 100644
--- usr.sbin/vmctl/main.c
+++ usr.sbin/vmctl/main.c
@@ -67,7 +67,8 @@ int ctl_receive(struct parse_result *, int, char
*[]);
struct ctl_command ctl_commands[] = {
{ "console", CMD_CONSOLE, ctl_console, "id" },
- { "create", CMD_CREATE, ctl_create, "\"path\" -s size", 1 },
+ { "create", CMD_CREATE, ctl_create,
+ "\"path\" [-s size] [-b base]", 1 },
{ "load", CMD_LOAD, ctl_load, "\"path\"" },
{ "log", CMD_LOG, ctl_log, "[verbose|brief]" },
{ "reload", CMD_RELOAD, ctl_reload, "" },
@@ -539,47 +540,55 @@ int
ctl_create(struct parse_result *res, int argc, char *argv[])
{
int ch, ret, type;
- const char *paths[2], *disk, *format;
+ const char *disk, *format, *base;
if (argc < 2)
ctl_usage(res->ctl);
+ base = NULL;
type = parse_disktype(argv[1], &disk);
- paths[0] = disk;
- paths[1] = NULL;
-
- if (unveil(paths[0], "rwc") == -1)
+ if (pledge("stdio rpath wpath cpath unveil", NULL) == -1)
+ err(1, "pledge");
+ if (unveil(disk, "rwc") == -1)
err(1, "unveil");
- if (pledge("stdio rpath wpath cpath", NULL) == -1)
- err(1, "pledge");
argc--;
argv++;
- while ((ch = getopt(argc, argv, "s:")) != -1) {
+ while ((ch = getopt(argc, argv, "s:b:")) != -1) {
switch (ch) {
case 's':
if (parse_size(res, optarg, 0) != 0)
errx(1, "invalid size: %s", optarg);
break;
+ case 'b':
+ base = optarg;
+ if (unveil(base, "r") == -1)
+ err(1, "unveil");
+ break;
default:
ctl_usage(res->ctl);
/* NOTREACHED */
}
}
+ if (unveil(NULL, NULL))
+ err(1, "unveil");
- if (res->size == 0) {
- fprintf(stderr, "missing size argument\n");
+ if (base && type != VMDF_QCOW2)
+ errx(1, "base images require qcow2 disk format");
+ if (res->size == 0 && !base) {
+ fprintf(stderr, "could not create %s: missing size argument\n",
+ disk);
ctl_usage(res->ctl);
}
if (type == VMDF_QCOW2) {
format = "qcow2";
- ret = create_qc2_imagefile(paths[0], res->size);
+ ret = create_qc2_imagefile(disk, base, res->size);
} else {
format = "raw";
- ret = create_raw_imagefile(paths[0], res->size);
+ ret = create_raw_imagefile(disk, res->size);
}
if (ret != 0) {
diff --git usr.sbin/vmctl/vmctl.8 usr.sbin/vmctl/vmctl.8
index f7890ac99f8..72738d473b7 100644
--- usr.sbin/vmctl/vmctl.8
+++ usr.sbin/vmctl/vmctl.8
@@ -50,7 +50,7 @@ Using
.Xr cu 1
connect to the console of the VM with the specified
.Ar id .
-.It Cm create Ar path Fl s Ar size
+.It Cm create Ar path Fl s Op Ar size Op Fl b Ar base
Creates a VM disk image file with the specified
.Ar path
and
@@ -59,13 +59,22 @@ rounded to megabytes.
The disk
.Ar path
may be prefixed with a format prefix
-.Pf ( Pa raw:
+.Pf ( Pa raw :
or
-.Pa qcow2: )
+.Pa qcow2 : )
in order to specify the disk format.
If left unspecified, the format defaults to
.Pa raw
if it cannot be derived automatically.
+For qcow2, a
+.Ar base
+image may be specified.
+The base image is not modified.
+The derived image contains only the changes written by the VM.
+When creating a derived image, the
+.Ar size
+may be omitted, and probed from the base image.
+If it is provided, it must match the base image size.
.It Cm load Ar filename
Load additional configuration from the specified file.
.It Cm log brief
@@ -127,9 +136,9 @@ Disk image file (may be specified multiple times to add
multiple disk images).
The disk
.Ar path
may be prefixed with a format prefix
-.Pf ( Pa raw:
+.Pf ( Pa raw :
or
-.Pa qcow2: )
+.Pa qcow2 : )
in order to specify the disk format.
If left unspecified, the format defaults to
.Pa raw
diff --git usr.sbin/vmctl/vmctl.c usr.sbin/vmctl/vmctl.c
index b09e1115ff7..12db3f69525 100644
--- usr.sbin/vmctl/vmctl.c
+++ usr.sbin/vmctl/vmctl.c
@@ -847,7 +847,8 @@ create_raw_imagefile(const char *imgfile_path, long imgsize)
#define ALIGN(sz, align) \
((sz + align - 1) & ~(align - 1))
int
-create_qc2_imagefile(const char *imgfile_path, long imgsize)
+create_qc2_imagefile(const char *imgfile_path,
+ const char *base_path, long imgsize)
{
struct qcheader {
char magic[4];
@@ -869,15 +870,33 @@ create_qc2_imagefile(const char *imgfile_path, long
imgsize)
uint64_t autoclearfeatures;
uint32_t reforder;
uint32_t headersz;
- } __packed hdr;
+ } __packed hdr, basehdr;
int fd, ret;
+ ssize_t base_len;
uint64_t l1sz, refsz, disksz, initsz, clustersz;
uint64_t l1off, refoff, v, i, l1entrysz, refentrysz;
uint16_t refs;
- disksz = 1024*1024*imgsize;
+ disksz = 1024 * 1024 * imgsize;
+
+ if (base_path) {
+ fd = open(base_path, O_RDONLY);
+ if (read(fd, &basehdr, sizeof(basehdr)) != sizeof(basehdr))
+ err(1, "failure to read base image header");
+ close(fd);
+ if (strncmp(basehdr.magic,
+ VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)) != 0)
+ errx(1, "base image is not a qcow2 file");
+ if (!disksz)
+ disksz = betoh64(basehdr.disksz);
+ else if (disksz != betoh64(basehdr.disksz))
+ errx(1, "base size does not match requested size");
+ }
+ if (!base_path && !disksz)
+ errx(1, "missing disk size");
+
clustersz = (1<<16);
- l1off = ALIGN(sizeof hdr, clustersz);
+ l1off = ALIGN(sizeof(hdr), clustersz);
l1entrysz = clustersz * clustersz / 8;
l1sz = (disksz + l1entrysz - 1) / l1entrysz;
@@ -887,11 +906,12 @@ create_qc2_imagefile(const char *imgfile_path, long
imgsize)
refsz = (disksz + refentrysz - 1) / refentrysz;
initsz = ALIGN(refoff + refsz*clustersz, clustersz);
+ base_len = base_path ? strlen(base_path) : 0;
- memcpy(hdr.magic, "QFI\xfb", 4);
+ memcpy(hdr.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW));
hdr.version = htobe32(3);
- hdr.backingoff = htobe64(0);
- hdr.backingsz = htobe32(0);
+ hdr.backingoff = htobe64(base_path ? sizeof(hdr) : 0);
+ hdr.backingsz = htobe32(base_len);
hdr.clustershift = htobe32(16);
hdr.disksz = htobe64(disksz);
hdr.cryptmethod = htobe32(0);
@@ -905,7 +925,7 @@ create_qc2_imagefile(const char *imgfile_path, long imgsize)
hdr.compatfeatures = htobe64(0);
hdr.autoclearfeatures = htobe64(0);
hdr.reforder = htobe32(4);
- hdr.headersz = htobe32(sizeof hdr);
+ hdr.headersz = htobe32(sizeof(hdr));
/* Refuse to overwrite an existing image */
fd = open(imgfile_path, O_RDWR | O_CREAT | O_TRUNC | O_EXCL,
@@ -914,7 +934,11 @@ create_qc2_imagefile(const char *imgfile_path, long
imgsize)
return (errno);
/* Write out the header */
- if (write(fd, &hdr, sizeof hdr) != sizeof hdr)
+ if (write(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
+ goto error;
+
+ /* Add the base image */
+ if (base_path && write(fd, base_path, base_len) != base_len)
goto error;
/* Extend to desired size, and add one refcount cluster */
diff --git usr.sbin/vmctl/vmctl.h usr.sbin/vmctl/vmctl.h
index 006411d9785..2d0355450ee 100644
--- usr.sbin/vmctl/vmctl.h
+++ usr.sbin/vmctl/vmctl.h
@@ -87,7 +87,7 @@ __dead void
/* vmctl.c */
int create_raw_imagefile(const char *, long);
-int create_qc2_imagefile(const char *, long);
+int create_qc2_imagefile(const char *, const char *, long);
int vm_start(uint32_t, const char *, int, int, char **, int,
char **, int *, char *, char *, char *);
int vm_start_complete(struct imsg *, int *, int);
diff --git usr.sbin/vmd/config.c usr.sbin/vmd/config.c
index 550b73c1a39..af12b790002 100644
--- usr.sbin/vmd/config.c
+++ usr.sbin/vmd/config.c
@@ -35,6 +35,7 @@
#include <util.h>
#include <errno.h>
#include <imsg.h>
+#include <libgen.h>
#include "proc.h"
#include "vmd.h"
@@ -176,16 +177,21 @@ config_getreset(struct vmd *env, struct imsg *imsg)
int
config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
{
+ int diskfds[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK];
struct vmd_if *vif;
struct vmop_create_params *vmc = &vm->vm_params;
struct vm_create_params *vcp = &vmc->vmc_params;
- unsigned int i;
+ unsigned int i, j;
int fd = -1, vmboot = 0;
- int kernfd = -1, *diskfds = NULL, *tapfds = NULL;
+ int kernfd = -1;
+ int *tapfds = NULL;
int cdromfd = -1;
int saved_errno = 0;
+ int n = 0, aflags, oflags;
char ifname[IF_NAMESIZE], *s;
char path[PATH_MAX];
+ char base[PATH_MAX];
+ char expanded[PATH_MAX];
unsigned int unit;
errno = 0;
@@ -205,13 +211,9 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
}
}
- diskfds = reallocarray(NULL, vcp->vcp_ndisks, sizeof(*diskfds));
- if (diskfds == NULL) {
- log_warn("%s: can't allocate disk fds", __func__);
- goto fail;
- }
- for (i = 0; i < vcp->vcp_ndisks; i++)
- diskfds[i] = -1;
+ for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
+ diskfds[i][j] = -1;
tapfds = reallocarray(NULL, vcp->vcp_nnics, sizeof(*tapfds));
if (tapfds == NULL) {
@@ -289,22 +291,71 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
/* Open disk images for child */
for (i = 0 ; i < vcp->vcp_ndisks; i++) {
- /* Stat disk[i] to ensure it is a regular file */
- if ((diskfds[i] = open(vcp->vcp_disks[i],
- O_RDWR|O_EXLOCK|O_NONBLOCK)) == -1) {
- log_warn("%s: can't open disk %s", __func__,
- vcp->vcp_disks[i]);
- errno = VMD_DISK_MISSING;
- goto fail;
- }
+ if (strlcpy(path, vcp->vcp_disks[i], sizeof(path))
+ >= sizeof(path))
+ log_warnx("%s, disk path too long", __func__);
+ memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases));
+ oflags = O_RDWR|O_EXLOCK|O_NONBLOCK;
+ aflags = R_OK|W_OK;
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
+ /* Stat disk[i] to ensure it is a regular file */
+ if ((diskfds[i][j] = open(path, oflags)) == -1) {
+ log_warn("%s: can't open disk %s", __func__,
+ vcp->vcp_disks[i]);
+ errno = VMD_DISK_MISSING;
+ goto fail;
+ }
- if (vm_checkaccess(diskfds[i],
- vmc->vmc_checkaccess & VMOP_CREATE_DISK,
- uid, R_OK|W_OK) == -1) {
- log_warnx("vm \"%s\" no read/write access to disk %s",
- vcp->vcp_name, vcp->vcp_disks[i]);
- errno = EPERM;
- goto fail;
+ if (vm_checkaccess(diskfds[i][j],
+ vmc->vmc_checkaccess & VMOP_CREATE_DISK,
+ uid, aflags) == -1) {
+ log_warnx("vm \"%s\" unable to access "
+ "disk %s", vcp->vcp_name, path);
+ errno = EPERM;
+ goto fail;
+ }
+
+ /*
+ * Clear the write and exclusive flags for base images.
+ * All writes should go to the top image, allowing them
+ * to be shared.
+ */
+ oflags = O_RDONLY|O_NONBLOCK;
+ aflags = R_OK;
+ n = virtio_get_base(diskfds[i][j], base, sizeof base,
+ vmc->vmc_disktypes[i]);
+ if (n == 0)
+ break;
+ if (n == -1) {
+ log_warnx("vm \"%s\" unable to read "
+ "base %s for disk %s", vcp->vcp_name,
+ base, vcp->vcp_disks[i]);
+ goto fail;
+ }
+ /*
+ * Relative paths should be interpreted relative
+ * to the disk image, rather than relative to the
+ * directory vmd happens to be running in, since
+ * this is the only userful interpretation.
+ */
+ if (base[0] == '/') {
+ if (realpath(base, path) == NULL) {
+ log_warn("unable to resolve %s", base);
+ goto fail;
+ }
+ } else {
+ s = dirname(path);
+ if (snprintf(expanded, sizeof(expanded),
+ "%s/%s", s, base) >= (int)sizeof(expanded))
{
+ log_warn("path too long: %s/%s",
+ s, base);
+ goto fail;
+ }
+ if (realpath(expanded, path) == NULL) {
+ log_warn("unable to resolve %s", base);
+ goto fail;
+ }
+ }
}
}
@@ -402,9 +453,13 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
NULL, 0);
for (i = 0; i < vcp->vcp_ndisks; i++) {
- proc_compose_imsg(ps, PROC_VMM, -1,
- IMSG_VMDOP_START_VM_DISK, vm->vm_vmid, diskfds[i],
- &i, sizeof(i));
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
+ if (diskfds[i][j] == -1)
+ break;
+ proc_compose_imsg(ps, PROC_VMM, -1,
+ IMSG_VMDOP_START_VM_DISK, vm->vm_vmid,
+ diskfds[i][j], &i, sizeof(i));
+ }
}
for (i = 0; i < vcp->vcp_nnics; i++) {
proc_compose_imsg(ps, PROC_VMM, -1,
@@ -416,7 +471,6 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
proc_compose_imsg(ps, PROC_VMM, -1,
IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0);
- free(diskfds);
free(tapfds);
vm->vm_running = 1;
@@ -430,11 +484,10 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm,
uint32_t peerid, uid_t uid)
close(kernfd);
if (cdromfd != -1)
close(cdromfd);
- if (diskfds != NULL) {
- for (i = 0; i < vcp->vcp_ndisks; i++)
- close(diskfds[i]);
- free(diskfds);
- }
+ for (i = 0; i < vcp->vcp_ndisks; i++)
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
+ if (diskfds[i][j] != -1)
+ close(diskfds[i][j]);
if (tapfds != NULL) {
for (i = 0; i < vcp->vcp_nnics; i++)
close(tapfds[i]);
@@ -489,7 +542,7 @@ int
config_getdisk(struct privsep *ps, struct imsg *imsg)
{
struct vmd_vm *vm;
- unsigned int n;
+ unsigned int n, idx;
errno = 0;
if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
@@ -500,14 +553,18 @@ config_getdisk(struct privsep *ps, struct imsg *imsg)
IMSG_SIZE_CHECK(imsg, &n);
memcpy(&n, imsg->data, sizeof(n));
- if (n >= vm->vm_params.vmc_params.vcp_ndisks ||
- vm->vm_disks[n] != -1 || imsg->fd == -1) {
+ if (n >= vm->vm_params.vmc_params.vcp_ndisks || imsg->fd == -1) {
log_warnx("invalid disk id");
errno = EINVAL;
return (-1);
}
- vm->vm_disks[n] = imsg->fd;
-
+ idx = vm->vm_params.vmc_diskbases[n]++;
+ if (idx >= VM_MAX_BASE_PER_DISK) {
+ log_warnx("too many bases for disk");
+ errno = EINVAL;
+ return (-1);
+ }
+ vm->vm_disks[n][idx] = imsg->fd;
return (0);
}
diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c
index c3211d186fa..25799cc5a3d 100644
--- usr.sbin/vmd/vioqcow2.c
+++ usr.sbin/vmd/vioqcow2.c
@@ -104,8 +104,7 @@ static off_t xlate(struct qcdisk *, off_t, int *);
static int copy_cluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
static off_t mkcluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
static int inc_refs(struct qcdisk *, off_t, int);
-static int qc2_openpath(struct qcdisk *, char *, int);
-static int qc2_open(struct qcdisk *, int);
+static int qc2_open(struct qcdisk *, int *, size_t);
static ssize_t qc2_pread(void *, char *, size_t, off_t);
static ssize_t qc2_pwrite(void *, char *, size_t, off_t);
static void qc2_close(void *, int);
@@ -118,14 +117,14 @@ static void qc2_close(void *, int);
* May open snapshot base images.
*/
int
-virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int fd)
+virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int *fd, size_t nfd)
{
struct qcdisk *diskp;
diskp = malloc(sizeof(struct qcdisk));
if (diskp == NULL)
return -1;
- if (qc2_open(diskp, fd) == -1) {
+ if (qc2_open(diskp, fd, nfd) == -1) {
log_warnx("%s: could not open qcow2 disk", __func__);
return -1;
}
@@ -137,19 +136,40 @@ virtio_init_qcow2(struct virtio_backing *file, off_t
*szp, int fd)
return 0;
}
-static int
-qc2_openpath(struct qcdisk *disk, char *path, int flags)
+ssize_t
+virtio_qcow2_get_base(int fd, char *path, size_t npath)
{
- int fd;
+ struct qcheader header;
+ uint64_t backingoff;
+ uint32_t backingsz;
- fd = open(path, flags);
- if (fd < 0)
+ if (pread(fd, &header, sizeof(header), 0) != sizeof(header)) {
+ log_warnx("%s: short read on header", __func__);
+ return -1;
+ }
+ if (strncmp(header.magic, VM_MAGIC_QCOW, strlen(VM_MAGIC_QCOW)) != 0) {
+ log_warn("%s: invalid magic numbers", __func__);
return -1;
- return qc2_open(disk, fd);
+ }
+ backingoff = be64toh(header.backingoff);
+ backingsz = be32toh(header.backingsz);
+ if (backingsz != 0) {
+ if (backingsz >= npath - 1) {
+ log_warn("%s: snapshot path too long", __func__);
+ return -1;
+ }
+ if (pread(fd, path, backingsz, backingoff) != backingsz) {
+ log_warnx("%s: could not read snapshot base name",
+ __func__);
+ return -1;
+ }
+ path[backingsz] = '\0';
+ }
+ return backingsz;
}
static int
-qc2_open(struct qcdisk *disk, int fd)
+qc2_open(struct qcdisk *disk, int *fds, size_t nfd)
{
char basepath[PATH_MAX];
struct stat st;
@@ -157,14 +177,15 @@ qc2_open(struct qcdisk *disk, int fd)
uint64_t backingoff;
uint32_t backingsz;
size_t i;
- int version;
+ int version, fd;
pthread_rwlock_init(&disk->lock, NULL);
+ fd = fds[0];
disk->fd = fd;
disk->base = NULL;
disk->l1 = NULL;
- if (pread(fd, &header, sizeof header, 0) != sizeof header) {
+ if (pread(fd, &header, sizeof(header), 0) != sizeof(header)) {
log_warn("%s: short read on header", __func__);
goto error;
}
@@ -203,11 +224,11 @@ qc2_open(struct qcdisk *disk, int fd)
goto error;
}
- disk->l1 = calloc(disk->l1sz, sizeof *disk->l1);
+ disk->l1 = calloc(disk->l1sz, sizeof(*disk->l1));
if (!disk->l1)
goto error;
- if (pread(disk->fd, disk->l1, 8*disk->l1sz, disk->l1off)
- != 8*disk->l1sz) {
+ if (pread(disk->fd, disk->l1, 8 * disk->l1sz, disk->l1off)
+ != 8 * disk->l1sz) {
log_warn("%s: unable to read qcow2 L1 table", __func__);
goto error;
}
@@ -222,14 +243,7 @@ qc2_open(struct qcdisk *disk, int fd)
backingoff = be64toh(header.backingoff);
backingsz = be32toh(header.backingsz);
if (backingsz != 0) {
- /*
- * FIXME: we need to figure out a way of opening these things,
- * otherwise we just crash with a pledge violation.
- */
- log_warn("%s: unsupported external snapshot images", __func__);
- goto error;
-
- if (backingsz >= sizeof basepath - 1) {
+ if (backingsz >= sizeof(basepath) - 1) {
log_warn("%s: snapshot path too long", __func__);
goto error;
}
@@ -239,11 +253,17 @@ qc2_open(struct qcdisk *disk, int fd)
goto error;
}
basepath[backingsz] = 0;
+ if (nfd <= 1) {
+ log_warnx("%s: missing base image %s", __func__,
+ basepath);
+ goto error;
+ }
+
disk->base = calloc(1, sizeof(struct qcdisk));
if (!disk->base)
goto error;
- if (qc2_openpath(disk->base, basepath, O_RDONLY) == -1) {
+ if (qc2_open(disk->base, fds + 1, nfd - 1) == -1) {
log_warn("%s: could not open %s", basepath, __func__);
goto error;
}
@@ -428,7 +448,7 @@ xlate(struct qcdisk *disk, off_t off, int *inplace)
return 0;
}
l2off = (off / disk->clustersz) % l2sz;
- pread(disk->fd, &buf, sizeof(buf), l2tab + l2off*8);
+ pread(disk->fd, &buf, sizeof(buf), l2tab + l2off * 8);
cluster = be64toh(buf);
/*
* cluster may be 0, but all future operations don't affect
@@ -521,12 +541,12 @@ mkcluster(struct qcdisk *disk, struct qcdisk *base, off_t
off, off_t src_phys)
cluster = disk->end;
disk->end += disk->clustersz;
buf = htobe64(cluster | QCOW2_INPLACE);
- if (pwrite(disk->fd, &buf, sizeof buf, l2tab + l2off*8) != sizeof(buf))
+ if (pwrite(disk->fd, &buf, sizeof(buf), l2tab + l2off * 8) != 8)
goto fail;
/* TODO: lazily sync: currently VMD doesn't close things */
buf = htobe64(disk->l1[l1off]);
- if (pwrite(disk->fd, &buf, sizeof buf, disk->l1off + 8*l1off) != 8)
+ if (pwrite(disk->fd, &buf, sizeof(buf), disk->l1off + 8 * l1off) != 8)
goto fail;
if (inc_refs(disk, cluster, 1) == -1)
goto fail;
@@ -570,8 +590,8 @@ inc_refs(struct qcdisk *disk, off_t off, int newcluster)
nper = disk->clustersz / 2;
l1idx = (off / disk->clustersz) / nper;
l2idx = (off / disk->clustersz) % nper;
- l1off = disk->refoff + 8*l1idx;
- if (pread(disk->fd, &buf, sizeof buf, l1off) != 8)
+ l1off = disk->refoff + 8 * l1idx;
+ if (pread(disk->fd, &buf, sizeof(buf), l1off) != 8)
return -1;
l2cluster = be64toh(buf);
@@ -583,19 +603,20 @@ inc_refs(struct qcdisk *disk, off_t off, int newcluster)
return -1;
}
buf = htobe64(l2cluster);
- if (pwrite(disk->fd, &buf, sizeof buf, l1off) != 8) {
+ if (pwrite(disk->fd, &buf, sizeof(buf), l1off) != 8) {
return -1;
}
}
refs = 1;
if (!newcluster) {
- if (pread(disk->fd, &refs, sizeof refs, l2cluster+2*l2idx) != 2)
+ if (pread(disk->fd, &refs, sizeof(refs),
+ l2cluster + 2 * l2idx) != 2)
return -1;
refs = be16toh(refs) + 1;
}
refs = htobe16(refs);
- if (pwrite(disk->fd, &refs, sizeof refs, l2cluster + 2*l2idx) != 2) {
+ if (pwrite(disk->fd, &refs, sizeof(refs), l2cluster + 2 * l2idx) != 2) {
log_warn("%s: could not write ref block", __func__);
return -1;
}
diff --git usr.sbin/vmd/vioraw.c usr.sbin/vmd/vioraw.c
index e02ab67c5dc..ff4bbb3095e 100644
--- usr.sbin/vmd/vioraw.c
+++ usr.sbin/vmd/vioraw.c
@@ -53,19 +53,21 @@ raw_close(void *file, int stayopen)
* returning -1 for error, 0 for success.
*/
int
-virtio_init_raw(struct virtio_backing *file, off_t *szp, int fd)
+virtio_init_raw(struct virtio_backing *file, off_t *szp, int *fd, size_t nfd)
{
off_t sz;
int *fdp;
- sz = lseek(fd, 0, SEEK_END);
+ if (nfd != 1)
+ return -1;
+ sz = lseek(fd[0], 0, SEEK_END);
if (sz == -1)
return -1;
fdp = malloc(sizeof(int));
if (!fdp)
return -1;
- *fdp = fd;
+ *fdp = fd[0];
file->p = fdp;
file->pread = raw_pread;
file->pwrite = raw_pwrite;
diff --git usr.sbin/vmd/virtio.c usr.sbin/vmd/virtio.c
index ac4f2a82b7d..d5fd8f91b1f 100644
--- usr.sbin/vmd/virtio.c
+++ usr.sbin/vmd/virtio.c
@@ -1745,24 +1745,41 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t
*intr,
return (0);
}
+int
+virtio_get_base(int fd, char *path, size_t npath ,int type)
+{
+ switch (type) {
+ case VMDF_RAW:
+ return 0;
+ case VMDF_QCOW2:
+ return virtio_qcow2_get_base(fd, path, npath);
+ }
+ log_warnx("%s: invalid disk format", __func__);
+ return -1;
+}
+
+/*
+ * Initializes a struct virtio_backing using the list of fds.
+ */
static int
-virtio_init_disk(struct virtio_backing *file, off_t *sz, int fd, int type)
+virtio_init_disk(struct virtio_backing *file, off_t *sz,
+ int *fd, size_t nfd, int type)
{
/*
* probe disk types in order of preference, first one to work wins.
* TODO: provide a way of specifying the type and options.
*/
switch (type) {
- case VMDF_RAW: return virtio_init_raw(file, sz, fd);
- case VMDF_QCOW2: return virtio_init_qcow2(file, sz, fd);
+ case VMDF_RAW: return virtio_init_raw(file, sz, fd, nfd);
+ case VMDF_QCOW2: return virtio_init_qcow2(file, sz, fd, nfd);
}
log_warnx("%s: invalid disk format", __func__);
return -1;
}
void
-virtio_init(struct vmd_vm *vm, int child_cdrom, int *child_disks,
- int *child_taps)
+virtio_init(struct vmd_vm *vm, int child_cdrom,
+ int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vmop_create_params *vmc = &vm->vm_params;
struct vm_create_params *vcp = &vmc->vmc_params;
@@ -1838,7 +1855,8 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int
*child_disks,
vioblk[i].vm_id = vcp->vcp_id;
vioblk[i].irq = pci_get_dev_irq(id);
if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
- child_disks[i], vmc->vmc_disktypes[i]) == -1) {
+ child_disks[i], vmc->vmc_diskbases[i],
+ vmc->vmc_disktypes[i]) == -1) {
log_warnx("%s: unable to determine disk format",
__func__);
return;
@@ -1967,7 +1985,7 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int
*child_disks,
vioscsi->vq[i].last_avail = 0;
}
if (virtio_init_disk(&vioscsi->file, &vioscsi->sz,
- child_cdrom, VMDF_RAW) == -1) {
+ &child_cdrom, 1, VMDF_RAW) == -1) {
log_warnx("%s: unable to determine iso format",
__func__);
return;
@@ -2125,7 +2143,8 @@ vionet_restore(int fd, struct vmd_vm *vm, int *child_taps)
}
int
-vioblk_restore(int fd, struct vmop_create_params *vmc, int *child_disks)
+vioblk_restore(int fd, struct vmop_create_params *vmc,
+ int child_disks[][VM_MAX_BASE_PER_DISK])
{
struct vm_create_params *vcp = &vmc->vmc_params;
uint8_t i;
@@ -2151,7 +2170,8 @@ vioblk_restore(int fd, struct vmop_create_params *vmc,
int *child_disks)
return (-1);
}
if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
- child_disks[i], vmc->vmc_disktypes[i]) == -1) {
+ child_disks[i], vmc->vmc_diskbases[i],
+ vmc->vmc_disktypes[i]) == -1) {
log_warnx("%s: unable to determine disk format",
__func__);
return (-1);
@@ -2188,7 +2208,7 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, int
child_cdrom)
return (-1);
}
- if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, child_cdrom,
+ if (virtio_init_disk(&vioscsi->file, &vioscsi->sz, &child_cdrom, 1,
VMDF_RAW) == -1) {
log_warnx("%s: unable to determine iso format", __func__);
return (-1);
@@ -2200,8 +2220,8 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, int
child_cdrom)
}
int
-virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, int *child_disks,
- int *child_taps)
+virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom,
+ int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vmop_create_params *vmc = &vm->vm_params;
struct vm_create_params *vcp = &vmc->vmc_params;
diff --git usr.sbin/vmd/virtio.h usr.sbin/vmd/virtio.h
index 46006916b6a..bb632bb5502 100644
--- usr.sbin/vmd/virtio.h
+++ usr.sbin/vmd/virtio.h
@@ -257,10 +257,11 @@ struct ioinfo {
};
/* virtio.c */
-void virtio_init(struct vmd_vm *, int, int *, int *);
+void virtio_init(struct vmd_vm *, int, int[][VM_MAX_BASE_PER_DISK], int *);
void virtio_shutdown(struct vmd_vm *);
int virtio_dump(int);
-int virtio_restore(int, struct vmd_vm *, int, int *, int *);
+int virtio_restore(int, struct vmd_vm *, int,
+ int[][VM_MAX_BASE_PER_DISK], int *);
uint32_t vring_size(uint32_t);
int virtio_rnd_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
@@ -270,12 +271,14 @@ void viornd_update_qs(void);
void viornd_update_qa(void);
int viornd_notifyq(void);
-int virtio_init_raw(struct virtio_backing *dev, off_t *sz, int fd);
-int virtio_init_qcow2(struct virtio_backing *dev, off_t *sz, int fd);
+ssize_t virtio_qcow2_get_base(int, char *, size_t);
+int virtio_init_raw(struct virtio_backing *, off_t *, int*, size_t);
+int virtio_init_qcow2(struct virtio_backing *, off_t *, int*, size_t);
int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
int vioblk_dump(int);
-int vioblk_restore(int, struct vmop_create_params *, int *);
+int vioblk_restore(int, struct vmop_create_params *,
+ int[][VM_MAX_BASE_PER_DISK]);
void vioblk_update_qs(struct vioblk_dev *);
void vioblk_update_qa(struct vioblk_dev *);
int vioblk_notifyq(struct vioblk_dev *);
diff --git usr.sbin/vmd/vm.c usr.sbin/vmd/vm.c
index ef4494d918b..37dfb95bb0d 100644
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -65,8 +65,8 @@
io_fn_t ioports_map[MAX_PORTS];
-int run_vm(int, int *, int *, struct vmop_create_params *,
- struct vcpu_reg_state *);
+int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *,
+ struct vmop_create_params *, struct vcpu_reg_state *);
void vm_dispatch_vmm(int, short, void *);
void *event_thread(void *);
void *vcpu_run_loop(void *);
@@ -75,8 +75,10 @@ int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state *);
void create_memory_map(struct vm_create_params *);
int alloc_guest_mem(struct vm_create_params *);
int vmm_create_vm(struct vm_create_params *);
-void init_emulated_hw(struct vmop_create_params *, int, int *, int *);
-void restore_emulated_hw(struct vm_create_params *, int, int *, int *,int);
+void init_emulated_hw(struct vmop_create_params *, int,
+ int[][VM_MAX_BASE_PER_DISK], int *);
+void restore_emulated_hw(struct vm_create_params *, int, int *,
+ int[][VM_MAX_BASE_PER_DISK],int);
void vcpu_exit_inout(struct vm_run_params *);
uint8_t vcpu_exit_pci(struct vm_run_params *);
int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
@@ -327,7 +329,8 @@ start_vm(struct vmd_vm *vm, int fd)
/* Find and open kernel image */
if ((fp = vmboot_open(vm->vm_kernel,
- vm->vm_disks[0], vmc->vmc_disktypes[0], &vmboot)) == NULL)
+ vm->vm_disks[0], vmc->vmc_diskbases[0],
+ vmc->vmc_disktypes[0], &vmboot)) == NULL)
fatalx("failed to open kernel - exiting");
/* Load kernel image */
@@ -903,7 +906,7 @@ vmm_create_vm(struct vm_create_params *vcp)
*/
void
init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
- int *child_disks, int *child_taps)
+ int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vm_create_params *vcp = &vmc->vmc_params;
int i;
@@ -968,7 +971,7 @@ init_emulated_hw(struct vmop_create_params *vmc, int
child_cdrom,
*/
void
restore_emulated_hw(struct vm_create_params *vcp, int fd,
- int *child_taps, int *child_disks, int child_cdrom)
+ int *child_taps, int child_disks[][VM_MAX_BASE_PER_DISK], int child_cdrom)
{
/* struct vm_create_params *vcp = &vmc->vmc_params; */
int i;
@@ -1029,8 +1032,9 @@ restore_emulated_hw(struct vm_create_params *vcp, int fd,
* !0 : the VM exited abnormally or failed to start
*/
int
-run_vm(int child_cdrom, int *child_disks, int *child_taps,
- struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
+run_vm(int child_cdrom, int child_disks[][VM_MAX_BASE_PER_DISK],
+ int *child_taps, struct vmop_create_params *vmc,
+ struct vcpu_reg_state *vrs)
{
struct vm_create_params *vcp = &vmc->vmc_params;
struct vm_rwregs_params vregsp;
diff --git usr.sbin/vmd/vmboot.c usr.sbin/vmd/vmboot.c
index 44ceeb64a7e..718c9739fa5 100644
--- usr.sbin/vmd/vmboot.c
+++ usr.sbin/vmd/vmboot.c
@@ -385,7 +385,7 @@ vmboot_loadfile(struct open_file *f, char *file, size_t
*size)
}
FILE *
-vmboot_open(int kernel_fd, int disk_fd, unsigned int disk_type,
+vmboot_open(int kernel_fd, int *disk_fd, int nfd, unsigned int disk_type,
struct vmboot_params *vmboot)
{
char file[PATH_MAX];
@@ -404,7 +404,7 @@ vmboot_open(int kernel_fd, int disk_fd, unsigned int
disk_type,
if (kernel_fd != -1)
return (fdopen(kernel_fd, "r"));
- if (disk_fd == -1)
+ if (disk_fd == NULL || nfd < 1)
return (NULL);
if ((vfp = calloc(1, sizeof(*vfp))) == NULL)
@@ -414,20 +414,19 @@ vmboot_open(int kernel_fd, int disk_fd, unsigned int
disk_type,
switch (vmboot->vbp_type) {
case VMDF_RAW:
- if (virtio_init_raw(vfp, &sz, disk_fd) == -1) {
+ if (virtio_init_raw(vfp, &sz, disk_fd, nfd) == -1) {
log_debug("%s: could not open raw disk", __func__);
goto fail;
}
break;
case VMDF_QCOW2:
- if (virtio_init_qcow2(vfp, &sz, disk_fd) == -1) {
+ if (virtio_init_qcow2(vfp, &sz, disk_fd, nfd) == -1) {
log_debug("%s: could not open qcow2 disk", __func__);
goto fail;
}
break;
}
- vmboot->vbp_fd = disk_fd;
vmboot_file.f_devdata = vmboot;
if ((vmboot->vbp_partoff =
diff --git usr.sbin/vmd/vmd.c usr.sbin/vmd/vmd.c
index 1571be21bc5..18a5e0d3d5d 100644
--- usr.sbin/vmd/vmd.c
+++ usr.sbin/vmd/vmd.c
@@ -1097,7 +1097,7 @@ void
vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
{
struct privsep *ps = &env->vmd_ps;
- unsigned int i;
+ unsigned int i, j;
if (vm == NULL)
return;
@@ -1117,9 +1117,11 @@ vm_stop(struct vmd_vm *vm, int keeptty, const char
*caller)
close(vm->vm_iev.ibuf.fd);
}
for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++) {
- if (vm->vm_disks[i] != -1) {
- close(vm->vm_disks[i]);
- vm->vm_disks[i] = -1;
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
+ if (vm->vm_disks[i][j] != -1) {
+ close(vm->vm_disks[i][j]);
+ vm->vm_disks[i][j] = -1;
+ }
}
}
for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
@@ -1176,7 +1178,7 @@ vm_register(struct privsep *ps, struct vmop_create_params
*vmc,
struct vmop_owner *vmo = NULL;
struct vmd_user *usr = NULL;
uint32_t rng;
- unsigned int i;
+ unsigned int i, j;
struct vmd_switch *sw;
char *s;
@@ -1267,7 +1269,8 @@ vm_register(struct privsep *ps, struct vmop_create_params
*vmc,
vm->vm_user = usr;
for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
- vm->vm_disks[i] = -1;
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
+ vm->vm_disks[i][j] = -1;
for (i = 0; i < VMM_MAX_NICS_PER_VM; i++)
vm->vm_ifs[i].vif_fd = -1;
for (i = 0; i < vcp->vcp_nnics; i++) {
diff --git usr.sbin/vmd/vmd.h usr.sbin/vmd/vmd.h
index b348d12c757..b7c012854e8 100644
--- usr.sbin/vmd/vmd.h
+++ usr.sbin/vmd/vmd.h
@@ -48,6 +48,7 @@
#define VM_DEFAULT_DEVICE "hd0a"
#define VM_BOOT_CONF "/etc/boot.conf"
#define VM_NAME_MAX 64
+#define VM_MAX_BASE_PER_DISK 4
#define VM_TTYNAME_MAX 16
#define MAX_TAP 256
#define NR_BACKLOG 5
@@ -169,6 +170,7 @@ struct vmop_create_params {
#define VMIFF_OPTMASK (VMIFF_LOCKED|VMIFF_LOCAL|VMIFF_RDOMAIN)
unsigned int vmc_disktypes[VMM_MAX_DISKS_PER_VM];
+ unsigned int vmc_diskbases[VMM_MAX_DISKS_PER_VM];
#define VMDF_RAW 0x01
#define VMDF_QCOW2 0x02
@@ -202,7 +204,6 @@ struct vm_dump_header {
} __packed;
struct vmboot_params {
- int vbp_fd;
off_t vbp_partoff;
char vbp_device[PATH_MAX];
char vbp_image[PATH_MAX];
@@ -241,7 +242,7 @@ struct vmd_vm {
uint32_t vm_vmid;
int vm_kernel;
int vm_cdrom;
- int vm_disks[VMM_MAX_DISKS_PER_VM];
+ int
vm_disks[VMM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK];
struct vmd_if vm_ifs[VMM_MAX_NICS_PER_VM];
char *vm_ttyname;
int vm_tty;
@@ -407,7 +408,7 @@ int config_getif(struct privsep *, struct imsg *);
int config_getcdrom(struct privsep *, struct imsg *);
/* vmboot.c */
-FILE *vmboot_open(int, int, unsigned int, struct vmboot_params *);
+FILE *vmboot_open(int, int *, int, unsigned int, struct vmboot_params *);
void vmboot_close(FILE *, struct vmboot_params *);
/* parse.y */
@@ -415,4 +416,7 @@ int parse_config(const char *);
int cmdline_symset(char *);
int host(const char *, struct address *);
+/* virtio.c */
+int virtio_get_base(int, char *, size_t, int);
+
#endif /* VMD_H */
diff --git usr.sbin/vmd/vmm.c usr.sbin/vmd/vmm.c
index 7757856323f..47e2b2324be 100644
--- usr.sbin/vmd/vmm.c
+++ usr.sbin/vmd/vmm.c
@@ -608,7 +608,7 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
struct vmd_vm *vm;
int ret = EINVAL;
int fds[2];
- size_t i;
+ size_t i, j;
if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
log_warnx("%s: can't find vm", __func__);
@@ -643,8 +643,11 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
close(fds[1]);
for (i = 0 ; i < vcp->vcp_ndisks; i++) {
- close(vm->vm_disks[i]);
- vm->vm_disks[i] = -1;
+ for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
+ if (vm->vm_disks[i][j] != -1)
+ close(vm->vm_disks[i][j]);
+ vm->vm_disks[i][j] = -1;
+ }
}
for (i = 0 ; i < vcp->vcp_nnics; i++) {
close(vm->vm_ifs[i].vif_fd);
--
Ori Bernstein