On Fri, 31 Aug 2018 14:32:24 +0800, Michael Mikonos <[email protected]> wrote:

> Hi Ori,
> 
> I had one question about mkcluster() in src/usr.sbin/vmd/vioqcow2.c...
> 
> +     if (src_phys > 0 && copy_cluster(disk, base, disk->end, src_phys) == -1)
> +                     return -1;
> 
> The other error cases around it do "goto fail" which calls 
> pthread_rwlock_unlock().
> Should this do the same?

Yes. The patterns in that code changed as I wrote it, and I missed updating that
piece, thanks.

Also, added a few formatting fixes that I forgot to make in the last version,
and removed the spammier bits of logging, so that verbose logs don't just spam
disk writes/translations.

---
 regress/usr.sbin/vmd/config/Makefile               |   6 +-
 .../usr.sbin/vmd/config/vmd-fail-bad-format.conf   |   8 +
 regress/usr.sbin/vmd/config/vmd-fail-bad-format.ok |   1 +
 .../vmd/config/vmd-fail-missing-format.conf        |   8 +
 .../usr.sbin/vmd/config/vmd-fail-missing-format.ok |   1 +
 .../vmd/config/vmd-pass-format-keyword.conf        |   8 +
 .../usr.sbin/vmd/config/vmd-pass-format-keyword.ok |   1 +
 regress/usr.sbin/vmd/diskfmt/Makefile              |  28 +
 regress/usr.sbin/vmd/diskfmt/vioscribble.c         | 165 ++++++
 usr.sbin/vmctl/main.c                              |  42 +-
 usr.sbin/vmctl/vmctl.8                             |  18 +-
 usr.sbin/vmctl/vmctl.c                             |   8 +-
 usr.sbin/vmctl/vmctl.h                             |   6 +-
 usr.sbin/vmd/Makefile                              |   2 +-
 usr.sbin/vmd/parse.y                               |  33 +-
 usr.sbin/vmd/vioqcow2.c                            | 566 +++++++++++++++++++++
 usr.sbin/vmd/virtio.c                              |  26 +-
 usr.sbin/vmd/virtio.h                              |   3 +-
 usr.sbin/vmd/vm.conf.5                             |   9 +-
 usr.sbin/vmd/vmd.h                                 |   5 +
 20 files changed, 909 insertions(+), 35 deletions(-)
 create mode 100644 regress/usr.sbin/vmd/config/vmd-fail-bad-format.conf
 create mode 100644 regress/usr.sbin/vmd/config/vmd-fail-bad-format.ok
 create mode 100644 regress/usr.sbin/vmd/config/vmd-fail-missing-format.conf
 create mode 100644 regress/usr.sbin/vmd/config/vmd-fail-missing-format.ok
 create mode 100644 regress/usr.sbin/vmd/config/vmd-pass-format-keyword.conf
 create mode 100644 regress/usr.sbin/vmd/config/vmd-pass-format-keyword.ok
 create mode 100644 regress/usr.sbin/vmd/diskfmt/Makefile
 create mode 100644 regress/usr.sbin/vmd/diskfmt/vioscribble.c
 create mode 100644 usr.sbin/vmd/vioqcow2.c

diff --git regress/usr.sbin/vmd/config/Makefile 
regress/usr.sbin/vmd/config/Makefile
index 2b41e49ac83..a98012da09c 100644
--- regress/usr.sbin/vmd/config/Makefile
+++ regress/usr.sbin/vmd/config/Makefile
@@ -2,10 +2,12 @@
 
 VMD ?= /usr/sbin/vmd
 
-VMD_PASS=boot-keyword memory-round memory-just-enough cdrom-keyword
+VMD_PASS=boot-keyword memory-round memory-just-enough cdrom-keyword \
+        format-keyword
 VMD_FAIL=kernel-keyword too-few-ram vm-name-too-long too-many-ifs \
         boot-name-too-long disk-path-too-long too-many-disks \
-        switch-no-interface switch-no-add cdrom-name-too-long
+        switch-no-interface switch-no-add cdrom-name-too-long \
+        bad-format missing-format
 
 REGRESS_TARGETS=
 
diff --git regress/usr.sbin/vmd/config/vmd-fail-bad-format.conf 
regress/usr.sbin/vmd/config/vmd-fail-bad-format.conf
new file mode 100644
index 00000000000..bd92b765587
--- /dev/null
+++ regress/usr.sbin/vmd/config/vmd-fail-bad-format.conf
@@ -0,0 +1,8 @@
+#      $OpenBSD: vmd-pass-cdrom-keyword.conf,v 1.1 2018/01/07 22:59:57 
ccardenas Exp $
+# Pass on cdrom keyword
+
+vm "x" {
+    memory 1G
+    disk "foo.img" format "rotten"
+    disable
+}
diff --git regress/usr.sbin/vmd/config/vmd-fail-bad-format.ok 
regress/usr.sbin/vmd/config/vmd-fail-bad-format.ok
new file mode 100644
index 00000000000..1f79afafa1e
--- /dev/null
+++ regress/usr.sbin/vmd/config/vmd-fail-bad-format.ok
@@ -0,0 +1 @@
+6: unrecognized disk format rotten
diff --git regress/usr.sbin/vmd/config/vmd-fail-missing-format.conf 
regress/usr.sbin/vmd/config/vmd-fail-missing-format.conf
new file mode 100644
index 00000000000..b4363fc5440
--- /dev/null
+++ regress/usr.sbin/vmd/config/vmd-fail-missing-format.conf
@@ -0,0 +1,8 @@
+#      $OpenBSD: vmd-pass-cdrom-keyword.conf,v 1.1 2018/01/07 22:59:57 
ccardenas Exp $
+# Pass on cdrom keyword
+
+vm "x" {
+    memory 1G
+    disk "foo.img" format
+    disable
+}
diff --git regress/usr.sbin/vmd/config/vmd-fail-missing-format.ok 
regress/usr.sbin/vmd/config/vmd-fail-missing-format.ok
new file mode 100644
index 00000000000..c88cb26bf39
--- /dev/null
+++ regress/usr.sbin/vmd/config/vmd-fail-missing-format.ok
@@ -0,0 +1 @@
+6: syntax error
diff --git regress/usr.sbin/vmd/config/vmd-pass-format-keyword.conf 
regress/usr.sbin/vmd/config/vmd-pass-format-keyword.conf
new file mode 100644
index 00000000000..24ef3d8c771
--- /dev/null
+++ regress/usr.sbin/vmd/config/vmd-pass-format-keyword.conf
@@ -0,0 +1,8 @@
+#      $OpenBSD: vmd-pass-cdrom-keyword.conf,v 1.1 2018/01/07 22:59:57 
ccardenas Exp $
+# Pass on cdrom keyword
+
+vm "x" {
+    memory 1G
+    disk "foo.img" format "raw"
+    disable
+}
diff --git regress/usr.sbin/vmd/config/vmd-pass-format-keyword.ok 
regress/usr.sbin/vmd/config/vmd-pass-format-keyword.ok
new file mode 100644
index 00000000000..403d828b763
--- /dev/null
+++ regress/usr.sbin/vmd/config/vmd-pass-format-keyword.ok
@@ -0,0 +1 @@
+configuration OK
diff --git regress/usr.sbin/vmd/diskfmt/Makefile 
regress/usr.sbin/vmd/diskfmt/Makefile
new file mode 100644
index 00000000000..71bb2b8ce52
--- /dev/null
+++ regress/usr.sbin/vmd/diskfmt/Makefile
@@ -0,0 +1,28 @@
+#      $OpenBSD: Makefile,v 1.5 2018/07/20 22:18:49 bluhm Exp $
+
+# This regression test creates a raw disk image and a
+# qcow disk image, and scribbles the same data to both
+# of them. It verifies that they both have the same
+# result.
+#
+# In order for this test to work, qemu must be installed
+# in order to create the disk images.
+
+VMD_DIR=$(BSDSRCDIR)/usr.sbin/vmd/
+
+PROG=vioscribble
+SRCS=vioscribble.c $(VMD_DIR)/vioqcow2.c $(VMD_DIR)/vioraw.c
+CFLAGS+=-I$(VMD_DIR) -pthread
+LDFLAGS+=-pthread
+
+run-regress-vioscribble: scribble-images
+
+scribble-images:
+       rm -f scribble.raw scribble.qc2
+       vmctl create scribble.raw -s 4G
+       qemu-img create -f qcow2 scribble.qc2 4G
+
+
+.PHONY: ${REGRESS_TARGETS} scribble-images
+
+.include <bsd.regress.mk>
diff --git regress/usr.sbin/vmd/diskfmt/vioscribble.c 
regress/usr.sbin/vmd/diskfmt/vioscribble.c
new file mode 100644
index 00000000000..3821c3b277b
--- /dev/null
+++ regress/usr.sbin/vmd/diskfmt/vioscribble.c
@@ -0,0 +1,165 @@
+/*     $OpenBSD: $     */
+
+/*
+ * Copyright (c) 2018 Ori Bernstein <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* 
+ * Quick hack of a program to try to test vioqcow2.c against
+ * vioraw.c.
+ *
+ * Compile with:
+ *
+ *     cc -pthread -o scribble vioscribble.c vioqcow2.c vioraw.c
+ */
+#include <sys/param.h> /* PAGE_SIZE */
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <machine/vmmvar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcidevs.h>
+#include <dev/pv/virtioreg.h>
+#include <dev/pv/vioblkreg.h>
+#include <dev/pv/vioscsireg.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <errno.h>
+#include <event.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <err.h>
+
+#include "pci.h"
+#include "vmd.h"
+#include "vmm.h"
+#include "virtio.h"
+
+#define CLUSTERSZ 65536
+
+int verbose;
+struct virtio_backing qcowfile;
+struct virtio_backing rawfile;
+
+/* We expect the scribble disks to be 4g in size */
+#define DISKSZ (4ull*1024ull*1024ull*1024ull)
+
+/* functions that io code depends on */
+
+void
+log_debug(const char *emsg, ...)
+{
+       if (verbose) {
+               va_list ap;
+
+               va_start(ap, emsg);
+               vfprintf(stdout, emsg, ap);
+               fprintf(stdout, "\n");
+               va_end(ap);
+       }
+}
+
+void
+log_warnx(const char *emsg, ...)
+{
+       va_list ap;
+
+       va_start(ap, emsg);
+       vfprintf(stdout, emsg, ap);
+       fprintf(stdout, "\n");
+       va_end(ap);
+}
+
+void
+log_warn(const char *emsg, ...)
+{
+       va_list ap;
+
+       va_start(ap, emsg);
+       vfprintf(stdout, emsg, ap);
+       fprintf(stdout, "\n");
+       va_end(ap);
+}
+
+static void
+fill(size_t off, char *buf, size_t len)
+{
+       size_t i;
+
+       /* use the top bits of off, since we can guess at where we went wrong. 
*/
+       for (i = 0; i < len; i++)
+               buf[i] = (off >> 8);
+}
+
+int
+main(int argc, char **argv)
+{
+       int qcfd, rawfd, i;
+       char buf[64*1024], cmp[64*1024];
+       off_t len, off, qcsz, rawsz;
+
+       verbose = !!getenv("VERBOSE");
+       qcfd = open("scribble.qc2", O_RDWR);
+       rawfd = open("scribble.raw", O_RDWR);
+       if (qcfd == -1 || virtio_init_qcow2(&qcowfile, &qcsz, qcfd) == -1)
+               err(1, "unable to open qcow");
+       if (rawfd == -1 || virtio_init_raw(&rawfile, &rawsz, rawfd) == -1)
+               err(1, "unable to open raw");
+
+       srandom_deterministic(123);
+
+       /* scribble to both disks */
+       printf("scribbling...\n");
+       for (i = 0; i < 16; i++) {
+               off = (random() % DISKSZ);
+               len = random() % sizeof buf + 1;
+               fill(off, buf, sizeof buf);
+               if (qcowfile.pwrite(qcowfile.p, buf, len, off) == -1)
+                       printf("iter %d: unable to write at %llx\n", i, off);
+               rawfile.pwrite(rawfile.p, buf, len, off);
+
+               if (qcowfile.pread(qcowfile.p, buf, len, off) == -1)
+                       printf("unable to read at %llx\n", off);
+               rawfile.pread(rawfile.p, cmp, len, off);
+               if (memcmp(buf, cmp, len) != 0) {
+                       printf("iter %d: mismatch at 0x%llx (espected val: 
%d)\n",
+                           i, off, (char)(off  >> 8));
+                       break;
+               }
+       }
+
+       /* validate that both disks match */
+       printf("validating...\n");
+       for (off = 0; off < DISKSZ; off += sizeof buf) {
+               if (qcowfile.pread(qcowfile.p, buf, sizeof buf, off) == -1)
+                       printf("unable to read at %llx\n", off);
+               rawfile.pread(rawfile.p, cmp, sizeof buf, off);
+               if (memcmp(buf, cmp, sizeof buf) != 0) {
+                       printf("mismatch at 0x%llx (espected val: %d)\n",
+                           off, (char)(off  >> 8));
+                       break;
+               }
+       }
+       return 0;
+}
diff --git usr.sbin/vmctl/main.c usr.sbin/vmctl/main.c
index b7674d0c980..fb209defbad 100644
--- usr.sbin/vmctl/main.c
+++ usr.sbin/vmctl/main.c
@@ -205,8 +205,8 @@ vmmaction(struct parse_result *res)
        switch (res->action) {
        case CMD_START:
                ret = vm_start(res->id, res->name, res->size, res->nifs,
-                   res->nets, res->ndisks, res->disks, res->path,
-                   res->isopath, res->instance);
+                   res->nets, res->ndisks, res->disks, res->disktypes,
+                   res->path, res->isopath, res->instance);
                if (ret) {
                        errno = ret;
                        err(1, "start VM operation failed");
@@ -334,6 +334,7 @@ parse_free(struct parse_result *res)
        for (i = 0; i < res->ndisks; i++)
                free(res->disks[i]);
        free(res->disks);
+       free(res->disktypes);
        memset(res, 0, sizeof(*res));
 }
 
@@ -398,10 +399,29 @@ parse_size(struct parse_result *res, char *word, long 
long val)
        return (0);
 }
 
+#define RAW_FMT_PREFIX         "raw:"
+#define QCOW2_FMT_PREFIX       "qcow2:"
+
+int
+parse_disktype(char *s, char **ret)
+{
+       *ret = s;
+       if (strstr(s, RAW_FMT_PREFIX) == s) {
+               *ret = s + strlen(RAW_FMT_PREFIX);
+               return VMDF_RAW;
+       }
+       if (strstr(s, QCOW2_FMT_PREFIX) == s) {
+               *ret = s + strlen(QCOW2_FMT_PREFIX);
+               return VMDF_QCOW2;
+       }
+       return VMDF_RAW;
+}
+
 int
-parse_disk(struct parse_result *res, char *word)
+parse_disk(struct parse_result *res, char *word, int type)
 {
        char            **disks;
+       int             *disktypes;
        char            *s;
 
        if ((disks = reallocarray(res->disks, res->ndisks + 1,
@@ -409,12 +429,19 @@ parse_disk(struct parse_result *res, char *word)
                warn("reallocarray");
                return (-1);
        }
+       if ((disktypes = reallocarray(res->disktypes, res->ndisks + 1,
+           sizeof(int))) == NULL) {
+               warn("reallocarray");
+               return -1;
+       }
        if ((s = strdup(word)) == NULL) {
                warn("strdup");
                return (-1);
        }
        disks[res->ndisks] = s;
+       disktypes[res->ndisks] = type;
        res->disks = disks;
+       res->disktypes = disktypes;
        res->ndisks++;
 
        return (0);
@@ -580,8 +607,8 @@ ctl_reset(struct parse_result *res, int argc, char *argv[])
 int
 ctl_start(struct parse_result *res, int argc, char *argv[])
 {
-       int              ch, i;
-       char             path[PATH_MAX];
+       int              ch, i, type;
+       char             path[PATH_MAX], *s;
 
        if (argc < 2)
                ctl_usage(res->ctl);
@@ -628,9 +655,10 @@ ctl_start(struct parse_result *res, int argc, char *argv[])
                                errx(1, "invalid network: %s", optarg);
                        break;
                case 'd':
-                       if (realpath(optarg, path) == NULL)
+                       type = parse_disktype(optarg, &s);
+                       if (realpath(s, path) == NULL)
                                err(1, "invalid disk path");
-                       if (parse_disk(res, path) != 0)
+                       if (parse_disk(res, path, type) != 0)
                                errx(1, "invalid disk: %s", optarg);
                        break;
                case 'i':
diff --git usr.sbin/vmctl/vmctl.8 usr.sbin/vmctl/vmctl.8
index 81ecbeb6c1d..734a651d3bf 100644
--- usr.sbin/vmctl/vmctl.8
+++ usr.sbin/vmctl/vmctl.8
@@ -55,7 +55,15 @@ Creates a VM disk image file with the specified
 .Ar path
 and
 .Ar size ,
-rounded to megabytes.
+rounded to megabytes. The disk
+.Ar format
+may be specified as either
+.Ar raw
+or 
+.Ar qcow2 ,
+defaulting to
+.Ar raw
+if left unspecified.
 .It Cm load Ar filename
 Load additional configuration from the specified file.
 .It Cm log brief
@@ -107,6 +115,14 @@ If not specified, the default is to boot using the BIOS 
image in
 Automatically connect to the VM console.
 .It Fl d Ar path
 Disk image file (may be specified multiple times to add multiple disk images).
+The disk
+.Ar path
+may be prefixed with a format prefix (
+.Pa raw:
+or
+.Pa qcow2:
+) in order to specify the disk format. If left unspecified, the default format 
is
+.Pa raw .
 .It Fl i Ar count
 Number of network interfaces to add to the VM.
 .It Fl L
diff --git usr.sbin/vmctl/vmctl.c usr.sbin/vmctl/vmctl.c
index 867a0e703e0..f9452e532bc 100644
--- usr.sbin/vmctl/vmctl.c
+++ usr.sbin/vmctl/vmctl.c
@@ -70,8 +70,8 @@ int info_console;
  */
 int
 vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
-    char **nics, int ndisks, char **disks, char *kernel, char *iso,
-    char *instance)
+    char **nics, int ndisks, char **disks, int *disktypes, char *kernel,
+    char *iso, char *instance)
 {
        struct vmop_create_params *vmc;
        struct vm_create_params *vcp;
@@ -128,11 +128,13 @@ vm_start(uint32_t start_id, const char *name, int 
memsize, int nnics,
        vcp->vcp_nnics = nnics;
        vcp->vcp_id = start_id;
 
-       for (i = 0 ; i < ndisks; i++)
+       for (i = 0 ; i < ndisks; i++) {
                if (strlcpy(vcp->vcp_disks[i], disks[i],
                    sizeof(vcp->vcp_disks[i])) >=
                    sizeof(vcp->vcp_disks[i]))
                        errx(1, "disk path too long");
+               vmc->vmc_disktypes[i] = disktypes[i];
+       }
        for (i = 0 ; i < nnics; i++) {
                vmc->vmc_ifflags[i] = VMIFF_UP;
 
diff --git usr.sbin/vmctl/vmctl.h usr.sbin/vmctl/vmctl.h
index 91ade10b7d8..92af5c4f720 100644
--- usr.sbin/vmctl/vmctl.h
+++ usr.sbin/vmctl/vmctl.h
@@ -52,6 +52,7 @@ struct parse_result {
        int                      nnets;
        size_t                   ndisks;
        char                    **disks;
+       int                     *disktypes;
        int                      verbose;
        char                    *instance;
        unsigned int             flags;
@@ -74,7 +75,8 @@ int    vmmaction(struct parse_result *);
 int     parse_ifs(struct parse_result *, char *, int);
 int     parse_network(struct parse_result *, char *);
 int     parse_size(struct parse_result *, char *, long long);
-int     parse_disk(struct parse_result *, char *);
+int     parse_disktype(char *, char **);
+int     parse_disk(struct parse_result *, char *, int);
 int     parse_vmid(struct parse_result *, char *, int);
 int     parse_instance(struct parse_result *, char *);
 void    parse_free(struct parse_result *);
@@ -85,7 +87,7 @@ __dead void
 /* vmctl.c */
 int     create_imagefile(const char *, long);
 int     vm_start(uint32_t, const char *, int, int, char **, int,
-           char **, char *, char *, char *);
+           char **, int *, char *, char *, char *);
 int     vm_start_complete(struct imsg *, int *, int);
 void    terminate_vm(uint32_t, const char *, unsigned int);
 int     terminate_vm_complete(struct imsg *, int *, unsigned int);
diff --git usr.sbin/vmd/Makefile usr.sbin/vmd/Makefile
index 60616d39167..603e3fc13ef 100644
--- usr.sbin/vmd/Makefile
+++ usr.sbin/vmd/Makefile
@@ -6,7 +6,7 @@ PROG=           vmd
 SRCS=          vmd.c control.c log.c priv.c proc.c config.c vmm.c
 SRCS+=         vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
 SRCS+=         ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c packet.c
-SRCS+=         parse.y atomicio.c vioscsi.c vioraw.c
+SRCS+=         parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c
 
 CFLAGS+=       -Wall -I${.CURDIR}
 CFLAGS+=       -Wstrict-prototypes -Wmissing-prototypes
diff --git usr.sbin/vmd/parse.y usr.sbin/vmd/parse.y
index 2b159d9b866..1cb64caad2d 100644
--- usr.sbin/vmd/parse.y
+++ usr.sbin/vmd/parse.y
@@ -88,7 +88,7 @@ int            symset(const char *, const char *, int);
 char           *symget(const char *);
 
 ssize_t                 parse_size(char *, int64_t);
-int             parse_disk(char *);
+int             parse_disk(char *, int);
 
 static struct vmop_create_params vmc;
 static struct vm_create_params *vcp;
@@ -117,13 +117,14 @@ typedef struct {
 
 
 %token INCLUDE ERROR
-%token ADD ALLOW BOOT CDROM DISABLE DISK DOWN ENABLE GROUP INSTANCE INTERFACE
-%token LLADDR LOCAL LOCKED MEMORY NIFS OWNER PATH PREFIX RDOMAIN SIZE SOCKET
-%token SWITCH UP VM VMID
+%token ADD ALLOW BOOT CDROM DISABLE DISK DOWN ENABLE FORMAT GROUP INSTANCE
+%token INTERFACE LLADDR LOCAL LOCKED MEMORY NIFS OWNER PATH PREFIX RDOMAIN
+%token SIZE SOCKET SWITCH UP VM VMID
 %token <v.number>      NUMBER
 %token <v.string>      STRING
 %type  <v.lladdr>      lladdr
 %type  <v.number>      disable
+%type  <v.number>      image_format
 %type  <v.number>      local
 %type  <v.number>      locked
 %type  <v.number>      updown
@@ -368,8 +369,8 @@ vm_opts_l   : vm_opts_l vm_opts nl
 vm_opts                : disable                       {
                        vcp_disable = $1;
                }
-               | DISK string                   {
-                       if (parse_disk($2) != 0) {
+               | DISK string image_format      {
+                       if (parse_disk($2, $3) != 0) {
                                yyerror("failed to parse disks: %s", $2);
                                free($2);
                                YYERROR;
@@ -559,6 +560,22 @@ owner_id   : /* none */            {
                }
                ;
 
+image_format   : /* none       */      {
+                       $$ = VMDF_RAW;
+               }
+               | FORMAT string         {
+                       if (strcmp($2, "raw") == 0)
+                               $$ = VMDF_RAW;
+                       else if (strcmp($2, "qcow2") == 0)
+                               $$ = VMDF_QCOW2;
+                       else {
+                               yyerror("unrecognized disk format %s", $2);
+                               free($2);
+                               YYERROR;
+                       }
+               }
+               ;
+
 iface_opts_o   : '{' optnl iface_opts_l '}'
                | iface_opts_c
                | /* empty */
@@ -720,6 +737,7 @@ lookup(char *s)
                { "disk",               DISK },
                { "down",               DOWN },
                { "enable",             ENABLE },
+               { "format",             FORMAT },
                { "group",              GROUP },
                { "id",                 VMID },
                { "include",            INCLUDE },
@@ -1212,7 +1230,7 @@ parse_size(char *word, int64_t val)
 }
 
 int
-parse_disk(char *word)
+parse_disk(char *word, int type)
 {
        char    path[PATH_MAX];
 
@@ -1231,6 +1249,7 @@ parse_disk(char *word)
                log_warnx("disk path too long");
                return (-1);
        }
+       vmc.vmc_disktypes[vcp->vcp_ndisks] = type;
 
        vcp->vcp_ndisks++;
 
diff --git usr.sbin/vmd/vioqcow2.c usr.sbin/vmd/vioqcow2.c
new file mode 100644
index 00000000000..e675c24d136
--- /dev/null
+++ usr.sbin/vmd/vioqcow2.c
@@ -0,0 +1,566 @@
+/*     $OpenBSD: $     */
+
+/*
+ * Copyright (c) 2018 Ori Bernstein <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <machine/vmmvar.h>
+#include <dev/pci/pcireg.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <err.h>
+
+#include "vmd.h"
+#include "vmm.h"
+#include "virtio.h"
+
+#define QCOW2_COMPRESSED       0x4000000000000000ull
+#define QCOW2_INPLACE          0x8000000000000000ull
+
+#define QCOW2_DIRTY            (1 << 0)
+#define QCOW2_CORRUPT          (1 << 1)
+
+enum {
+       ICFEATURE_DIRTY         = 1 << 0,
+       ICFEATURE_CORRUPT       = 1 << 1,
+};
+
+enum {
+       ACFEATURE_BITEXT        = 1 << 0,
+};
+
+struct qcheader {
+       char magic[4];
+       uint32_t version;
+       uint64_t backingoff;
+       uint32_t backingsz;
+       uint32_t clustershift;
+       uint64_t disksz;
+       uint32_t cryptmethod;
+       uint32_t l1sz;
+       uint64_t l1off;
+       uint64_t refoff;
+       uint32_t refsz;
+       uint32_t snapcount;
+       uint64_t snapsz;
+       /* v3 additions */
+       uint64_t incompatfeatures;
+       uint64_t compatfeatures;
+       uint64_t autoclearfeatures;
+       uint32_t reforder;      /* Bits = 1 << reforder */
+       uint32_t headersz;
+} __packed;
+
+struct qcdisk {
+       pthread_rwlock_t lock;
+       struct qcdisk *base;
+       struct qcheader header;
+
+       int       fd;
+       uint64_t *l1;
+       char     *scratch;
+       off_t     end;
+       uint32_t  clustersz;
+       off_t     disksz; /* In bytes */
+       uint32_t cryptmethod;
+
+       uint32_t l1sz;
+       off_t    l1off;
+
+       off_t    refoff;
+       uint32_t refsz;
+
+       uint32_t nsnap;
+       off_t    snapoff;
+
+       /* v3 features */
+       uint64_t incompatfeatures;
+       uint64_t autoclearfeatures;
+       uint32_t refssz;
+       uint32_t headersz;
+};
+
+extern char *__progname;
+
+static off_t xlate(struct qcdisk *, off_t, int *);
+static int copy_cluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
+static off_t mkcluster(struct qcdisk *, struct qcdisk *, off_t, off_t);
+static int inc_refs(struct qcdisk *, off_t, int);
+static int qc2_openpath(struct qcdisk *, char *, int);
+static int qc2_open(struct qcdisk *, int);
+static ssize_t qc2_pread(void *, char *, size_t, off_t);
+static ssize_t qc2_pwrite(void *, char *, size_t, off_t);
+static void qc2_close(void *);
+
+/*
+ * Initializes a raw disk image backing file from an fd.
+ * Stores the number of 512 byte sectors in *szp,
+ * returning -1 for error, 0 for success.
+ *
+ * May open snapshot base images.
+ */
+int
+virtio_init_qcow2(struct virtio_backing *file, off_t *szp, int fd)
+{
+       struct qcdisk *diskp;
+
+       diskp = malloc(sizeof(struct qcdisk));
+       if (diskp == NULL)
+               return -1;
+       if (qc2_open(diskp, fd) == -1) {
+               free(diskp);
+               return -1;
+       }
+       file->p = diskp;
+       file->pread = qc2_pread;
+       file->pwrite = qc2_pwrite;
+       file->close = qc2_close;
+       *szp = diskp->disksz;
+       return 0;
+}
+
+static int
+qc2_openpath(struct qcdisk *disk, char *path, int flags)
+{
+       int fd;
+
+       fd = open(path, flags);
+       if (fd < 0)
+               return -1;
+       return qc2_open(disk, fd);
+}
+
+static int
+qc2_open(struct qcdisk *disk, int fd)
+{
+       char basepath[PATH_MAX];
+       struct stat st;
+       struct qcheader header;
+       uint64_t backingoff;
+       uint32_t backingsz;
+       size_t i;
+       int version;
+
+       if (pread(fd, &header, sizeof header, 0) != sizeof header)
+               return -1;
+       if (strncmp(header.magic, "QFI\xfb", 4) != 0)
+               return -1;
+       pthread_rwlock_init(&disk->lock, NULL);
+       disk->fd = fd;
+       disk->base = NULL;
+
+       disk->clustersz         = (1ull << be32toh(header.clustershift));
+       disk->disksz            = be64toh(header.disksz);
+       disk->cryptmethod       = be32toh(header.cryptmethod);
+       disk->l1sz              = be32toh(header.l1sz);
+       disk->l1off             = be64toh(header.l1off);
+       disk->refsz             = be32toh(header.refsz);
+       disk->refoff            = be64toh(header.refoff);
+       disk->nsnap             = be32toh(header.snapcount);
+       disk->snapoff           = be64toh(header.snapsz);
+       /* 
+        * The additional features here are defined as 0 in the v2 format,
+        * so as long as we clear the buffer before parsing, we don't need
+        * to check versions here.
+        */
+       disk->incompatfeatures = be64toh(header.incompatfeatures);
+       disk->autoclearfeatures = be64toh(header.autoclearfeatures);
+       disk->refssz = be32toh(header.refsz);
+       disk->headersz = be32toh(header.headersz);
+
+       /*
+        * We only know about the dirty or corrupt bits here.
+        */
+       if (disk->incompatfeatures & ~(QCOW2_DIRTY|QCOW2_CORRUPT)) {
+               log_warn("%s: unsupported features %llx", __progname,
+                   disk->incompatfeatures & ~(QCOW2_DIRTY|QCOW2_CORRUPT));
+               return -1;
+       }
+
+       disk->l1 = calloc(disk->l1sz, sizeof *disk->l1);
+       if (pread(disk->fd, (char*)disk->l1, 8*disk->l1sz, disk->l1off)
+           != 8*disk->l1sz) {
+               free(disk->l1);
+               return -1;
+       }
+       for (i = 0; i < disk->l1sz; i++)
+               disk->l1[i] = be64toh(disk->l1[i]);
+       version = be32toh(header.version);
+       if (version != 2 && version != 3) {
+               log_warn("%s: unknown qcow2 version %d", __progname, version);
+               return -1;
+       }
+
+       backingoff = be64toh(header.backingoff);
+       backingsz = be32toh(header.backingsz);
+       if (backingsz != 0) {
+               /*
+                * FIXME: we need to figure out a way of opening these things,
+                * otherwise we just crash with a pledge violation.
+                */
+               log_warn("unsupported external snapshot images");
+               return -1;
+
+               if (backingsz >= sizeof basepath - 1) {
+                       log_warn("%s: snapshot path too long", __progname);
+                       return -1;
+               }
+               if (pread(fd, basepath, backingsz, backingoff) != backingsz) {
+                       log_warn("%s: could not read snapshot base name", 
+                           __progname);
+                       return -1;
+               }
+               basepath[backingsz] = 0;
+
+               disk->base = calloc(1, sizeof(struct qcdisk));
+               if (qc2_openpath(disk->base, basepath, O_RDONLY) == -1) {
+                       free(disk->base);
+                       return -1;
+               }
+               if (disk->base->clustersz != disk->clustersz) {
+                       log_warn("%s: all disks must share clustersize",
+                           __progname);
+                       free(disk->base);
+                       return -1;
+               }
+       }
+       fstat(fd, &st);
+       disk->end = st.st_size;
+       return 0;
+}
+
+static ssize_t
+qc2_pread(void *p, char *buf, size_t len, off_t off)
+{
+       struct qcdisk *disk, *d;
+       off_t phys_off, end, cluster_off;
+       ssize_t sz, rem;
+
+       disk = p;
+       end = off + len;
+       if (off < 0 || end > disk->disksz)
+               return -1;
+
+       /* handle head chunk separately */
+       rem = len;
+       while (off != end) {
+               for (d = disk; d; d = d->base)
+                       if ((phys_off = xlate(d, off, NULL)) > 0)
+                               break;
+               /* Break out into chunks. This handles
+                * three cases:
+                *
+                *    |----+====|========|====+    |
+                * 
+                * Either we are at the start of the read,
+                * and the cluster has some leading bytes.
+                * This means that we are reading the tail
+                * of the cluster, and our size is:
+                *
+                *      clustersz - (off % clustersz).
+                *
+                * Otherwise, we're reading the middle section.
+                * We're already aligned here, so we can just
+                * read the whole cluster size. Or we're at the
+                * tail, at which point we just want to read the
+                * remaining bytes.
+                */
+               cluster_off = off % disk->clustersz;
+               sz = disk->clustersz - cluster_off;
+               if (sz > rem)
+                       sz = rem;
+               /* 
+               * If we're within the disk, but don't have backing bytes,
+               * just read back zeros.
+               */
+               if (!d)
+                       bzero(buf, sz);
+               else if (pread(d->fd, buf, sz, phys_off) != sz)
+                       return -1;
+               off += sz;
+               buf += sz;
+               rem -= sz;
+       }
+       return len;
+}
+
+ssize_t
+qc2_pwrite(void *p, char *buf, size_t len, off_t off)
+{
+       struct qcdisk *disk, *d;
+       off_t phys_off, cluster_off, end;
+       ssize_t sz, rem;
+       int inplace;
+
+       d = p;
+       disk = p;
+       inplace = 1;
+       end = off + len;
+       if (off < 0 || end > disk->disksz)
+               return -1;
+       rem = len;
+       while (off != end) {
+               /* See the read code for a summary of the computation */
+               cluster_off = off % disk->clustersz;
+               sz = disk->clustersz - cluster_off;
+               if (sz > rem)
+                       sz = rem;
+
+               phys_off = xlate(disk, off, &inplace);
+               if (phys_off == -1)
+                       return -1;
+               /* 
+                * If we couldn't find the cluster in the writable disk,
+                * see if it exists in the base image. If it does, we
+                * need to copy it before the write. The copy happens
+                * in the '!inplace' if clause below te search.
+                */
+               if (phys_off == 0)
+                       for (d = disk->base; d; d = d->base)
+                               if ((phys_off = xlate(d, off, NULL)) > 0)
+                                       break;
+               if (!inplace)
+                       phys_off = mkcluster(disk, d, off, phys_off);
+               if (phys_off == -1)
+                       return -1;
+               if (pwrite(disk->fd, buf, sz, phys_off) != sz)
+                       return -1;
+               off += sz;
+               buf += sz;
+               rem -= sz;
+       }
+       return len;
+}
+
+static void
+qc2_close(void *p)
+{
+       struct qcdisk *disk;
+
+       disk = p;
+       pwrite(disk->fd, disk->l1, disk->l1sz, disk->l1off);
+       close(disk->fd);
+       free(disk);
+}
+
+/* 
+ * Translates a virtual offset into an on-disk offset.
+ * Returns:
+ *     -1 on error
+ *      0 on 'not found'
+ *     >0 on found
+ */
+static off_t
+xlate(struct qcdisk *disk, off_t off, int *inplace)
+{
+       off_t l2sz, l1off, l2tab, l2off, cluster, clusteroff;
+       uint64_t buf;
+
+
+       pthread_rwlock_rdlock(&disk->lock);
+       if (off < 0)
+               goto err;
+
+       l2sz = disk->clustersz / 8;
+       l1off = (off / disk->clustersz) / l2sz;
+       if (l1off >= disk->l1sz)
+               goto err;
+
+       l2tab = disk->l1[l1off];
+       l2tab &= ~QCOW2_INPLACE;
+       if (l2tab == 0) {
+               pthread_rwlock_unlock(&disk->lock);
+               return 0;
+       }
+       l2off = (off / disk->clustersz) % l2sz;
+       pread(disk->fd, &buf, sizeof(buf), l2tab + l2off*8);
+       cluster = be64toh(buf);
+       /* 
+        * cluster may be 0, but all future operations don't affect
+        * the return value. 
+        */
+       if (inplace)
+               *inplace = !!(cluster & QCOW2_INPLACE);
+       if (cluster & QCOW2_COMPRESSED) {
+               log_warn("%s: compressed clusters unsupported", __progname);
+               goto err;
+       }
+       pthread_rwlock_unlock(&disk->lock);
+       clusteroff = 0;
+       cluster &= ~QCOW2_INPLACE;
+       if (cluster)
+               clusteroff = off % disk->clustersz;
+       return cluster + clusteroff;
+err:
+       pthread_rwlock_unlock(&disk->lock);
+       return -1;
+}
+
+/*
+ * Allocates a new cluster on disk, creating a new L2 table
+ * if needed. The cluster starts off with a refs of one,
+ * and the writable bit set.
+ *
+ * Returns -1 on error, and the physical address within the
+ * cluster of the write offset if it exists.
+ */
+static off_t
+mkcluster(struct qcdisk *disk, struct qcdisk *base, off_t off, off_t src_phys)
+{
+       off_t l2sz, l1off, l2tab, l2off, cluster, clusteroff, orig;
+       uint64_t buf;
+       int fd;
+
+       pthread_rwlock_wrlock(&disk->lock);
+
+       cluster = -1;
+       fd = disk->fd;
+       /* L1 entries always exist */
+       l2sz = disk->clustersz / 8;
+       l1off = off / (disk->clustersz * l2sz);
+       if (l1off >= disk->l1sz)
+               goto fail;
+
+       /*
+        * Align disk to cluster size, for ftruncate: Not strictly
+        * required, but it easier to eyeball buggy write offsets,
+        * and helps performance a bit.
+        */
+       disk->end = (disk->end + disk->clustersz - 1) & ~(disk->clustersz - 1);
+
+       l2tab = disk->l1[l1off];
+       l2off = (off / disk->clustersz) % l2sz;
+       /* We may need to create or clone an L2 entry to map the block */
+       if (l2tab == 0 || (l2tab & QCOW2_INPLACE) == 0) {
+               orig = l2tab & ~QCOW2_INPLACE;
+               l2tab = disk->end;
+               disk->end += disk->clustersz;
+               if (ftruncate(disk->fd, disk->end) == -1) {
+                       perror("ftruncate");
+                       goto fail;
+               }
+
+               /*
+                * If we translated, found a L2 entry, but it needed to
+                * be copied, copy it.
+                */
+               if (orig != 0 && copy_cluster(disk, disk, l2tab, orig) == -1) {
+                       perror("move cluster");
+                       goto fail;
+               }
+               /* Update l1 -- we flush it later */
+               disk->l1[l1off] = l2tab | QCOW2_INPLACE;
+               if (inc_refs(disk, l2tab, 1) == -1) {
+                       perror("refs");
+                       goto fail;
+               }
+       }
+       l2tab &= ~QCOW2_INPLACE;
+
+       /* Grow the disk */
+       if (ftruncate(disk->fd, disk->end + disk->clustersz) < 0)
+               goto fail;
+       if (src_phys > 0)
+               if (copy_cluster(disk, base, disk->end, src_phys) == -1)
+                       goto fail;
+       cluster = disk->end;
+       disk->end += disk->clustersz;
+       buf = htobe64(cluster | QCOW2_INPLACE);
+       if (pwrite(disk->fd, &buf, sizeof buf, l2tab + l2off*8) != sizeof(buf))
+               goto fail;
+
+       /* TODO: lazily sync: currently VMD doesn't close things */
+       buf = htobe64(disk->l1[l1off]);
+       if (pwrite(disk->fd, &buf, sizeof buf, disk->l1off + 8*l1off) != 8)
+               goto fail;
+       if (inc_refs(disk, cluster, 1) == -1)
+               goto fail;
+
+       pthread_rwlock_unlock(&disk->lock);
+       clusteroff = off % disk->clustersz;
+       return cluster + clusteroff;
+
+fail:
+       pthread_rwlock_unlock(&disk->lock);
+       return -1;
+}
+
+static int
+copy_cluster(struct qcdisk *disk, struct qcdisk *base, off_t dst, off_t src)
+{
+       char *scratch;
+
+       scratch = alloca(disk->clustersz);
+       if (!scratch)
+               err(1, "out of memory");
+       src &= ~(disk->clustersz - 1);
+       dst &= ~(disk->clustersz - 1);
+       if (pread(base->fd, scratch, disk->clustersz, src) == -1)
+               return -1;
+       if (pwrite(disk->fd, scratch, disk->clustersz, dst) == -1)
+               return -1;
+       return 0;
+}
+
+static int
+inc_refs(struct qcdisk *disk, off_t off, int newcluster)
+{
+       off_t l1off, l1idx, l2idx, l2cluster;
+       size_t nper;
+       uint16_t refs;
+       uint64_t buf;
+
+       off &= ~QCOW2_INPLACE;
+       nper = disk->clustersz / 2;
+       l1idx = (off / disk->clustersz) / nper;
+       l2idx = (off / disk->clustersz) % nper;
+       l1off = disk->refoff + 8*l1idx;
+       if (pread(disk->fd, &buf, sizeof buf, l1off) != 8)
+               return -1;
+
+       l2cluster = be64toh(buf);
+       if (l2cluster == 0) {
+               l2cluster = disk->end;
+               disk->end += disk->clustersz;
+               if (ftruncate(disk->fd, disk->end) < 0) {
+                       log_warn("refs block grow fail ");
+                       return -1;
+               }
+               buf = htobe64(l2cluster);
+               if (pwrite(disk->fd, &buf, sizeof buf, l1off) != 8) {
+                       return -1;
+               }
+       }
+
+       refs = 1;
+       if (!newcluster) {
+               if (pread(disk->fd, &refs, sizeof refs, l2cluster+2*l2idx) != 2)
+                       return -1;
+               refs = be16toh(refs) + 1;
+       }
+       refs = htobe16(refs);
+       if (pwrite(disk->fd, &refs, sizeof refs, l2cluster + 2*l2idx) != 2) {
+               log_warn("could not write ref block");
+       }
+       return 0;
+}
+
diff --git usr.sbin/vmd/virtio.c usr.sbin/vmd/virtio.c
index df500a385ce..5f94dabce91 100644
--- usr.sbin/vmd/virtio.c
+++ usr.sbin/vmd/virtio.c
@@ -1746,13 +1746,18 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t 
*intr,
 }
 
 static int
-virtio_init_disk(struct virtio_backing *file, off_t *sz, int fd)
+virtio_init_disk(struct virtio_backing *file, off_t *sz, int fd, int type)
 {
        /* 
-        * This is where we slot in disk type selection.
-        *  Right now, there's only raw.
+        * probe disk types in order of preference, first one to work wins.
+        * TODO: provide a way of specifying the type and options.
         */
-       return virtio_init_raw(file, sz, fd);
+       switch (type) {
+       case VMDF_RAW:          return virtio_init_raw(file, sz, fd);
+       case VMDF_QCOW2:        return virtio_init_qcow2(file, sz, fd);
+       }
+       log_warnx("%s: invalid disk format", __progname);
+       return -1;
 }
 
 void
@@ -1833,7 +1838,7 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int 
*child_disks,
                        vioblk[i].vm_id = vcp->vcp_id;
                        vioblk[i].irq = pci_get_dev_irq(id);
                        if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
-                           child_disks[i]) == -1)
+                           child_disks[i], vmc->vmc_disktypes[i]) == -1)
                                continue;
                        vioblk[i].sz /= 512;
                }
@@ -1959,7 +1964,7 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, int 
*child_disks,
                        vioscsi->vq[i].last_avail = 0;
                }
                if (virtio_init_disk(&vioscsi->file, &vioscsi->sz,
-                   child_cdrom) == -1)
+                   child_cdrom, VMDF_RAW) == -1)
                        return;
                vioscsi->locked = 0;
                vioscsi->lba = 0;
@@ -2098,8 +2103,9 @@ vionet_restore(int fd, struct vmd_vm *vm, int *child_taps)
 }
 
 int
-vioblk_restore(int fd, struct vm_create_params *vcp, int *child_disks)
+vioblk_restore(int fd, struct vmop_create_params *vmc, int *child_disks)
 {
+       struct vm_create_params *vcp = &vmc->vmc_params;
        uint8_t i;
 
        nr_vioblk = vcp->vcp_ndisks;
@@ -2123,7 +2129,7 @@ vioblk_restore(int fd, struct vm_create_params *vcp, int 
*child_disks)
                        return (-1);
                }
                if (virtio_init_disk(&vioblk[i].file, &vioblk[i].sz,
-                    child_disks[i]) == -1)
+                    child_disks[i], vmc->vmc_disktypes[i]) == -1)
                        continue;
        }
        return (0);
@@ -2155,7 +2161,7 @@ vioscsi_restore(int fd, struct vm_create_params *vcp, int 
child_cdrom)
                return (-1);
        }
 
-       virtio_init_disk(&vioscsi->file, &vioscsi->sz, child_cdrom);
+       virtio_init_disk(&vioscsi->file, &vioscsi->sz, child_cdrom, VMDF_RAW);
 
        return (0);
 }
@@ -2171,7 +2177,7 @@ virtio_restore(int fd, struct vmd_vm *vm, int 
child_cdrom, int *child_disks,
        if ((ret = viornd_restore(fd, vcp)) == -1)
                return ret;
 
-       if ((ret = vioblk_restore(fd, vcp, child_disks)) == -1)
+       if ((ret = vioblk_restore(fd, vmc, child_disks)) == -1)
                return ret;
 
        if ((ret = vioscsi_restore(fd, vcp, child_cdrom)) == -1)
diff --git usr.sbin/vmd/virtio.h usr.sbin/vmd/virtio.h
index 84a7e2af6a5..38f1529124d 100644
--- usr.sbin/vmd/virtio.h
+++ usr.sbin/vmd/virtio.h
@@ -270,10 +270,11 @@ void viornd_update_qa(void);
 int viornd_notifyq(void);
 
 int virtio_init_raw(struct virtio_backing *dev, off_t *sz, int fd);
+int virtio_init_qcow2(struct virtio_backing *dev, off_t *sz, int fd);
 
 int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
 int vioblk_dump(int);
-int vioblk_restore(int, struct vm_create_params *, int *);
+int vioblk_restore(int, struct vmop_create_params *, int *);
 void vioblk_update_qs(struct vioblk_dev *);
 void vioblk_update_qa(struct vioblk_dev *);
 int vioblk_notifyq(struct vioblk_dev *);
diff --git usr.sbin/vmd/vm.conf.5 usr.sbin/vmd/vm.conf.5
index 413395e4f3a..57f25f2f8e3 100644
--- usr.sbin/vmd/vm.conf.5
+++ usr.sbin/vmd/vm.conf.5
@@ -144,8 +144,15 @@ nor
 is specified.
 .It Cm disable
 Do not start this VM.
-.It Cm disk Ar path
+.It Cm disk Ar path Op Cm format Ar fmt
 Disk image file (may be specified multiple times to add multiple disk images).
+The format may be specified as either
+.Ar qcow2
+or
+.Ar raw ,
+defaulting to
+.Ar raw
+if left unspecified.
 .It Oo Cm local Oc Cm interface Oo name Oc Op Brq ...
 Network interface to add to the VM.
 The optional
diff --git usr.sbin/vmd/vmd.h usr.sbin/vmd/vmd.h
index 2b83bb42c71..6f0f93bd574 100644
--- usr.sbin/vmd/vmd.h
+++ usr.sbin/vmd/vmd.h
@@ -164,6 +164,11 @@ struct vmop_create_params {
 #define VMIFF_LOCAL            0x04
 #define VMIFF_RDOMAIN          0x08
 #define VMIFF_OPTMASK          (VMIFF_LOCKED|VMIFF_LOCAL|VMIFF_RDOMAIN)
+
+       unsigned int             vmc_disktypes[VMM_MAX_DISKS_PER_VM];
+#define VMDF_RAW               0x01
+#define VMDF_QCOW2             0x02
+
        char                     vmc_ifnames[VMM_MAX_NICS_PER_VM][IF_NAMESIZE];
        char                     vmc_ifswitch[VMM_MAX_NICS_PER_VM][VM_NAME_MAX];
        char                     vmc_ifgroup[VMM_MAX_NICS_PER_VM][IF_NAMESIZE];
-- 
2.16.4


-- 
    Ori Bernstein

Reply via email to