virtio 1.0 for virtio_mmio it not yet implemented, but 0.9 devices
continue to work.
---
 share/man/man4/virtio.4     |  11 +-
 sys/dev/pci/virtio_pci.c    | 518 +++++++++++++++++++++++++++++++-----
 sys/dev/pci/virtio_pcireg.h |  57 ++++
 sys/dev/pv/if_vio.c         |   9 +-
 sys/dev/pv/virtiovar.h      |   5 +
 5 files changed, 531 insertions(+), 69 deletions(-)

diff --git a/share/man/man4/virtio.4 b/share/man/man4/virtio.4
index 5e60e2e0c32..dd9d4551715 100644
--- a/share/man/man4/virtio.4
+++ b/share/man/man4/virtio.4
@@ -22,7 +22,7 @@
 .Nd VirtIO support driver
 .Sh SYNOPSIS
 .Cd "virtio* at fdt?"
-.Cd "virtio* at pci?"
+.Cd "virtio* at pci? flags 0x00"
 .Sh DESCRIPTION
 The
 .Nm
@@ -56,7 +56,14 @@ control interface
 The
 .Nm
 driver conforms to the virtio 0.9.5 specification.
-The virtio 1.0 standard is not supported, yet.
+The virtio 1.0 standard is only supported for pci devices.
+.Pp
+By default 0.9 is preferred over 1.0.
+This can be changed by setting the bit 0x4 in the flags.
+.Pp
+Setting the bit 0x8 in the flags disables the 1.0 support completely.
+.Sh BUGS
+.Nm Big-endian architectures are not yet supported.
 .Sh SEE ALSO
 .Xr intro 4
 .Sh HISTORY
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index f370f513e85..591da61290a 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -35,11 +35,16 @@
 #include <dev/pci/pcidevs.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
+#include <dev/pci/virtio_pcireg.h>
 
 #include <dev/pv/virtioreg.h>
 #include <dev/pv/virtiovar.h>
 #include <dev/pci/virtio_pcireg.h>
 
+#define DNPRINTF(n,x...)                               \
+    do { if (VIRTIO_DEBUG >= n) printf(x); } while(0)
+
+
 /*
  * XXX: Before being used on big endian arches, the access to config registers
  * XXX: needs to be reviewed/fixed. The non-device specific registers are
@@ -52,6 +57,8 @@ struct virtio_pci_softc;
 
 int            virtio_pci_match(struct device *, void *, void *);
 void           virtio_pci_attach(struct device *, struct device *, void *);
+int            virtio_pci_attach_09(struct virtio_pci_softc *sc, struct 
pci_attach_args *pa);
+int            virtio_pci_attach_10(struct virtio_pci_softc *sc, struct 
pci_attach_args *pa);
 int            virtio_pci_detach(struct device *, int);
 
 void           virtio_pci_kick(struct virtio_softc *, uint16_t);
@@ -67,8 +74,8 @@ void          virtio_pci_write_device_config_8(struct 
virtio_softc *, int, uint64_t);
 uint16_t       virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
 void           virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue 
*, uint64_t);
 void           virtio_pci_set_status(struct virtio_softc *, int);
-int            virtio_pci_negotiate_features(struct virtio_softc *,
-    const struct virtio_feature_name *);
+int            virtio_pci_negotiate_features(struct virtio_softc *, const 
struct virtio_feature_name *);
+int            virtio_pci_negotiate_features_10(struct virtio_softc *, const 
struct virtio_feature_name *);
 void           virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, 
uint32_t, uint16_t);
 void           virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, 
uint16_t);
 int            virtio_pci_msix_establish(struct virtio_pci_softc *, struct 
pci_attach_args *, int, int (*)(void *), void *);
@@ -80,6 +87,10 @@ int          virtio_pci_legacy_intr_mpsafe(void *);
 int            virtio_pci_config_intr(void *);
 int            virtio_pci_queue_intr(void *);
 int            virtio_pci_shared_queue_intr(void *);
+int            virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, 
void *buf, int buflen);
+#if VIRTIO_DEBUG
+void virtio_pci_dump_caps(struct virtio_pci_softc *sc);
+#endif
 
 enum irq_type {
        IRQ_NO_MSIX,
@@ -96,9 +107,14 @@ struct virtio_pci_softc {
        bus_space_handle_t      sc_ioh;
        bus_size_t              sc_iosize;
 
+       bus_space_tag_t         sc_bars_iot[4];
+       bus_space_handle_t      sc_bars_ioh[4];
+       bus_size_t              sc_bars_iosize[4];
+
        bus_space_tag_t         sc_notify_iot;
        bus_space_handle_t      sc_notify_ioh;
        bus_size_t              sc_notify_iosize;
+       unsigned int            sc_notify_off_multiplier;
 
        bus_space_tag_t         sc_devcfg_iot;
        bus_space_handle_t      sc_devcfg_ioh;
@@ -145,14 +161,69 @@ struct virtio_ops virtio_pci_ops = {
        virtio_pci_poll_intr,
 };
 
+static inline
+uint64_t _cread(struct virtio_pci_softc *sc, unsigned off, unsigned size)
+{
+       uint64_t val;
+       switch (size) {
+       case 1:
+               val = bus_space_read_1(sc->sc_iot, sc->sc_ioh, off);
+               break;
+       case 2:
+               val = bus_space_read_2(sc->sc_iot, sc->sc_ioh, off);
+               break;
+       case 4:
+               val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
+               break;
+       case 8:
+               val = bus_space_read_8(sc->sc_iot, sc->sc_ioh, off);
+               break;
+       }
+       return val;
+}
+
+#define CREAD(sc, memb)  _cread(sc, offsetof(struct virtio_pci_common_cfg, 
memb), \
+    sizeof(((struct virtio_pci_common_cfg *)0)->memb))
+
+#define CWRITE(sc, memb, val)                                                  
\
+       do {                                                                    
\
+               struct virtio_pci_common_cfg c;                                 
\
+               size_t off = offsetof(struct virtio_pci_common_cfg, memb);      
\
+               size_t size = sizeof(c.memb);                                   
\
+                                                                               
\
+               DNPRINTF(2, "%s: %d: off %#zx size %#zx write %#llx\n",         
\
+                   __func__, __LINE__, off, size, (unsigned long long)val);    
\
+               switch (size) {                                                 
\
+               case 1:                                                         
\
+                       bus_space_write_1(sc->sc_iot, sc->sc_ioh, off, val);    
\
+                       break;                                                  
\
+               case 2:                                                         
\
+                       bus_space_write_2(sc->sc_iot, sc->sc_ioh, off, val);    
\
+                       break;                                                  
\
+               case 4:                                                         
\
+                       bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);    
\
+                       break;                                                  
\
+               case 8:                                                         
\
+                       bus_space_write_8(sc->sc_iot, sc->sc_ioh, off, val);    
\
+                       break;                                                  
\
+               }                                                               
\
+       } while (0)
+
 uint16_t
 virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
 {
        struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
-       bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT,
-           idx);
-       return bus_space_read_2(sc->sc_iot, sc->sc_ioh,
-           VIRTIO_CONFIG_QUEUE_SIZE);
+       uint16_t ret;
+       if (sc->sc_sc.sc_version_1) {
+               CWRITE(sc, queue_select, idx);
+               ret = CREAD(sc, queue_size);
+       } else {
+               bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_CONFIG_QUEUE_SELECT, idx);
+               ret = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_CONFIG_QUEUE_SIZE);
+       }
+       return ret;
 }
 
 void
@@ -160,10 +231,26 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct 
virtqueue *vq,
     uint64_t addr)
 {
        struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
-       bus_space_write_2(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT,
-           vq->vq_index);
-       bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_ADDRESS,
-           addr / VIRTIO_PAGE_SIZE);
+       if (sc->sc_sc.sc_version_1) {
+               CWRITE(sc, queue_select, vq->vq_index);
+               if (addr == 0) {
+                       CWRITE(sc, queue_enable, 0);
+                       CWRITE(sc, queue_desc, 0);
+                       CWRITE(sc, queue_avail, 0);
+                       CWRITE(sc, queue_used, 0);
+               } else {
+                       CWRITE(sc, queue_desc, addr);
+                       CWRITE(sc, queue_avail, addr + vq->vq_availoffset);
+                       CWRITE(sc, queue_used, addr + vq->vq_usedoffset);
+                       CWRITE(sc, queue_enable, 1);
+                       vq->vq_notify_off = CREAD(sc, queue_notify_off);
+               }
+       } else {
+               bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
+               bus_space_write_4(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);
+       }
 
        /*
         * This path is only executed if this function is called after
@@ -174,8 +261,12 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct 
virtqueue *vq,
                int vec = 1;
                if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
                       vec += vq->vq_index;
-               bus_space_write_2(sc->sc_iot, sc->sc_ioh,
-                   VIRTIO_MSI_QUEUE_VECTOR, vec);
+               if (sc->sc_sc.sc_version_1) {
+                       CWRITE(sc, queue_msix_vector, vec);
+               } else {
+                       bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+                           VIRTIO_MSI_QUEUE_VECTOR, vec);
+               }
        }
 }
 
@@ -185,11 +276,17 @@ virtio_pci_set_status(struct virtio_softc *vsc, int 
status)
        struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
        int old = 0;
 
-       if (status != 0)
-               old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
-                                      VIRTIO_CONFIG_DEVICE_STATUS);
-       bus_space_write_1(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_DEVICE_STATUS,
-                         status|old);
+       if (sc->sc_sc.sc_version_1) {
+               if (status != 0)
+                       old = CREAD(sc, device_status);
+               CWRITE(sc, device_status, status|old);
+       } else {
+               if (status != 0)
+                       old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+                           VIRTIO_CONFIG_DEVICE_STATUS);
+               bus_space_write_1(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_CONFIG_DEVICE_STATUS, status|old);
+       }
 }
 
 int
@@ -198,63 +295,224 @@ virtio_pci_match(struct device *parent, void *match, 
void *aux)
        struct pci_attach_args *pa;
 
        pa = (struct pci_attach_args *)aux;
-       if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_QUMRANET &&
-           PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
+       if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
+           PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
+               return 1;
+       if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_QUMRANET)
+               return 0;
+       /* virtio 0.9 */
+       if (PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
            PCI_PRODUCT(pa->pa_id) <= 0x103f &&
            PCI_REVISION(pa->pa_class) == 0)
                return 1;
-       if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
-           PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
+       /* virtio 1.0 */
+       if (PCI_PRODUCT(pa->pa_id) >= 0x1040 &&
+           PCI_PRODUCT(pa->pa_id) <= 0x107f &&
+           PCI_REVISION(pa->pa_class) == 1)
                return 1;
        return 0;
 }
 
+#if VIRTIO_DEBUG
 void
-virtio_pci_attach(struct device *parent, struct device *self, void *aux)
+virtio_pci_dump_caps(struct virtio_pci_softc *sc)
 {
-       struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
-       struct virtio_softc *vsc = &sc->sc_sc;
-       struct pci_attach_args *pa = (struct pci_attach_args *)aux;
-       pci_chipset_tag_t pc = pa->pa_pc;
-       pcitag_t tag = pa->pa_tag;
-       int revision;
-       pcireg_t id;
-       char const *intrstr;
-       pci_intr_handle_t ih;
-
-       revision = PCI_REVISION(pa->pa_class);
-       if (revision != 0) {
-               printf("unknown revision 0x%02x; giving up\n", revision);
+       pci_chipset_tag_t pc = sc->sc_pc;
+       pcitag_t tag = sc->sc_ptag;
+       int offset;
+       union {
+               pcireg_t reg[4];
+               struct virtio_pci_cap vcap;
+       } v;
+
+       if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v.reg[0]))
                return;
+
+       printf("\n");
+       do {
+               for (int i = 0; i < 4; i++)
+                       v.reg[i] = pci_conf_read(pc, tag, offset + i * 4);
+               printf("%s: cfgoff %#x len %#x type %#x bar %#x: off %#x len 
%#x\n",
+                       __func__, offset, v.vcap.cap_len, v.vcap.cfg_type, 
v.vcap.bar,
+                       v.vcap.offset, v.vcap.length);
+               offset = v.vcap.cap_next;
+       } while (offset != 0);
+}
+#endif
+
+int
+virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int 
buflen)
+{
+       pci_chipset_tag_t pc = sc->sc_pc;
+       pcitag_t tag = sc->sc_ptag;
+       unsigned int offset, i, len;
+       union {
+               pcireg_t reg[8];
+               struct virtio_pci_cap vcap;
+       } *v = buf;
+
+       if (buflen < sizeof(struct virtio_pci_cap))
+               return ERANGE;
+
+       if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
+               return ENOENT;
+
+       do {
+               for (i = 0; i < 4; i++)
+                       v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
+               if (v->vcap.cfg_type == cfg_type)
+                       break;
+               offset = v->vcap.cap_next;
+       } while (offset != 0);
+
+       if (offset == 0)
+               return ENOENT;
+
+       if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
+               len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
+               if (len > buflen) {
+                       printf("%s: cap too large\n", __func__);
+                       return ERANGE;
+               }
+               for (i = 4; i < len / sizeof(pcireg_t);  i++)
+                       v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
        }
 
-       /* subsystem ID shows what I am */
-       id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
+       return 0;
+}
 
-       printf("\n");
 
-       vsc->sc_ops = &virtio_pci_ops;
-       sc->sc_pc = pc;
-       sc->sc_ptag = pa->pa_tag;
-       vsc->sc_dmat = pa->pa_dmat;
+#define NMAPREG                ((PCI_MAPREG_END - PCI_MAPREG_START) / \
+                               sizeof(pcireg_t))
+
+int
+virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
+{
+       struct virtio_pci_cap common, isr, device;
+       struct virtio_pci_notify_cap notify;
+       int have_device_cfg = 0;
+       bus_size_t bars[NMAPREG] = { 0 };
+       int bars_idx[NMAPREG] = { 0 };
+       struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
+       int i, j = 0, ret = 0;
+
+       if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_COMMON_CFG, &common, 
sizeof(common)) != 0)
+               return ENODEV;
+
+       if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, &notify, 
sizeof(notify)) != 0)
+               return ENODEV;
+       if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr)) 
!= 0)
+               return ENODEV;
+       if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, 
sizeof(device)) != 0)
+               memset(&device, 0, sizeof(device));
+       else
+               have_device_cfg = 1;
 
        /*
-        * For virtio, ignore normal MSI black/white-listing depending on the
-        * PCI bridge but enable it unconditionally.
+        * XXX Maybe there are devices that offer the pci caps but not the
+        * XXX VERSION_1 feature bit? Then we should check the feature bit
+        * XXX here and fall back to 0.9 out if not present.
         */
-       pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
 
+       /* Figure out which bars we need to map */
+       for (i = 0; i < nitems(caps); i++) {
+               int bar = caps[i]->bar;
+               bus_size_t len = caps[i]->offset + caps[i]->length;
+               if (caps[i]->length == 0)
+                       continue;
+               if (bars[bar] < len)
+                       bars[bar] = len;
+       }
+
+       for (i = 0; i < nitems(bars); i++) {
+               int reg;
+               pcireg_t type;
+               if (bars[i] == 0)
+                       continue;
+               reg = PCI_MAPREG_START + i * 4;
+               type = pci_mapreg_type(sc->sc_pc, sc->sc_ptag, reg);
+               if (pci_mapreg_map(pa, reg, type, 0, &sc->sc_bars_iot[j],
+                   &sc->sc_bars_ioh[j], NULL, &sc->sc_bars_iosize[j],
+                   bars[i])) {
+                       printf("%s: can't map bar %u \n",
+                           sc->sc_sc.sc_dev.dv_xname, i);
+                       ret = EIO;
+                       goto err;
+               }
+               bars_idx[i] = j;
+               j++;
+       }
+
+       i = bars_idx[notify.cap.bar];
+       if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
+           notify.cap.offset, notify.cap.length, &sc->sc_notify_ioh) != 0) {
+               printf("%s: can't map notify i/o space\n",
+                   sc->sc_sc.sc_dev.dv_xname);
+               ret = EIO;
+               goto err;
+       }
+       sc->sc_notify_iosize = notify.cap.length;
+       sc->sc_notify_iot = sc->sc_bars_iot[i];
+       sc->sc_notify_off_multiplier = notify.notify_off_multiplier;
+
+       if (have_device_cfg) {
+               i = bars_idx[device.bar];
+               if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
+                   device.offset, device.length, &sc->sc_devcfg_ioh) != 0) {
+                       printf("%s: can't map devcfg i/o space\n",
+                           sc->sc_sc.sc_dev.dv_xname);
+                       ret = EIO;
+                       goto err;
+               }
+               sc->sc_devcfg_iosize = device.length;
+               sc->sc_devcfg_iot = sc->sc_bars_iot[i];
+       }
+
+       i = bars_idx[isr.bar];
+       if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
+           isr.offset, isr.length, &sc->sc_isr_ioh) != 0) {
+               printf("%s: can't map isr i/o space\n",
+                   sc->sc_sc.sc_dev.dv_xname);
+               ret = EIO;
+               goto err;
+       }
+       sc->sc_isr_iosize = isr.length;
+       sc->sc_isr_iot = sc->sc_bars_iot[i];
+
+       i = bars_idx[common.bar];
+       if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
+           common.offset, common.length, &sc->sc_ioh) != 0) {
+               printf("%s: can't map common i/o space\n",
+                   sc->sc_sc.sc_dev.dv_xname);
+               ret = EIO;
+               goto err;
+       }
+       sc->sc_iosize = common.length;
+       sc->sc_iot = sc->sc_bars_iot[i];
+
+       sc->sc_sc.sc_version_1 = 1;
+       return 0;
+
+err:
+       /* there is no pci_mapreg_unmap() */
+       return ret;
+}
+
+int
+virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
+{
+       struct virtio_softc *vsc = &sc->sc_sc;
        if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
            &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
                printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
-               return;
+               return EIO;
        }
 
        if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
            VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &sc->sc_notify_ioh) != 0) {
                printf("%s: can't map notify i/o space\n",
                    vsc->sc_dev.dv_xname);
-               return;
+               return EIO;
        }
        sc->sc_notify_iosize = 2;
        sc->sc_notify_iot = sc->sc_iot;
@@ -263,20 +521,80 @@ virtio_pci_attach(struct device *parent, struct device 
*self, void *aux)
            VIRTIO_CONFIG_ISR_STATUS, 1, &sc->sc_isr_ioh) != 0) {
                printf("%s: can't map isr i/o space\n",
                    vsc->sc_dev.dv_xname);
-               return;
+               return EIO;
        }
        sc->sc_isr_iosize = 1;
        sc->sc_isr_iot = sc->sc_iot;
 
+       return 0;
+}
+
+void
+virtio_pci_attach(struct device *parent, struct device *self, void *aux)
+{
+       struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
+       struct virtio_softc *vsc = &sc->sc_sc;
+       struct pci_attach_args *pa = (struct pci_attach_args *)aux;
+       pci_chipset_tag_t pc = pa->pa_pc;
+       pcitag_t tag = pa->pa_tag;
+       int revision, ret = ENODEV;
+       pcireg_t id;
+       char const *intrstr;
+       pci_intr_handle_t ih;
+
+       revision = PCI_REVISION(pa->pa_class);
+       switch (revision) {
+       case 0:
+               /* subsystem ID shows what I am */
+               id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
+               break;
+       case 1:
+               id = PCI_PRODUCT(pa->pa_id) - 0x1040;
+               break;
+       default:
+               printf("unknown revision 0x%02x; giving up\n", revision);
+               return;
+       }
+
+       sc->sc_pc = pc;
+       sc->sc_ptag = pa->pa_tag;
+       vsc->sc_dmat = pa->pa_dmat;
+
+       /*
+        * For virtio, ignore normal MSI black/white-listing depending on the
+        * PCI bridge but enable it unconditionally.
+        */
+       pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
+
+#if VIRTIO_DEBUG
+       virtio_pci_dump_caps(sc);
+#endif
+
+       vsc->sc_ops = &virtio_pci_ops;
+       if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 &&
+           (revision == 1 ||
+            (vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_PREFER_VERSION_1))) {
+               ret = virtio_pci_attach_10(sc, pa);
+       }
+       if (ret != 0 && revision == 0) {
+               /* revision 0 means 0.9 only or both 0.9 and 1.0 */
+               ret = virtio_pci_attach_09(sc, pa);
+       }
+       if (ret != 0) {
+               printf(": Cannot attach (%d)\n", ret);
+               return;
+       }
+
        sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
        sc->sc_irq_type = IRQ_NO_MSIX;
        if (virtio_pci_adjust_config_region(sc) != 0)
                return;
 
        virtio_device_reset(vsc);
-       virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
-       virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
+       virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
+       virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
 
+       printf("\n");
        vsc->sc_childdevid = id;
        vsc->sc_child = NULL;
        config_found(self, sc, NULL);
@@ -358,6 +676,8 @@ virtio_pci_detach(struct device *self, int flags)
 int
 virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
 {
+       if (sc->sc_sc.sc_version_1)
+               return 0;
        sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
        sc->sc_devcfg_iot = sc->sc_iot;
        if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
@@ -379,7 +699,7 @@ virtio_pci_negotiate_features(struct virtio_softc *vsc,
     const struct virtio_feature_name *guest_feature_names)
 {
        struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
-       uint64_t host, neg;
+       uint64_t host, negotiated;
 
        vsc->sc_active_features = 0;
 
@@ -403,24 +723,79 @@ virtio_pci_negotiate_features(struct virtio_softc *vsc,
            (vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))) {
                if (guest_feature_names != NULL)
                        printf(" RingEventIdx disabled by UKC");
-               vsc->sc_driver_features &= ~(VIRTIO_F_RING_EVENT_IDX);
+               vsc->sc_driver_features &= ~VIRTIO_F_RING_EVENT_IDX;
+       }
+
+       if (vsc->sc_version_1) {
+               return virtio_pci_negotiate_features_10(vsc,
+                   guest_feature_names);
        }
 
+       /* virtio 0.9 only */
        host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
                                VIRTIO_CONFIG_DEVICE_FEATURES);
-       neg = host & vsc->sc_driver_features;
+       negotiated = host & vsc->sc_driver_features;
 #if VIRTIO_DEBUG
        if (guest_feature_names)
-               virtio_log_features(host, neg, guest_feature_names);
+               virtio_log_features(host, negotiated, guest_feature_names);
 #endif
        bus_space_write_4(sc->sc_iot, sc->sc_ioh,
-                         VIRTIO_CONFIG_GUEST_FEATURES, neg);
-       vsc->sc_active_features = neg;
-       if (neg & VIRTIO_F_RING_INDIRECT_DESC)
+                         VIRTIO_CONFIG_GUEST_FEATURES, negotiated);
+       vsc->sc_active_features = negotiated;
+       if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
                vsc->sc_indirect = 1;
        else
                vsc->sc_indirect = 0;
+       return 0;
+}
+
+int
+virtio_pci_negotiate_features_10(struct virtio_softc *vsc,
+    const struct virtio_feature_name *guest_feature_names)
+{
+       struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+       uint64_t host, negotiated;
+
+       vsc->sc_driver_features |= VIRTIO_F_VERSION_1;
+       /* notify on empty is 0.9 only */
+       vsc->sc_driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
+       CWRITE(sc, device_feature_select, 0);
+       host = CREAD(sc, device_feature);
+       CWRITE(sc, device_feature_select, 1);
+       host |= (uint64_t)CREAD(sc, device_feature) << 32;
 
+       negotiated = host & vsc->sc_driver_features;
+#if VIRTIO_DEBUG
+       if (guest_feature_names)
+               virtio_log_features(host, negotiated, guest_feature_names);
+#endif
+       CWRITE(sc, driver_feature_select, 0);
+       CWRITE(sc, driver_feature, negotiated & 0xffffffff);
+       CWRITE(sc, driver_feature_select, 1);
+       CWRITE(sc, driver_feature, negotiated >> 32);
+       virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);
+
+       if ((CREAD(sc, device_status) &
+           VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
+               printf("%s: Feature negotiation failed\n",
+                   vsc->sc_dev.dv_xname);
+               CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
+               return ENXIO;
+       }
+       vsc->sc_active_features = negotiated;
+
+       if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
+               vsc->sc_indirect = 1;
+       else
+               vsc->sc_indirect = 0;
+
+       if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
+#if VIRTIO_DEBUG
+               printf("%s: Host rejected Version_1\n", __func__);
+#endif
+               CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
+               return EINVAL;
+       }
        return 0;
 }
 
@@ -523,19 +898,29 @@ virtio_pci_msix_establish(struct virtio_pci_softc *sc,
 void
 virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *sc, uint32_t idx, 
uint16_t vector)
 {
-       bus_space_write_2(sc->sc_iot, sc->sc_ioh,
-           VIRTIO_CONFIG_QUEUE_SELECT, idx);
-       bus_space_write_2(sc->sc_iot, sc->sc_ioh,
-           VIRTIO_MSI_QUEUE_VECTOR, vector);
+       if (sc->sc_sc.sc_version_1) {
+               CWRITE(sc, queue_select, idx);
+               CWRITE(sc, queue_msix_vector, vector);
+       } else {
+               bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_CONFIG_QUEUE_SELECT, idx);
+               bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_MSI_QUEUE_VECTOR, vector);
+       }
 }
 
 void
 virtio_pci_set_msix_config_vector(struct virtio_pci_softc *sc, uint16_t vector)
 {
-       bus_space_write_2(sc->sc_iot, sc->sc_ioh,
-           VIRTIO_MSI_CONFIG_VECTOR, vector);
+       if (sc->sc_sc.sc_version_1) {
+               CWRITE(sc, config_msix_vector, vector);
+       } else {
+               bus_space_write_2(sc->sc_iot, sc->sc_ioh,
+                   VIRTIO_MSI_CONFIG_VECTOR, vector);
+       }
 }
 
+
 void
 virtio_pci_free_irqs(struct virtio_pci_softc *sc)
 {
@@ -702,5 +1087,10 @@ void
 virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
 {
        struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
-       bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, 0, idx);
+       unsigned offset = 0;
+       if (vsc->sc_version_1) {
+               offset = vsc->sc_vqs[idx].vq_notify_off *
+                   sc->sc_notify_off_multiplier;
+       }
+       bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, offset, idx);
 }
diff --git a/sys/dev/pci/virtio_pcireg.h b/sys/dev/pci/virtio_pcireg.h
index dc838f38d59..f47ec9c6e7a 100644
--- a/sys/dev/pci/virtio_pcireg.h
+++ b/sys/dev/pci/virtio_pcireg.h
@@ -35,5 +35,62 @@
 
 #define VIRTIO_MSI_NO_VECTOR                   0xffff
 
+/*
+ * Virtio 1.0 specific
+ */
+
+struct virtio_pci_cap {
+       uint8_t cap_vndr;       /* Generic PCI field: PCI_CAP_ID_VNDR */
+       uint8_t cap_next;       /* Generic PCI field: next ptr. */
+       uint8_t cap_len;        /* Generic PCI field: capability length */
+       uint8_t cfg_type;       /* Identifies the structure. */
+       uint8_t bar;            /* Where to find it. */
+       uint8_t padding[3];     /* Pad to full dword. */
+       uint32_t offset;        /* Offset within bar. */
+       uint32_t length;        /* Length of the structure, in bytes. */
+} __packed;
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG      1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG      2
+/* ISR Status */
+#define VIRTIO_PCI_CAP_ISR_CFG         3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG      4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG         5
+
+struct virtio_pci_notify_cap {
+       struct virtio_pci_cap cap;
+       uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */
+} __packed;
+
+struct virtio_pci_cfg_cap {
+       struct virtio_pci_cap cap;
+       uint8_t pci_cfg_data[4];        /* Data for BAR access. */
+} __packed;
+
+struct virtio_pci_common_cfg {
+       /* About the whole device. */
+       uint32_t device_feature_select; /* read-write */
+       uint32_t device_feature;        /* read-only for driver */
+       uint32_t driver_feature_select; /* read-write */
+       uint32_t driver_feature;        /* read-write */
+       uint16_t config_msix_vector;    /* read-write */
+       uint16_t num_queues;            /* read-only for driver */
+       uint8_t device_status;          /* read-write */
+       uint8_t config_generation;      /* read-only for driver */
+
+       /* About a specific virtqueue. */
+       uint16_t queue_select;          /* read-write */
+       uint16_t queue_size;            /* read-write, power of 2, or 0. */
+       uint16_t queue_msix_vector;     /* read-write */
+       uint16_t queue_enable;          /* read-write */
+       uint16_t queue_notify_off;      /* read-only for driver */
+       uint64_t queue_desc;            /* read-write */
+       uint64_t queue_avail;           /* read-write */
+       uint64_t queue_used;            /* read-write */
+} __packed;
 
 #endif /* _DEV_PCI_VIRTIO_PCIREG_H_ */
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index 252ad2fdf9b..caf4229fd19 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -544,13 +544,16 @@ vio_attach(struct device *parent, struct device *self, 
void *aux)
        }
        printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
 
-       if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF)) {
+       if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
+           vsc->sc_version_1) {
                sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
-               ifp->if_hardmtu = 16000; /* arbitrary limit */
        } else {
                sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
-               ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
        }
+       if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
+               ifp->if_hardmtu = 16000; /* arbitrary limit */
+       else
+               ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
 
        if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0)
                goto err;
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index 2b43086a4e9..8ec4a2dd6dd 100644
--- a/sys/dev/pv/virtiovar.h
+++ b/sys/dev/pv/virtiovar.h
@@ -82,6 +82,8 @@
 /* flags for config(8) */
 #define VIRTIO_CF_NO_INDIRECT          1
 #define VIRTIO_CF_NO_EVENT_IDX         2
+#define VIRTIO_CF_PREFER_VERSION_1     4
+#define VIRTIO_CF_NO_VERSION_1         8
 
 struct vq_entry {
        SLIST_ENTRY(vq_entry)    qe_list;       /* free list */
@@ -131,6 +133,8 @@ struct virtqueue {
 
        /* interrupt handler */
        int                     (*vq_done)(struct virtqueue*);
+       /* 1.x only: offset for notify address calculation */
+       uint32_t                vq_notify_off;
 };
 
 struct virtio_feature_name {
@@ -167,6 +171,7 @@ struct virtio_softc {
        uint64_t                 sc_driver_features;
        uint64_t                 sc_active_features;
        int                      sc_indirect;
+       int                      sc_version_1;
 
        int                      sc_nvqs;       /* set by child */
        struct virtqueue        *sc_vqs;        /* set by child */
-- 
2.19.0

Reply via email to