This patch provides all necessary changes to OSv
to make it boot on AWS firecracker.

The curl command exmaple to create and start instance with
single block device:

./scripts/setup-block.sh && ./scripts/create-instance.sh && 
./scripts/start-instance.sh

where:
curl --unix-socket /tmp/firecracker.socket -i \
    -X PUT 'http://localhost/drives/rootfs' \
    -H 'Accept: application/json'           \
    -H 'Content-Type: application/json'     \
    -d '{
        "drive_id": "rootfs",
        "path_on_host": "/home/wkozaczuk/projects/osv/build/release/usr.rofs",
        "is_root_device": false,
        "is_read_only": false
    }'

curl --unix-socket /tmp/firecracker.socket -i \
    -X PUT 'http://localhost/boot-source'   \
    -H 'Accept: application/json'           \
    -H 'Content-Type: application/json'     \
    -d '{
        "kernel_image_path": 
"/home/wkozaczuk/projects/osv/build/release/loader-stripped.elf",
        "boot_args": "--bootchart /hello"
    }'

curl --unix-socket /tmp/firecracker.socket -i \
    -X PUT 'http://localhost/actions'       \
    -H  'Accept: application/json'          \
    -H  'Content-Type: application/json'    \
    -d '{
        "action_type": "InstanceStart"
     }'

Signed-off-by: Waldemar Kozaczuk <[email protected]>
---
 Makefile                     |   2 +
 arch/x64/arch-setup.cc       | 180 +++++++++++++++++++---
 arch/x64/boot.S              |  37 ++++-
 arch/x64/loader.ld           |   2 +-
 arch/x64/power.cc            |  14 +-
 arch/x64/smp.cc              |  16 +-
 drivers/acpi.cc              |  10 +-
 drivers/virtio-assign.cc     |   3 +-
 drivers/virtio-blk.cc        |  78 ++++++----
 drivers/virtio-blk.hh        |  10 +-
 drivers/virtio-mmio.cc       | 128 ++++++++++++++++
 drivers/virtio-mmio.hh       | 132 ++++++++++++++++
 drivers/virtio-net.cc        |  86 +++++------
 drivers/virtio-net.hh        |  10 +-
 drivers/virtio-vring.cc      |  25 ++-
 drivers/virtio-vring.hh      |  10 +-
 drivers/virtio.hh            |  18 ++-
 drivers/virtio2.cc           | 289 +++++++++++++++++++++++++++++++++++
 drivers/virtio2.hh           | 115 ++++++++++++++
 include/osv/virtio-assign.hh |   2 +-
 loader.cc                    |   3 +-
 21 files changed, 1039 insertions(+), 131 deletions(-)
 create mode 100644 drivers/virtio-mmio.cc
 create mode 100644 drivers/virtio-mmio.hh
 create mode 100644 drivers/virtio2.cc
 create mode 100644 drivers/virtio2.hh

diff --git a/Makefile b/Makefile
index 68043485..cf571632 100644
--- a/Makefile
+++ b/Makefile
@@ -819,9 +819,11 @@ drivers += $(libtsm)
 drivers += drivers/vga.o drivers/kbd.o drivers/isa-serial.o
 drivers += arch/$(arch)/pvclock-abi.o
 drivers += drivers/virtio.o
+drivers += drivers/virtio2.o
 drivers += drivers/virtio-vring.o
 drivers += drivers/virtio-net.o
 drivers += drivers/virtio-assign.o
+drivers += drivers/virtio-mmio.o
 drivers += drivers/vmxnet3.o
 drivers += drivers/vmxnet3-queues.o
 drivers += drivers/virtio-blk.o
diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc
index 6e4833cf..58da00c5 100644
--- a/arch/x64/arch-setup.cc
+++ b/arch/x64/arch-setup.cc
@@ -22,6 +22,19 @@
 #include <osv/commands.hh>
 #include "dmi.hh"
 
+// Not sure if Linux zero page is always located at this place
+// in memory or its address is passed in one of the registers
+// -> double check
+#define ZERO_PAGE_START      0x7000
+#define SETUP_HEADER_OFFSET  0x1f1   // look at bootparam.h in linux
+#define BOOT_FLAG_OFFSET     sizeof(u8) + 4 * sizeof(u16) + sizeof(u32)
+
+#define E820_ENTRIES_OFFSET  0x1e8   // look at bootparam.h in linux
+#define E820_TABLE_OFFSET    0x2d0   // look at bootparam.h in linux
+
+#define CMD_LINE_PTR_OFFSET  sizeof(u8) * 5 + sizeof(u16) * 11 + sizeof(u32) * 
7
+#define CMD_LINE_SIZE_OFFSET CMD_LINE_PTR_OFFSET + sizeof(u8) * 2 + 
sizeof(u16) + sizeof(u32) * 3
+
 struct multiboot_info_type {
     u32 flags;
     u32 mem_lower;
@@ -61,12 +74,81 @@ struct e820ent {
     u32 type;
 } __attribute__((packed));
 
+struct _e820ent {
+    u64 addr;
+    u64 size;
+    u32 type;
+} __attribute__((packed));
+
 osv_multiboot_info_type* osv_multiboot_info;
 
-void parse_cmdline(multiboot_info_type& mb)
+struct mmio_device_info {
+    u64 address;
+    u64 size;
+    unsigned int irq;
+};
+
+//TODO: For now we are limiting number of mmio devices to two
+// Ideally we should be using somewhat more dynamic structure
+struct mmio_device_info mmio_device_info_entries[2];
+int mmio_device_info_count = 0;
+
+#define VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX "virtio_mmio.device="
+char* parse_mmio_device_info(char *cmdline, mmio_device_info *info) {
+    // [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>]
+    char *prefix_pos = strstr(cmdline,VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX);
+    if (!prefix_pos)
+        return nullptr;
+
+    char *size_pos = prefix_pos + strlen(VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX);
+    if (sscanf(size_pos,"%ld", &info->size) != 1)
+        return nullptr;
+
+    char *at_pos = strstr(size_pos,"@");
+    if (!at_pos)
+        return nullptr;
+
+    switch(*(at_pos - 1)) {
+        case 'k':
+        case 'K':
+            info->size = info->size * 1024;
+            break;
+        case 'm':
+        case 'M':
+            info->size = info->size * 1024 * 1024;
+            break;
+        default:
+            break;
+    }
+
+    if (sscanf(at_pos, "@%lli:%u", &info->address, &info->irq) == 2)
+        return prefix_pos;
+    else
+        return nullptr;
+}
+
+//void parse_cmdline(multiboot_info_type& mb)
+void parse_cmdline(char *cmdline)
 {
-    auto p = reinterpret_cast<char*>(mb.cmdline);
-    osv::parse_cmdline(p);
+    //auto p = reinterpret_cast<char*>(mb.cmdline);
+    // We are assuming the mmio devices information is appended to the
+    // command line (at least it is the case with the firecracker) so
+    // once we parse those we strip it away so only plain OSv command line
+    // is left
+    //TODO: There may be a smarter, better way to parse this information
+    char *virtio_device_info_pos = 
parse_mmio_device_info(cmdline,mmio_device_info_entries);
+    if (virtio_device_info_pos) {
+        mmio_device_info_count++;
+        *virtio_device_info_pos = 0;
+
+        virtio_device_info_pos =
+            parse_mmio_device_info(virtio_device_info_pos + 
1,mmio_device_info_entries + 1);
+        if (virtio_device_info_pos) {
+            mmio_device_info_count++;
+        }
+    }
+
+    osv::parse_cmdline(cmdline);
 }
 
 void setup_temporary_phys_map()
@@ -121,28 +203,64 @@ void arch_setup_free_memory()
 {
     static ulong edata;
     asm ("movl $.edata, %0" : "=rm"(edata));
-    // copy to stack so we don't free it now
-    auto omb = *osv_multiboot_info;
-    auto mb = omb.mb;
-    auto e820_buffer = alloca(mb.mmap_length);
-    auto e820_size = mb.mmap_length;
-    memcpy(e820_buffer, reinterpret_cast<void*>(mb.mmap_addr), e820_size);
+
+    void *zero_page = reinterpret_cast<void*>(ZERO_PAGE_START);
+    void *setup_header = zero_page + SETUP_HEADER_OFFSET;
+
+    // Grab command line from zero page
+    u32 cmdline_ptr = *static_cast<u32*>(setup_header + CMD_LINE_PTR_OFFSET);
+    u32 cmdline_size = *static_cast<u32*>(setup_header + CMD_LINE_SIZE_OFFSET);
+
+    // Copy cmdline from zero page
+    void* cmdline = reinterpret_cast<void*>((u64)cmdline_ptr);
+    void *cmdline_copy = alloca(cmdline_size + 1);
+    memcpy(cmdline_copy,cmdline,cmdline_size);
+    ((char*)cmdline_copy)[cmdline_size] = 0;
+
+    debug_early("Cmdline: ");
+    debug_early((char*)cmdline_copy);
+    debug_early("\n");
+
+    // Copy e820 information from zero page
+    struct _e820ent *e820_table = static_cast<struct _e820ent *>(zero_page + 
E820_TABLE_OFFSET);
+
+    //TODO: We are assuming two entries but in reality
+    //there could be more so this logic below needs to be a little smarter
+    auto e820_size = 48;
+    auto e820_buffer = alloca(e820_size);
+    {
+       struct e820ent *lower = reinterpret_cast<struct e820ent*>(e820_buffer);
+       lower->ent_size = 20;
+       lower->type = 1;
+       lower->addr = e820_table[0].addr;
+       lower->size = e820_table[0].size;
+
+       struct e820ent *upper = lower + 1;
+       upper->ent_size = 20;
+       upper->type = 1;
+       upper->addr = e820_table[1].addr;
+       upper->size = e820_table[1].size;
+    }
+
     for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
         memory::phys_mem_size += ent.size;
     });
     constexpr u64 initial_map = 1 << 30; // 1GB mapped by startup code
 
-    u64 time;
-    time = omb.tsc_init_hi;
-    time = (time << 32) | omb.tsc_init;
+    //TODO: We are assuming that bootchart-wise we start here but
+    // in reality it all starts at boot.S:start64_. However what happens
+    // before this points should take negligible amount of time, no?
+    u64 time = 0;
+    //time = omb.tsc_init_hi;
+    //time = (time << 32) | omb.tsc_init;
     boot_time.event(0, "", time );
 
-    time = omb.tsc_disk_done_hi;
-    time = (time << 32) | omb.tsc_disk_done;
+    //time = omb.tsc_disk_done_hi;
+    //time = (time << 32) | omb.tsc_disk_done;
     boot_time.event(1, "disk read (real mode)", time );
 
-    time = omb.tsc_uncompress_done_hi;
-    time = (time << 32) | omb.tsc_uncompress_done;
+    //time = omb.tsc_uncompress_done_hi;
+    //time = (time << 32) | omb.tsc_uncompress_done;
     boot_time.event(2, "uncompress lzloader.elf", time );
 
     auto c = processor::cpuid(0x80000000);
@@ -185,7 +303,10 @@ void arch_setup_free_memory()
     elf_size = edata - elf_phys;
     mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE);
     // get rid of the command line, before low memory is unmapped
-    parse_cmdline(mb);
+    //parse_cmdline(mb);
+
+    parse_cmdline((char*)cmdline_copy);
+
     // now that we have some free memory, we can start mapping the rest
     mmu::switch_to_runtime_page_tables();
     for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
@@ -260,6 +381,7 @@ void arch_init_premain()
 #include "drivers/virtio-net.hh"
 #include "drivers/virtio-assign.hh"
 #include "drivers/virtio-rng.hh"
+#include "drivers/virtio-mmio.hh"
 #include "drivers/xenplatform-pci.hh"
 #include "drivers/ahci.hh"
 #include "drivers/vmw-pvscsi.hh"
@@ -271,13 +393,33 @@ extern bool opt_assign_net;
 void arch_init_drivers()
 {
     // initialize panic drivers
-    panic::pvpanic::probe_and_setup();
+    // pvpanic depends on ACPI which firecracker
+    // does not suppot so we disable probing it altogether
+    //TODO: Is there a way to detect if ACPI is available and
+    //only then probe pvpanic?
+    //panic::pvpanic::probe_and_setup();
     boot_time.event("pvpanic done");
 
     // Enumerate PCI devices
-    pci::pci_device_enumeration();
+    // PCI is not supported by firecracker
+    //TODO: Is there a way to detect if PCI is present and only enumerate
+    //PCI devices then? Somehow even firecracker presents a bus with
+    //some dummy devices.
+    //pci::pci_device_enumeration();
     boot_time.event("pci enumerated");
 
+    // Register any parsed virtio-mmio devices
+    for (int d = 0; d < mmio_device_info_count; d++) {
+        auto info = mmio_device_info_entries[d];
+        auto mmio_device = new virtio::mmio_device(info.address, info.size, 
info.irq);
+        if (mmio_device->parse_config()) {
+            device_manager::instance()->register_device(mmio_device);
+        }
+        else {
+            delete mmio_device;
+        }
+    }
+
     // Initialize all drivers
     hw::driver_manager* drvman = hw::driver_manager::instance();
     drvman->register_driver(virtio::blk::probe);
diff --git a/arch/x64/boot.S b/arch/x64/boot.S
index c4b97e2d..3bdc57ea 100644
--- a/arch/x64/boot.S
+++ b/arch/x64/boot.S
@@ -97,9 +97,10 @@ start64:
     sub %rdi, %rcx
     xor %eax, %eax
     rep stosb
+    mov $0x200000, %rbp
     mov %rbp, elf_header
     # %ebx is set by boot16.S before running the loader
-    mov %rbx, osv_multiboot_info
+    //mov %rbx, osv_multiboot_info
     lea init_stack_top, %rsp
     call premain
     mov __loader_argc, %edi
@@ -107,8 +108,42 @@ start64:
     call main
     .cfi_endproc
 
+.code64
+.global _start64
+_start64:
+//TODO: Is there a way to switch to protected mode and then jump to start32
+//which would be even better than what we do below?
+    //For whatever reason at this point (long mode?) we cannot
+    //set gdt the way it is done in start32 but linux expects
+    //similar one so whatever firecracker sets seems to be OK.
+    //Maybe because OSv gdt has 32-bit entry
+    //lgdt gdt_desc
+    //mov $0x10, %eax
+    //mov %eax, %ds
+    //mov %eax, %es
+    //mov %eax, %fs
+    //mov %eax, %gs
+    //mov %eax, %ss
+    // Disable paging and enable PAE
+    mov $BOOT_CR4, %eax
+    mov %eax, %cr4
+    // Setup page tables
+    lea ident_pt_l4, %eax
+    mov %eax, %cr3
+    // Write contents of EDX:EAX to Model Specific Register specified by ECX 
register
+    mov $0xc0000080, %ecx
+    mov $0x00000900, %eax
+    xor %edx, %edx
+    wrmsr
+    // Enable paging
+    mov $BOOT_CR0, %eax
+    mov %eax, %cr0
+    jmp start64
+
 # The smp trampoline must be in the lower 1MB, so we manually relocate
 # it to address 0 by subtracting smpboot from any offset
+//TODO: I am pretty sure we have some logic missing for SMP
+//given firecracker jumps us into long mode. Not sure how to handle it.
 .data
 .global smpboot
 smpboot:
diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld
index efe78d52..e08b310c 100644
--- a/arch/x64/loader.ld
+++ b/arch/x64/loader.ld
@@ -108,4 +108,4 @@ PHDRS {
        eh_frame PT_GNU_EH_FRAME;
        note PT_NOTE;
 }
-ENTRY(start32);
+ENTRY(_start64);
diff --git a/arch/x64/power.cc b/arch/x64/power.cc
index 81849335..534ffdf0 100644
--- a/arch/x64/power.cc
+++ b/arch/x64/power.cc
@@ -27,17 +27,9 @@ void halt(void)
 
 void poweroff(void)
 {
-    ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
-    if (ACPI_FAILURE(status)) {
-        debug("AcpiEnterSleepStatePrep failed: %s\n", 
AcpiFormatException(status));
-        halt();
-    }
-    status = AcpiEnterSleepState(ACPI_STATE_S5);
-    if (ACPI_FAILURE(status)) {
-        debug("AcpiEnterSleepState failed: %s\n", AcpiFormatException(status));
-        halt();
-    }
-
+    // Firecracker only supports this as away to shutdown the VM
+    // Reset using the 8042 PS/2 Controller ("keyboard controller")
+    processor::outb(0xfe, 0x64);
     // We shouldn't get here on x86.
     halt();
 }
diff --git a/arch/x64/smp.cc b/arch/x64/smp.cc
index 073ef206..4bdae0c6 100644
--- a/arch/x64/smp.cc
+++ b/arch/x64/smp.cc
@@ -74,7 +74,21 @@ void parse_madt()
 
 void smp_init()
 {
-    parse_madt();
+    //Firecracker does not support ACPI so
+    //there is no MADT table
+    //parse_madt();
+
+    //TODO: This a nasty hack as we support
+    // single vCPU. Eventually we should parse out equivalent information
+    // about all vCPUs from MP table (seems like more ancient way).
+    // See 
https://github.com/firecracker-microvm/firecracker/blob/7f29bca9ca197283275eab62fddc1c10ab580794/x86_64/src/mptable.rs
+    auto c = new sched::cpu(0);
+    c->arch.apic_id = 0;//lapic->Id;
+    c->arch.acpi_id = 0;//lapic->ProcessorId;
+    c->arch.initstack.next = smp_stack_free;
+    smp_stack_free = &c->arch.initstack;
+    sched::cpus.push_back(c);
+
     sched::current_cpu = sched::cpus[0];
     for (auto c : sched::cpus) {
         c->incoming_wakeups = 
aligned_array_new<sched::cpu::incoming_wakeup_queue>(sched::cpus.size());
diff --git a/drivers/acpi.cc b/drivers/acpi.cc
index 506ca68f..006231f1 100644
--- a/drivers/acpi.cc
+++ b/drivers/acpi.cc
@@ -605,7 +605,9 @@ void init()
 
 }
 
-void __attribute__((constructor(init_prio::acpi))) acpi_init_early()
-{
-     XENPV_ALTERNATIVE({ acpi::early_init(); }, {});
-}
+//TODO: Is there a way to detect if ACPI is available and do not even
+//try to load relevant information? Right now OSv depends on ACPI available
+//void __attribute__((constructor(init_prio::acpi))) acpi_init_early()
+//{
+//     XENPV_ALTERNATIVE({ acpi::early_init(); }, {});
+//}
diff --git a/drivers/virtio-assign.cc b/drivers/virtio-assign.cc
index 8a128d5e..5a49966f 100644
--- a/drivers/virtio-assign.cc
+++ b/drivers/virtio-assign.cc
@@ -42,8 +42,9 @@ public:
 
     // osv::assigned_virtio API implementation:
 
-    virtual void kick(int queue) override {
+    virtual bool kick(int queue) override {
         virtio_driver::kick(queue);
+        return true;
     }
 
     virtual u32 queue_size(int queue) override
diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc
index a3f7898d..3784c453 100644
--- a/drivers/virtio-blk.cc
+++ b/drivers/virtio-blk.cc
@@ -10,7 +10,6 @@
 
 #include "drivers/virtio.hh"
 #include "drivers/virtio-blk.hh"
-#include "drivers/pci-device.hh"
 #include <osv/interrupt.hh>
 
 #include <osv/mempool.hh>
@@ -100,20 +99,20 @@ struct driver blk_driver = {
 
 bool blk::ack_irq()
 {
-    auto isr = virtio_conf_readb(VIRTIO_PCI_ISR);
-    auto queue = get_virt_queue(0);
-
-    if (isr) {
-        queue->disable_interrupts();
+    if(_dev.ack_irq()) {
+        get_virt_queue(0)->disable_interrupts();
         return true;
-    } else {
-        return false;
     }
-
+    else
+        return false;
 }
 
-blk::blk(pci::device& pci_dev)
-    : virtio_driver(pci_dev), _ro(false)
+//TODO: For now this driver is hardcoded to expect mmio_device
+// but eventually we could introduce some sort of virtio_device
+// interface class that pci_device and mmio_device would implement/extend
+// from.
+blk::blk(mmio_device& _dev)
+    : virtio_mmio_driver(_dev), _ro(false)
 {
 
     _driver_name = "virtio-blk";
@@ -128,13 +127,12 @@ blk::blk(pci::device& pci_dev)
             sched::thread::attr().name("virtio-blk"));
     t->start();
     auto queue = get_virt_queue(0);
-    if (pci_dev.is_msix()) {
-        _msi.easy_register({ { 0, [=] { queue->disable_interrupts(); }, t } });
-    } else {
-        _irq.reset(new pci_interrupt(pci_dev,
-                                     [=] { return ack_irq(); },
-                                     [=] { t->wake(); }));
-    }
+
+    //TODO: This logic should really be moved to a device class
+    // so that it would do different thing depending if it is MMIO or PCI
+    // device
+    _irq.reset(new gsi_edge_interrupt(_dev.get_irq(),
+                                     [=] { if(this->ack_irq()) t->wake(); }));
 
     // Enable indirect descriptor
     queue->set_use_indirect(true);
@@ -164,25 +162,32 @@ blk::~blk()
 void blk::read_config()
 {
     //read all of the block config (including size, mce, topology,..) in one 
shot
-    virtio_conf_read(virtio_pci_config_offset(), &_config, sizeof(_config));
+    //TODO: It may to do with legacy vs non-legacy device
+    //but at least with latest spec we should check if individual
+    //config fields are available vs reading whole config struct. For example
+    //firecracker reports memory read violation warnings
+    virtio_conf_read(0, &_config, sizeof(_config.capacity));
 
     trace_virtio_blk_read_config_capacity(_config.capacity);
 
-    if (get_guest_feature_bit(VIRTIO_BLK_F_SIZE_MAX))
+    //TODO: Legacy vs non-legacy device concept. In pre-finalized spec
+    //there was a concept of "guest"/"host". Right now they use equivalent
+    //concepts - "driver"/"device"
+    if (get_drv_feature_bit(VIRTIO_BLK_F_SIZE_MAX))
         trace_virtio_blk_read_config_size_max(_config.size_max);
-    if (get_guest_feature_bit(VIRTIO_BLK_F_SEG_MAX))
+    if (get_drv_feature_bit(VIRTIO_BLK_F_SEG_MAX))
         trace_virtio_blk_read_config_seg_max(_config.seg_max);
-    if (get_guest_feature_bit(VIRTIO_BLK_F_GEOMETRY)) {
+    if (get_drv_feature_bit(VIRTIO_BLK_F_GEOMETRY)) {
         trace_virtio_blk_read_config_geometry((u32)_config.geometry.cylinders, 
(u32)_config.geometry.heads, (u32)_config.geometry.sectors);
     }
-    if (get_guest_feature_bit(VIRTIO_BLK_F_BLK_SIZE))
+    if (get_drv_feature_bit(VIRTIO_BLK_F_BLK_SIZE))
         trace_virtio_blk_read_config_blk_size(_config.blk_size);
-    if (get_guest_feature_bit(VIRTIO_BLK_F_TOPOLOGY)) {
+    if (get_drv_feature_bit(VIRTIO_BLK_F_TOPOLOGY)) {
         trace_virtio_blk_read_config_topology((u32)_config.physical_block_exp, 
(u32)_config.alignment_offset, (u32)_config.min_io_size, 
(u32)_config.opt_io_size);
     }
-    if (get_guest_feature_bit(VIRTIO_BLK_F_CONFIG_WCE))
+    if (get_drv_feature_bit(VIRTIO_BLK_F_CONFIG_WCE))
         trace_virtio_blk_read_config_wce((u32)_config.wce);
-    if (get_guest_feature_bit(VIRTIO_BLK_F_RO)) {
+    if (get_drv_feature_bit(VIRTIO_BLK_F_RO)) {
         set_readonly();
         trace_virtio_blk_read_config_ro();
     }
@@ -195,7 +200,7 @@ void blk::req_done()
 
     while (1) {
 
-        virtio_driver::wait_for_queue(queue, &vring::used_ring_not_empty);
+        virtio_mmio_driver::wait_for_queue(queue, &vring::used_ring_not_empty);
         trace_virtio_blk_wake();
 
         u32 len;
@@ -239,11 +244,12 @@ int blk::make_request(struct bio* bio)
     WITH_LOCK(_lock) {
 
         if (!bio) return EIO;
-
+        /* TODO: Is this really correct to simply disable this logic?
+         * Temporarily comment out -> seg_max is unavailable ...
         if (bio->bio_bcount/mmu::page_size + 1 > _config.seg_max) {
             trace_virtio_blk_make_request_seg_max(bio->bio_bcount, 
_config.seg_max);
             return EIO;
-        }
+        }*/
 
         auto* queue = get_virt_queue(0);
         blk_request_type type;
@@ -296,7 +302,7 @@ int blk::make_request(struct bio* bio)
 
 u32 blk::get_driver_features()
 {
-    auto base = virtio_driver::get_driver_features();
+    auto base = virtio_mmio_driver::get_driver_features();
     return (base | ( 1 << VIRTIO_BLK_F_SIZE_MAX)
                  | ( 1 << VIRTIO_BLK_F_SEG_MAX)
                  | ( 1 << VIRTIO_BLK_F_GEOMETRY)
@@ -308,7 +314,17 @@ u32 blk::get_driver_features()
 
 hw_driver* blk::probe(hw_device* dev)
 {
-    return virtio::probe<blk, VIRTIO_BLK_DEVICE_ID>(dev);
+    //TODO: Eventually we should account for both PCI and MMIO devices
+    //once we have a virtio_device class
+    if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) {
+        if (mmio_dev->get_id() == hw_device_id(0x0, VIRTIO_ID_BLOCK)) {
+            debug_early("virtio-blk::probe() -> found virtio-mmio device 
...\n");
+            return new blk(*mmio_dev);
+        }
+    }
+    return nullptr;
+
+    //return virtio::probe<blk, VIRTIO_BLK_DEVICE_ID>(dev);
 }
 
 }
diff --git a/drivers/virtio-blk.hh b/drivers/virtio-blk.hh
index 17cf4d18..79e42710 100644
--- a/drivers/virtio-blk.hh
+++ b/drivers/virtio-blk.hh
@@ -8,12 +8,13 @@
 #ifndef VIRTIO_BLK_DRIVER_H
 #define VIRTIO_BLK_DRIVER_H
 #include "drivers/virtio.hh"
-#include "drivers/pci-device.hh"
+#include "drivers/virtio2.hh"
+#include "drivers/virtio-mmio.hh"
 #include <osv/bio.h>
 
 namespace virtio {
 
-class blk : public virtio_driver {
+class blk : public virtio_mmio_driver {
 public:
 
     // The feature bitmap for virtio blk
@@ -118,7 +119,7 @@ public:
         u8 status;
     };
 
-    explicit blk(pci::device& dev);
+    explicit blk(mmio_device& dev);
     virtual ~blk();
 
     virtual std::string get_name() const { return _driver_name; }
@@ -157,7 +158,8 @@ private:
     bool _ro;
     // This mutex protects parallel make_request invocations
     mutex _lock;
-    std::unique_ptr<pci_interrupt> _irq;
+    //TODO: There is no PCI so is it OK to use GSI edge interrupt?
+    std::unique_ptr<gsi_edge_interrupt> _irq;
 };
 
 }
diff --git a/drivers/virtio-mmio.cc b/drivers/virtio-mmio.cc
new file mode 100644
index 00000000..1485ea00
--- /dev/null
+++ b/drivers/virtio-mmio.cc
@@ -0,0 +1,128 @@
+//
+// Created by wkozaczuk on 12/25/18.
+//
+
+#include <osv/debug.hh>
+#include "virtio-mmio.hh"
+
+namespace virtio {
+
+// This implements virtio-io mmio device (transport layer, modeled after PSI).
+// Read here - 
https://www.kraxel.org/virtio/virtio-v1.0-cs03-virtio-gpu.html#x1-1080002
+hw_device_id mmio_device::get_id()
+{
+    return hw_device_id(_vendor_id, _device_id);
+}
+
+void mmio_device::print() {}
+void mmio_device::reset() {}
+
+u8 mmio_device::get_status() {
+    return mmio_getl(_addr_mmio + VIRTIO_MMIO_STATUS) & 0xff;
+}
+
+void mmio_device::set_status(u8 status) {
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status);
+}
+
+void mmio_device::add_status(u8 status) {
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status | get_status());
+}
+
+u64 mmio_device::get_features() {
+    u64 features;
+
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1);
+    features = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES);
+    features <<= 32;
+
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0);
+    features |= mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES);
+
+    return features;
+}
+
+void mmio_device::set_features(u64 features) {
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1);
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)(features >> 32));
+
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0);
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)features);
+}
+
+void mmio_device::kick(int queue_num) {
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NOTIFY, queue_num);
+}
+
+void mmio_device::select_queue(int queue_num) {
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_SEL, queue_num);
+    assert(!mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY));
+}
+
+u16 mmio_device::get_queue_size() {
+    return mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM_MAX) & 0xffff;
+}
+
+void mmio_device::activate_queue(vring* queue) {
+    // Set size
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM, queue->size());
+    //
+    // Pass addresses
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_LOW, 
(u32)queue->get_desc_addr());
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_HIGH, 
(u32)(queue->get_desc_addr() >> 32));
+
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_LOW, 
(u32)queue->get_avail_addr());
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_HIGH, 
(u32)(queue->get_avail_addr() >> 32));
+
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_LOW, 
(u32)queue->get_used_addr());
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_HIGH, 
(u32)(queue->get_used_addr() >> 32));
+    //
+    // Make it ready
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY, 1 );
+}
+
+bool mmio_device::ack_irq() {
+    //TODO: we might need to guard this read and write with a mutex
+    //to prevent two concurrent interrupts raised against same device step
+    //on each other. Is it possible? Spec does not seem to say anything about 
it.
+    unsigned long status = mmio_getl(_addr_mmio + 
VIRTIO_MMIO_INTERRUPT_STATUS);
+    //Sometimes this assert would be false (maybe of what I am talking above).
+    //assert(status & VIRTIO_MMIO_INT_VRING);
+    mmio_setl(_addr_mmio + VIRTIO_MMIO_INTERRUPT_ACK, status);
+    //return true;
+    return (status & VIRTIO_MMIO_INT_VRING) != 0;
+}
+
+u8 mmio_device::read_config(u64 offset) {
+    return mmio_getb(_addr_mmio + VIRTIO_MMIO_CONFIG + offset);
+}
+
+bool mmio_device::parse_config() {
+    _addr_mmio = mmio_map(_address, _size);
+
+    u32 magic = mmio_getl(_addr_mmio + VIRTIO_MMIO_MAGIC_VALUE);
+    if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
+        return false;
+    }
+
+    // Check device version
+    u32 version = mmio_getl(_addr_mmio + VIRTIO_MMIO_VERSION);
+    if (version < 1 || version > 2) {
+        debugf( "Version %ld not supported!\n", version);
+        return false;
+    }
+
+    _device_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_ID);
+    if (_device_id == 0) {
+        //
+        // virtio-mmio device with an ID 0 is a (dummy) placeholder
+        // with no function. End probing now with no error reported.
+        debug( "Dummy virtio-mmio device detected!\n");
+        return false;
+    }
+    _vendor_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_VENDOR_ID);
+
+    debugf("Detected virtio-mmio device: (%ld,%ld)\n", _device_id, _vendor_id);
+    return true;
+}
+}
diff --git a/drivers/virtio-mmio.hh b/drivers/virtio-mmio.hh
new file mode 100644
index 00000000..78f8150b
--- /dev/null
+++ b/drivers/virtio-mmio.hh
@@ -0,0 +1,132 @@
+//
+// Created by wkozaczuk on 12/25/18.
+//
+
+#ifndef VIRTIO_MMIO_DEVICE_HH
+#define VIRTIO_MMIO_DEVICE_HH
+
+#include <osv/types.h>
+#include <osv/mmio.hh>
+#include "device.hh"
+#include "virtio-vring.hh"
+
+using namespace hw;
+
+/* Magic value ("virt" string) - Read Only */
+#define VIRTIO_MMIO_MAGIC_VALUE                0x000
+
+/* Virtio device version - Read Only */
+#define VIRTIO_MMIO_VERSION            0x004
+
+/* Virtio device ID - Read Only */
+#define VIRTIO_MMIO_DEVICE_ID          0x008
+
+/* Virtio vendor ID - Read Only */
+#define VIRTIO_MMIO_VENDOR_ID          0x00c
+
+/* Bitmask of the features supported by the device (host)
+ * (32 bits per set) - Read Only */
+#define VIRTIO_MMIO_DEVICE_FEATURES    0x010
+
+/* Device (host) features set selector - Write Only */
+#define VIRTIO_MMIO_DEVICE_FEATURES_SEL        0x014
+
+/* Bitmask of features activated by the driver (guest)
+ * (32 bits per set) - Write Only */
+#define VIRTIO_MMIO_DRIVER_FEATURES    0x020
+
+/* Activated features set selector - Write Only */
+#define VIRTIO_MMIO_DRIVER_FEATURES_SEL        0x024
+
+/* Queue selector - Write Only */
+#define VIRTIO_MMIO_QUEUE_SEL          0x030
+
+/* Maximum size of the currently selected queue - Read Only */
+#define VIRTIO_MMIO_QUEUE_NUM_MAX      0x034
+
+/* Queue size for the currently selected queue - Write Only */
+#define VIRTIO_MMIO_QUEUE_NUM          0x038
+
+/* Ready bit for the currently selected queue - Read Write */
+#define VIRTIO_MMIO_QUEUE_READY                0x044
+
+/* Queue notifier - Write Only */
+#define VIRTIO_MMIO_QUEUE_NOTIFY       0x050
+
+/* Interrupt status - Read Only */
+#define VIRTIO_MMIO_INTERRUPT_STATUS   0x060
+
+/* Interrupt acknowledge - Write Only */
+#define VIRTIO_MMIO_INTERRUPT_ACK      0x064
+
+/* Device status register - Read Write */
+#define VIRTIO_MMIO_STATUS             0x070
+
+/* Selected queue's Descriptor Table address, 64 bits in two halves */
+#define VIRTIO_MMIO_QUEUE_DESC_LOW     0x080
+#define VIRTIO_MMIO_QUEUE_DESC_HIGH    0x084
+
+/* Selected queue's Available Ring address, 64 bits in two halves */
+#define VIRTIO_MMIO_QUEUE_AVAIL_LOW    0x090
+#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH   0x094
+
+/* Selected queue's Used Ring address, 64 bits in two halves */
+#define VIRTIO_MMIO_QUEUE_USED_LOW     0x0a0
+#define VIRTIO_MMIO_QUEUE_USED_HIGH    0x0a4
+
+/* Configuration atomicity value */
+#define VIRTIO_MMIO_CONFIG_GENERATION  0x0fc
+
+/* The config space is defined by each driver as
+ * the per-driver configuration space - Read Write */
+#define VIRTIO_MMIO_CONFIG             0x100
+
+#define VIRTIO_MMIO_INT_VRING          (1 << 0)
+#define VIRTIO_MMIO_INT_CONFIG         (1 << 1)
+
+namespace virtio {
+
+class mmio_device : public hw_device {
+public:
+    mmio_device(u64 address, u64 size, unsigned int irq) :
+        _address(address), _size(size), _irq(irq),
+        _vendor_id(0), _device_id(0), _addr_mmio(0) {}
+
+    virtual ~mmio_device() {}
+
+    virtual hw_device_id get_id();
+    virtual void print();
+    virtual void reset();
+
+    unsigned int get_irq() { return _irq; }
+    bool ack_irq();
+
+    u8 get_status();
+    void set_status(u8 status);
+    void add_status(u8 status);
+
+    u64 get_features();
+    void set_features(u64 features);
+
+    void kick(int queue);
+    void select_queue(int queue);
+    u16 get_queue_size();
+    void activate_queue(vring* queue);
+
+    u8 read_config(u64 offset);
+    bool parse_config();
+
+private:
+    u64 _address;
+    u64 _size;
+    unsigned int _irq;
+    //u64 _id;
+    u16 _vendor_id;
+    u16 _device_id;
+
+    mmioaddr_t _addr_mmio;
+};
+
+}
+
+#endif //VIRTIO_MMIO_DEVICE_HH
diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc
index b820c36c..9ad5d48a 100644
--- a/drivers/virtio-net.cc
+++ b/drivers/virtio-net.cc
@@ -10,7 +10,6 @@
 
 #include "drivers/virtio.hh"
 #include "drivers/virtio-net.hh"
-#include "drivers/pci-device.hh"
 #include <osv/interrupt.hh>
 
 #include <osv/mempool.hh>
@@ -217,19 +216,20 @@ void net::fill_qstats(const struct txq& txq, struct 
if_data* out_data) const
 
 bool net::ack_irq()
 {
-    auto isr = virtio_conf_readb(VIRTIO_PCI_ISR);
-
-    if (isr) {
+    if( _dev.ack_irq()) {
         _rxq.vqueue->disable_interrupts();
         return true;
-    } else {
-        return false;
     }
-
+    else
+        return false;
 }
 
-net::net(pci::device& dev)
-    : virtio_driver(dev),
+//TODO: For now this driver is hardcoded to expect mmio_device
+// but eventually we could introduce some sort of virtio_device
+// interface class that pci_device and mmio_device would implement/extend
+// from.
+net::net(mmio_device& dev)
+    : virtio_mmio_driver(dev),
       _rxq(get_virt_queue(0), [this] { this->receiver(); }),
       _txq(this, get_virt_queue(1))
 {
@@ -244,7 +244,8 @@ net::net(pci::device& dev)
     setup_features();
     read_config();
 
-    _hdr_size = _mergeable_bufs ? sizeof(net_hdr_mrg_rxbuf) : sizeof(net_hdr);
+    //TODO: Legacy vs non-legacy -> the non-legacy header includes one more 
field
+    _hdr_size = sizeof(net_hdr_mrg_rxbuf);
 
     //initialize the BSD interface _if
     _ifn = if_alloc(IFT_ETHER);
@@ -290,16 +291,9 @@ net::net(pci::device& dev)
 
     ether_ifattach(_ifn, _config.mac);
 
-    if (dev.is_msix()) {
-        _msi.easy_register({
-            { 0, [&] { _rxq.vqueue->disable_interrupts(); }, poll_task },
-            { 1, [&] { _txq.vqueue->disable_interrupts(); }, nullptr }
-        });
-    } else {
-        _irq.reset(new pci_interrupt(dev,
-                                     [=] { return this->ack_irq(); },
-                                     [=] { poll_task->wake(); }));
-    }
+    //TODO: Move to device class
+    _irq.reset(new gsi_edge_interrupt(_dev.get_irq(),
+                                 [=] { if(this->ack_irq()) poll_task->wake(); 
}));
 
     fill_rx_ring();
 
@@ -324,10 +318,14 @@ net::~net()
 void net::read_config()
 {
     //read all of the net config  in one shot
-    virtio_conf_read(virtio_pci_config_offset(), &_config, sizeof(_config));
-
-    if (get_guest_feature_bit(VIRTIO_NET_F_MAC))
-        net_i("The mac addr of the device is %x:%x:%x:%x:%x:%x",
+    //TODO: It may to do with legacy vs non-legacy device
+    //but at least with latest spec we should check if individual
+    //config fields are available vs reading whole config struct. For example
+    //firecracker reports memory read violation warnings
+    virtio_conf_read(0, &(_config.mac[0]), sizeof(_config.mac));
+
+    if (get_drv_feature_bit(VIRTIO_NET_F_MAC))
+        debugf("The mac addr of the device is %x:%x:%x:%x:%x:%x\n",
                 (u32)_config.mac[0],
                 (u32)_config.mac[1],
                 (u32)_config.mac[2],
@@ -335,20 +333,20 @@ void net::read_config()
                 (u32)_config.mac[4],
                 (u32)_config.mac[5]);
 
-    _mergeable_bufs = get_guest_feature_bit(VIRTIO_NET_F_MRG_RXBUF);
-    _status = get_guest_feature_bit(VIRTIO_NET_F_STATUS);
-    _tso_ecn = get_guest_feature_bit(VIRTIO_NET_F_GUEST_ECN);
-    _host_tso_ecn = get_guest_feature_bit(VIRTIO_NET_F_HOST_ECN);
-    _csum = get_guest_feature_bit(VIRTIO_NET_F_CSUM);
-    _guest_csum = get_guest_feature_bit(VIRTIO_NET_F_GUEST_CSUM);
-    _guest_tso4 = get_guest_feature_bit(VIRTIO_NET_F_GUEST_TSO4);
-    _host_tso4 = get_guest_feature_bit(VIRTIO_NET_F_HOST_TSO4);
-    _guest_ufo = get_guest_feature_bit(VIRTIO_NET_F_GUEST_UFO);
-
-    net_i("Features: %s=%d,%s=%d", "Status", _status, "TSO_ECN", _tso_ecn);
-    net_i("Features: %s=%d,%s=%d", "Host TSO ECN", _host_tso_ecn, "CSUM", 
_csum);
-    net_i("Features: %s=%d,%s=%d", "Guest_csum", _guest_csum, "guest tso4", 
_guest_tso4);
-    net_i("Features: %s=%d", "host tso4", _host_tso4);
+    _mergeable_bufs = get_drv_feature_bit(VIRTIO_NET_F_MRG_RXBUF);
+    _status = get_drv_feature_bit(VIRTIO_NET_F_STATUS);
+    _tso_ecn = get_drv_feature_bit(VIRTIO_NET_F_GUEST_ECN);
+    _host_tso_ecn = get_drv_feature_bit(VIRTIO_NET_F_HOST_ECN);
+    _csum = get_drv_feature_bit(VIRTIO_NET_F_CSUM);
+    _guest_csum = get_drv_feature_bit(VIRTIO_NET_F_GUEST_CSUM);
+    _guest_tso4 = get_drv_feature_bit(VIRTIO_NET_F_GUEST_TSO4);
+    _host_tso4 = get_drv_feature_bit(VIRTIO_NET_F_HOST_TSO4);
+    _guest_ufo = get_drv_feature_bit(VIRTIO_NET_F_GUEST_UFO);
+
+    debugf("Features: %s=%d,%s=%d\n", "Status", _status, "TSO_ECN", _tso_ecn);
+    debugf("Features: %s=%d,%s=%d\n", "Host TSO ECN", _host_tso_ecn, "CSUM", 
_csum);
+    debugf("Features: %s=%d,%s=%d\n", "Guest_csum", _guest_csum, "guest tso4", 
_guest_tso4);
+    debugf("Features: %s=%d,%s=%d\n", "host tso4", _host_tso4, 
"mergeable_bufs", _mergeable_bufs);
 }
 
 /**
@@ -422,7 +420,7 @@ void net::receiver()
     while (1) {
 
         // Wait for rx queue (used elements)
-        virtio_driver::wait_for_queue(vq, &vring::used_ring_not_empty);
+        virtio_mmio_driver::wait_for_queue(vq, &vring::used_ring_not_empty);
         trace_virtio_net_rx_wake();
 
         _rxq.stats.rx_bh_wakeups++;
@@ -840,7 +838,7 @@ void net::txq::gc()
 
 u32 net::get_driver_features()
 {
-    u32 base = virtio_driver::get_driver_features();
+    u32 base = virtio_mmio_driver::get_driver_features();
     return (base | (1 << VIRTIO_NET_F_MAC)        \
                  | (1 << VIRTIO_NET_F_MRG_RXBUF)  \
                  | (1 << VIRTIO_NET_F_STATUS)     \
@@ -856,12 +854,14 @@ u32 net::get_driver_features()
 
 hw_driver* net::probe(hw_device* dev)
 {
-    if (auto pci_dev = dynamic_cast<pci::device*>(dev)) {
-        if (pci_dev->get_id() == hw_device_id(VIRTIO_VENDOR_ID, 
VIRTIO_NET_DEVICE_ID)) {
+    //TODO: Handle both PCI and MMIO devices
+    if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) {
+        if (mmio_dev->get_id() == hw_device_id(0x0, VIRTIO_ID_NET)) {
+            debug_early("virtio-net::probe() -> found virtio-mmio device 
...\n");
             if (opt_maxnic && maxnic-- <= 0) {
                 return nullptr;
             } else {
-                return aligned_new<net>(*pci_dev);
+                return aligned_new<net>(*mmio_dev);
             }
         }
     }
diff --git a/drivers/virtio-net.hh b/drivers/virtio-net.hh
index ad323e69..6edca97a 100644
--- a/drivers/virtio-net.hh
+++ b/drivers/virtio-net.hh
@@ -14,9 +14,11 @@
 #include <bsd/sys/sys/mbuf.h>
 
 #include <osv/percpu_xmit.hh>
+#include <osv/interrupt.hh>
 
 #include "drivers/virtio.hh"
-#include "drivers/pci-device.hh"
+#include "drivers/virtio2.hh"
+#include "drivers/virtio-mmio.hh"
 
 namespace virtio {
 
@@ -24,7 +26,7 @@ namespace virtio {
  * @class net
  * virtio net device class
  */
-class net : public virtio_driver {
+class net : public virtio_mmio_driver {
 public:
 
     // The feature bitmap for virtio net
@@ -204,7 +206,7 @@ public:
             u16 virtqueue_pairs;
     };
 
-    explicit net(pci::device& dev);
+    explicit net(mmio_device& dev);
     virtual ~net();
 
     virtual std::string get_name() const { return _driver_name; }
@@ -269,7 +271,7 @@ private:
 
     u32 _hdr_size;
 
-    std::unique_ptr<pci_interrupt> _irq;
+    std::unique_ptr<gsi_edge_interrupt> _irq;
 
     struct rxq_stats {
         u64 rx_packets; /* if_ipackets */
diff --git a/drivers/virtio-vring.cc b/drivers/virtio-vring.cc
index 5f89fb5d..58681ead 100644
--- a/drivers/virtio-vring.cc
+++ b/drivers/virtio-vring.cc
@@ -38,9 +38,9 @@ TRACEPOINT(trace_vring_get_buf_ret, "vring=%p _avail_count 
%d", void*, int);
 
 namespace virtio {
 
-    vring::vring(virtio_driver* const dev, u16 num, u16 q_index)
+    vring::vring(vdriver* const driver, u16 num, u16 q_index)
     {
-        _dev = dev;
+        _driver = driver;
         _q_index = q_index;
         // Alloc enough pages for the vring...
         unsigned sz = VIRTIO_ALIGN(vring::get_size(num, 
VIRTIO_PCI_VRING_ALIGN));
@@ -86,6 +86,21 @@ namespace virtio {
         return mmu::virt_to_phys(_vring_ptr);
     }
 
+    u64 vring::get_desc_addr()
+    {
+        return mmu::virt_to_phys(_desc);
+    }
+
+    u64 vring::get_avail_addr()
+    {
+        return mmu::virt_to_phys(_avail);
+    }
+
+    u64 vring::get_used_addr()
+    {
+        return mmu::virt_to_phys(_used);
+    }
+
     unsigned vring::get_size(unsigned int num, unsigned long align)
     {
         return (((sizeof(vring_desc) * num + sizeof(u16) * (3 + num)
@@ -102,7 +117,7 @@ namespace virtio {
     inline bool vring::use_indirect(int desc_needed)
     {
         return _use_indirect &&
-               _dev->get_indirect_buf_cap() &&
+               _driver->get_indirect_buf_cap() &&
                // don't let the posting fail due to low available buffers 
number
                (desc_needed > _avail_count ||
                // no need to use indirect for a single descriptor
@@ -280,7 +295,7 @@ namespace virtio {
     vring::kick() {
         bool kicked = true;
 
-        if (_dev->get_event_idx_cap()) {
+        if (_driver->get_event_idx_cap()) {
 
             std::atomic_thread_fence(std::memory_order_seq_cst);
 
@@ -310,7 +325,7 @@ namespace virtio {
         // and _avail_added_since_kick might wrap around due to this bulking.
         //
         if (kicked || (_avail_added_since_kick >= (u16)(~0) / 2)) {
-            _dev->kick(_q_index);
+            _driver->kick(_q_index);
             _avail_added_since_kick = 0;
             return true;
         }
diff --git a/drivers/virtio-vring.hh b/drivers/virtio-vring.hh
index af8691ca..0bf1c581 100644
--- a/drivers/virtio-vring.hh
+++ b/drivers/virtio-vring.hh
@@ -29,7 +29,7 @@ TRACEPOINT(trace_vring_update_used_event, "vring=%p: 
_used_ring_host_head %d",
 namespace virtio {
 
 class virtio_vring;
-class virtio_driver;
+class vdriver;
 
     // Buffer descriptors in the ring
     class vring_desc {
@@ -122,12 +122,16 @@ class virtio_driver;
     class vring {
     public:
 
-        vring(virtio_driver* const dev, u16 num, u16 q_index);
+        vring(vdriver* const driver, u16 num, u16 q_index);
         virtual ~vring();
 
         u64 get_paddr();
         static unsigned get_size(unsigned int num, unsigned long align);
 
+        u64 get_desc_addr();
+        u64 get_avail_addr();
+        u64 get_used_addr();
+
         // Ring operations
         bool add_buf(void* cookie);
         // Get the top item from the used ring
@@ -240,7 +244,7 @@ class virtio_driver;
     private:
 
         // Up pointer
-        virtio_driver* _dev;
+        vdriver* _driver;
         u16 _q_index;
         // The physical of the physical address handed to the virtio device
         void* _vring_ptr;
diff --git a/drivers/virtio.hh b/drivers/virtio.hh
index b918de28..2faadec7 100644
--- a/drivers/virtio.hh
+++ b/drivers/virtio.hh
@@ -28,6 +28,9 @@ enum VIRTIO_CONFIG {
     VIRTIO_CONFIG_S_DRIVER = 2,
     /* Driver has used its parts of the config, and is happy */
     VIRTIO_CONFIG_S_DRIVER_OK = 4,
+    /* Indicates that the driver has acknowledged all the features it 
understands,
+     * and feature negotiation is complete */
+    VIRTIO_CONFIG_S_FEATURES_OK = 8,
     /* We've given up on this device. */
     VIRTIO_CONFIG_S_FAILED = 0x80,
     /* Some virtio feature bits (currently bits 28 through 31) are reserved 
for the
@@ -104,7 +107,20 @@ enum {
 
 const unsigned max_virtqueues_nr = 64;
 
-class virtio_driver : public hw_driver {
+//TODO: This helps us make vring class not be
+// tightly coupled to virtio_driver but rather this simple
+// interface. Might not be necessary once we introduce
+// virtio_device class.
+class vdriver {
+public:
+    virtual ~vdriver() {};
+
+    virtual bool get_indirect_buf_cap() = 0;
+    virtual bool get_event_idx_cap() = 0;
+    virtual bool kick(int queue) = 0;
+};
+
+class virtio_driver : public hw_driver, public vdriver {
 public:
     explicit virtio_driver(pci::device& dev);
     virtual ~virtio_driver();
diff --git a/drivers/virtio2.cc b/drivers/virtio2.cc
new file mode 100644
index 00000000..09410888
--- /dev/null
+++ b/drivers/virtio2.cc
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include <string.h>
+
+#include "drivers/virtio2.hh"
+#include "virtio-vring.hh"
+#include <osv/debug.h>
+#include "osv/trace.hh"
+
+//using namespace pci;
+
+//TRACEPOINT(trace_virtio_wait_for_queue, "queue(%p) have_elements=%d", void*, 
int);
+
+namespace virtio {
+
+int virtio_mmio_driver::_disk_idx = 0;
+
+//TODO: This is a copy of virtio.cc with commented out PCI-related code and
+// adjusted for mmio. Just a temporary artefact. Eventually we should have 
single
+// virtio_driver class that supports both PCI and MMIO devices through common
+// virtio_device interface.
+virtio_mmio_driver::virtio_mmio_driver(mmio_device& dev)
+    : hw_driver()
+    , _dev(dev)
+    , _num_queues(0)
+    , _cap_indirect_buf(false)
+{
+    for (unsigned i = 0; i < max_virtqueues_nr; i++) {
+        _queues[i] = nullptr;
+    }
+    //bool status = parse_pci_config();
+    //assert(status == true);
+
+    //PCI: _dev.set_bus_master(true);
+
+    //PCI: _dev.msix_enable();
+
+    //make sure the queue is reset
+    reset_host_side();
+
+    // Acknowledge device
+    add_dev_status(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER);
+
+    // Generic init of virtqueues
+    probe_virt_queues();
+}
+
+virtio_mmio_driver::~virtio_mmio_driver()
+{
+    reset_host_side();
+    free_queues();
+}
+
+void virtio_mmio_driver::setup_features()
+{
+    u64 dev_features = get_device_features();
+    u64 drv_features = this->get_driver_features();
+
+    u64 subset = dev_features & drv_features;
+
+    //notify the host about the features in used according
+    //to the virtio spec
+    for (int i = 0; i < 64; i++)
+        if (subset & (1 << i))
+            virtio_d("%s: found feature intersec of bit %d", __FUNCTION__,  i);
+
+    if (subset & (1 << VIRTIO_RING_F_INDIRECT_DESC))
+        set_indirect_buf_cap(true);
+
+    if (subset & (1 << VIRTIO_RING_F_EVENT_IDX))
+            set_event_idx_cap(true);
+
+    set_drv_features(subset);
+
+    // Confirm features (new non-legacy)
+    add_dev_status(VIRTIO_CONFIG_S_FEATURES_OK);
+    get_dev_status(); //TODO -> Verify if correct
+}
+
+void virtio_mmio_driver::dump_config()
+{
+    /*
+    u8 B, D, F;
+    _dev.get_bdf(B, D, F);
+
+    _dev.dump_config();
+    virtio_d("%s [%x:%x.%x] vid:id=%x:%x", get_name().c_str(),
+        (u16)B, (u16)D, (u16)F,
+        _dev.get_vendor_id(),
+        _dev.get_device_id());
+
+    virtio_d("    virtio features: ");
+    for (int i = 0; i < 32; i++)
+        virtio_d(" %d ", get_device_feature_bit(i));
+    */
+}
+
+/*
+bool virtio_mmio_driver::parse_pci_config()
+{
+    // Test whether bar1 is present
+    _bar1 = _dev.get_bar(1);
+    if (_bar1 == nullptr) {
+        return false;
+    }
+
+    // Check ABI version
+    u8 rev = _dev.get_revision_id();
+    if (rev != VIRTIO_PCI_ABI_VERSION) {
+        virtio_e("Wrong virtio revision=%x", rev);
+        return false;
+    }
+
+    // Check device ID
+    u16 dev_id = _dev.get_device_id();
+    if ((dev_id < VIRTIO_PCI_ID_MIN) || (dev_id > VIRTIO_PCI_ID_MAX)) {
+        virtio_e("Wrong virtio dev id %x", dev_id);
+        return false;
+    }
+
+    return true;
+}*/
+
+void virtio_mmio_driver::reset_host_side()
+{
+    set_dev_status(0);
+}
+
+void virtio_mmio_driver::free_queues()
+{
+    for (unsigned i = 0; i < max_virtqueues_nr; i++) {
+        if (nullptr != _queues[i]) {
+            delete (_queues[i]);
+            _queues[i] = nullptr;
+        }
+    }
+}
+
+bool virtio_mmio_driver::kick(int queue)
+{
+    _dev.kick(queue);
+    //virtio_conf_writew(VIRTIO_PCI_QUEUE_NOTIFY, queue);
+    return true;
+}
+
+void virtio_mmio_driver::probe_virt_queues()
+{
+    u16 qsize = 0;
+
+    do {
+
+        if (_num_queues >= max_virtqueues_nr) {
+            return;
+        }
+
+        // Read queue size
+        _dev.select_queue(_num_queues);
+        //PCI: virtio_conf_writew(VIRTIO_PCI_QUEUE_SEL, _num_queues);
+        qsize = _dev.get_queue_size();
+        //PCI: qsize = virtio_conf_readw(VIRTIO_PCI_QUEUE_NUM);
+        if (0 == qsize) {
+            break;
+        }
+
+        // Init a new queue
+        vring* queue = new vring(this, qsize, _num_queues);
+        _queues[_num_queues] = queue;
+        debugf("virtio_mmio_driver: created vring - id:(%d), size:(%d)\n", 
_num_queues, qsize);
+
+        //PCI:
+        /*if (_dev.is_msix()) {
+            // Setup queue_id:entry_id 1:1 correlation...
+            virtio_conf_writew(VIRTIO_MSI_QUEUE_VECTOR, _num_queues);
+            if (virtio_conf_readw(VIRTIO_MSI_QUEUE_VECTOR) != _num_queues) {
+                virtio_e("Setting MSIx entry for queue %d failed.", 
_num_queues);
+                return;
+            }
+        }*/
+
+        _num_queues++;
+
+        // Tell host about pfn
+        // TODO: Yak, this is a bug in the design, on large memory we'll have 
PFNs > 32 bit
+        // Dor to notify Rusty
+        //PCI: virtio_conf_writel(VIRTIO_PCI_QUEUE_PFN, 
(u32)(queue->get_paddr() >> VIRTIO_PCI_QUEUE_ADDR_SHIFT));
+        _dev.activate_queue(queue);
+
+        // Debug print
+        virtio_d("Queue[%d] -> size %d, paddr %x", (_num_queues-1), qsize, 
queue->get_paddr());
+
+    } while (true);
+}
+
+vring* virtio_mmio_driver::get_virt_queue(unsigned idx)
+{
+    if (idx >= _num_queues) {
+        return nullptr;
+    }
+
+    return _queues[idx];
+}
+
+void virtio_mmio_driver::wait_for_queue(vring* queue, bool (vring::*pred)() 
const)
+{
+    sched::thread::wait_until([queue,pred] {
+        bool have_elements = (queue->*pred)();
+        if (!have_elements) {
+            queue->enable_interrupts();
+
+            // we must check that the ring is not empty *after*
+            // we enable interrupts to avoid a race where a packet
+            // may have been delivered between queue->used_ring_not_empty()
+            // and queue->enable_interrupts() above
+            have_elements = (queue->*pred)();
+            if (have_elements) {
+                queue->disable_interrupts();
+            }
+        }
+
+        //trace_virtio_wait_for_queue(queue, have_elements);
+        return have_elements;
+    });
+}
+
+u64 virtio_mmio_driver::get_device_features()
+{
+    return _dev.get_features();
+    //PCI: return virtio_conf_readl(VIRTIO_PCI_HOST_FEATURES);
+}
+
+bool virtio_mmio_driver::get_device_feature_bit(int bit)
+{
+    return (_dev.get_features() & (1 << bit)) != 0;
+    //PCI: return get_virtio_config_bit(VIRTIO_PCI_HOST_FEATURES, bit);
+}
+
+void virtio_mmio_driver::set_drv_features(u64 features)
+{
+    _dev.set_features(features);
+    _enabled_features = features;
+    //PCI: virtio_conf_writel(VIRTIO_PCI_GUEST_FEATURES, features);
+}
+
+u32 virtio_mmio_driver::get_drv_features()
+{
+    return _enabled_features;
+    //PCI: return virtio_conf_readl(VIRTIO_PCI_GUEST_FEATURES);
+}
+
+bool virtio_mmio_driver::get_drv_feature_bit(int bit)
+{
+    return (_enabled_features & (1 << bit)) != 0;
+    //PCI: return get_virtio_config_bit(VIRTIO_PCI_GUEST_FEATURES, bit);
+}
+
+u8 virtio_mmio_driver::get_dev_status()
+{
+    return _dev.get_status();
+    //PCI: return virtio_conf_readb(VIRTIO_PCI_STATUS);
+}
+
+void virtio_mmio_driver::set_dev_status(u8 status)
+{
+    _dev.set_status(status);
+    //PCI: virtio_conf_writeb(VIRTIO_PCI_STATUS, status);
+}
+
+void virtio_mmio_driver::add_dev_status(u8 status)
+{
+    set_dev_status(get_dev_status() | status);
+}
+
+void virtio_mmio_driver::del_dev_status(u8 status)
+{
+    set_dev_status(get_dev_status() & ~status);
+}
+
+void virtio_mmio_driver::virtio_conf_read(u32 offset, void* buf, int length)
+{
+    unsigned char* ptr = reinterpret_cast<unsigned char*>(buf);
+    for (int i = 0; i < length; i++)
+        ptr[i] = _dev.read_config(offset + i);
+}
+}
diff --git a/drivers/virtio2.hh b/drivers/virtio2.hh
new file mode 100644
index 00000000..afcc6459
--- /dev/null
+++ b/drivers/virtio2.hh
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef VIRTIO_DRIVER2_H
+#define VIRTIO_DRIVER2_H
+
+#include "driver.hh"
+#include "drivers/driver.hh"
+#include "drivers/virtio.hh"
+#include "drivers/virtio-mmio.hh"
+#include "drivers/virtio-vring.hh"
+#include <osv/interrupt.hh>
+
+namespace virtio {
+
+class virtio_mmio_driver : public hw_driver, public vdriver {
+public:
+    explicit virtio_mmio_driver(mmio_device& dev);
+    virtual ~virtio_mmio_driver();
+
+    virtual std::string get_name() const = 0;
+
+    virtual void dump_config();
+
+    // The remaining space is defined by each driver as the per-driver
+    // configuration space
+    //int virtio_pci_config_offset() {return (_dev.is_msix_enabled())? 24 : 
20;}
+
+    //bool parse_pci_config();
+
+    void probe_virt_queues();
+    vring* get_virt_queue(unsigned idx);
+
+    // block the calling thread until the queue has some used elements in it.
+    void wait_for_queue(vring* queue, bool (vring::*pred)() const);
+
+    // guest/host features physical access
+    // LEGACY NOTE:
+    //      guest equivalent to driver
+    //      host equivalent to device
+    u64 get_device_features();
+    bool get_device_feature_bit(int bit);
+
+    void set_drv_features(u64 features);
+    u32 get_drv_features();
+    bool get_drv_feature_bit(int bit);
+
+    // device status
+    u8 get_dev_status();
+    void set_dev_status(u8 status);
+    void add_dev_status(u8 status);
+    void del_dev_status(u8 status);
+
+    // Access the virtio conf address space set by pci bar 1
+    //bool get_virtio_config_bit(u32 offset, int bit);
+    //void set_virtio_config_bit(u32 offset, int bit, bool on);
+
+    // Access virtio config space
+    //void virtio_conf_read(u32 offset, void* buf, int length);
+    //void virtio_conf_write(u32 offset, void* buf, int length);
+    //u8 virtio_conf_readb(u32 offset) { return _bar1->readb(offset);};
+    //u16 virtio_conf_readw(u32 offset) { return _bar1->readw(offset);};
+    //u32 virtio_conf_readl(u32 offset) { return _bar1->readl(offset);};
+    //void virtio_conf_writeb(u32 offset, u8 val) { _bar1->writeb(offset, 
val);};
+    //void virtio_conf_writew(u32 offset, u16 val) { _bar1->writew(offset, 
val);};
+    //void virtio_conf_writel(u32 offset, u32 val) { _bar1->writel(offset, 
val);};
+
+    void virtio_conf_read(u32 offset, void* buf, int length);
+
+    bool kick(int queue);
+    void reset_host_side();
+    void free_queues();
+
+    bool get_indirect_buf_cap() {return _cap_indirect_buf;}
+    void set_indirect_buf_cap(bool on) {_cap_indirect_buf = on;}
+    bool get_event_idx_cap() {return _cap_event_idx;}
+    void set_event_idx_cap(bool on) {_cap_event_idx = on;}
+
+    mmio_device& device() { return _dev; }
+protected:
+    // Actual drivers should implement this on top of the basic ring features
+    virtual u32 get_driver_features() { return 1 << 
VIRTIO_RING_F_INDIRECT_DESC | 1 << VIRTIO_RING_F_EVENT_IDX; }
+    void setup_features();
+protected:
+    mmio_device& _dev;
+    //pci::device& _dev;
+    //interrupt_manager _msi;
+    vring* _queues[max_virtqueues_nr];
+    u32 _num_queues;
+    u64 _enabled_features;
+    //pci::bar* _bar1;
+    bool _cap_indirect_buf;
+    bool _cap_event_idx = false;
+    static int _disk_idx;
+};
+
+template <typename T, u16 ID>
+hw_driver* probe_mmio(hw_device* dev)
+{
+    if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) {
+        if (mmio_dev->get_id() == hw_device_id(0x0, ID)) {
+            return new T(*mmio_dev);
+        }
+    }
+    return nullptr;
+}
+
+}
+
+#endif
+
diff --git a/include/osv/virtio-assign.hh b/include/osv/virtio-assign.hh
index b81f41ae..82b374fe 100644
--- a/include/osv/virtio-assign.hh
+++ b/include/osv/virtio-assign.hh
@@ -21,7 +21,7 @@ public:
     // TODO: provide a way to get one of multiple assigned virtio devices.
     static assigned_virtio *get() __attribute__((weak));
 
-    virtual void kick(int queue) = 0;
+    virtual bool kick(int queue) = 0;
     virtual uint32_t queue_size(int queue) = 0;
     virtual void enable_interrupt(unsigned int queue,
             std::function<void(void)> handler) = 0;
diff --git a/loader.cc b/loader.cc
index 3f88ebda..2cef739b 100644
--- a/loader.cc
+++ b/loader.cc
@@ -55,6 +55,7 @@
 #include "drivers/null.hh"
 
 #include "libc/network/__dns.hh"
+#include "early-console.hh"
 
 using namespace osv;
 using namespace osv::clock::literals;
@@ -550,7 +551,7 @@ void main_cont(int loader_argc, char** loader_argv)
     memory::enable_debug_allocator();
 
 #ifndef AARCH64_PORT_STUB
-    acpi::init();
+    //acpi::init();
 #endif /* !AARCH64_PORT_STUB */
 
     if (sched::cpus.size() > sched::max_cpus) {
-- 
2.19.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to