This patch is not meant to be applied but it serves to illustrate what is 
left to make OSv boot on firecracker. As you can see it is not a daunting 
amount of work anymore given modern virtio is supported now.

In am not sure in what order we should implement the remaining work. 
Ideally every patch should be testable which means that virtio-mmio support 
should be added last given it is not possible to test it on QEMU (this 
https://github.com/qemu/qemu/blob/595123df1d54ed8fbab9e1a73d5a58c5bb71058f/hw/virtio/virtio-mmio.c
 
seems to suggest that QEMU is able to emulate virtio-mmio devices but I 
have not found any examples of using it on x86-64 platform, anyone has 
ideas?). Please see my extra comments/questions below.

On Sunday, February 10, 2019 at 3:22:44 PM UTC-5, Waldek Kozaczuk wrote:
>
> - clean up modern/legacy virtio support (configurability) 
>
This mostly is about fragments of code in virtio layer similar to this 
- 
https://github.com/cloudius-systems/osv/blob/master/drivers/virtio-blk.cc#L177-L187
 
- where code needs to be tweaked to properly identify from feature flags 
what configuration data is available and only read this.  

> - add virtio-mmio support including parsing virtio-mmio devices from 
> command line 
> - enhance boot.S and OSv boot in general to support vmlinux format 
> (decompressed ELF 64-bit Linux kernel) 
>    - pass e820 and command line parameters through zero-page 
>
 This is most non-trivial amount of work that requires changing OSv boot 
protocol so it better conforms to a Linux (for reference 
https://www.kernel.org/doc/Documentation/x86/boot.txt). It would be nice to 
support both 32-bit and 64-bit ELF direct boot LInux protocol. It is not 
clear to me what is the best way to support it especially if we still want 
to be able to boot OSv in real mode, protected and long mode at the same 
time and ideally minimize number of versions of boot artifacts (usr.img, 
lzloader.elf, loader.elf, etc). Maybe this deserves its own email thread.

> - make APCI optional and/or disable-able which includes: 
>    - make pvpanic probe only if ACPI on 
>    - make poweroff work when ACPI off 
>    - parse SMP information from MP table instead of MADT if ACPI off 
>

I think that all the 3 ACPI related items above can be part of a single 
patch. 

> - add option to turn PCI off (make OSv not even try to enumerate PCI) 
> - mark boot time end 


> Signed-off-by: Waldemar Kozaczuk <[email protected]> 
> --- 
>  Makefile               |   1 + 
>  arch/x64/arch-setup.cc | 180 ++++++++++++++++++++++++++++++++++++----- 
>  arch/x64/boot.S        |  37 ++++++++- 
>  arch/x64/loader.ld     |   2 +- 
>  arch/x64/power.cc      |  14 +--- 
>  arch/x64/smp.cc        |  16 +++- 
>  drivers/acpi.cc        |  10 ++- 
>  drivers/virtio-blk.cc  |   8 +- 
>  drivers/virtio-mmio.cc | 131 ++++++++++++++++++++++++++++++ 
>  drivers/virtio-mmio.hh | 148 +++++++++++++++++++++++++++++++++ 
>  drivers/virtio-net.cc  |   7 ++ 
>  drivers/virtio.cc      |   7 +- 
>  drivers/virtio.hh      |   1 + 
>  loader.cc              |   5 +- 
>  14 files changed, 527 insertions(+), 40 deletions(-) 
>  create mode 100644 drivers/virtio-mmio.cc 
>  create mode 100644 drivers/virtio-mmio.hh 
>
> diff --git a/Makefile b/Makefile 
> index ff9f2ed3..b905063c 100644 
> --- a/Makefile 
> +++ b/Makefile 
> @@ -821,6 +821,7 @@ drivers += arch/$(arch)/pvclock-abi.o 
>  drivers += drivers/virtio.o 
>  drivers += drivers/virtio-pci-device.o 
>  drivers += drivers/virtio-vring.o 
> +drivers += drivers/virtio-mmio.o 
>  drivers += drivers/virtio-net.o 
>  drivers += drivers/vmxnet3.o 
>  drivers += drivers/vmxnet3-queues.o 
> diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc 
> index 50500662..cf5adb15 100644 
> --- a/arch/x64/arch-setup.cc 
> +++ b/arch/x64/arch-setup.cc 
> @@ -22,6 +22,19 @@ 
>  #include <osv/commands.hh> 
>  #include "dmi.hh" 
>   
> +// Not sure if Linux zero page is always located at this place 
> +// in memory or its address is passed in one of the registers 
> +// -> double check 
> +#define ZERO_PAGE_START      0x7000 
> +#define SETUP_HEADER_OFFSET  0x1f1   // look at bootparam.h in linux 
> +#define BOOT_FLAG_OFFSET     sizeof(u8) + 4 * sizeof(u16) + sizeof(u32) 
> + 
> +#define E820_ENTRIES_OFFSET  0x1e8   // look at bootparam.h in linux 
> +#define E820_TABLE_OFFSET    0x2d0   // look at bootparam.h in linux 
> + 
> +#define CMD_LINE_PTR_OFFSET  sizeof(u8) * 5 + sizeof(u16) * 11 + 
> sizeof(u32) * 7 
> +#define CMD_LINE_SIZE_OFFSET CMD_LINE_PTR_OFFSET + sizeof(u8) * 2 + 
> sizeof(u16) + sizeof(u32) * 3 
> + 
>  struct multiboot_info_type { 
>      u32 flags; 
>      u32 mem_lower; 
> @@ -61,12 +74,81 @@ struct e820ent { 
>      u32 type; 
>  } __attribute__((packed)); 
>   
> +struct _e820ent { 
> +    u64 addr; 
> +    u64 size; 
> +    u32 type; 
> +} __attribute__((packed)); 
> + 
>  osv_multiboot_info_type* osv_multiboot_info; 
>   
> -void parse_cmdline(multiboot_info_type& mb) 
> +struct mmio_device_info { 
> +    u64 address; 
> +    u64 size; 
> +    unsigned int irq; 
> +}; 
> + 
> +//TODO: For now we are limiting number of mmio devices to two 
> +// Ideally we should be using somewhat more dynamic structure 
> +struct mmio_device_info mmio_device_info_entries[2]; 
> +int mmio_device_info_count = 0; 
> + 
> +#define VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX "virtio_mmio.device=" 
> +char* parse_mmio_device_info(char *cmdline, mmio_device_info *info) { 
> +    // [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>] 
> +    char *prefix_pos = strstr(cmdline,VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX); 
> +    if (!prefix_pos) 
> +        return nullptr; 
> + 
> +    char *size_pos = prefix_pos + 
> strlen(VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX); 
> +    if (sscanf(size_pos,"%ld", &info->size) != 1) 
> +        return nullptr; 
> + 
> +    char *at_pos = strstr(size_pos,"@"); 
> +    if (!at_pos) 
> +        return nullptr; 
> + 
> +    switch(*(at_pos - 1)) { 
> +        case 'k': 
> +        case 'K': 
> +            info->size = info->size * 1024; 
> +            break; 
> +        case 'm': 
> +        case 'M': 
> +            info->size = info->size * 1024 * 1024; 
> +            break; 
> +        default: 
> +            break; 
> +    } 
> + 
> +    if (sscanf(at_pos, "@%lli:%u", &info->address, &info->irq) == 2) 
> +        return prefix_pos; 
> +    else 
> +        return nullptr; 
> +} 
> + 
> +//void parse_cmdline(multiboot_info_type& mb) 
> +void parse_cmdline(char *cmdline) 
>  { 
> -    auto p = reinterpret_cast<char*>(mb.cmdline); 
> -    osv::parse_cmdline(p); 
> +    //auto p = reinterpret_cast<char*>(mb.cmdline); 
> +    // We are assuming the mmio devices information is appended to the 
> +    // command line (at least it is the case with the firecracker) so 
> +    // once we parse those we strip it away so only plain OSv command 
> line 
> +    // is left 
> +    //TODO: There may be a smarter, better way to parse this information 
> +    char *virtio_device_info_pos = 
> parse_mmio_device_info(cmdline,mmio_device_info_entries); 
> +    if (virtio_device_info_pos) { 
> +        mmio_device_info_count++; 
> +        *virtio_device_info_pos = 0; 
> + 
> +        virtio_device_info_pos = 
> +            parse_mmio_device_info(virtio_device_info_pos + 
> 1,mmio_device_info_entries + 1); 
> +        if (virtio_device_info_pos) { 
> +            mmio_device_info_count++; 
> +        } 
> +    } 
> + 
> +    osv::parse_cmdline(cmdline); 
>  } 
>   
>  void setup_temporary_phys_map() 
> @@ -121,28 +203,64 @@ void arch_setup_free_memory() 
>  { 
>      static ulong edata; 
>      asm ("movl $.edata, %0" : "=rm"(edata)); 
> -    // copy to stack so we don't free it now 
> -    auto omb = *osv_multiboot_info; 
> -    auto mb = omb.mb; 
> -    auto e820_buffer = alloca(mb.mmap_length); 
> -    auto e820_size = mb.mmap_length; 
> -    memcpy(e820_buffer, reinterpret_cast<void*>(mb.mmap_addr), 
> e820_size); 
> + 
> +    void *zero_page = reinterpret_cast<void*>(ZERO_PAGE_START); 
> +    void *setup_header = zero_page + SETUP_HEADER_OFFSET; 
> + 
> +    // Grab command line from zero page 
> +    u32 cmdline_ptr = *static_cast<u32*>(setup_header + 
> CMD_LINE_PTR_OFFSET); 
> +    u32 cmdline_size = *static_cast<u32*>(setup_header + 
> CMD_LINE_SIZE_OFFSET); 
> + 
> +    // Copy cmdline from zero page 
> +    void* cmdline = reinterpret_cast<void*>((u64)cmdline_ptr); 
> +    void *cmdline_copy = alloca(cmdline_size + 1); 
> +    memcpy(cmdline_copy,cmdline,cmdline_size); 
> +    ((char*)cmdline_copy)[cmdline_size] = 0; 
> + 
> +    debug_early("Cmdline: "); 
> +    debug_early((char*)cmdline_copy); 
> +    debug_early("\n"); 
> + 
> +    // Copy e820 information from zero page 
> +    struct _e820ent *e820_table = static_cast<struct _e820ent 
> *>(zero_page + E820_TABLE_OFFSET); 
> + 
> +    //TODO: We are assuming two entries but in reality 
> +    //there could be more so this logic below needs to be a little 
> smarter 
> +    auto e820_size = 48; 
> +    auto e820_buffer = alloca(e820_size); 
> +    { 
> +       struct e820ent *lower = reinterpret_cast<struct 
> e820ent*>(e820_buffer); 
> +       lower->ent_size = 20; 
> +       lower->type = 1; 
> +       lower->addr = e820_table[0].addr; 
> +       lower->size = e820_table[0].size; 
> + 
> +       struct e820ent *upper = lower + 1; 
> +       upper->ent_size = 20; 
> +       upper->type = 1; 
> +       upper->addr = e820_table[1].addr; 
> +       upper->size = e820_table[1].size; 
> +    } 
> + 
>      for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { 
>          memory::phys_mem_size += ent.size; 
>      }); 
>      constexpr u64 initial_map = 1 << 30; // 1GB mapped by startup code 
>   
> -    u64 time; 
> -    time = omb.tsc_init_hi; 
> -    time = (time << 32) | omb.tsc_init; 
> +    //TODO: We are assuming that bootchart-wise we start here but 
> +    // in reality it all starts at boot.S:start64_. However what happens 
> +    // before this points should take negligible amount of time, no? 
> +    u64 time = 0; 
> +    //time = omb.tsc_init_hi; 
> +    //time = (time << 32) | omb.tsc_init; 
>      boot_time.event(0, "", time ); 
>   
> -    time = omb.tsc_disk_done_hi; 
> -    time = (time << 32) | omb.tsc_disk_done; 
> +    //time = omb.tsc_disk_done_hi; 
> +    //time = (time << 32) | omb.tsc_disk_done; 
>      boot_time.event(1, "disk read (real mode)", time ); 
>   
> -    time = omb.tsc_uncompress_done_hi; 
> -    time = (time << 32) | omb.tsc_uncompress_done; 
> +    //time = omb.tsc_uncompress_done_hi; 
> +    //time = (time << 32) | omb.tsc_uncompress_done; 
>      boot_time.event(2, "uncompress lzloader.elf", time ); 
>   
>      auto c = processor::cpuid(0x80000000); 
> @@ -185,7 +303,10 @@ void arch_setup_free_memory() 
>      elf_size = edata - elf_phys; 
>      mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE); 
>      // get rid of the command line, before low memory is unmapped 
> -    parse_cmdline(mb); 
> +    //parse_cmdline(mb); 
> + 
> +    parse_cmdline((char*)cmdline_copy); 
> + 
>      // now that we have some free memory, we can start mapping the rest 
>      mmu::switch_to_runtime_page_tables(); 
>      for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { 
> @@ -259,6 +380,7 @@ void arch_init_premain() 
>  #include "drivers/virtio-scsi.hh" 
>  #include "drivers/virtio-net.hh" 
>  #include "drivers/virtio-rng.hh" 
> +#include "drivers/virtio-mmio.hh" 
>  #include "drivers/xenplatform-pci.hh" 
>  #include "drivers/ahci.hh" 
>  #include "drivers/vmw-pvscsi.hh" 
> @@ -268,13 +390,33 @@ void arch_init_premain() 
>  void arch_init_drivers() 
>  { 
>      // initialize panic drivers 
> -    panic::pvpanic::probe_and_setup(); 
> +    // pvpanic depends on ACPI which firecracker 
> +    // does not suppot so we disable probing it altogether 
> +    //TODO: Is there a way to detect if ACPI is available and 
> +    //only then probe pvpanic? 
> +    //panic::pvpanic::probe_and_setup(); 
>      boot_time.event("pvpanic done"); 
>   
>      // Enumerate PCI devices 
> -    pci::pci_device_enumeration(); 
> +    // PCI is not supported by firecracker 
> +    //TODO: Is there a way to detect if PCI is present and only enumerate 
> +    //PCI devices then? Somehow even firecracker presents a bus with 
> +    //some dummy devices. 
> +    //pci::pci_device_enumeration(); 
>      boot_time.event("pci enumerated"); 
>   
> +    // Register any parsed virtio-mmio devices 
> +    for (int d = 0; d < mmio_device_info_count; d++) { 
> +        auto info = mmio_device_info_entries[d]; 
> +        auto mmio_device = new virtio::mmio_device(info.address, 
> info.size, info.irq); 
> +        if (mmio_device->parse_config()) { 
> +            device_manager::instance()->register_device(mmio_device); 
> +        } 
> +        else { 
> +            delete mmio_device; 
> +        } 
> +    } 
> + 
>      // Initialize all drivers 
>      hw::driver_manager* drvman = hw::driver_manager::instance(); 
>      drvman->register_driver(virtio::blk::probe); 
> diff --git a/arch/x64/boot.S b/arch/x64/boot.S 
> index c4b97e2d..3bdc57ea 100644 
> --- a/arch/x64/boot.S 
> +++ b/arch/x64/boot.S 
> @@ -97,9 +97,10 @@ start64: 
>      sub %rdi, %rcx 
>      xor %eax, %eax 
>      rep stosb 
> +    mov $0x200000, %rbp 
>      mov %rbp, elf_header 
>      # %ebx is set by boot16.S before running the loader 
> -    mov %rbx, osv_multiboot_info 
> +    //mov %rbx, osv_multiboot_info 
>      lea init_stack_top, %rsp 
>      call premain 
>      mov __loader_argc, %edi 
> @@ -107,8 +108,42 @@ start64: 
>      call main 
>      .cfi_endproc 
>   
> +.code64 
> +.global _start64 
> +_start64: 
> +//TODO: Is there a way to switch to protected mode and then jump to 
> start32 
> +//which would be even better than what we do below? 
> +    //For whatever reason at this point (long mode?) we cannot 
> +    //set gdt the way it is done in start32 but linux expects 
> +    //similar one so whatever firecracker sets seems to be OK. 
> +    //Maybe because OSv gdt has 32-bit entry 
> +    //lgdt gdt_desc 
> +    //mov $0x10, %eax 
> +    //mov %eax, %ds 
> +    //mov %eax, %es 
> +    //mov %eax, %fs 
> +    //mov %eax, %gs 
> +    //mov %eax, %ss 
> +    // Disable paging and enable PAE 
> +    mov $BOOT_CR4, %eax 
> +    mov %eax, %cr4 
> +    // Setup page tables 
> +    lea ident_pt_l4, %eax 
> +    mov %eax, %cr3 
> +    // Write contents of EDX:EAX to Model Specific Register specified by 
> ECX register 
> +    mov $0xc0000080, %ecx 
> +    mov $0x00000900, %eax 
> +    xor %edx, %edx 
> +    wrmsr 
> +    // Enable paging 
> +    mov $BOOT_CR0, %eax 
> +    mov %eax, %cr0 
> +    jmp start64 
> + 
>  # The smp trampoline must be in the lower 1MB, so we manually relocate 
>  # it to address 0 by subtracting smpboot from any offset 
> +//TODO: I am pretty sure we have some logic missing for SMP 
> +//given firecracker jumps us into long mode. Not sure how to handle it. 
>  .data 
>  .global smpboot 
>  smpboot: 
> diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld 
> index efe78d52..e08b310c 100644 
> --- a/arch/x64/loader.ld 
> +++ b/arch/x64/loader.ld 
> @@ -108,4 +108,4 @@ PHDRS { 
>          eh_frame PT_GNU_EH_FRAME; 
>          note PT_NOTE; 
>  } 
> -ENTRY(start32); 
> +ENTRY(_start64); 
> diff --git a/arch/x64/power.cc b/arch/x64/power.cc 
> index 81849335..534ffdf0 100644 
> --- a/arch/x64/power.cc 
> +++ b/arch/x64/power.cc 
> @@ -27,17 +27,9 @@ void halt(void) 
>   
>  void poweroff(void) 
>  { 
> -    ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5); 
> -    if (ACPI_FAILURE(status)) { 
> -        debug("AcpiEnterSleepStatePrep failed: %s\n", 
> AcpiFormatException(status)); 
> -        halt(); 
> -    } 
> -    status = AcpiEnterSleepState(ACPI_STATE_S5); 
> -    if (ACPI_FAILURE(status)) { 
> -        debug("AcpiEnterSleepState failed: %s\n", 
> AcpiFormatException(status)); 
> -        halt(); 
> -    } 
> - 
> +    // Firecracker only supports this as away to shutdown the VM 
> +    // Reset using the 8042 PS/2 Controller ("keyboard controller") 
> +    processor::outb(0xfe, 0x64); 
>      // We shouldn't get here on x86. 
>      halt(); 
>  } 
> diff --git a/arch/x64/smp.cc b/arch/x64/smp.cc 
> index 073ef206..4bdae0c6 100644 
> --- a/arch/x64/smp.cc 
> +++ b/arch/x64/smp.cc 
> @@ -74,7 +74,21 @@ void parse_madt() 
>   
>  void smp_init() 
>  { 
> -    parse_madt(); 
> +    //Firecracker does not support ACPI so 
> +    //there is no MADT table 
> +    //parse_madt(); 
> + 
> +    //TODO: This a nasty hack as we support 
> +    // single vCPU. Eventually we should parse out equivalent information 
> +    // about all vCPUs from MP table (seems like more ancient way). 
> +    // See 
> https://github.com/firecracker-microvm/firecracker/blob/7f29bca9ca197283275eab62fddc1c10ab580794/x86_64/src/mptable.rs
>  
> +    auto c = new sched::cpu(0); 
> +    c->arch.apic_id = 0;//lapic->Id; 
> +    c->arch.acpi_id = 0;//lapic->ProcessorId; 
> +    c->arch.initstack.next = smp_stack_free; 
> +    smp_stack_free = &c->arch.initstack; 
> +    sched::cpus.push_back(c); 
> + 
>      sched::current_cpu = sched::cpus[0]; 
>      for (auto c : sched::cpus) { 
>          c->incoming_wakeups = 
> aligned_array_new<sched::cpu::incoming_wakeup_queue>(sched::cpus.size()); 
> diff --git a/drivers/acpi.cc b/drivers/acpi.cc 
> index 506ca68f..006231f1 100644 
> --- a/drivers/acpi.cc 
> +++ b/drivers/acpi.cc 
> @@ -605,7 +605,9 @@ void init() 
>   
>  } 
>   
> -void __attribute__((constructor(init_prio::acpi))) acpi_init_early() 
> -{ 
> -     XENPV_ALTERNATIVE({ acpi::early_init(); }, {}); 
> -} 
> +//TODO: Is there a way to detect if ACPI is available and do not even 
> +//try to load relevant information? Right now OSv depends on ACPI 
> available 
> +//void __attribute__((constructor(init_prio::acpi))) acpi_init_early() 
> +//{ 
> +//     XENPV_ALTERNATIVE({ acpi::early_init(); }, {}); 
> +//} 
> diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc 
> index 0bc6fb0f..fa4cba25 100644 
> --- a/drivers/virtio-blk.cc 
> +++ b/drivers/virtio-blk.cc 
> @@ -115,7 +115,6 @@ bool blk::ack_irq() 
>  blk::blk(virtio_device& virtio_dev) 
>      : virtio_driver(virtio_dev), _ro(false) 
>  { 
> - 
>      _driver_name = "virtio-blk"; 
>      _id = _instance++; 
>      virtio_i("VIRTIO BLK INSTANCE %d", _id); 
> @@ -144,6 +143,13 @@ blk::blk(virtio_device& virtio_dev) 
>              [=] { return this->ack_irq(); }, 
>              [=] { t->wake(); }); 
>      }; 
> + 
> +    int_factory.create_gsi_edge_interrupt = [this,t]() { 
> +        return new gsi_edge_interrupt( 
> +                _dev.get_irq(), 
> +                [=] { if (this->ack_irq()) t->wake(); }); 
> +    }; 
> + 
>      _dev.register_interrupt(int_factory); 
>   
>      // Enable indirect descriptor 
> diff --git a/drivers/virtio-mmio.cc b/drivers/virtio-mmio.cc 
> new file mode 100644 
> index 00000000..27d71848 
> --- /dev/null 
> +++ b/drivers/virtio-mmio.cc 
> @@ -0,0 +1,131 @@ 
> +// 
> +// Created by wkozaczuk on 12/25/18. 
> +// 
> + 
> +#include <osv/debug.hh> 
> +#include "virtio-mmio.hh" 
> + 
> +namespace virtio { 
> + 
> +// This implements virtio-io mmio device (transport layer, modeled after 
> PSI). 
> +// Read here - 
> https://www.kraxel.org/virtio/virtio-v1.0-cs03-virtio-gpu.html#x1-1080002 
> +hw_device_id mmio_device::get_id() 
> +{ 
> +    return hw_device_id(0x1af4, _device_id); 
> +} 
> + 
> +u8 mmio_device::get_status() { 
> +    return mmio_getl(_addr_mmio + VIRTIO_MMIO_STATUS) & 0xff; 
> +} 
> + 
> +void mmio_device::set_status(u8 status) { 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status); 
> +} 
> + 
> +u64 mmio_device::get_available_features() { 
> +    u64 features; 
> + 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1); 
> +    features = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES); 
> +    features <<= 32; 
> + 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0); 
> +    features |= mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES); 
> + 
> +    return features; 
> +} 
> + 
> +void mmio_device::set_enabled_features(u64 features) { 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)(features >> 
> 32)); 
> + 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)features); 
> +} 
> + 
> +void mmio_device::kick_queue(int queue_num) { 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NOTIFY, queue_num); 
> +} 
> + 
> +void mmio_device::select_queue(int queue_num) { 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_SEL, queue_num); 
> +    assert(!mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY)); 
> +} 
> + 
> +u16 mmio_device::get_queue_size() { 
> +    return mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM_MAX) & 0xffff; 
> +} 
> + 
> +void mmio_device::setup_queue(vring* queue) { 
> +    // Set size 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM, queue->size()); 
> +    // 
> +    // Pass addresses 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_LOW, 
> (u32)queue->get_desc_addr()); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_HIGH, 
> (u32)(queue->get_desc_addr() >> 32)); 
> + 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_LOW, 
> (u32)queue->get_avail_addr()); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_HIGH, 
> (u32)(queue->get_avail_addr() >> 32)); 
> + 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_LOW, 
> (u32)queue->get_used_addr()); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_HIGH, 
> (u32)(queue->get_used_addr() >> 32)); 
> +} 
> + 
> +void mmio_device::activate_queue(int queue) 
> +{ 
> +    // 
> +    // Make it ready 
> +    select_queue(queue); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY, 1 ); 
> +} 
> + 
> +u8 mmio_device::read_and_ack_isr() { 
> +    //TODO: we might need to guard this read and write with a mutex 
> +    //to prevent two concurrent interrupts raised against same device 
> step 
> +    //on each other. Is it possible? Spec does not seem to say anything 
> about it. 
> +    unsigned long status = mmio_getl(_addr_mmio + 
> VIRTIO_MMIO_INTERRUPT_STATUS); 
> +    //Sometimes this assert would be false (maybe of what I am talking 
> above). 
> +    //assert(status & VIRTIO_MMIO_INT_VRING); 
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_INTERRUPT_ACK, status); 
> +    //return true; 
> +    return (status & VIRTIO_MMIO_INT_VRING); 
> +} 
> + 
> +u8 mmio_device::read_config(u32 offset) { 
> +    return mmio_getb(_addr_mmio + VIRTIO_MMIO_CONFIG + offset); 
> +} 
> + 
> +void mmio_device::register_interrupt(interrupt_factory irq_factory) 
> +{ 
> +    _irq.reset(irq_factory.create_gsi_edge_interrupt()); 
> +} 
> + 
> +bool mmio_device::parse_config() { 
> +    _addr_mmio = mmio_map(_address, _size); 
> + 
> +    u32 magic = mmio_getl(_addr_mmio + VIRTIO_MMIO_MAGIC_VALUE); 
> +    if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { 
> +        return false; 
> +    } 
> + 
> +    // Check device version 
> +    u32 version = mmio_getl(_addr_mmio + VIRTIO_MMIO_VERSION); 
> +    if (version < 1 || version > 2) { 
> +        debugf( "Version %ld not supported!\n", version); 
> +        return false; 
> +    } 
> + 
> +    _device_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_ID); 
> +    if (_device_id == 0) { 
> +        // 
> +        // virtio-mmio device with an ID 0 is a (dummy) placeholder 
> +        // with no function. End probing now with no error reported. 
> +        debug( "Dummy virtio-mmio device detected!\n"); 
> +        return false; 
> +    } 
> +    _vendor_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_VENDOR_ID); 
> + 
> +    debugf("Detected virtio-mmio device: (%ld,%ld)\n", _device_id, 
> _vendor_id); 
> +    return true; 
> +} 
> +} 
> diff --git a/drivers/virtio-mmio.hh b/drivers/virtio-mmio.hh 
> new file mode 100644 
> index 00000000..25c88f6a 
> --- /dev/null 
> +++ b/drivers/virtio-mmio.hh 
> @@ -0,0 +1,148 @@ 
> +// 
> +// Created by wkozaczuk on 12/25/18. 
> +// 
> + 
> +#ifndef VIRTIO_MMIO_DEVICE_HH 
> +#define VIRTIO_MMIO_DEVICE_HH 
> + 
> +#include <osv/types.h> 
> +#include <osv/mmio.hh> 
> +#include "virtio-device.hh" 
> +#include "virtio-vring.hh" 
> + 
> +using namespace hw; 
> + 
> +/* Magic value ("virt" string) - Read Only */ 
> +#define VIRTIO_MMIO_MAGIC_VALUE                0x000 
> + 
> +/* Virtio device version - Read Only */ 
> +#define VIRTIO_MMIO_VERSION                0x004 
> + 
> +/* Virtio device ID - Read Only */ 
> +#define VIRTIO_MMIO_DEVICE_ID                0x008 
> + 
> +/* Virtio vendor ID - Read Only */ 
> +#define VIRTIO_MMIO_VENDOR_ID                0x00c 
> + 
> +/* Bitmask of the features supported by the device (host) 
> + * (32 bits per set) - Read Only */ 
> +#define VIRTIO_MMIO_DEVICE_FEATURES        0x010 
> + 
> +/* Device (host) features set selector - Write Only */ 
> +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL        0x014 
> + 
> +/* Bitmask of features activated by the driver (guest) 
> + * (32 bits per set) - Write Only */ 
> +#define VIRTIO_MMIO_DRIVER_FEATURES        0x020 
> + 
> +/* Activated features set selector - Write Only */ 
> +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL        0x024 
> + 
> +/* Queue selector - Write Only */ 
> +#define VIRTIO_MMIO_QUEUE_SEL                0x030 
> + 
> +/* Maximum size of the currently selected queue - Read Only */ 
> +#define VIRTIO_MMIO_QUEUE_NUM_MAX        0x034 
> + 
> +/* Queue size for the currently selected queue - Write Only */ 
> +#define VIRTIO_MMIO_QUEUE_NUM                0x038 
> + 
> +/* Ready bit for the currently selected queue - Read Write */ 
> +#define VIRTIO_MMIO_QUEUE_READY                0x044 
> + 
> +/* Queue notifier - Write Only */ 
> +#define VIRTIO_MMIO_QUEUE_NOTIFY        0x050 
> + 
> +/* Interrupt status - Read Only */ 
> +#define VIRTIO_MMIO_INTERRUPT_STATUS        0x060 
> + 
> +/* Interrupt acknowledge - Write Only */ 
> +#define VIRTIO_MMIO_INTERRUPT_ACK        0x064 
> + 
> +/* Device status register - Read Write */ 
> +#define VIRTIO_MMIO_STATUS                0x070 
> + 
> +/* Selected queue's Descriptor Table address, 64 bits in two halves */ 
> +#define VIRTIO_MMIO_QUEUE_DESC_LOW        0x080 
> +#define VIRTIO_MMIO_QUEUE_DESC_HIGH        0x084 
> + 
> +/* Selected queue's Available Ring address, 64 bits in two halves */ 
> +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW        0x090 
> +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH        0x094 
> + 
> +/* Selected queue's Used Ring address, 64 bits in two halves */ 
> +#define VIRTIO_MMIO_QUEUE_USED_LOW        0x0a0 
> +#define VIRTIO_MMIO_QUEUE_USED_HIGH        0x0a4 
> + 
> +/* Configuration atomicity value */ 
> +#define VIRTIO_MMIO_CONFIG_GENERATION        0x0fc 
> + 
> +/* The config space is defined by each driver as 
> + * the per-driver configuration space - Read Write */ 
> +#define VIRTIO_MMIO_CONFIG                0x100 
> + 
> +#define VIRTIO_MMIO_INT_VRING                (1 << 0) 
> +#define VIRTIO_MMIO_INT_CONFIG                (1 << 1) 
> + 
> +namespace virtio { 
> + 
> +class mmio_device : public virtio_device { 
> +public: 
> +    mmio_device(u64 address, u64 size, unsigned int irq) : 
> +        _address(address), _size(size), _irq_no(irq), 
> +        _vendor_id(0), _device_id(0), _addr_mmio(0) {} 
> + 
> +    virtual ~mmio_device() {} 
> + 
> +    virtual hw_device_id get_id(); 
> + 
> +    virtual void init() {} 
> +    virtual void print() {} 
> +    virtual void reset() {} 
> + 
> +    virtual unsigned get_irq() { return _irq_no; } 
> +    virtual u8 read_and_ack_isr(); 
> +    virtual void register_interrupt(interrupt_factory irq_factory); 
> + 
> +    virtual void select_queue(int queue); 
> +    virtual u16 get_queue_size(); 
> +    virtual void setup_queue(vring *queue); 
> +    virtual void activate_queue(int queue); 
> +    virtual void kick_queue(int queue); 
> + 
> +    virtual u64 get_available_features(); 
> +    virtual bool get_available_feature_bit(int bit) { return false; } 
> //TODO: Should be implemented ? 
> + 
> +    virtual void set_enabled_features(u64 features); 
> +    virtual u64 get_enabled_features() { return 0; } //TODO: Should be 
> implemented - function calling it in virtio.cc is not used? 
> +    virtual bool get_enabled_feature_bit(int bit) { return false; } 
> //TODO: Should be implemented - function calling it in virtio.cc is not 
> used? 
> + 
> +    virtual u8 get_status(); 
> +    virtual void set_status(u8 status); 
> + 
> +    virtual u8 read_config(u32 offset); 
> +    virtual void write_config(u32 offset, u8 byte) {} //TODO: Implement - 
> function 
> +    // calling it (virtio_driver::virtio_conf_write) is not used by 
> anything? 
> + 
> +    virtual void dump_config() {} //TODO: Implement it 
> + 
> +    virtual bool is_modern() { return true; }; 
> +    virtual size_t get_vring_alignment() { return 4096; 
> }//VIRTIO_PCI_VRING_ALIGN; }; 
> + 
> +    bool parse_config(); 
> + 
> +private: 
> +    u64 _address; 
> +    u64 _size; 
> +    unsigned int _irq_no; 
> +    //u64 _id; 
> +    u16 _vendor_id; 
> +    u16 _device_id; 
> + 
> +    mmioaddr_t _addr_mmio; 
> +    std::unique_ptr<gsi_edge_interrupt> _irq; 
> +}; 
> + 
> +} 
> + 
> +#endif //VIRTIO_MMIO_DEVICE_HH 
> diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc 
> index ef2dff66..3522a80d 100644 
> --- a/drivers/virtio-net.cc 
> +++ b/drivers/virtio-net.cc 
> @@ -316,6 +316,13 @@ net::net(virtio_device& dev) 
>              [=] { return this->ack_irq(); }, 
>              [=] { poll_task->wake(); }); 
>      }; 
> + 
> +    int_factory.create_gsi_edge_interrupt = [this,poll_task]() { 
> +        return new gsi_edge_interrupt( 
> +            _dev.get_irq(), 
> +            [=] { if (this->ack_irq()) poll_task->wake(); }); 
> +    }; 
> + 
>      _dev.register_interrupt(int_factory); 
>   
>      fill_rx_ring(); 
> diff --git a/drivers/virtio.cc b/drivers/virtio.cc 
> index 5ef0f94d..9b778522 100644 
> --- a/drivers/virtio.cc 
> +++ b/drivers/virtio.cc 
> @@ -180,11 +180,16 @@ bool virtio_driver::get_device_feature_bit(int bit) 
>  void virtio_driver::set_guest_features(u64 features) 
>  { 
>      _dev.set_enabled_features(features); 
> +    _enabled_features = features; 
>  } 
>   
>  bool virtio_driver::get_guest_feature_bit(int bit) 
>  { 
> -    return _dev.get_enabled_feature_bit(bit); 
> +    if (_dev.is_modern()) //TODO: SO far only mmio - will work with 
> modern pci? 
> +        return (_enabled_features & (1 << bit)) != 0; 
> +    else 
> +        return _dev.get_enabled_feature_bit(bit); 
> +    //return _dev.get_enabled_feature_bit(bit); 
>  } 
>   
>  u8 virtio_driver::get_dev_status() 
> diff --git a/drivers/virtio.hh b/drivers/virtio.hh 
> index ef915e1c..4b59a91a 100644 
> --- a/drivers/virtio.hh 
> +++ b/drivers/virtio.hh 
> @@ -114,6 +114,7 @@ protected: 
>      bool _cap_indirect_buf; 
>      bool _cap_event_idx = false; 
>      static int _disk_idx; 
> +    u64 _enabled_features; 
>  }; 
>   
>  template <typename T, u16 ID> 
> diff --git a/loader.cc b/loader.cc 
> index c29bf4c4..530e3816 100644 
> --- a/loader.cc 
> +++ b/loader.cc 
> @@ -55,6 +55,8 @@ 
>  #include "drivers/null.hh" 
>   
>  #include "libc/network/__dns.hh" 
> +#include "early-console.hh" 
> +#include <processor.hh> 
>   
>  using namespace osv; 
>  using namespace osv::clock::literals; 
> @@ -426,6 +428,7 @@ void* do_main_thread(void *_main_args) 
>      } 
>   
>      boot_time.event("Total time"); 
> +    processor::outb(123, 0x3f0); 
>   
>      if (opt_bootchart) { 
>          boot_time.print_chart(); 
> @@ -545,7 +548,7 @@ void main_cont(int loader_argc, char** loader_argv) 
>      memory::enable_debug_allocator(); 
>   
>  #ifndef AARCH64_PORT_STUB 
> -    acpi::init(); 
> +    //acpi::init(); 
>  #endif /* !AARCH64_PORT_STUB */ 
>   
>      if (sched::cpus.size() > sched::max_cpus) { 
> -- 
> 2.19.1 
>
>

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to