Please bear in mind it is a RFC polluted with hacks and many TODO comments that raise many questions.
Feel free to review it especially if you have ideas on how to better restructure the code (I left some comments about it) and if you see any obvious bugs. Regards, Waldek On Saturday, January 5, 2019 at 1:44:47 AM UTC-5, Waldek Kozaczuk wrote: > > This patch provides all necessary changes to OSv > to make it boot on AWS firecracker. > > The curl command exmaple to create and start instance with > single block device: > > ./scripts/setup-block.sh && ./scripts/create-instance.sh && > ./scripts/start-instance.sh > > where: > curl --unix-socket /tmp/firecracker.socket -i \ > -X PUT 'http://localhost/drives/rootfs' \ > -H 'Accept: application/json' \ > -H 'Content-Type: application/json' \ > -d '{ > "drive_id": "rootfs", > "path_on_host": > "/home/wkozaczuk/projects/osv/build/release/usr.rofs", > "is_root_device": false, > "is_read_only": false > }' > > curl --unix-socket /tmp/firecracker.socket -i \ > -X PUT 'http://localhost/boot-source' \ > -H 'Accept: application/json' \ > -H 'Content-Type: application/json' \ > -d '{ > "kernel_image_path": > "/home/wkozaczuk/projects/osv/build/release/loader-stripped.elf", > "boot_args": "--bootchart /hello" > }' > > curl --unix-socket /tmp/firecracker.socket -i \ > -X PUT 'http://localhost/actions' \ > -H 'Accept: application/json' \ > -H 'Content-Type: application/json' \ > -d '{ > "action_type": "InstanceStart" > }' > > Signed-off-by: Waldemar Kozaczuk <[email protected]> > --- > Makefile | 2 + > arch/x64/arch-setup.cc | 180 +++++++++++++++++++--- > arch/x64/boot.S | 37 ++++- > arch/x64/loader.ld | 2 +- > arch/x64/power.cc | 14 +- > arch/x64/smp.cc | 16 +- > drivers/acpi.cc | 10 +- > drivers/virtio-assign.cc | 3 +- > drivers/virtio-blk.cc | 78 ++++++---- > drivers/virtio-blk.hh | 10 +- > drivers/virtio-mmio.cc | 128 ++++++++++++++++ > drivers/virtio-mmio.hh | 132 ++++++++++++++++ > drivers/virtio-net.cc | 86 +++++------ > drivers/virtio-net.hh | 10 +- > drivers/virtio-vring.cc | 25 ++- > drivers/virtio-vring.hh | 10 +- > drivers/virtio.hh | 18 ++- > drivers/virtio2.cc | 289 +++++++++++++++++++++++++++++++++++ > drivers/virtio2.hh | 115 ++++++++++++++ > include/osv/virtio-assign.hh | 2 +- > loader.cc | 3 +- > 21 files changed, 1039 insertions(+), 131 deletions(-) > create mode 100644 drivers/virtio-mmio.cc > create mode 100644 drivers/virtio-mmio.hh > create mode 100644 drivers/virtio2.cc > create mode 100644 drivers/virtio2.hh > > diff --git a/Makefile b/Makefile > index 68043485..cf571632 100644 > --- a/Makefile > +++ b/Makefile > @@ -819,9 +819,11 @@ drivers += $(libtsm) > drivers += drivers/vga.o drivers/kbd.o drivers/isa-serial.o > drivers += arch/$(arch)/pvclock-abi.o > drivers += drivers/virtio.o > +drivers += drivers/virtio2.o > drivers += drivers/virtio-vring.o > drivers += drivers/virtio-net.o > drivers += drivers/virtio-assign.o > +drivers += drivers/virtio-mmio.o > drivers += drivers/vmxnet3.o > drivers += drivers/vmxnet3-queues.o > drivers += drivers/virtio-blk.o > diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc > index 6e4833cf..58da00c5 100644 > --- a/arch/x64/arch-setup.cc > +++ b/arch/x64/arch-setup.cc > @@ -22,6 +22,19 @@ > #include <osv/commands.hh> > #include "dmi.hh" > > +// Not sure if Linux zero page is always located at this place > +// in memory or its address is passed in one of the registers > +// -> double check > +#define ZERO_PAGE_START 0x7000 > +#define SETUP_HEADER_OFFSET 0x1f1 // look at bootparam.h in linux > +#define BOOT_FLAG_OFFSET sizeof(u8) + 4 * sizeof(u16) + sizeof(u32) > + > +#define E820_ENTRIES_OFFSET 0x1e8 // look at bootparam.h in linux > +#define E820_TABLE_OFFSET 0x2d0 // look at bootparam.h in linux > + > +#define CMD_LINE_PTR_OFFSET sizeof(u8) * 5 + sizeof(u16) * 11 + > sizeof(u32) * 7 > +#define CMD_LINE_SIZE_OFFSET CMD_LINE_PTR_OFFSET + sizeof(u8) * 2 + > sizeof(u16) + sizeof(u32) * 3 > + > struct multiboot_info_type { > u32 flags; > u32 mem_lower; > @@ -61,12 +74,81 @@ struct e820ent { > u32 type; > } __attribute__((packed)); > > +struct _e820ent { > + u64 addr; > + u64 size; > + u32 type; > +} __attribute__((packed)); > + > osv_multiboot_info_type* osv_multiboot_info; > > -void parse_cmdline(multiboot_info_type& mb) > +struct mmio_device_info { > + u64 address; > + u64 size; > + unsigned int irq; > +}; > + > +//TODO: For now we are limiting number of mmio devices to two > +// Ideally we should be using somewhat more dynamic structure > +struct mmio_device_info mmio_device_info_entries[2]; > +int mmio_device_info_count = 0; > + > +#define VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX "virtio_mmio.device=" > +char* parse_mmio_device_info(char *cmdline, mmio_device_info *info) { > + // [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>] > + char *prefix_pos = strstr(cmdline,VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX); > + if (!prefix_pos) > + return nullptr; > + > + char *size_pos = prefix_pos + > strlen(VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX); > + if (sscanf(size_pos,"%ld", &info->size) != 1) > + return nullptr; > + > + char *at_pos = strstr(size_pos,"@"); > + if (!at_pos) > + return nullptr; > + > + switch(*(at_pos - 1)) { > + case 'k': > + case 'K': > + info->size = info->size * 1024; > + break; > + case 'm': > + case 'M': > + info->size = info->size * 1024 * 1024; > + break; > + default: > + break; > + } > + > + if (sscanf(at_pos, "@%lli:%u", &info->address, &info->irq) == 2) > + return prefix_pos; > + else > + return nullptr; > +} > + > +//void parse_cmdline(multiboot_info_type& mb) > +void parse_cmdline(char *cmdline) > { > - auto p = reinterpret_cast<char*>(mb.cmdline); > - osv::parse_cmdline(p); > + //auto p = reinterpret_cast<char*>(mb.cmdline); > + // We are assuming the mmio devices information is appended to the > + // command line (at least it is the case with the firecracker) so > + // once we parse those we strip it away so only plain OSv command > line > + // is left > + //TODO: There may be a smarter, better way to parse this information > + char *virtio_device_info_pos = > parse_mmio_device_info(cmdline,mmio_device_info_entries); > + if (virtio_device_info_pos) { > + mmio_device_info_count++; > + *virtio_device_info_pos = 0; > + > + virtio_device_info_pos = > + parse_mmio_device_info(virtio_device_info_pos + > 1,mmio_device_info_entries + 1); > + if (virtio_device_info_pos) { > + mmio_device_info_count++; > + } > + } > + > + osv::parse_cmdline(cmdline); > } > > void setup_temporary_phys_map() > @@ -121,28 +203,64 @@ void arch_setup_free_memory() > { > static ulong edata; > asm ("movl $.edata, %0" : "=rm"(edata)); > - // copy to stack so we don't free it now > - auto omb = *osv_multiboot_info; > - auto mb = omb.mb; > - auto e820_buffer = alloca(mb.mmap_length); > - auto e820_size = mb.mmap_length; > - memcpy(e820_buffer, reinterpret_cast<void*>(mb.mmap_addr), > e820_size); > + > + void *zero_page = reinterpret_cast<void*>(ZERO_PAGE_START); > + void *setup_header = zero_page + SETUP_HEADER_OFFSET; > + > + // Grab command line from zero page > + u32 cmdline_ptr = *static_cast<u32*>(setup_header + > CMD_LINE_PTR_OFFSET); > + u32 cmdline_size = *static_cast<u32*>(setup_header + > CMD_LINE_SIZE_OFFSET); > + > + // Copy cmdline from zero page > + void* cmdline = reinterpret_cast<void*>((u64)cmdline_ptr); > + void *cmdline_copy = alloca(cmdline_size + 1); > + memcpy(cmdline_copy,cmdline,cmdline_size); > + ((char*)cmdline_copy)[cmdline_size] = 0; > + > + debug_early("Cmdline: "); > + debug_early((char*)cmdline_copy); > + debug_early("\n"); > + > + // Copy e820 information from zero page > + struct _e820ent *e820_table = static_cast<struct _e820ent > *>(zero_page + E820_TABLE_OFFSET); > + > + //TODO: We are assuming two entries but in reality > + //there could be more so this logic below needs to be a little > smarter > + auto e820_size = 48; > + auto e820_buffer = alloca(e820_size); > + { > + struct e820ent *lower = reinterpret_cast<struct > e820ent*>(e820_buffer); > + lower->ent_size = 20; > + lower->type = 1; > + lower->addr = e820_table[0].addr; > + lower->size = e820_table[0].size; > + > + struct e820ent *upper = lower + 1; > + upper->ent_size = 20; > + upper->type = 1; > + upper->addr = e820_table[1].addr; > + upper->size = e820_table[1].size; > + } > + > for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { > memory::phys_mem_size += ent.size; > }); > constexpr u64 initial_map = 1 << 30; // 1GB mapped by startup code > > - u64 time; > - time = omb.tsc_init_hi; > - time = (time << 32) | omb.tsc_init; > + //TODO: We are assuming that bootchart-wise we start here but > + // in reality it all starts at boot.S:start64_. However what happens > + // before this points should take negligible amount of time, no? > + u64 time = 0; > + //time = omb.tsc_init_hi; > + //time = (time << 32) | omb.tsc_init; > boot_time.event(0, "", time ); > > - time = omb.tsc_disk_done_hi; > - time = (time << 32) | omb.tsc_disk_done; > + //time = omb.tsc_disk_done_hi; > + //time = (time << 32) | omb.tsc_disk_done; > boot_time.event(1, "disk read (real mode)", time ); > > - time = omb.tsc_uncompress_done_hi; > - time = (time << 32) | omb.tsc_uncompress_done; > + //time = omb.tsc_uncompress_done_hi; > + //time = (time << 32) | omb.tsc_uncompress_done; > boot_time.event(2, "uncompress lzloader.elf", time ); > > auto c = processor::cpuid(0x80000000); > @@ -185,7 +303,10 @@ void arch_setup_free_memory() > elf_size = edata - elf_phys; > mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE); > // get rid of the command line, before low memory is unmapped > - parse_cmdline(mb); > + //parse_cmdline(mb); > + > + parse_cmdline((char*)cmdline_copy); > + > // now that we have some free memory, we can start mapping the rest > mmu::switch_to_runtime_page_tables(); > for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { > @@ -260,6 +381,7 @@ void arch_init_premain() > #include "drivers/virtio-net.hh" > #include "drivers/virtio-assign.hh" > #include "drivers/virtio-rng.hh" > +#include "drivers/virtio-mmio.hh" > #include "drivers/xenplatform-pci.hh" > #include "drivers/ahci.hh" > #include "drivers/vmw-pvscsi.hh" > @@ -271,13 +393,33 @@ extern bool opt_assign_net; > void arch_init_drivers() > { > // initialize panic drivers > - panic::pvpanic::probe_and_setup(); > + // pvpanic depends on ACPI which firecracker > + // does not suppot so we disable probing it altogether > + //TODO: Is there a way to detect if ACPI is available and > + //only then probe pvpanic? > + //panic::pvpanic::probe_and_setup(); > boot_time.event("pvpanic done"); > > // Enumerate PCI devices > - pci::pci_device_enumeration(); > + // PCI is not supported by firecracker > + //TODO: Is there a way to detect if PCI is present and only enumerate > + //PCI devices then? Somehow even firecracker presents a bus with > + //some dummy devices. > + //pci::pci_device_enumeration(); > boot_time.event("pci enumerated"); > > + // Register any parsed virtio-mmio devices > + for (int d = 0; d < mmio_device_info_count; d++) { > + auto info = mmio_device_info_entries[d]; > + auto mmio_device = new virtio::mmio_device(info.address, > info.size, info.irq); > + if (mmio_device->parse_config()) { > + device_manager::instance()->register_device(mmio_device); > + } > + else { > + delete mmio_device; > + } > + } > + > // Initialize all drivers > hw::driver_manager* drvman = hw::driver_manager::instance(); > drvman->register_driver(virtio::blk::probe); > diff --git a/arch/x64/boot.S b/arch/x64/boot.S > index c4b97e2d..3bdc57ea 100644 > --- a/arch/x64/boot.S > +++ b/arch/x64/boot.S > @@ -97,9 +97,10 @@ start64: > sub %rdi, %rcx > xor %eax, %eax > rep stosb > + mov $0x200000, %rbp > mov %rbp, elf_header > # %ebx is set by boot16.S before running the loader > - mov %rbx, osv_multiboot_info > + //mov %rbx, osv_multiboot_info > lea init_stack_top, %rsp > call premain > mov __loader_argc, %edi > @@ -107,8 +108,42 @@ start64: > call main > .cfi_endproc > > +.code64 > +.global _start64 > +_start64: > +//TODO: Is there a way to switch to protected mode and then jump to > start32 > +//which would be even better than what we do below? > + //For whatever reason at this point (long mode?) we cannot > + //set gdt the way it is done in start32 but linux expects > + //similar one so whatever firecracker sets seems to be OK. > + //Maybe because OSv gdt has 32-bit entry > + //lgdt gdt_desc > + //mov $0x10, %eax > + //mov %eax, %ds > + //mov %eax, %es > + //mov %eax, %fs > + //mov %eax, %gs > + //mov %eax, %ss > + // Disable paging and enable PAE > + mov $BOOT_CR4, %eax > + mov %eax, %cr4 > + // Setup page tables > + lea ident_pt_l4, %eax > + mov %eax, %cr3 > + // Write contents of EDX:EAX to Model Specific Register specified by > ECX register > + mov $0xc0000080, %ecx > + mov $0x00000900, %eax > + xor %edx, %edx > + wrmsr > + // Enable paging > + mov $BOOT_CR0, %eax > + mov %eax, %cr0 > + jmp start64 > + > # The smp trampoline must be in the lower 1MB, so we manually relocate > # it to address 0 by subtracting smpboot from any offset > +//TODO: I am pretty sure we have some logic missing for SMP > +//given firecracker jumps us into long mode. Not sure how to handle it. > .data > .global smpboot > smpboot: > diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld > index efe78d52..e08b310c 100644 > --- a/arch/x64/loader.ld > +++ b/arch/x64/loader.ld > @@ -108,4 +108,4 @@ PHDRS { > eh_frame PT_GNU_EH_FRAME; > note PT_NOTE; > } > -ENTRY(start32); > +ENTRY(_start64); > diff --git a/arch/x64/power.cc b/arch/x64/power.cc > index 81849335..534ffdf0 100644 > --- a/arch/x64/power.cc > +++ b/arch/x64/power.cc > @@ -27,17 +27,9 @@ void halt(void) > > void poweroff(void) > { > - ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5); > - if (ACPI_FAILURE(status)) { > - debug("AcpiEnterSleepStatePrep failed: %s\n", > AcpiFormatException(status)); > - halt(); > - } > - status = AcpiEnterSleepState(ACPI_STATE_S5); > - if (ACPI_FAILURE(status)) { > - debug("AcpiEnterSleepState failed: %s\n", > AcpiFormatException(status)); > - halt(); > - } > - > + // Firecracker only supports this as away to shutdown the VM > + // Reset using the 8042 PS/2 Controller ("keyboard controller") > + processor::outb(0xfe, 0x64); > // We shouldn't get here on x86. > halt(); > } > diff --git a/arch/x64/smp.cc b/arch/x64/smp.cc > index 073ef206..4bdae0c6 100644 > --- a/arch/x64/smp.cc > +++ b/arch/x64/smp.cc > @@ -74,7 +74,21 @@ void parse_madt() > > void smp_init() > { > - parse_madt(); > + //Firecracker does not support ACPI so > + //there is no MADT table > + //parse_madt(); > + > + //TODO: This a nasty hack as we support > + // single vCPU. Eventually we should parse out equivalent information > + // about all vCPUs from MP table (seems like more ancient way). > + // See > https://github.com/firecracker-microvm/firecracker/blob/7f29bca9ca197283275eab62fddc1c10ab580794/x86_64/src/mptable.rs > > + auto c = new sched::cpu(0); > + c->arch.apic_id = 0;//lapic->Id; > + c->arch.acpi_id = 0;//lapic->ProcessorId; > + c->arch.initstack.next = smp_stack_free; > + smp_stack_free = &c->arch.initstack; > + sched::cpus.push_back(c); > + > sched::current_cpu = sched::cpus[0]; > for (auto c : sched::cpus) { > c->incoming_wakeups = > aligned_array_new<sched::cpu::incoming_wakeup_queue>(sched::cpus.size()); > diff --git a/drivers/acpi.cc b/drivers/acpi.cc > index 506ca68f..006231f1 100644 > --- a/drivers/acpi.cc > +++ b/drivers/acpi.cc > @@ -605,7 +605,9 @@ void init() > > } > > -void __attribute__((constructor(init_prio::acpi))) acpi_init_early() > -{ > - XENPV_ALTERNATIVE({ acpi::early_init(); }, {}); > -} > +//TODO: Is there a way to detect if ACPI is available and do not even > +//try to load relevant information? Right now OSv depends on ACPI > available > +//void __attribute__((constructor(init_prio::acpi))) acpi_init_early() > +//{ > +// XENPV_ALTERNATIVE({ acpi::early_init(); }, {}); > +//} > diff --git a/drivers/virtio-assign.cc b/drivers/virtio-assign.cc > index 8a128d5e..5a49966f 100644 > --- a/drivers/virtio-assign.cc > +++ b/drivers/virtio-assign.cc > @@ -42,8 +42,9 @@ public: > > // osv::assigned_virtio API implementation: > > - virtual void kick(int queue) override { > + virtual bool kick(int queue) override { > virtio_driver::kick(queue); > + return true; > } > > virtual u32 queue_size(int queue) override > diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc > index a3f7898d..3784c453 100644 > --- a/drivers/virtio-blk.cc > +++ b/drivers/virtio-blk.cc > @@ -10,7 +10,6 @@ > > #include "drivers/virtio.hh" > #include "drivers/virtio-blk.hh" > -#include "drivers/pci-device.hh" > #include <osv/interrupt.hh> > > #include <osv/mempool.hh> > @@ -100,20 +99,20 @@ struct driver blk_driver = { > > bool blk::ack_irq() > { > - auto isr = virtio_conf_readb(VIRTIO_PCI_ISR); > - auto queue = get_virt_queue(0); > - > - if (isr) { > - queue->disable_interrupts(); > + if(_dev.ack_irq()) { > + get_virt_queue(0)->disable_interrupts(); > return true; > - } else { > - return false; > } > - > + else > + return false; > } > > -blk::blk(pci::device& pci_dev) > - : virtio_driver(pci_dev), _ro(false) > +//TODO: For now this driver is hardcoded to expect mmio_device > +// but eventually we could introduce some sort of virtio_device > +// interface class that pci_device and mmio_device would implement/extend > +// from. > +blk::blk(mmio_device& _dev) > + : virtio_mmio_driver(_dev), _ro(false) > { > > _driver_name = "virtio-blk"; > @@ -128,13 +127,12 @@ blk::blk(pci::device& pci_dev) > sched::thread::attr().name("virtio-blk")); > t->start(); > auto queue = get_virt_queue(0); > - if (pci_dev.is_msix()) { > - _msi.easy_register({ { 0, [=] { queue->disable_interrupts(); }, t > } }); > - } else { > - _irq.reset(new pci_interrupt(pci_dev, > - [=] { return ack_irq(); }, > - [=] { t->wake(); })); > - } > + > + //TODO: This logic should really be moved to a device class > + // so that it would do different thing depending if it is MMIO or PCI > + // device > + _irq.reset(new gsi_edge_interrupt(_dev.get_irq(), > + [=] { if(this->ack_irq()) t->wake(); > })); > > // Enable indirect descriptor > queue->set_use_indirect(true); > @@ -164,25 +162,32 @@ blk::~blk() > void blk::read_config() > { > //read all of the block config (including size, mce, topology,..) in > one shot > - virtio_conf_read(virtio_pci_config_offset(), &_config, > sizeof(_config)); > + //TODO: It may to do with legacy vs non-legacy device > + //but at least with latest spec we should check if individual > + //config fields are available vs reading whole config struct. For > example > + //firecracker reports memory read violation warnings > + virtio_conf_read(0, &_config, sizeof(_config.capacity)); > > trace_virtio_blk_read_config_capacity(_config.capacity); > > - if (get_guest_feature_bit(VIRTIO_BLK_F_SIZE_MAX)) > + //TODO: Legacy vs non-legacy device concept. In pre-finalized spec > + //there was a concept of "guest"/"host". Right now they use > equivalent > + //concepts - "driver"/"device" > + if (get_drv_feature_bit(VIRTIO_BLK_F_SIZE_MAX)) > trace_virtio_blk_read_config_size_max(_config.size_max); > - if (get_guest_feature_bit(VIRTIO_BLK_F_SEG_MAX)) > + if (get_drv_feature_bit(VIRTIO_BLK_F_SEG_MAX)) > trace_virtio_blk_read_config_seg_max(_config.seg_max); > - if (get_guest_feature_bit(VIRTIO_BLK_F_GEOMETRY)) { > + if (get_drv_feature_bit(VIRTIO_BLK_F_GEOMETRY)) { > > trace_virtio_blk_read_config_geometry((u32)_config.geometry.cylinders, > (u32)_config.geometry.heads, (u32)_config.geometry.sectors); > } > - if (get_guest_feature_bit(VIRTIO_BLK_F_BLK_SIZE)) > + if (get_drv_feature_bit(VIRTIO_BLK_F_BLK_SIZE)) > trace_virtio_blk_read_config_blk_size(_config.blk_size); > - if (get_guest_feature_bit(VIRTIO_BLK_F_TOPOLOGY)) { > + if (get_drv_feature_bit(VIRTIO_BLK_F_TOPOLOGY)) { > > trace_virtio_blk_read_config_topology((u32)_config.physical_block_exp, > (u32)_config.alignment_offset, (u32)_config.min_io_size, > (u32)_config.opt_io_size); > } > - if (get_guest_feature_bit(VIRTIO_BLK_F_CONFIG_WCE)) > + if (get_drv_feature_bit(VIRTIO_BLK_F_CONFIG_WCE)) > trace_virtio_blk_read_config_wce((u32)_config.wce); > - if (get_guest_feature_bit(VIRTIO_BLK_F_RO)) { > + if (get_drv_feature_bit(VIRTIO_BLK_F_RO)) { > set_readonly(); > trace_virtio_blk_read_config_ro(); > } > @@ -195,7 +200,7 @@ void blk::req_done() > > while (1) { > > - virtio_driver::wait_for_queue(queue, > &vring::used_ring_not_empty); > + virtio_mmio_driver::wait_for_queue(queue, > &vring::used_ring_not_empty); > trace_virtio_blk_wake(); > > u32 len; > @@ -239,11 +244,12 @@ int blk::make_request(struct bio* bio) > WITH_LOCK(_lock) { > > if (!bio) return EIO; > - > + /* TODO: Is this really correct to simply disable this logic? > + * Temporarily comment out -> seg_max is unavailable ... > if (bio->bio_bcount/mmu::page_size + 1 > _config.seg_max) { > trace_virtio_blk_make_request_seg_max(bio->bio_bcount, > _config.seg_max); > return EIO; > - } > + }*/ > > auto* queue = get_virt_queue(0); > blk_request_type type; > @@ -296,7 +302,7 @@ int blk::make_request(struct bio* bio) > > u32 blk::get_driver_features() > { > - auto base = virtio_driver::get_driver_features(); > + auto base = virtio_mmio_driver::get_driver_features(); > return (base | ( 1 << VIRTIO_BLK_F_SIZE_MAX) > | ( 1 << VIRTIO_BLK_F_SEG_MAX) > | ( 1 << VIRTIO_BLK_F_GEOMETRY) > @@ -308,7 +314,17 @@ u32 blk::get_driver_features() > > hw_driver* blk::probe(hw_device* dev) > { > - return virtio::probe<blk, VIRTIO_BLK_DEVICE_ID>(dev); > + //TODO: Eventually we should account for both PCI and MMIO devices > + //once we have a virtio_device class > + if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) { > + if (mmio_dev->get_id() == hw_device_id(0x0, VIRTIO_ID_BLOCK)) { > + debug_early("virtio-blk::probe() -> found virtio-mmio device > ...\n"); > + return new blk(*mmio_dev); > + } > + } > + return nullptr; > + > + //return virtio::probe<blk, VIRTIO_BLK_DEVICE_ID>(dev); > } > > } > diff --git a/drivers/virtio-blk.hh b/drivers/virtio-blk.hh > index 17cf4d18..79e42710 100644 > --- a/drivers/virtio-blk.hh > +++ b/drivers/virtio-blk.hh > @@ -8,12 +8,13 @@ > #ifndef VIRTIO_BLK_DRIVER_H > #define VIRTIO_BLK_DRIVER_H > #include "drivers/virtio.hh" > -#include "drivers/pci-device.hh" > +#include "drivers/virtio2.hh" > +#include "drivers/virtio-mmio.hh" > #include <osv/bio.h> > > namespace virtio { > > -class blk : public virtio_driver { > +class blk : public virtio_mmio_driver { > public: > > // The feature bitmap for virtio blk > @@ -118,7 +119,7 @@ public: > u8 status; > }; > > - explicit blk(pci::device& dev); > + explicit blk(mmio_device& dev); > virtual ~blk(); > > virtual std::string get_name() const { return _driver_name; } > @@ -157,7 +158,8 @@ private: > bool _ro; > // This mutex protects parallel make_request invocations > mutex _lock; > - std::unique_ptr<pci_interrupt> _irq; > + //TODO: There is no PCI so is it OK to use GSI edge interrupt? > + std::unique_ptr<gsi_edge_interrupt> _irq; > }; > > } > diff --git a/drivers/virtio-mmio.cc b/drivers/virtio-mmio.cc > new file mode 100644 > index 00000000..1485ea00 > --- /dev/null > +++ b/drivers/virtio-mmio.cc > @@ -0,0 +1,128 @@ > +// > +// Created by wkozaczuk on 12/25/18. > +// > + > +#include <osv/debug.hh> > +#include "virtio-mmio.hh" > + > +namespace virtio { > + > +// This implements virtio-io mmio device (transport layer, modeled after > PSI). > +// Read here - > https://www.kraxel.org/virtio/virtio-v1.0-cs03-virtio-gpu.html#x1-1080002 > +hw_device_id mmio_device::get_id() > +{ > + return hw_device_id(_vendor_id, _device_id); > +} > + > +void mmio_device::print() {} > +void mmio_device::reset() {} > + > +u8 mmio_device::get_status() { > + return mmio_getl(_addr_mmio + VIRTIO_MMIO_STATUS) & 0xff; > +} > + > +void mmio_device::set_status(u8 status) { > + mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status); > +} > + > +void mmio_device::add_status(u8 status) { > + mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status | get_status()); > +} > + > +u64 mmio_device::get_features() { > + u64 features; > + > + mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1); > + features = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES); > + features <<= 32; > + > + mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0); > + features |= mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES); > + > + return features; > +} > + > +void mmio_device::set_features(u64 features) { > + mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1); > + mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)(features >> > 32)); > + > + mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0); > + mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)features); > +} > + > +void mmio_device::kick(int queue_num) { > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NOTIFY, queue_num); > +} > + > +void mmio_device::select_queue(int queue_num) { > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_SEL, queue_num); > + assert(!mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY)); > +} > + > +u16 mmio_device::get_queue_size() { > + return mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM_MAX) & 0xffff; > +} > + > +void mmio_device::activate_queue(vring* queue) { > + // Set size > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM, queue->size()); > + // > + // Pass addresses > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_LOW, > (u32)queue->get_desc_addr()); > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_HIGH, > (u32)(queue->get_desc_addr() >> 32)); > + > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_LOW, > (u32)queue->get_avail_addr()); > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_HIGH, > (u32)(queue->get_avail_addr() >> 32)); > + > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_LOW, > (u32)queue->get_used_addr()); > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_HIGH, > (u32)(queue->get_used_addr() >> 32)); > + // > + // Make it ready > + mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY, 1 ); > +} > + > +bool mmio_device::ack_irq() { > + //TODO: we might need to guard this read and write with a mutex > + //to prevent two concurrent interrupts raised against same device > step > + //on each other. Is it possible? Spec does not seem to say anything > about it. > + unsigned long status = mmio_getl(_addr_mmio + > VIRTIO_MMIO_INTERRUPT_STATUS); > + //Sometimes this assert would be false (maybe of what I am talking > above). > + //assert(status & VIRTIO_MMIO_INT_VRING); > + mmio_setl(_addr_mmio + VIRTIO_MMIO_INTERRUPT_ACK, status); > + //return true; > + return (status & VIRTIO_MMIO_INT_VRING) != 0; > +} > + > +u8 mmio_device::read_config(u64 offset) { > + return mmio_getb(_addr_mmio + VIRTIO_MMIO_CONFIG + offset); > +} > + > +bool mmio_device::parse_config() { > + _addr_mmio = mmio_map(_address, _size); > + > + u32 magic = mmio_getl(_addr_mmio + VIRTIO_MMIO_MAGIC_VALUE); > + if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { > + return false; > + } > + > + // Check device version > + u32 version = mmio_getl(_addr_mmio + VIRTIO_MMIO_VERSION); > + if (version < 1 || version > 2) { > + debugf( "Version %ld not supported!\n", version); > + return false; > + } > + > + _device_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_ID); > + if (_device_id == 0) { > + // > + // virtio-mmio device with an ID 0 is a (dummy) placeholder > + // with no function. End probing now with no error reported. > + debug( "Dummy virtio-mmio device detected!\n"); > + return false; > + } > + _vendor_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_VENDOR_ID); > + > + debugf("Detected virtio-mmio device: (%ld,%ld)\n", _device_id, > _vendor_id); > + return true; > +} > +} > diff --git a/drivers/virtio-mmio.hh b/drivers/virtio-mmio.hh > new file mode 100644 > index 00000000..78f8150b > --- /dev/null > +++ b/drivers/virtio-mmio.hh > @@ -0,0 +1,132 @@ > +// > +// Created by wkozaczuk on 12/25/18. > +// > + > +#ifndef VIRTIO_MMIO_DEVICE_HH > +#define VIRTIO_MMIO_DEVICE_HH > + > +#include <osv/types.h> > +#include <osv/mmio.hh> > +#include "device.hh" > +#include "virtio-vring.hh" > + > +using namespace hw; > + > +/* Magic value ("virt" string) - Read Only */ > +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 > + > +/* Virtio device version - Read Only */ > +#define VIRTIO_MMIO_VERSION 0x004 > + > +/* Virtio device ID - Read Only */ > +#define VIRTIO_MMIO_DEVICE_ID 0x008 > + > +/* Virtio vendor ID - Read Only */ > +#define VIRTIO_MMIO_VENDOR_ID 0x00c > + > +/* Bitmask of the features supported by the device (host) > + * (32 bits per set) - Read Only */ > +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 > + > +/* Device (host) features set selector - Write Only */ > +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 > + > +/* Bitmask of features activated by the driver (guest) > + * (32 bits per set) - Write Only */ > +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 > + > +/* Activated features set selector - Write Only */ > +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 > + > +/* Queue selector - Write Only */ > +#define VIRTIO_MMIO_QUEUE_SEL 0x030 > + > +/* Maximum size of the currently selected queue - Read Only */ > +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 > + > +/* Queue size for the currently selected queue - Write Only */ > +#define VIRTIO_MMIO_QUEUE_NUM 0x038 > + > +/* Ready bit for the currently selected queue - Read Write */ > +#define VIRTIO_MMIO_QUEUE_READY 0x044 > + > +/* Queue notifier - Write Only */ > +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 > + > +/* Interrupt status - Read Only */ > +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 > + > +/* Interrupt acknowledge - Write Only */ > +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 > + > +/* Device status register - Read Write */ > +#define VIRTIO_MMIO_STATUS 0x070 > + > +/* Selected queue's Descriptor Table address, 64 bits in two halves */ > +#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 > +#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 > + > +/* Selected queue's Available Ring address, 64 bits in two halves */ > +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 > +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 > + > +/* Selected queue's Used Ring address, 64 bits in two halves */ > +#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 > +#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 > + > +/* Configuration atomicity value */ > +#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc > + > +/* The config space is defined by each driver as > + * the per-driver configuration space - Read Write */ > +#define VIRTIO_MMIO_CONFIG 0x100 > + > +#define VIRTIO_MMIO_INT_VRING (1 << 0) > +#define VIRTIO_MMIO_INT_CONFIG (1 << 1) > + > +namespace virtio { > + > +class mmio_device : public hw_device { > +public: > + mmio_device(u64 address, u64 size, unsigned int irq) : > + _address(address), _size(size), _irq(irq), > + _vendor_id(0), _device_id(0), _addr_mmio(0) {} > + > + virtual ~mmio_device() {} > + > + virtual hw_device_id get_id(); > + virtual void print(); > + virtual void reset(); > + > + unsigned int get_irq() { return _irq; } > + bool ack_irq(); > + > + u8 get_status(); > + void set_status(u8 status); > + void add_status(u8 status); > + > + u64 get_features(); > + void set_features(u64 features); > + > + void kick(int queue); > + void select_queue(int queue); > + u16 get_queue_size(); > + void activate_queue(vring* queue); > + > + u8 read_config(u64 offset); > + bool parse_config(); > + > +private: > + u64 _address; > + u64 _size; > + unsigned int _irq; > + //u64 _id; > + u16 _vendor_id; > + u16 _device_id; > + > + mmioaddr_t _addr_mmio; > +}; > + > +} > + > +#endif //VIRTIO_MMIO_DEVICE_HH > diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc > index b820c36c..9ad5d48a 100644 > --- a/drivers/virtio-net.cc > +++ b/drivers/virtio-net.cc > @@ -10,7 +10,6 @@ > > #include "drivers/virtio.hh" > #include "drivers/virtio-net.hh" > -#include "drivers/pci-device.hh" > #include <osv/interrupt.hh> > > #include <osv/mempool.hh> > @@ -217,19 +216,20 @@ void net::fill_qstats(const struct txq& txq, struct > if_data* out_data) const > > bool net::ack_irq() > { > - auto isr = virtio_conf_readb(VIRTIO_PCI_ISR); > - > - if (isr) { > + if( _dev.ack_irq()) { > _rxq.vqueue->disable_interrupts(); > return true; > - } else { > - return false; > } > - > + else > + return false; > } > > -net::net(pci::device& dev) > - : virtio_driver(dev), > +//TODO: For now this driver is hardcoded to expect mmio_device > +// but eventually we could introduce some sort of virtio_device > +// interface class that pci_device and mmio_device would implement/extend > +// from. > +net::net(mmio_device& dev) > + : virtio_mmio_driver(dev), > _rxq(get_virt_queue(0), [this] { this->receiver(); }), > _txq(this, get_virt_queue(1)) > { > @@ -244,7 +244,8 @@ net::net(pci::device& dev) > setup_features(); > read_config(); > > - _hdr_size = _mergeable_bufs ? sizeof(net_hdr_mrg_rxbuf) : > sizeof(net_hdr); > + //TODO: Legacy vs non-legacy -> the non-legacy header includes one > more field > + _hdr_size = sizeof(net_hdr_mrg_rxbuf); > > //initialize the BSD interface _if > _ifn = if_alloc(IFT_ETHER); > @@ -290,16 +291,9 @@ net::net(pci::device& dev) > > ether_ifattach(_ifn, _config.mac); > > - if (dev.is_msix()) { > - _msi.easy_register({ > - { 0, [&] { _rxq.vqueue->disable_interrupts(); }, poll_task }, > - { 1, [&] { _txq.vqueue->disable_interrupts(); }, nullptr } > - }); > - } else { > - _irq.reset(new pci_interrupt(dev, > - [=] { return this->ack_irq(); }, > - [=] { poll_task->wake(); })); > - } > + //TODO: Move to device class > + _irq.reset(new gsi_edge_interrupt(_dev.get_irq(), > + [=] { if(this->ack_irq()) > poll_task->wake(); })); > > fill_rx_ring(); > > @@ -324,10 +318,14 @@ net::~net() > void net::read_config() > { > //read all of the net config in one shot > - virtio_conf_read(virtio_pci_config_offset(), &_config, > sizeof(_config)); > - > - if (get_guest_feature_bit(VIRTIO_NET_F_MAC)) > - net_i("The mac addr of the device is %x:%x:%x:%x:%x:%x", > + //TODO: It may to do with legacy vs non-legacy device > + //but at least with latest spec we should check if individual > + //config fields are available vs reading whole config struct. For > example > + //firecracker reports memory read violation warnings > + virtio_conf_read(0, &(_config.mac[0]), sizeof(_config.mac)); > + > + if (get_drv_feature_bit(VIRTIO_NET_F_MAC)) > + debugf("The mac addr of the device is %x:%x:%x:%x:%x:%x\n", > (u32)_config.mac[0], > (u32)_config.mac[1], > (u32)_config.mac[2], > @@ -335,20 +333,20 @@ void net::read_config() > (u32)_config.mac[4], > (u32)_config.mac[5]); > > - _mergeable_bufs = get_guest_feature_bit(VIRTIO_NET_F_MRG_RXBUF); > - _status = get_guest_feature_bit(VIRTIO_NET_F_STATUS); > - _tso_ecn = get_guest_feature_bit(VIRTIO_NET_F_GUEST_ECN); > - _host_tso_ecn = get_guest_feature_bit(VIRTIO_NET_F_HOST_ECN); > - _csum = get_guest_feature_bit(VIRTIO_NET_F_CSUM); > - _guest_csum = get_guest_feature_bit(VIRTIO_NET_F_GUEST_CSUM); > - _guest_tso4 = get_guest_feature_bit(VIRTIO_NET_F_GUEST_TSO4); > - _host_tso4 = get_guest_feature_bit(VIRTIO_NET_F_HOST_TSO4); > - _guest_ufo = get_guest_feature_bit(VIRTIO_NET_F_GUEST_UFO); > - > - net_i("Features: %s=%d,%s=%d", "Status", _status, "TSO_ECN", > _tso_ecn); > - net_i("Features: %s=%d,%s=%d", "Host TSO ECN", _host_tso_ecn, "CSUM", > _csum); > - net_i("Features: %s=%d,%s=%d", "Guest_csum", _guest_csum, "guest > tso4", _guest_tso4); > - net_i("Features: %s=%d", "host tso4", _host_tso4); > + _mergeable_bufs = get_drv_feature_bit(VIRTIO_NET_F_MRG_RXBUF); > + _status = get_drv_feature_bit(VIRTIO_NET_F_STATUS); > + _tso_ecn = get_drv_feature_bit(VIRTIO_NET_F_GUEST_ECN); > + _host_tso_ecn = get_drv_feature_bit(VIRTIO_NET_F_HOST_ECN); > + _csum = get_drv_feature_bit(VIRTIO_NET_F_CSUM); > + _guest_csum = get_drv_feature_bit(VIRTIO_NET_F_GUEST_CSUM); > + _guest_tso4 = get_drv_feature_bit(VIRTIO_NET_F_GUEST_TSO4); > + _host_tso4 = get_drv_feature_bit(VIRTIO_NET_F_HOST_TSO4); > + _guest_ufo = get_drv_feature_bit(VIRTIO_NET_F_GUEST_UFO); > + > + debugf("Features: %s=%d,%s=%d\n", "Status", _status, "TSO_ECN", > _tso_ecn); > + debugf("Features: %s=%d,%s=%d\n", "Host TSO ECN", _host_tso_ecn, > "CSUM", _csum); > + debugf("Features: %s=%d,%s=%d\n", "Guest_csum", _guest_csum, "guest > tso4", _guest_tso4); > + debugf("Features: %s=%d,%s=%d\n", "host tso4", _host_tso4, > "mergeable_bufs", _mergeable_bufs); > } > > /** > @@ -422,7 +420,7 @@ void net::receiver() > while (1) { > > // Wait for rx queue (used elements) > - virtio_driver::wait_for_queue(vq, &vring::used_ring_not_empty); > + virtio_mmio_driver::wait_for_queue(vq, > &vring::used_ring_not_empty); > trace_virtio_net_rx_wake(); > > _rxq.stats.rx_bh_wakeups++; > @@ -840,7 +838,7 @@ void net::txq::gc() > > u32 net::get_driver_features() > { > - u32 base = virtio_driver::get_driver_features(); > + u32 base = virtio_mmio_driver::get_driver_features(); > return (base | (1 << VIRTIO_NET_F_MAC) \ > | (1 << VIRTIO_NET_F_MRG_RXBUF) \ > | (1 << VIRTIO_NET_F_STATUS) \ > @@ -856,12 +854,14 @@ u32 net::get_driver_features() > > hw_driver* net::probe(hw_device* dev) > { > - if (auto pci_dev = dynamic_cast<pci::device*>(dev)) { > - if (pci_dev->get_id() == hw_device_id(VIRTIO_VENDOR_ID, > VIRTIO_NET_DEVICE_ID)) { > + //TODO: Handle both PCI and MMIO devices > + if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) { > + if (mmio_dev->get_id() == hw_device_id(0x0, VIRTIO_ID_NET)) { > + debug_early("virtio-net::probe() -> found virtio-mmio device > ...\n"); > if (opt_maxnic && maxnic-- <= 0) { > return nullptr; > } else { > - return aligned_new<net>(*pci_dev); > + return aligned_new<net>(*mmio_dev); > } > } > } > diff --git a/drivers/virtio-net.hh b/drivers/virtio-net.hh > index ad323e69..6edca97a 100644 > --- a/drivers/virtio-net.hh > +++ b/drivers/virtio-net.hh > @@ -14,9 +14,11 @@ > #include <bsd/sys/sys/mbuf.h> > > #include <osv/percpu_xmit.hh> > +#include <osv/interrupt.hh> > > #include "drivers/virtio.hh" > -#include "drivers/pci-device.hh" > +#include "drivers/virtio2.hh" > +#include "drivers/virtio-mmio.hh" > > namespace virtio { > > @@ -24,7 +26,7 @@ namespace virtio { > * @class net > * virtio net device class > */ > -class net : public virtio_driver { > +class net : public virtio_mmio_driver { > public: > > // The feature bitmap for virtio net > @@ -204,7 +206,7 @@ public: > u16 virtqueue_pairs; > }; > > - explicit net(pci::device& dev); > + explicit net(mmio_device& dev); > virtual ~net(); > > virtual std::string get_name() const { return _driver_name; } > @@ -269,7 +271,7 @@ private: > > u32 _hdr_size; > > - std::unique_ptr<pci_interrupt> _irq; > + std::unique_ptr<gsi_edge_interrupt> _irq; > > struct rxq_stats { > u64 rx_packets; /* if_ipackets */ > diff --git a/drivers/virtio-vring.cc b/drivers/virtio-vring.cc > index 5f89fb5d..58681ead 100644 > --- a/drivers/virtio-vring.cc > +++ b/drivers/virtio-vring.cc > @@ -38,9 +38,9 @@ TRACEPOINT(trace_vring_get_buf_ret, "vring=%p > _avail_count %d", void*, int); > > namespace virtio { > > - vring::vring(virtio_driver* const dev, u16 num, u16 q_index) > + vring::vring(vdriver* const driver, u16 num, u16 q_index) > { > - _dev = dev; > + _driver = driver; > _q_index = q_index; > // Alloc enough pages for the vring... > unsigned sz = VIRTIO_ALIGN(vring::get_size(num, > VIRTIO_PCI_VRING_ALIGN)); > @@ -86,6 +86,21 @@ namespace virtio { > return mmu::virt_to_phys(_vring_ptr); > } > > + u64 vring::get_desc_addr() > + { > + return mmu::virt_to_phys(_desc); > + } > + > + u64 vring::get_avail_addr() > + { > + return mmu::virt_to_phys(_avail); > + } > + > + u64 vring::get_used_addr() > + { > + return mmu::virt_to_phys(_used); > + } > + > unsigned vring::get_size(unsigned int num, unsigned long align) > { > return (((sizeof(vring_desc) * num + sizeof(u16) * (3 + num) > @@ -102,7 +117,7 @@ namespace virtio { > inline bool vring::use_indirect(int desc_needed) > { > return _use_indirect && > - _dev->get_indirect_buf_cap() && > + _driver->get_indirect_buf_cap() && > // don't let the posting fail due to low available buffers > number > (desc_needed > _avail_count || > // no need to use indirect for a single descriptor > @@ -280,7 +295,7 @@ namespace virtio { > vring::kick() { > bool kicked = true; > > - if (_dev->get_event_idx_cap()) { > + if (_driver->get_event_idx_cap()) { > > std::atomic_thread_fence(std::memory_order_seq_cst); > > @@ -310,7 +325,7 @@ namespace virtio { > // and _avail_added_since_kick might wrap around due to this > bulking. > // > if (kicked || (_avail_added_since_kick >= (u16)(~0) / 2)) { > - _dev->kick(_q_index); > + _driver->kick(_q_index); > _avail_added_since_kick = 0; > return true; > } > diff --git a/drivers/virtio-vring.hh b/drivers/virtio-vring.hh > index af8691ca..0bf1c581 100644 > --- a/drivers/virtio-vring.hh > +++ b/drivers/virtio-vring.hh > @@ -29,7 +29,7 @@ TRACEPOINT(trace_vring_update_used_event, "vring=%p: > _used_ring_host_head %d", > namespace virtio { > > class virtio_vring; > -class virtio_driver; > +class vdriver; > > // Buffer descriptors in the ring > class vring_desc { > @@ -122,12 +122,16 @@ class virtio_driver; > class vring { > public: > > - vring(virtio_driver* const dev, u16 num, u16 q_index); > + vring(vdriver* const driver, u16 num, u16 q_index); > virtual ~vring(); > > u64 get_paddr(); > static unsigned get_size(unsigned int num, unsigned long align); > > + u64 get_desc_addr(); > + u64 get_avail_addr(); > + u64 get_used_addr(); > + > // Ring operations > bool add_buf(void* cookie); > // Get the top item from the used ring > @@ -240,7 +244,7 @@ class virtio_driver; > private: > > // Up pointer > - virtio_driver* _dev; > + vdriver* _driver; > u16 _q_index; > // The physical of the physical address handed to the virtio > device > void* _vring_ptr; > diff --git a/drivers/virtio.hh b/drivers/virtio.hh > index b918de28..2faadec7 100644 > --- a/drivers/virtio.hh > +++ b/drivers/virtio.hh > @@ -28,6 +28,9 @@ enum VIRTIO_CONFIG { > VIRTIO_CONFIG_S_DRIVER = 2, > /* Driver has used its parts of the config, and is happy */ > VIRTIO_CONFIG_S_DRIVER_OK = 4, > + /* Indicates that the driver has acknowledged all the features it > understands, > + * and feature negotiation is complete */ > + VIRTIO_CONFIG_S_FEATURES_OK = 8, > /* We've given up on this device. */ > VIRTIO_CONFIG_S_FAILED = 0x80, > /* Some virtio feature bits (currently bits 28 through 31) are > reserved for the > @@ -104,7 +107,20 @@ enum { > > const unsigned max_virtqueues_nr = 64; > > -class virtio_driver : public hw_driver { > +//TODO: This helps us make vring class not be > +// tightly coupled to virtio_driver but rather this simple > +// interface. Might not be necessary once we introduce > +// virtio_device class. > +class vdriver { > +public: > + virtual ~vdriver() {}; > + > + virtual bool get_indirect_buf_cap() = 0; > + virtual bool get_event_idx_cap() = 0; > + virtual bool kick(int queue) = 0; > +}; > + > +class virtio_driver : public hw_driver, public vdriver { > public: > explicit virtio_driver(pci::device& dev); > virtual ~virtio_driver(); > diff --git a/drive... -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. For more options, visit https://groups.google.com/d/optout.
