This patch provides all necessary changes to OSv
to make it boot on AWS firecracker.
The curl command exmaple to create and start instance with
single block device:
./scripts/setup-block.sh && ./scripts/create-instance.sh &&
./scripts/start-instance.sh
where:
curl --unix-socket /tmp/firecracker.socket -i \
-X PUT 'http://localhost/drives/rootfs' \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"drive_id": "rootfs",
"path_on_host": "/home/wkozaczuk/projects/osv/build/release/usr.rofs",
"is_root_device": false,
"is_read_only": false
}'
curl --unix-socket /tmp/firecracker.socket -i \
-X PUT 'http://localhost/boot-source' \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"kernel_image_path":
"/home/wkozaczuk/projects/osv/build/release/loader-stripped.elf",
"boot_args": "--bootchart /hello"
}'
curl --unix-socket /tmp/firecracker.socket -i \
-X PUT 'http://localhost/actions' \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"action_type": "InstanceStart"
}'
Signed-off-by: Waldemar Kozaczuk <[email protected]>
---
Makefile | 2 +
arch/x64/arch-setup.cc | 180 +++++++++++++++++++---
arch/x64/boot.S | 37 ++++-
arch/x64/loader.ld | 2 +-
arch/x64/power.cc | 14 +-
arch/x64/smp.cc | 16 +-
drivers/acpi.cc | 10 +-
drivers/virtio-assign.cc | 3 +-
drivers/virtio-blk.cc | 78 ++++++----
drivers/virtio-blk.hh | 10 +-
drivers/virtio-mmio.cc | 128 ++++++++++++++++
drivers/virtio-mmio.hh | 132 ++++++++++++++++
drivers/virtio-net.cc | 86 +++++------
drivers/virtio-net.hh | 10 +-
drivers/virtio-vring.cc | 25 ++-
drivers/virtio-vring.hh | 10 +-
drivers/virtio.hh | 18 ++-
drivers/virtio2.cc | 289 +++++++++++++++++++++++++++++++++++
drivers/virtio2.hh | 115 ++++++++++++++
include/osv/virtio-assign.hh | 2 +-
loader.cc | 3 +-
21 files changed, 1039 insertions(+), 131 deletions(-)
create mode 100644 drivers/virtio-mmio.cc
create mode 100644 drivers/virtio-mmio.hh
create mode 100644 drivers/virtio2.cc
create mode 100644 drivers/virtio2.hh
diff --git a/Makefile b/Makefile
index 68043485..cf571632 100644
--- a/Makefile
+++ b/Makefile
@@ -819,9 +819,11 @@ drivers += $(libtsm)
drivers += drivers/vga.o drivers/kbd.o drivers/isa-serial.o
drivers += arch/$(arch)/pvclock-abi.o
drivers += drivers/virtio.o
+drivers += drivers/virtio2.o
drivers += drivers/virtio-vring.o
drivers += drivers/virtio-net.o
drivers += drivers/virtio-assign.o
+drivers += drivers/virtio-mmio.o
drivers += drivers/vmxnet3.o
drivers += drivers/vmxnet3-queues.o
drivers += drivers/virtio-blk.o
diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc
index 6e4833cf..58da00c5 100644
--- a/arch/x64/arch-setup.cc
+++ b/arch/x64/arch-setup.cc
@@ -22,6 +22,19 @@
#include <osv/commands.hh>
#include "dmi.hh"
+// Not sure if Linux zero page is always located at this place
+// in memory or its address is passed in one of the registers
+// -> double check
+#define ZERO_PAGE_START 0x7000
+#define SETUP_HEADER_OFFSET 0x1f1 // look at bootparam.h in linux
+#define BOOT_FLAG_OFFSET sizeof(u8) + 4 * sizeof(u16) + sizeof(u32)
+
+#define E820_ENTRIES_OFFSET 0x1e8 // look at bootparam.h in linux
+#define E820_TABLE_OFFSET 0x2d0 // look at bootparam.h in linux
+
+#define CMD_LINE_PTR_OFFSET sizeof(u8) * 5 + sizeof(u16) * 11 + sizeof(u32) *
7
+#define CMD_LINE_SIZE_OFFSET CMD_LINE_PTR_OFFSET + sizeof(u8) * 2 +
sizeof(u16) + sizeof(u32) * 3
+
struct multiboot_info_type {
u32 flags;
u32 mem_lower;
@@ -61,12 +74,81 @@ struct e820ent {
u32 type;
} __attribute__((packed));
+struct _e820ent {
+ u64 addr;
+ u64 size;
+ u32 type;
+} __attribute__((packed));
+
osv_multiboot_info_type* osv_multiboot_info;
-void parse_cmdline(multiboot_info_type& mb)
+struct mmio_device_info {
+ u64 address;
+ u64 size;
+ unsigned int irq;
+};
+
+//TODO: For now we are limiting number of mmio devices to two
+// Ideally we should be using somewhat more dynamic structure
+struct mmio_device_info mmio_device_info_entries[2];
+int mmio_device_info_count = 0;
+
+#define VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX "virtio_mmio.device="
+char* parse_mmio_device_info(char *cmdline, mmio_device_info *info) {
+ // [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>]
+ char *prefix_pos = strstr(cmdline,VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX);
+ if (!prefix_pos)
+ return nullptr;
+
+ char *size_pos = prefix_pos + strlen(VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX);
+ if (sscanf(size_pos,"%ld", &info->size) != 1)
+ return nullptr;
+
+ char *at_pos = strstr(size_pos,"@");
+ if (!at_pos)
+ return nullptr;
+
+ switch(*(at_pos - 1)) {
+ case 'k':
+ case 'K':
+ info->size = info->size * 1024;
+ break;
+ case 'm':
+ case 'M':
+ info->size = info->size * 1024 * 1024;
+ break;
+ default:
+ break;
+ }
+
+ if (sscanf(at_pos, "@%lli:%u", &info->address, &info->irq) == 2)
+ return prefix_pos;
+ else
+ return nullptr;
+}
+
+//void parse_cmdline(multiboot_info_type& mb)
+void parse_cmdline(char *cmdline)
{
- auto p = reinterpret_cast<char*>(mb.cmdline);
- osv::parse_cmdline(p);
+ //auto p = reinterpret_cast<char*>(mb.cmdline);
+ // We are assuming the mmio devices information is appended to the
+ // command line (at least it is the case with the firecracker) so
+ // once we parse those we strip it away so only plain OSv command line
+ // is left
+ //TODO: There may be a smarter, better way to parse this information
+ char *virtio_device_info_pos =
parse_mmio_device_info(cmdline,mmio_device_info_entries);
+ if (virtio_device_info_pos) {
+ mmio_device_info_count++;
+ *virtio_device_info_pos = 0;
+
+ virtio_device_info_pos =
+ parse_mmio_device_info(virtio_device_info_pos +
1,mmio_device_info_entries + 1);
+ if (virtio_device_info_pos) {
+ mmio_device_info_count++;
+ }
+ }
+
+ osv::parse_cmdline(cmdline);
}
void setup_temporary_phys_map()
@@ -121,28 +203,64 @@ void arch_setup_free_memory()
{
static ulong edata;
asm ("movl $.edata, %0" : "=rm"(edata));
- // copy to stack so we don't free it now
- auto omb = *osv_multiboot_info;
- auto mb = omb.mb;
- auto e820_buffer = alloca(mb.mmap_length);
- auto e820_size = mb.mmap_length;
- memcpy(e820_buffer, reinterpret_cast<void*>(mb.mmap_addr), e820_size);
+
+ void *zero_page = reinterpret_cast<void*>(ZERO_PAGE_START);
+ void *setup_header = zero_page + SETUP_HEADER_OFFSET;
+
+ // Grab command line from zero page
+ u32 cmdline_ptr = *static_cast<u32*>(setup_header + CMD_LINE_PTR_OFFSET);
+ u32 cmdline_size = *static_cast<u32*>(setup_header + CMD_LINE_SIZE_OFFSET);
+
+ // Copy cmdline from zero page
+ void* cmdline = reinterpret_cast<void*>((u64)cmdline_ptr);
+ void *cmdline_copy = alloca(cmdline_size + 1);
+ memcpy(cmdline_copy,cmdline,cmdline_size);
+ ((char*)cmdline_copy)[cmdline_size] = 0;
+
+ debug_early("Cmdline: ");
+ debug_early((char*)cmdline_copy);
+ debug_early("\n");
+
+ // Copy e820 information from zero page
+ struct _e820ent *e820_table = static_cast<struct _e820ent *>(zero_page +
E820_TABLE_OFFSET);
+
+ //TODO: We are assuming two entries but in reality
+ //there could be more so this logic below needs to be a little smarter
+ auto e820_size = 48;
+ auto e820_buffer = alloca(e820_size);
+ {
+ struct e820ent *lower = reinterpret_cast<struct e820ent*>(e820_buffer);
+ lower->ent_size = 20;
+ lower->type = 1;
+ lower->addr = e820_table[0].addr;
+ lower->size = e820_table[0].size;
+
+ struct e820ent *upper = lower + 1;
+ upper->ent_size = 20;
+ upper->type = 1;
+ upper->addr = e820_table[1].addr;
+ upper->size = e820_table[1].size;
+ }
+
for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
memory::phys_mem_size += ent.size;
});
constexpr u64 initial_map = 1 << 30; // 1GB mapped by startup code
- u64 time;
- time = omb.tsc_init_hi;
- time = (time << 32) | omb.tsc_init;
+ //TODO: We are assuming that bootchart-wise we start here but
+ // in reality it all starts at boot.S:start64_. However what happens
+ // before this points should take negligible amount of time, no?
+ u64 time = 0;
+ //time = omb.tsc_init_hi;
+ //time = (time << 32) | omb.tsc_init;
boot_time.event(0, "", time );
- time = omb.tsc_disk_done_hi;
- time = (time << 32) | omb.tsc_disk_done;
+ //time = omb.tsc_disk_done_hi;
+ //time = (time << 32) | omb.tsc_disk_done;
boot_time.event(1, "disk read (real mode)", time );
- time = omb.tsc_uncompress_done_hi;
- time = (time << 32) | omb.tsc_uncompress_done;
+ //time = omb.tsc_uncompress_done_hi;
+ //time = (time << 32) | omb.tsc_uncompress_done;
boot_time.event(2, "uncompress lzloader.elf", time );
auto c = processor::cpuid(0x80000000);
@@ -185,7 +303,10 @@ void arch_setup_free_memory()
elf_size = edata - elf_phys;
mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE);
// get rid of the command line, before low memory is unmapped
- parse_cmdline(mb);
+ //parse_cmdline(mb);
+
+ parse_cmdline((char*)cmdline_copy);
+
// now that we have some free memory, we can start mapping the rest
mmu::switch_to_runtime_page_tables();
for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
@@ -260,6 +381,7 @@ void arch_init_premain()
#include "drivers/virtio-net.hh"
#include "drivers/virtio-assign.hh"
#include "drivers/virtio-rng.hh"
+#include "drivers/virtio-mmio.hh"
#include "drivers/xenplatform-pci.hh"
#include "drivers/ahci.hh"
#include "drivers/vmw-pvscsi.hh"
@@ -271,13 +393,33 @@ extern bool opt_assign_net;
void arch_init_drivers()
{
// initialize panic drivers
- panic::pvpanic::probe_and_setup();
+ // pvpanic depends on ACPI which firecracker
+ // does not suppot so we disable probing it altogether
+ //TODO: Is there a way to detect if ACPI is available and
+ //only then probe pvpanic?
+ //panic::pvpanic::probe_and_setup();
boot_time.event("pvpanic done");
// Enumerate PCI devices
- pci::pci_device_enumeration();
+ // PCI is not supported by firecracker
+ //TODO: Is there a way to detect if PCI is present and only enumerate
+ //PCI devices then? Somehow even firecracker presents a bus with
+ //some dummy devices.
+ //pci::pci_device_enumeration();
boot_time.event("pci enumerated");
+ // Register any parsed virtio-mmio devices
+ for (int d = 0; d < mmio_device_info_count; d++) {
+ auto info = mmio_device_info_entries[d];
+ auto mmio_device = new virtio::mmio_device(info.address, info.size,
info.irq);
+ if (mmio_device->parse_config()) {
+ device_manager::instance()->register_device(mmio_device);
+ }
+ else {
+ delete mmio_device;
+ }
+ }
+
// Initialize all drivers
hw::driver_manager* drvman = hw::driver_manager::instance();
drvman->register_driver(virtio::blk::probe);
diff --git a/arch/x64/boot.S b/arch/x64/boot.S
index c4b97e2d..3bdc57ea 100644
--- a/arch/x64/boot.S
+++ b/arch/x64/boot.S
@@ -97,9 +97,10 @@ start64:
sub %rdi, %rcx
xor %eax, %eax
rep stosb
+ mov $0x200000, %rbp
mov %rbp, elf_header
# %ebx is set by boot16.S before running the loader
- mov %rbx, osv_multiboot_info
+ //mov %rbx, osv_multiboot_info
lea init_stack_top, %rsp
call premain
mov __loader_argc, %edi
@@ -107,8 +108,42 @@ start64:
call main
.cfi_endproc
+.code64
+.global _start64
+_start64:
+//TODO: Is there a way to switch to protected mode and then jump to start32
+//which would be even better than what we do below?
+ //For whatever reason at this point (long mode?) we cannot
+ //set gdt the way it is done in start32 but linux expects
+ //similar one so whatever firecracker sets seems to be OK.
+ //Maybe because OSv gdt has 32-bit entry
+ //lgdt gdt_desc
+ //mov $0x10, %eax
+ //mov %eax, %ds
+ //mov %eax, %es
+ //mov %eax, %fs
+ //mov %eax, %gs
+ //mov %eax, %ss
+ // Disable paging and enable PAE
+ mov $BOOT_CR4, %eax
+ mov %eax, %cr4
+ // Setup page tables
+ lea ident_pt_l4, %eax
+ mov %eax, %cr3
+ // Write contents of EDX:EAX to Model Specific Register specified by ECX
register
+ mov $0xc0000080, %ecx
+ mov $0x00000900, %eax
+ xor %edx, %edx
+ wrmsr
+ // Enable paging
+ mov $BOOT_CR0, %eax
+ mov %eax, %cr0
+ jmp start64
+
# The smp trampoline must be in the lower 1MB, so we manually relocate
# it to address 0 by subtracting smpboot from any offset
+//TODO: I am pretty sure we have some logic missing for SMP
+//given firecracker jumps us into long mode. Not sure how to handle it.
.data
.global smpboot
smpboot:
diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld
index efe78d52..e08b310c 100644
--- a/arch/x64/loader.ld
+++ b/arch/x64/loader.ld
@@ -108,4 +108,4 @@ PHDRS {
eh_frame PT_GNU_EH_FRAME;
note PT_NOTE;
}
-ENTRY(start32);
+ENTRY(_start64);
diff --git a/arch/x64/power.cc b/arch/x64/power.cc
index 81849335..534ffdf0 100644
--- a/arch/x64/power.cc
+++ b/arch/x64/power.cc
@@ -27,17 +27,9 @@ void halt(void)
void poweroff(void)
{
- ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
- if (ACPI_FAILURE(status)) {
- debug("AcpiEnterSleepStatePrep failed: %s\n",
AcpiFormatException(status));
- halt();
- }
- status = AcpiEnterSleepState(ACPI_STATE_S5);
- if (ACPI_FAILURE(status)) {
- debug("AcpiEnterSleepState failed: %s\n", AcpiFormatException(status));
- halt();
- }
-
+ // Firecracker only supports this as away to shutdown the VM
+ // Reset using the 8042 PS/2 Controller ("keyboard controller")
+ processor::outb(0xfe, 0x64);
// We shouldn't get here on x86.
halt();
}
diff --git a/arch/x64/smp.cc b/arch/x64/smp.cc
index 073ef206..4bdae0c6 100644
--- a/arch/x64/smp.cc
+++ b/arch/x64/smp.cc
@@ -74,7 +74,21 @@ void parse_madt()
void smp_init()
{
- parse_madt();
+ //Firecracker does not support ACPI so
+ //there is no MADT table
+ //parse_madt();
+
+ //TODO: This a nasty hack as we support
+ // single vCPU. Eventually we should parse out equivalent information
+ // about all vCPUs from MP table (seems like more ancient way).
+ // See
https://github.com/firecracker-microvm/firecracker/blob/7f29bca9ca197283275eab62fddc1c10ab580794/x86_64/src/mptable.rs
+ auto c = new sched::cpu(0);
+ c->arch.apic_id = 0;//lapic->Id;
+ c->arch.acpi_id = 0;//lapic->ProcessorId;
+ c->arch.initstack.next = smp_stack_free;
+ smp_stack_free = &c->arch.initstack;
+ sched::cpus.push_back(c);
+
sched::current_cpu = sched::cpus[0];
for (auto c : sched::cpus) {
c->incoming_wakeups =
aligned_array_new<sched::cpu::incoming_wakeup_queue>(sched::cpus.size());
diff --git a/drivers/acpi.cc b/drivers/acpi.cc
index 506ca68f..006231f1 100644
--- a/drivers/acpi.cc
+++ b/drivers/acpi.cc
@@ -605,7 +605,9 @@ void init()
}
-void __attribute__((constructor(init_prio::acpi))) acpi_init_early()
-{
- XENPV_ALTERNATIVE({ acpi::early_init(); }, {});
-}
+//TODO: Is there a way to detect if ACPI is available and do not even
+//try to load relevant information? Right now OSv depends on ACPI available
+//void __attribute__((constructor(init_prio::acpi))) acpi_init_early()
+//{
+// XENPV_ALTERNATIVE({ acpi::early_init(); }, {});
+//}
diff --git a/drivers/virtio-assign.cc b/drivers/virtio-assign.cc
index 8a128d5e..5a49966f 100644
--- a/drivers/virtio-assign.cc
+++ b/drivers/virtio-assign.cc
@@ -42,8 +42,9 @@ public:
// osv::assigned_virtio API implementation:
- virtual void kick(int queue) override {
+ virtual bool kick(int queue) override {
virtio_driver::kick(queue);
+ return true;
}
virtual u32 queue_size(int queue) override
diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc
index a3f7898d..3784c453 100644
--- a/drivers/virtio-blk.cc
+++ b/drivers/virtio-blk.cc
@@ -10,7 +10,6 @@
#include "drivers/virtio.hh"
#include "drivers/virtio-blk.hh"
-#include "drivers/pci-device.hh"
#include <osv/interrupt.hh>
#include <osv/mempool.hh>
@@ -100,20 +99,20 @@ struct driver blk_driver = {
bool blk::ack_irq()
{
- auto isr = virtio_conf_readb(VIRTIO_PCI_ISR);
- auto queue = get_virt_queue(0);
-
- if (isr) {
- queue->disable_interrupts();
+ if(_dev.ack_irq()) {
+ get_virt_queue(0)->disable_interrupts();
return true;
- } else {
- return false;
}
-
+ else
+ return false;
}
-blk::blk(pci::device& pci_dev)
- : virtio_driver(pci_dev), _ro(false)
+//TODO: For now this driver is hardcoded to expect mmio_device
+// but eventually we could introduce some sort of virtio_device
+// interface class that pci_device and mmio_device would implement/extend
+// from.
+blk::blk(mmio_device& _dev)
+ : virtio_mmio_driver(_dev), _ro(false)
{
_driver_name = "virtio-blk";
@@ -128,13 +127,12 @@ blk::blk(pci::device& pci_dev)
sched::thread::attr().name("virtio-blk"));
t->start();
auto queue = get_virt_queue(0);
- if (pci_dev.is_msix()) {
- _msi.easy_register({ { 0, [=] { queue->disable_interrupts(); }, t } });
- } else {
- _irq.reset(new pci_interrupt(pci_dev,
- [=] { return ack_irq(); },
- [=] { t->wake(); }));
- }
+
+ //TODO: This logic should really be moved to a device class
+ // so that it would do different thing depending if it is MMIO or PCI
+ // device
+ _irq.reset(new gsi_edge_interrupt(_dev.get_irq(),
+ [=] { if(this->ack_irq()) t->wake(); }));
// Enable indirect descriptor
queue->set_use_indirect(true);
@@ -164,25 +162,32 @@ blk::~blk()
void blk::read_config()
{
//read all of the block config (including size, mce, topology,..) in one
shot
- virtio_conf_read(virtio_pci_config_offset(), &_config, sizeof(_config));
+ //TODO: It may to do with legacy vs non-legacy device
+ //but at least with latest spec we should check if individual
+ //config fields are available vs reading whole config struct. For example
+ //firecracker reports memory read violation warnings
+ virtio_conf_read(0, &_config, sizeof(_config.capacity));
trace_virtio_blk_read_config_capacity(_config.capacity);
- if (get_guest_feature_bit(VIRTIO_BLK_F_SIZE_MAX))
+ //TODO: Legacy vs non-legacy device concept. In pre-finalized spec
+ //there was a concept of "guest"/"host". Right now they use equivalent
+ //concepts - "driver"/"device"
+ if (get_drv_feature_bit(VIRTIO_BLK_F_SIZE_MAX))
trace_virtio_blk_read_config_size_max(_config.size_max);
- if (get_guest_feature_bit(VIRTIO_BLK_F_SEG_MAX))
+ if (get_drv_feature_bit(VIRTIO_BLK_F_SEG_MAX))
trace_virtio_blk_read_config_seg_max(_config.seg_max);
- if (get_guest_feature_bit(VIRTIO_BLK_F_GEOMETRY)) {
+ if (get_drv_feature_bit(VIRTIO_BLK_F_GEOMETRY)) {
trace_virtio_blk_read_config_geometry((u32)_config.geometry.cylinders,
(u32)_config.geometry.heads, (u32)_config.geometry.sectors);
}
- if (get_guest_feature_bit(VIRTIO_BLK_F_BLK_SIZE))
+ if (get_drv_feature_bit(VIRTIO_BLK_F_BLK_SIZE))
trace_virtio_blk_read_config_blk_size(_config.blk_size);
- if (get_guest_feature_bit(VIRTIO_BLK_F_TOPOLOGY)) {
+ if (get_drv_feature_bit(VIRTIO_BLK_F_TOPOLOGY)) {
trace_virtio_blk_read_config_topology((u32)_config.physical_block_exp,
(u32)_config.alignment_offset, (u32)_config.min_io_size,
(u32)_config.opt_io_size);
}
- if (get_guest_feature_bit(VIRTIO_BLK_F_CONFIG_WCE))
+ if (get_drv_feature_bit(VIRTIO_BLK_F_CONFIG_WCE))
trace_virtio_blk_read_config_wce((u32)_config.wce);
- if (get_guest_feature_bit(VIRTIO_BLK_F_RO)) {
+ if (get_drv_feature_bit(VIRTIO_BLK_F_RO)) {
set_readonly();
trace_virtio_blk_read_config_ro();
}
@@ -195,7 +200,7 @@ void blk::req_done()
while (1) {
- virtio_driver::wait_for_queue(queue, &vring::used_ring_not_empty);
+ virtio_mmio_driver::wait_for_queue(queue, &vring::used_ring_not_empty);
trace_virtio_blk_wake();
u32 len;
@@ -239,11 +244,12 @@ int blk::make_request(struct bio* bio)
WITH_LOCK(_lock) {
if (!bio) return EIO;
-
+ /* TODO: Is this really correct to simply disable this logic?
+ * Temporarily comment out -> seg_max is unavailable ...
if (bio->bio_bcount/mmu::page_size + 1 > _config.seg_max) {
trace_virtio_blk_make_request_seg_max(bio->bio_bcount,
_config.seg_max);
return EIO;
- }
+ }*/
auto* queue = get_virt_queue(0);
blk_request_type type;
@@ -296,7 +302,7 @@ int blk::make_request(struct bio* bio)
u32 blk::get_driver_features()
{
- auto base = virtio_driver::get_driver_features();
+ auto base = virtio_mmio_driver::get_driver_features();
return (base | ( 1 << VIRTIO_BLK_F_SIZE_MAX)
| ( 1 << VIRTIO_BLK_F_SEG_MAX)
| ( 1 << VIRTIO_BLK_F_GEOMETRY)
@@ -308,7 +314,17 @@ u32 blk::get_driver_features()
hw_driver* blk::probe(hw_device* dev)
{
- return virtio::probe<blk, VIRTIO_BLK_DEVICE_ID>(dev);
+ //TODO: Eventually we should account for both PCI and MMIO devices
+ //once we have a virtio_device class
+ if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) {
+ if (mmio_dev->get_id() == hw_device_id(0x0, VIRTIO_ID_BLOCK)) {
+ debug_early("virtio-blk::probe() -> found virtio-mmio device
...\n");
+ return new blk(*mmio_dev);
+ }
+ }
+ return nullptr;
+
+ //return virtio::probe<blk, VIRTIO_BLK_DEVICE_ID>(dev);
}
}
diff --git a/drivers/virtio-blk.hh b/drivers/virtio-blk.hh
index 17cf4d18..79e42710 100644
--- a/drivers/virtio-blk.hh
+++ b/drivers/virtio-blk.hh
@@ -8,12 +8,13 @@
#ifndef VIRTIO_BLK_DRIVER_H
#define VIRTIO_BLK_DRIVER_H
#include "drivers/virtio.hh"
-#include "drivers/pci-device.hh"
+#include "drivers/virtio2.hh"
+#include "drivers/virtio-mmio.hh"
#include <osv/bio.h>
namespace virtio {
-class blk : public virtio_driver {
+class blk : public virtio_mmio_driver {
public:
// The feature bitmap for virtio blk
@@ -118,7 +119,7 @@ public:
u8 status;
};
- explicit blk(pci::device& dev);
+ explicit blk(mmio_device& dev);
virtual ~blk();
virtual std::string get_name() const { return _driver_name; }
@@ -157,7 +158,8 @@ private:
bool _ro;
// This mutex protects parallel make_request invocations
mutex _lock;
- std::unique_ptr<pci_interrupt> _irq;
+ //TODO: There is no PCI so is it OK to use GSI edge interrupt?
+ std::unique_ptr<gsi_edge_interrupt> _irq;
};
}
diff --git a/drivers/virtio-mmio.cc b/drivers/virtio-mmio.cc
new file mode 100644
index 00000000..1485ea00
--- /dev/null
+++ b/drivers/virtio-mmio.cc
@@ -0,0 +1,128 @@
+//
+// Created by wkozaczuk on 12/25/18.
+//
+
+#include <osv/debug.hh>
+#include "virtio-mmio.hh"
+
+namespace virtio {
+
+// This implements virtio-io mmio device (transport layer, modeled after PSI).
+// Read here -
https://www.kraxel.org/virtio/virtio-v1.0-cs03-virtio-gpu.html#x1-1080002
+hw_device_id mmio_device::get_id()
+{
+ return hw_device_id(_vendor_id, _device_id);
+}
+
+void mmio_device::print() {}
+void mmio_device::reset() {}
+
+u8 mmio_device::get_status() {
+ return mmio_getl(_addr_mmio + VIRTIO_MMIO_STATUS) & 0xff;
+}
+
+void mmio_device::set_status(u8 status) {
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status);
+}
+
+void mmio_device::add_status(u8 status) {
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status | get_status());
+}
+
+u64 mmio_device::get_features() {
+ u64 features;
+
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1);
+ features = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES);
+ features <<= 32;
+
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0);
+ features |= mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES);
+
+ return features;
+}
+
+void mmio_device::set_features(u64 features) {
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1);
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)(features >> 32));
+
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0);
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)features);
+}
+
+void mmio_device::kick(int queue_num) {
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NOTIFY, queue_num);
+}
+
+void mmio_device::select_queue(int queue_num) {
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_SEL, queue_num);
+ assert(!mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY));
+}
+
+u16 mmio_device::get_queue_size() {
+ return mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM_MAX) & 0xffff;
+}
+
+void mmio_device::activate_queue(vring* queue) {
+ // Set size
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM, queue->size());
+ //
+ // Pass addresses
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_LOW,
(u32)queue->get_desc_addr());
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_HIGH,
(u32)(queue->get_desc_addr() >> 32));
+
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_LOW,
(u32)queue->get_avail_addr());
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_HIGH,
(u32)(queue->get_avail_addr() >> 32));
+
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_LOW,
(u32)queue->get_used_addr());
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_HIGH,
(u32)(queue->get_used_addr() >> 32));
+ //
+ // Make it ready
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY, 1 );
+}
+
+bool mmio_device::ack_irq() {
+ //TODO: we might need to guard this read and write with a mutex
+ //to prevent two concurrent interrupts raised against same device step
+ //on each other. Is it possible? Spec does not seem to say anything about
it.
+ unsigned long status = mmio_getl(_addr_mmio +
VIRTIO_MMIO_INTERRUPT_STATUS);
+ //Sometimes this assert would be false (maybe of what I am talking above).
+ //assert(status & VIRTIO_MMIO_INT_VRING);
+ mmio_setl(_addr_mmio + VIRTIO_MMIO_INTERRUPT_ACK, status);
+ //return true;
+ return (status & VIRTIO_MMIO_INT_VRING) != 0;
+}
+
+u8 mmio_device::read_config(u64 offset) {
+ return mmio_getb(_addr_mmio + VIRTIO_MMIO_CONFIG + offset);
+}
+
+bool mmio_device::parse_config() {
+ _addr_mmio = mmio_map(_address, _size);
+
+ u32 magic = mmio_getl(_addr_mmio + VIRTIO_MMIO_MAGIC_VALUE);
+ if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
+ return false;
+ }
+
+ // Check device version
+ u32 version = mmio_getl(_addr_mmio + VIRTIO_MMIO_VERSION);
+ if (version < 1 || version > 2) {
+ debugf( "Version %ld not supported!\n", version);
+ return false;
+ }
+
+ _device_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_ID);
+ if (_device_id == 0) {
+ //
+ // virtio-mmio device with an ID 0 is a (dummy) placeholder
+ // with no function. End probing now with no error reported.
+ debug( "Dummy virtio-mmio device detected!\n");
+ return false;
+ }
+ _vendor_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_VENDOR_ID);
+
+ debugf("Detected virtio-mmio device: (%ld,%ld)\n", _device_id, _vendor_id);
+ return true;
+}
+}
diff --git a/drivers/virtio-mmio.hh b/drivers/virtio-mmio.hh
new file mode 100644
index 00000000..78f8150b
--- /dev/null
+++ b/drivers/virtio-mmio.hh
@@ -0,0 +1,132 @@
+//
+// Created by wkozaczuk on 12/25/18.
+//
+
+#ifndef VIRTIO_MMIO_DEVICE_HH
+#define VIRTIO_MMIO_DEVICE_HH
+
+#include <osv/types.h>
+#include <osv/mmio.hh>
+#include "device.hh"
+#include "virtio-vring.hh"
+
+using namespace hw;
+
+/* Magic value ("virt" string) - Read Only */
+#define VIRTIO_MMIO_MAGIC_VALUE 0x000
+
+/* Virtio device version - Read Only */
+#define VIRTIO_MMIO_VERSION 0x004
+
+/* Virtio device ID - Read Only */
+#define VIRTIO_MMIO_DEVICE_ID 0x008
+
+/* Virtio vendor ID - Read Only */
+#define VIRTIO_MMIO_VENDOR_ID 0x00c
+
+/* Bitmask of the features supported by the device (host)
+ * (32 bits per set) - Read Only */
+#define VIRTIO_MMIO_DEVICE_FEATURES 0x010
+
+/* Device (host) features set selector - Write Only */
+#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014
+
+/* Bitmask of features activated by the driver (guest)
+ * (32 bits per set) - Write Only */
+#define VIRTIO_MMIO_DRIVER_FEATURES 0x020
+
+/* Activated features set selector - Write Only */
+#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024
+
+/* Queue selector - Write Only */
+#define VIRTIO_MMIO_QUEUE_SEL 0x030
+
+/* Maximum size of the currently selected queue - Read Only */
+#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034
+
+/* Queue size for the currently selected queue - Write Only */
+#define VIRTIO_MMIO_QUEUE_NUM 0x038
+
+/* Ready bit for the currently selected queue - Read Write */
+#define VIRTIO_MMIO_QUEUE_READY 0x044
+
+/* Queue notifier - Write Only */
+#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050
+
+/* Interrupt status - Read Only */
+#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060
+
+/* Interrupt acknowledge - Write Only */
+#define VIRTIO_MMIO_INTERRUPT_ACK 0x064
+
+/* Device status register - Read Write */
+#define VIRTIO_MMIO_STATUS 0x070
+
+/* Selected queue's Descriptor Table address, 64 bits in two halves */
+#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080
+#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084
+
+/* Selected queue's Available Ring address, 64 bits in two halves */
+#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090
+#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094
+
+/* Selected queue's Used Ring address, 64 bits in two halves */
+#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0
+#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4
+
+/* Configuration atomicity value */
+#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc
+
+/* The config space is defined by each driver as
+ * the per-driver configuration space - Read Write */
+#define VIRTIO_MMIO_CONFIG 0x100
+
+#define VIRTIO_MMIO_INT_VRING (1 << 0)
+#define VIRTIO_MMIO_INT_CONFIG (1 << 1)
+
+namespace virtio {
+
+class mmio_device : public hw_device {
+public:
+ mmio_device(u64 address, u64 size, unsigned int irq) :
+ _address(address), _size(size), _irq(irq),
+ _vendor_id(0), _device_id(0), _addr_mmio(0) {}
+
+ virtual ~mmio_device() {}
+
+ virtual hw_device_id get_id();
+ virtual void print();
+ virtual void reset();
+
+ unsigned int get_irq() { return _irq; }
+ bool ack_irq();
+
+ u8 get_status();
+ void set_status(u8 status);
+ void add_status(u8 status);
+
+ u64 get_features();
+ void set_features(u64 features);
+
+ void kick(int queue);
+ void select_queue(int queue);
+ u16 get_queue_size();
+ void activate_queue(vring* queue);
+
+ u8 read_config(u64 offset);
+ bool parse_config();
+
+private:
+ u64 _address;
+ u64 _size;
+ unsigned int _irq;
+ //u64 _id;
+ u16 _vendor_id;
+ u16 _device_id;
+
+ mmioaddr_t _addr_mmio;
+};
+
+}
+
+#endif //VIRTIO_MMIO_DEVICE_HH
diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc
index b820c36c..9ad5d48a 100644
--- a/drivers/virtio-net.cc
+++ b/drivers/virtio-net.cc
@@ -10,7 +10,6 @@
#include "drivers/virtio.hh"
#include "drivers/virtio-net.hh"
-#include "drivers/pci-device.hh"
#include <osv/interrupt.hh>
#include <osv/mempool.hh>
@@ -217,19 +216,20 @@ void net::fill_qstats(const struct txq& txq, struct
if_data* out_data) const
bool net::ack_irq()
{
- auto isr = virtio_conf_readb(VIRTIO_PCI_ISR);
-
- if (isr) {
+ if( _dev.ack_irq()) {
_rxq.vqueue->disable_interrupts();
return true;
- } else {
- return false;
}
-
+ else
+ return false;
}
-net::net(pci::device& dev)
- : virtio_driver(dev),
+//TODO: For now this driver is hardcoded to expect mmio_device
+// but eventually we could introduce some sort of virtio_device
+// interface class that pci_device and mmio_device would implement/extend
+// from.
+net::net(mmio_device& dev)
+ : virtio_mmio_driver(dev),
_rxq(get_virt_queue(0), [this] { this->receiver(); }),
_txq(this, get_virt_queue(1))
{
@@ -244,7 +244,8 @@ net::net(pci::device& dev)
setup_features();
read_config();
- _hdr_size = _mergeable_bufs ? sizeof(net_hdr_mrg_rxbuf) : sizeof(net_hdr);
+ //TODO: Legacy vs non-legacy -> the non-legacy header includes one more
field
+ _hdr_size = sizeof(net_hdr_mrg_rxbuf);
//initialize the BSD interface _if
_ifn = if_alloc(IFT_ETHER);
@@ -290,16 +291,9 @@ net::net(pci::device& dev)
ether_ifattach(_ifn, _config.mac);
- if (dev.is_msix()) {
- _msi.easy_register({
- { 0, [&] { _rxq.vqueue->disable_interrupts(); }, poll_task },
- { 1, [&] { _txq.vqueue->disable_interrupts(); }, nullptr }
- });
- } else {
- _irq.reset(new pci_interrupt(dev,
- [=] { return this->ack_irq(); },
- [=] { poll_task->wake(); }));
- }
+ //TODO: Move to device class
+ _irq.reset(new gsi_edge_interrupt(_dev.get_irq(),
+ [=] { if(this->ack_irq()) poll_task->wake();
}));
fill_rx_ring();
@@ -324,10 +318,14 @@ net::~net()
void net::read_config()
{
//read all of the net config in one shot
- virtio_conf_read(virtio_pci_config_offset(), &_config, sizeof(_config));
-
- if (get_guest_feature_bit(VIRTIO_NET_F_MAC))
- net_i("The mac addr of the device is %x:%x:%x:%x:%x:%x",
+ //TODO: It may to do with legacy vs non-legacy device
+ //but at least with latest spec we should check if individual
+ //config fields are available vs reading whole config struct. For example
+ //firecracker reports memory read violation warnings
+ virtio_conf_read(0, &(_config.mac[0]), sizeof(_config.mac));
+
+ if (get_drv_feature_bit(VIRTIO_NET_F_MAC))
+ debugf("The mac addr of the device is %x:%x:%x:%x:%x:%x\n",
(u32)_config.mac[0],
(u32)_config.mac[1],
(u32)_config.mac[2],
@@ -335,20 +333,20 @@ void net::read_config()
(u32)_config.mac[4],
(u32)_config.mac[5]);
- _mergeable_bufs = get_guest_feature_bit(VIRTIO_NET_F_MRG_RXBUF);
- _status = get_guest_feature_bit(VIRTIO_NET_F_STATUS);
- _tso_ecn = get_guest_feature_bit(VIRTIO_NET_F_GUEST_ECN);
- _host_tso_ecn = get_guest_feature_bit(VIRTIO_NET_F_HOST_ECN);
- _csum = get_guest_feature_bit(VIRTIO_NET_F_CSUM);
- _guest_csum = get_guest_feature_bit(VIRTIO_NET_F_GUEST_CSUM);
- _guest_tso4 = get_guest_feature_bit(VIRTIO_NET_F_GUEST_TSO4);
- _host_tso4 = get_guest_feature_bit(VIRTIO_NET_F_HOST_TSO4);
- _guest_ufo = get_guest_feature_bit(VIRTIO_NET_F_GUEST_UFO);
-
- net_i("Features: %s=%d,%s=%d", "Status", _status, "TSO_ECN", _tso_ecn);
- net_i("Features: %s=%d,%s=%d", "Host TSO ECN", _host_tso_ecn, "CSUM",
_csum);
- net_i("Features: %s=%d,%s=%d", "Guest_csum", _guest_csum, "guest tso4",
_guest_tso4);
- net_i("Features: %s=%d", "host tso4", _host_tso4);
+ _mergeable_bufs = get_drv_feature_bit(VIRTIO_NET_F_MRG_RXBUF);
+ _status = get_drv_feature_bit(VIRTIO_NET_F_STATUS);
+ _tso_ecn = get_drv_feature_bit(VIRTIO_NET_F_GUEST_ECN);
+ _host_tso_ecn = get_drv_feature_bit(VIRTIO_NET_F_HOST_ECN);
+ _csum = get_drv_feature_bit(VIRTIO_NET_F_CSUM);
+ _guest_csum = get_drv_feature_bit(VIRTIO_NET_F_GUEST_CSUM);
+ _guest_tso4 = get_drv_feature_bit(VIRTIO_NET_F_GUEST_TSO4);
+ _host_tso4 = get_drv_feature_bit(VIRTIO_NET_F_HOST_TSO4);
+ _guest_ufo = get_drv_feature_bit(VIRTIO_NET_F_GUEST_UFO);
+
+ debugf("Features: %s=%d,%s=%d\n", "Status", _status, "TSO_ECN", _tso_ecn);
+ debugf("Features: %s=%d,%s=%d\n", "Host TSO ECN", _host_tso_ecn, "CSUM",
_csum);
+ debugf("Features: %s=%d,%s=%d\n", "Guest_csum", _guest_csum, "guest tso4",
_guest_tso4);
+ debugf("Features: %s=%d,%s=%d\n", "host tso4", _host_tso4,
"mergeable_bufs", _mergeable_bufs);
}
/**
@@ -422,7 +420,7 @@ void net::receiver()
while (1) {
// Wait for rx queue (used elements)
- virtio_driver::wait_for_queue(vq, &vring::used_ring_not_empty);
+ virtio_mmio_driver::wait_for_queue(vq, &vring::used_ring_not_empty);
trace_virtio_net_rx_wake();
_rxq.stats.rx_bh_wakeups++;
@@ -840,7 +838,7 @@ void net::txq::gc()
u32 net::get_driver_features()
{
- u32 base = virtio_driver::get_driver_features();
+ u32 base = virtio_mmio_driver::get_driver_features();
return (base | (1 << VIRTIO_NET_F_MAC) \
| (1 << VIRTIO_NET_F_MRG_RXBUF) \
| (1 << VIRTIO_NET_F_STATUS) \
@@ -856,12 +854,14 @@ u32 net::get_driver_features()
hw_driver* net::probe(hw_device* dev)
{
- if (auto pci_dev = dynamic_cast<pci::device*>(dev)) {
- if (pci_dev->get_id() == hw_device_id(VIRTIO_VENDOR_ID,
VIRTIO_NET_DEVICE_ID)) {
+ //TODO: Handle both PCI and MMIO devices
+ if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) {
+ if (mmio_dev->get_id() == hw_device_id(0x0, VIRTIO_ID_NET)) {
+ debug_early("virtio-net::probe() -> found virtio-mmio device
...\n");
if (opt_maxnic && maxnic-- <= 0) {
return nullptr;
} else {
- return aligned_new<net>(*pci_dev);
+ return aligned_new<net>(*mmio_dev);
}
}
}
diff --git a/drivers/virtio-net.hh b/drivers/virtio-net.hh
index ad323e69..6edca97a 100644
--- a/drivers/virtio-net.hh
+++ b/drivers/virtio-net.hh
@@ -14,9 +14,11 @@
#include <bsd/sys/sys/mbuf.h>
#include <osv/percpu_xmit.hh>
+#include <osv/interrupt.hh>
#include "drivers/virtio.hh"
-#include "drivers/pci-device.hh"
+#include "drivers/virtio2.hh"
+#include "drivers/virtio-mmio.hh"
namespace virtio {
@@ -24,7 +26,7 @@ namespace virtio {
* @class net
* virtio net device class
*/
-class net : public virtio_driver {
+class net : public virtio_mmio_driver {
public:
// The feature bitmap for virtio net
@@ -204,7 +206,7 @@ public:
u16 virtqueue_pairs;
};
- explicit net(pci::device& dev);
+ explicit net(mmio_device& dev);
virtual ~net();
virtual std::string get_name() const { return _driver_name; }
@@ -269,7 +271,7 @@ private:
u32 _hdr_size;
- std::unique_ptr<pci_interrupt> _irq;
+ std::unique_ptr<gsi_edge_interrupt> _irq;
struct rxq_stats {
u64 rx_packets; /* if_ipackets */
diff --git a/drivers/virtio-vring.cc b/drivers/virtio-vring.cc
index 5f89fb5d..58681ead 100644
--- a/drivers/virtio-vring.cc
+++ b/drivers/virtio-vring.cc
@@ -38,9 +38,9 @@ TRACEPOINT(trace_vring_get_buf_ret, "vring=%p _avail_count
%d", void*, int);
namespace virtio {
- vring::vring(virtio_driver* const dev, u16 num, u16 q_index)
+ vring::vring(vdriver* const driver, u16 num, u16 q_index)
{
- _dev = dev;
+ _driver = driver;
_q_index = q_index;
// Alloc enough pages for the vring...
unsigned sz = VIRTIO_ALIGN(vring::get_size(num,
VIRTIO_PCI_VRING_ALIGN));
@@ -86,6 +86,21 @@ namespace virtio {
return mmu::virt_to_phys(_vring_ptr);
}
+ u64 vring::get_desc_addr()
+ {
+ return mmu::virt_to_phys(_desc);
+ }
+
+ u64 vring::get_avail_addr()
+ {
+ return mmu::virt_to_phys(_avail);
+ }
+
+ u64 vring::get_used_addr()
+ {
+ return mmu::virt_to_phys(_used);
+ }
+
unsigned vring::get_size(unsigned int num, unsigned long align)
{
return (((sizeof(vring_desc) * num + sizeof(u16) * (3 + num)
@@ -102,7 +117,7 @@ namespace virtio {
inline bool vring::use_indirect(int desc_needed)
{
return _use_indirect &&
- _dev->get_indirect_buf_cap() &&
+ _driver->get_indirect_buf_cap() &&
// don't let the posting fail due to low available buffers
number
(desc_needed > _avail_count ||
// no need to use indirect for a single descriptor
@@ -280,7 +295,7 @@ namespace virtio {
vring::kick() {
bool kicked = true;
- if (_dev->get_event_idx_cap()) {
+ if (_driver->get_event_idx_cap()) {
std::atomic_thread_fence(std::memory_order_seq_cst);
@@ -310,7 +325,7 @@ namespace virtio {
// and _avail_added_since_kick might wrap around due to this bulking.
//
if (kicked || (_avail_added_since_kick >= (u16)(~0) / 2)) {
- _dev->kick(_q_index);
+ _driver->kick(_q_index);
_avail_added_since_kick = 0;
return true;
}
diff --git a/drivers/virtio-vring.hh b/drivers/virtio-vring.hh
index af8691ca..0bf1c581 100644
--- a/drivers/virtio-vring.hh
+++ b/drivers/virtio-vring.hh
@@ -29,7 +29,7 @@ TRACEPOINT(trace_vring_update_used_event, "vring=%p:
_used_ring_host_head %d",
namespace virtio {
class virtio_vring;
-class virtio_driver;
+class vdriver;
// Buffer descriptors in the ring
class vring_desc {
@@ -122,12 +122,16 @@ class virtio_driver;
class vring {
public:
- vring(virtio_driver* const dev, u16 num, u16 q_index);
+ vring(vdriver* const driver, u16 num, u16 q_index);
virtual ~vring();
u64 get_paddr();
static unsigned get_size(unsigned int num, unsigned long align);
+ u64 get_desc_addr();
+ u64 get_avail_addr();
+ u64 get_used_addr();
+
// Ring operations
bool add_buf(void* cookie);
// Get the top item from the used ring
@@ -240,7 +244,7 @@ class virtio_driver;
private:
// Up pointer
- virtio_driver* _dev;
+ vdriver* _driver;
u16 _q_index;
// The physical of the physical address handed to the virtio device
void* _vring_ptr;
diff --git a/drivers/virtio.hh b/drivers/virtio.hh
index b918de28..2faadec7 100644
--- a/drivers/virtio.hh
+++ b/drivers/virtio.hh
@@ -28,6 +28,9 @@ enum VIRTIO_CONFIG {
VIRTIO_CONFIG_S_DRIVER = 2,
/* Driver has used its parts of the config, and is happy */
VIRTIO_CONFIG_S_DRIVER_OK = 4,
+ /* Indicates that the driver has acknowledged all the features it
understands,
+ * and feature negotiation is complete */
+ VIRTIO_CONFIG_S_FEATURES_OK = 8,
/* We've given up on this device. */
VIRTIO_CONFIG_S_FAILED = 0x80,
/* Some virtio feature bits (currently bits 28 through 31) are reserved
for the
@@ -104,7 +107,20 @@ enum {
const unsigned max_virtqueues_nr = 64;
-class virtio_driver : public hw_driver {
+//TODO: This helps us make vring class not be
+// tightly coupled to virtio_driver but rather this simple
+// interface. Might not be necessary once we introduce
+// virtio_device class.
+class vdriver {
+public:
+ virtual ~vdriver() {};
+
+ virtual bool get_indirect_buf_cap() = 0;
+ virtual bool get_event_idx_cap() = 0;
+ virtual bool kick(int queue) = 0;
+};
+
+class virtio_driver : public hw_driver, public vdriver {
public:
explicit virtio_driver(pci::device& dev);
virtual ~virtio_driver();
diff --git a/drivers/virtio2.cc b/drivers/virtio2.cc
new file mode 100644
index 00000000..09410888
--- /dev/null
+++ b/drivers/virtio2.cc
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include <string.h>
+
+#include "drivers/virtio2.hh"
+#include "virtio-vring.hh"
+#include <osv/debug.h>
+#include "osv/trace.hh"
+
+//using namespace pci;
+
+//TRACEPOINT(trace_virtio_wait_for_queue, "queue(%p) have_elements=%d", void*,
int);
+
+namespace virtio {
+
+int virtio_mmio_driver::_disk_idx = 0;
+
+//TODO: This is a copy of virtio.cc with commented out PCI-related code and
+// adjusted for mmio. Just a temporary artefact. Eventually we should have
single
+// virtio_driver class that supports both PCI and MMIO devices through common
+// virtio_device interface.
+virtio_mmio_driver::virtio_mmio_driver(mmio_device& dev)
+ : hw_driver()
+ , _dev(dev)
+ , _num_queues(0)
+ , _cap_indirect_buf(false)
+{
+ for (unsigned i = 0; i < max_virtqueues_nr; i++) {
+ _queues[i] = nullptr;
+ }
+ //bool status = parse_pci_config();
+ //assert(status == true);
+
+ //PCI: _dev.set_bus_master(true);
+
+ //PCI: _dev.msix_enable();
+
+ //make sure the queue is reset
+ reset_host_side();
+
+ // Acknowledge device
+ add_dev_status(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER);
+
+ // Generic init of virtqueues
+ probe_virt_queues();
+}
+
+virtio_mmio_driver::~virtio_mmio_driver()
+{
+ reset_host_side();
+ free_queues();
+}
+
+void virtio_mmio_driver::setup_features()
+{
+ u64 dev_features = get_device_features();
+ u64 drv_features = this->get_driver_features();
+
+ u64 subset = dev_features & drv_features;
+
+ //notify the host about the features in used according
+ //to the virtio spec
+ for (int i = 0; i < 64; i++)
+ if (subset & (1 << i))
+ virtio_d("%s: found feature intersec of bit %d", __FUNCTION__, i);
+
+ if (subset & (1 << VIRTIO_RING_F_INDIRECT_DESC))
+ set_indirect_buf_cap(true);
+
+ if (subset & (1 << VIRTIO_RING_F_EVENT_IDX))
+ set_event_idx_cap(true);
+
+ set_drv_features(subset);
+
+ // Confirm features (new non-legacy)
+ add_dev_status(VIRTIO_CONFIG_S_FEATURES_OK);
+ get_dev_status(); //TODO -> Verify if correct
+}
+
+void virtio_mmio_driver::dump_config()
+{
+ /*
+ u8 B, D, F;
+ _dev.get_bdf(B, D, F);
+
+ _dev.dump_config();
+ virtio_d("%s [%x:%x.%x] vid:id=%x:%x", get_name().c_str(),
+ (u16)B, (u16)D, (u16)F,
+ _dev.get_vendor_id(),
+ _dev.get_device_id());
+
+ virtio_d(" virtio features: ");
+ for (int i = 0; i < 32; i++)
+ virtio_d(" %d ", get_device_feature_bit(i));
+ */
+}
+
+/*
+bool virtio_mmio_driver::parse_pci_config()
+{
+ // Test whether bar1 is present
+ _bar1 = _dev.get_bar(1);
+ if (_bar1 == nullptr) {
+ return false;
+ }
+
+ // Check ABI version
+ u8 rev = _dev.get_revision_id();
+ if (rev != VIRTIO_PCI_ABI_VERSION) {
+ virtio_e("Wrong virtio revision=%x", rev);
+ return false;
+ }
+
+ // Check device ID
+ u16 dev_id = _dev.get_device_id();
+ if ((dev_id < VIRTIO_PCI_ID_MIN) || (dev_id > VIRTIO_PCI_ID_MAX)) {
+ virtio_e("Wrong virtio dev id %x", dev_id);
+ return false;
+ }
+
+ return true;
+}*/
+
+void virtio_mmio_driver::reset_host_side()
+{
+ set_dev_status(0);
+}
+
+void virtio_mmio_driver::free_queues()
+{
+ for (unsigned i = 0; i < max_virtqueues_nr; i++) {
+ if (nullptr != _queues[i]) {
+ delete (_queues[i]);
+ _queues[i] = nullptr;
+ }
+ }
+}
+
+bool virtio_mmio_driver::kick(int queue)
+{
+ _dev.kick(queue);
+ //virtio_conf_writew(VIRTIO_PCI_QUEUE_NOTIFY, queue);
+ return true;
+}
+
+void virtio_mmio_driver::probe_virt_queues()
+{
+ u16 qsize = 0;
+
+ do {
+
+ if (_num_queues >= max_virtqueues_nr) {
+ return;
+ }
+
+ // Read queue size
+ _dev.select_queue(_num_queues);
+ //PCI: virtio_conf_writew(VIRTIO_PCI_QUEUE_SEL, _num_queues);
+ qsize = _dev.get_queue_size();
+ //PCI: qsize = virtio_conf_readw(VIRTIO_PCI_QUEUE_NUM);
+ if (0 == qsize) {
+ break;
+ }
+
+ // Init a new queue
+ vring* queue = new vring(this, qsize, _num_queues);
+ _queues[_num_queues] = queue;
+ debugf("virtio_mmio_driver: created vring - id:(%d), size:(%d)\n",
_num_queues, qsize);
+
+ //PCI:
+ /*if (_dev.is_msix()) {
+ // Setup queue_id:entry_id 1:1 correlation...
+ virtio_conf_writew(VIRTIO_MSI_QUEUE_VECTOR, _num_queues);
+ if (virtio_conf_readw(VIRTIO_MSI_QUEUE_VECTOR) != _num_queues) {
+ virtio_e("Setting MSIx entry for queue %d failed.",
_num_queues);
+ return;
+ }
+ }*/
+
+ _num_queues++;
+
+ // Tell host about pfn
+ // TODO: Yak, this is a bug in the design, on large memory we'll have
PFNs > 32 bit
+ // Dor to notify Rusty
+ //PCI: virtio_conf_writel(VIRTIO_PCI_QUEUE_PFN,
(u32)(queue->get_paddr() >> VIRTIO_PCI_QUEUE_ADDR_SHIFT));
+ _dev.activate_queue(queue);
+
+ // Debug print
+ virtio_d("Queue[%d] -> size %d, paddr %x", (_num_queues-1), qsize,
queue->get_paddr());
+
+ } while (true);
+}
+
+vring* virtio_mmio_driver::get_virt_queue(unsigned idx)
+{
+ if (idx >= _num_queues) {
+ return nullptr;
+ }
+
+ return _queues[idx];
+}
+
+void virtio_mmio_driver::wait_for_queue(vring* queue, bool (vring::*pred)()
const)
+{
+ sched::thread::wait_until([queue,pred] {
+ bool have_elements = (queue->*pred)();
+ if (!have_elements) {
+ queue->enable_interrupts();
+
+ // we must check that the ring is not empty *after*
+ // we enable interrupts to avoid a race where a packet
+ // may have been delivered between queue->used_ring_not_empty()
+ // and queue->enable_interrupts() above
+ have_elements = (queue->*pred)();
+ if (have_elements) {
+ queue->disable_interrupts();
+ }
+ }
+
+ //trace_virtio_wait_for_queue(queue, have_elements);
+ return have_elements;
+ });
+}
+
+u64 virtio_mmio_driver::get_device_features()
+{
+ return _dev.get_features();
+ //PCI: return virtio_conf_readl(VIRTIO_PCI_HOST_FEATURES);
+}
+
+bool virtio_mmio_driver::get_device_feature_bit(int bit)
+{
+ return (_dev.get_features() & (1 << bit)) != 0;
+ //PCI: return get_virtio_config_bit(VIRTIO_PCI_HOST_FEATURES, bit);
+}
+
+void virtio_mmio_driver::set_drv_features(u64 features)
+{
+ _dev.set_features(features);
+ _enabled_features = features;
+ //PCI: virtio_conf_writel(VIRTIO_PCI_GUEST_FEATURES, features);
+}
+
+u32 virtio_mmio_driver::get_drv_features()
+{
+ return _enabled_features;
+ //PCI: return virtio_conf_readl(VIRTIO_PCI_GUEST_FEATURES);
+}
+
+bool virtio_mmio_driver::get_drv_feature_bit(int bit)
+{
+ return (_enabled_features & (1 << bit)) != 0;
+ //PCI: return get_virtio_config_bit(VIRTIO_PCI_GUEST_FEATURES, bit);
+}
+
+u8 virtio_mmio_driver::get_dev_status()
+{
+ return _dev.get_status();
+ //PCI: return virtio_conf_readb(VIRTIO_PCI_STATUS);
+}
+
+void virtio_mmio_driver::set_dev_status(u8 status)
+{
+ _dev.set_status(status);
+ //PCI: virtio_conf_writeb(VIRTIO_PCI_STATUS, status);
+}
+
+void virtio_mmio_driver::add_dev_status(u8 status)
+{
+ set_dev_status(get_dev_status() | status);
+}
+
+void virtio_mmio_driver::del_dev_status(u8 status)
+{
+ set_dev_status(get_dev_status() & ~status);
+}
+
+void virtio_mmio_driver::virtio_conf_read(u32 offset, void* buf, int length)
+{
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(buf);
+ for (int i = 0; i < length; i++)
+ ptr[i] = _dev.read_config(offset + i);
+}
+}
diff --git a/drivers/virtio2.hh b/drivers/virtio2.hh
new file mode 100644
index 00000000..afcc6459
--- /dev/null
+++ b/drivers/virtio2.hh
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef VIRTIO_DRIVER2_H
+#define VIRTIO_DRIVER2_H
+
+#include "driver.hh"
+#include "drivers/driver.hh"
+#include "drivers/virtio.hh"
+#include "drivers/virtio-mmio.hh"
+#include "drivers/virtio-vring.hh"
+#include <osv/interrupt.hh>
+
+namespace virtio {
+
+class virtio_mmio_driver : public hw_driver, public vdriver {
+public:
+ explicit virtio_mmio_driver(mmio_device& dev);
+ virtual ~virtio_mmio_driver();
+
+ virtual std::string get_name() const = 0;
+
+ virtual void dump_config();
+
+ // The remaining space is defined by each driver as the per-driver
+ // configuration space
+ //int virtio_pci_config_offset() {return (_dev.is_msix_enabled())? 24 :
20;}
+
+ //bool parse_pci_config();
+
+ void probe_virt_queues();
+ vring* get_virt_queue(unsigned idx);
+
+ // block the calling thread until the queue has some used elements in it.
+ void wait_for_queue(vring* queue, bool (vring::*pred)() const);
+
+ // guest/host features physical access
+ // LEGACY NOTE:
+ // guest equivalent to driver
+ // host equivalent to device
+ u64 get_device_features();
+ bool get_device_feature_bit(int bit);
+
+ void set_drv_features(u64 features);
+ u32 get_drv_features();
+ bool get_drv_feature_bit(int bit);
+
+ // device status
+ u8 get_dev_status();
+ void set_dev_status(u8 status);
+ void add_dev_status(u8 status);
+ void del_dev_status(u8 status);
+
+ // Access the virtio conf address space set by pci bar 1
+ //bool get_virtio_config_bit(u32 offset, int bit);
+ //void set_virtio_config_bit(u32 offset, int bit, bool on);
+
+ // Access virtio config space
+ //void virtio_conf_read(u32 offset, void* buf, int length);
+ //void virtio_conf_write(u32 offset, void* buf, int length);
+ //u8 virtio_conf_readb(u32 offset) { return _bar1->readb(offset);};
+ //u16 virtio_conf_readw(u32 offset) { return _bar1->readw(offset);};
+ //u32 virtio_conf_readl(u32 offset) { return _bar1->readl(offset);};
+ //void virtio_conf_writeb(u32 offset, u8 val) { _bar1->writeb(offset,
val);};
+ //void virtio_conf_writew(u32 offset, u16 val) { _bar1->writew(offset,
val);};
+ //void virtio_conf_writel(u32 offset, u32 val) { _bar1->writel(offset,
val);};
+
+ void virtio_conf_read(u32 offset, void* buf, int length);
+
+ bool kick(int queue);
+ void reset_host_side();
+ void free_queues();
+
+ bool get_indirect_buf_cap() {return _cap_indirect_buf;}
+ void set_indirect_buf_cap(bool on) {_cap_indirect_buf = on;}
+ bool get_event_idx_cap() {return _cap_event_idx;}
+ void set_event_idx_cap(bool on) {_cap_event_idx = on;}
+
+ mmio_device& device() { return _dev; }
+protected:
+ // Actual drivers should implement this on top of the basic ring features
+ virtual u32 get_driver_features() { return 1 <<
VIRTIO_RING_F_INDIRECT_DESC | 1 << VIRTIO_RING_F_EVENT_IDX; }
+ void setup_features();
+protected:
+ mmio_device& _dev;
+ //pci::device& _dev;
+ //interrupt_manager _msi;
+ vring* _queues[max_virtqueues_nr];
+ u32 _num_queues;
+ u64 _enabled_features;
+ //pci::bar* _bar1;
+ bool _cap_indirect_buf;
+ bool _cap_event_idx = false;
+ static int _disk_idx;
+};
+
+template <typename T, u16 ID>
+hw_driver* probe_mmio(hw_device* dev)
+{
+ if (auto mmio_dev = dynamic_cast<mmio_device*>(dev)) {
+ if (mmio_dev->get_id() == hw_device_id(0x0, ID)) {
+ return new T(*mmio_dev);
+ }
+ }
+ return nullptr;
+}
+
+}
+
+#endif
+
diff --git a/include/osv/virtio-assign.hh b/include/osv/virtio-assign.hh
index b81f41ae..82b374fe 100644
--- a/include/osv/virtio-assign.hh
+++ b/include/osv/virtio-assign.hh
@@ -21,7 +21,7 @@ public:
// TODO: provide a way to get one of multiple assigned virtio devices.
static assigned_virtio *get() __attribute__((weak));
- virtual void kick(int queue) = 0;
+ virtual bool kick(int queue) = 0;
virtual uint32_t queue_size(int queue) = 0;
virtual void enable_interrupt(unsigned int queue,
std::function<void(void)> handler) = 0;
diff --git a/loader.cc b/loader.cc
index 3f88ebda..2cef739b 100644
--- a/loader.cc
+++ b/loader.cc
@@ -55,6 +55,7 @@
#include "drivers/null.hh"
#include "libc/network/__dns.hh"
+#include "early-console.hh"
using namespace osv;
using namespace osv::clock::literals;
@@ -550,7 +551,7 @@ void main_cont(int loader_argc, char** loader_argv)
memory::enable_debug_allocator();
#ifndef AARCH64_PORT_STUB
- acpi::init();
+ //acpi::init();
#endif /* !AARCH64_PORT_STUB */
if (sched::cpus.size() > sched::max_cpus) {
--
2.19.1
--
You received this message because you are subscribed to the Google Groups "OSv
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.