Hey Fotis, Thank you for the patches. I have applied the 1st one from the series. I have some questions and suggestions regarding this one though. Please see them below.
On Sunday, March 8, 2020 at 10:44:52 AM UTC-4, Fotis Xenakis wrote: > > The latest virtio spec adds shared memory regions: > "Shared memory regions are an additional facility available to devices > that need a region of memory that’s continuously shared between the > device and the driver, rather than passed between them in the way > virtqueue elements are." > > In virtio over PCI, these are enumerated as a sequence of > VIRTIO_PCI_CAP_SHARED_MEMORY_CFG capabilities, one per region. > > This patch extends the virtio over PCI implementation to discover all > such regions provided by a device. > > Signed-off-by: Fotis Xenakis <[email protected] <javascript:>> > --- > drivers/virtio-pci-device.cc | 78 +++++++++++++++++++++++++++++------- > drivers/virtio-pci-device.hh | 44 +++++++++++++------- > 2 files changed, 93 insertions(+), 29 deletions(-) > > diff --git a/drivers/virtio-pci-device.cc b/drivers/virtio-pci-device.cc > index c107ddd7..8e02c1c1 100644 > --- a/drivers/virtio-pci-device.cc > +++ b/drivers/virtio-pci-device.cc > @@ -269,37 +269,87 @@ bool virtio_modern_pci_device::parse_pci_config() > return false; > } > > - parse_virtio_capability(_common_cfg, VIRTIO_PCI_CAP_COMMON_CFG); > - parse_virtio_capability(_isr_cfg, VIRTIO_PCI_CAP_ISR_CFG); > - parse_virtio_capability(_notify_cfg, VIRTIO_PCI_CAP_NOTIFY_CFG); > - parse_virtio_capability(_device_cfg, VIRTIO_PCI_CAP_DEVICE_CFG); > + parse_virtio_capability(_common_cfg, VIRTIO_PCI_CAP_COMMON_CFG, > nullptr); > + parse_virtio_capability(_isr_cfg, VIRTIO_PCI_CAP_ISR_CFG, nullptr); > + parse_virtio_capability(_notify_cfg, VIRTIO_PCI_CAP_NOTIFY_CFG, > nullptr); > + parse_virtio_capability(_device_cfg, VIRTIO_PCI_CAP_DEVICE_CFG, > nullptr); > + parse_virtio_capabilities(_shm_cfgs, > VIRTIO_PCI_CAP_SHARED_MEMORY_CFG); > > if (_notify_cfg) { > - _notify_offset_multiplier > =_dev->pci_readl(_notify_cfg->get_cfg_offset() + > + _notify_offset_multiplier = > _dev->pci_readl(_notify_cfg->get_cfg_offset() + > offsetof(virtio_pci_notify_cap, > notify_offset_multiplier)); > } > > return _common_cfg && _isr_cfg && _notify_cfg && _device_cfg; > } > > -void > virtio_modern_pci_device::parse_virtio_capability(std::unique_ptr<virtio_capability> > > &ptr, u8 type) > -{ > - u8 cfg_offset = _dev->find_capability(pci::function::PCI_CAP_VENDOR, > [type] (pci::function *fun, u8 offset) { > - u8 cfg_type = fun->pci_readb(offset + offsetof(struct > virtio_pci_cap, cfg_type)); > - return type == cfg_type; > +// Parse all virtio PCI capabilities whose types match @type and append > them to > +// @caps. > +// From the spec: "The device MAY offer more than one structure of any > type - > +// this makes it possible for the device to expose multiple interfaces to > +// drivers. The order of the capabilities in the capability list > specifies the > +// order of preference suggested by the device. A device may specify that > this > +// ordering mechanism be overridden by the use of the id field." > +void virtio_modern_pci_device::parse_virtio_capabilities( > + std::vector<std::unique_ptr<virtio_capability>>& caps, u8 type) > +{ > + std::vector<u8> past_ids; > + auto pred = [&past_ids] (u8 id) { > + return std::none_of(past_ids.cbegin(), past_ids.cend(), [id] (u8 > past_id) { > + return past_id == id; > + }); > + }; > + > + while (true) { > + std::unique_ptr<virtio_capability> ptr; > + // Search for a matching capability with an id not already in > past_ids > + parse_virtio_capability(ptr, type, pred); > + if (!ptr) { > + return; > + } > + u8 id = _dev->pci_readb(ptr->get_cfg_offset() + > offsetof(virtio_pci_cap, id)); > + past_ids.push_back(id); > + caps.emplace_back(ptr.release()); > + } > +} > So the idea with the implementation of parse_virtio_capabilities() above is really to collect all capabilities of the given type (in our case so far VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)? I wonder if the code can be simplified if we add a method function::find_capabilities(u8 cap_id) in pci-function.cc that will simply return a collection of matching offsets? The general idea - we do not have to do it in this patch. I noticed we re-read same data in u8 function::find_capability() over and over (and each read is an exit I think) and that is what cause the pci initialization slower especially for modern devices (I see 10ms difference in boot time between running the same program with '--virtio modern' and '--virtio legacy' (default). Maybe some easy caching - again let us not worry now. > + > +// Parse a single virtio PCI capability, whose type must match @type and > id must > +// satisfy @id_pred. > +void > virtio_modern_pci_device::parse_virtio_capability(std::unique_ptr<virtio_capability>& > > ptr, u8 type, > + std::function<bool (u8)> id_pred) > +{ > + u8 cfg_offset = _dev->find_capability(pci::function::PCI_CAP_VENDOR, > + [type, id_pred] (pci::function *fun, u8 offset) { > + u8 cfg_type = fun->pci_readb(offset + > offsetof(virtio_pci_cap, cfg_type)); > + if (cfg_type != type) { > + return false; > + } > + u8 id = fun->pci_readb(offset + offsetof(virtio_pci_cap, > id)); > + if (id_pred && !id_pred(id)) { > + return false; > + } > + return true; > Easy optimization suggestion (given that I think each pci_readb is an exit to hypervisor) - maybe first check if id_pred not null and only then do pci_readb() and call id_pred. Most parse_virtio_capability() calls pass id_pred as null, right? > }); > > if (cfg_offset != 0xFF) { > - u8 bar_index = _dev->pci_readb(cfg_offset + offsetof(struct > virtio_pci_cap, bar)); > - u32 offset = _dev->pci_readl(cfg_offset + offsetof(struct > virtio_pci_cap, offset)); > - u32 length = _dev->pci_readl(cfg_offset + offsetof(struct > virtio_pci_cap, length)); > - > + u8 bar_index = _dev->pci_readb(cfg_offset + > offsetof(virtio_pci_cap, bar)); > auto bar_no = bar_index + 1; > auto bar = _dev->get_bar(bar_no); > if (bar && bar->is_mmio() && !bar->is_mapped()) { > bar->map(); > } > > + u64 offset = _dev->pci_readl(cfg_offset + > offsetof(virtio_pci_cap, offset)); > + u64 length = _dev->pci_readl(cfg_offset + > offsetof(virtio_pci_cap, length)); > + if (type == VIRTIO_PCI_CAP_SHARED_MEMORY_CFG) { > + // The shared memory region capability is defined by a struct > + // virtio_pci_cap64 > + u32 offset_hi = _dev->pci_readl(cfg_offset + > offsetof(virtio_pci_cap64, offset_hi)); > + u32 length_hi = _dev->pci_readl(cfg_offset + > offsetof(virtio_pci_cap64, length_hi)); > + offset |= ((u64)offset_hi << 32); > + length |= ((u64)length_hi << 32); > + } > + > ptr.reset(new > virtio_modern_pci_device::virtio_capability(cfg_offset, bar, bar_no, > offset, length)); > } > } > diff --git a/drivers/virtio-pci-device.hh b/drivers/virtio-pci-device.hh > index 5a891d93..9a32aa18 100644 > --- a/drivers/virtio-pci-device.hh > +++ b/drivers/virtio-pci-device.hh > @@ -98,7 +98,7 @@ public: > ~virtio_legacy_pci_device() {} > > virtual const char *get_version() { return "legacy"; } > - virtual u16 get_type_id() { return _dev->get_subsystem_id(); }; > + virtual u16 get_type_id() { return _dev->get_subsystem_id(); } > > virtual void select_queue(int queue); > virtual u16 get_queue_size(); > @@ -115,7 +115,7 @@ public: > virtual u8 read_config(u32 offset); > virtual u8 read_and_ack_isr(); > > - virtual bool is_modern() { return false; }; > + virtual bool is_modern() { return false; } > protected: > virtual bool parse_pci_config(); > > @@ -145,6 +145,8 @@ enum VIRTIO_MODERN_PCI_CONFIG { > VIRTIO_PCI_CAP_DEVICE_CFG = 4, > /* PCI configuration access */ > VIRTIO_PCI_CAP_PCI_CFG = 5, > + /* Shared memory region */ > + VIRTIO_PCI_CAP_SHARED_MEMORY_CFG = 8, > }; > > /* This is the PCI capability header: */ > @@ -154,11 +156,20 @@ struct virtio_pci_cap { > u8 cap_len; /* Generic PCI field: capability length */ > u8 cfg_type; /* Identifies the structure. */ > u8 bar; /* Where to find it. */ > - u8 padding[3]; /* Pad to full dword. */ > + u8 id; /* Multiple capabilities of the same type */ > + u8 padding[2]; /* Pad to full dword. */ > u32 offset; /* Offset within bar. */ > u32 length; /* Length of the structure, in bytes. */ > }; > > +/* A variant of virtio_pci_cap, for capabilities that require offsets or > lengths > + * larger than 4GiB */ > +struct virtio_pci_cap64 { > + struct virtio_pci_cap cap; > + u32 offset_hi; > + u32 length_hi; > +}; > + > /* The notification location is found using the VIRTIO_PCI_CAP_NOTIFY_CFG > capability. > * This capability is immediately followed by an additional field, like > so:*/ > struct virtio_pci_notify_cap { > @@ -198,7 +209,7 @@ struct virtio_pci_common_cfg { > class virtio_modern_pci_device : public virtio_pci_device { > public: > struct virtio_capability { > - virtio_capability(u32 cfg_offset, pci::bar* bar, u32 bar_no, u32 > bar_offset, u32 length) : > + virtio_capability(u32 cfg_offset, pci::bar* bar, u32 bar_no, u64 > bar_offset, u64 length) : > _cfg_offset(cfg_offset), > _bar(bar), > _bar_no(bar_no), > @@ -207,27 +218,27 @@ public: > assert(_length > 0 && _bar_offset >= 0 && _bar_offset + > _length <= _bar->get_size()); > } > > - u8 virtio_conf_readb(u32 offset) { > + u8 virtio_conf_readb(u64 offset) { > verify_offset(offset, sizeof(u8)); > return _bar->readb(_bar_offset + offset); > }; > - u16 virtio_conf_readw(u32 offset) { > + u16 virtio_conf_readw(u64 offset) { > verify_offset(offset, sizeof(u16)); > return _bar->readw(_bar_offset + offset); > }; > - u32 virtio_conf_readl(u32 offset) { > + u32 virtio_conf_readl(u64 offset) { > verify_offset(offset, sizeof(u32)); > return _bar->readl(_bar_offset + offset); > }; > - void virtio_conf_writeb(u32 offset, u8 val) { > + void virtio_conf_writeb(u64 offset, u8 val) { > verify_offset(offset, sizeof(u8)); > _bar->writeb(_bar_offset + offset, val); > }; > - void virtio_conf_writew(u32 offset, u16 val) { > + void virtio_conf_writew(u64 offset, u16 val) { > verify_offset(offset, sizeof(u16)); > _bar->writew(_bar_offset + offset, val); > }; > - void virtio_conf_writel(u32 offset, u32 val) { > + void virtio_conf_writel(u64 offset, u32 val) { > verify_offset(offset, sizeof(u32)); > _bar->writel(_bar_offset + offset, val); > }; > @@ -237,15 +248,15 @@ public: > virtio_d("%s bar=%d, offset=%x, size=%x", prefix, _bar_no, > _bar_offset, _length); > } > private: > - inline void verify_offset(u32 offset, u32 size) { > + inline void verify_offset(u64 offset, u32 size) { > assert(offset >= 0 && offset + size <= _length); > } > > u32 _cfg_offset; > pci::bar* _bar; > u32 _bar_no; > - u32 _bar_offset; > - u32 _length; > + u64 _bar_offset; > + u64 _length; > }; > > explicit virtio_modern_pci_device(pci::device *dev); > @@ -276,12 +287,15 @@ public: > protected: > virtual bool parse_pci_config(); > private: > - void parse_virtio_capability(std::unique_ptr<virtio_capability> &ptr, > u8 type); > + void > parse_virtio_capabilities(std::vector<std::unique_ptr<virtio_capability>>& > caps, u8 type); > + void parse_virtio_capability(std::unique_ptr<virtio_capability> &ptr, > u8 type, > + std::function<bool (u8)> id_pred); > > std::unique_ptr<virtio_capability> _common_cfg; > std::unique_ptr<virtio_capability> _isr_cfg; > std::unique_ptr<virtio_capability> _notify_cfg; > std::unique_ptr<virtio_capability> _device_cfg; > + std::vector<std::unique_ptr<virtio_capability>> _shm_cfgs; > > u32 _notify_offset_multiplier; > u32 _queues_notify_offsets[64]; > @@ -293,4 +307,4 @@ virtio_device* create_virtio_pci_device(pci::device > *dev); > > } > > -#endif //VIRTIO_PCI_DEVICE_HH > \ No newline at end of file > +#endif //VIRTIO_PCI_DEVICE_HH > -- > 2.25.1 > > -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/76837ad1-7e3b-433f-827f-e8c2ad81cafb%40googlegroups.com.
