Hey Fotis,

Thank you for the patches. I have applied the 1st one from the series. I 
have some questions and suggestions regarding this one though. Please see 
them below.


On Sunday, March 8, 2020 at 10:44:52 AM UTC-4, Fotis Xenakis wrote:
>
> The latest virtio spec adds shared memory regions: 
> "Shared memory regions are an additional facility available to devices 
> that need a region of memory that’s continuously shared between the 
> device and the driver, rather than passed between them in the way 
> virtqueue elements are." 
>
> In virtio over PCI, these are enumerated as a sequence of 
> VIRTIO_PCI_CAP_SHARED_MEMORY_CFG capabilities, one per region. 
>
> This patch extends the virtio over PCI implementation to discover all 
> such regions provided by a device. 
>
> Signed-off-by: Fotis Xenakis <[email protected] <javascript:>> 
> --- 
>  drivers/virtio-pci-device.cc | 78 +++++++++++++++++++++++++++++------- 
>  drivers/virtio-pci-device.hh | 44 +++++++++++++------- 
>  2 files changed, 93 insertions(+), 29 deletions(-) 
>
> diff --git a/drivers/virtio-pci-device.cc b/drivers/virtio-pci-device.cc 
> index c107ddd7..8e02c1c1 100644 
> --- a/drivers/virtio-pci-device.cc 
> +++ b/drivers/virtio-pci-device.cc 
> @@ -269,37 +269,87 @@ bool virtio_modern_pci_device::parse_pci_config() 
>          return false; 
>      } 
>   
> -    parse_virtio_capability(_common_cfg, VIRTIO_PCI_CAP_COMMON_CFG); 
> -    parse_virtio_capability(_isr_cfg, VIRTIO_PCI_CAP_ISR_CFG); 
> -    parse_virtio_capability(_notify_cfg, VIRTIO_PCI_CAP_NOTIFY_CFG); 
> -    parse_virtio_capability(_device_cfg, VIRTIO_PCI_CAP_DEVICE_CFG); 
> +    parse_virtio_capability(_common_cfg, VIRTIO_PCI_CAP_COMMON_CFG, 
> nullptr); 
> +    parse_virtio_capability(_isr_cfg, VIRTIO_PCI_CAP_ISR_CFG, nullptr); 
> +    parse_virtio_capability(_notify_cfg, VIRTIO_PCI_CAP_NOTIFY_CFG, 
> nullptr); 
> +    parse_virtio_capability(_device_cfg, VIRTIO_PCI_CAP_DEVICE_CFG, 
> nullptr); 
> +    parse_virtio_capabilities(_shm_cfgs, 
> VIRTIO_PCI_CAP_SHARED_MEMORY_CFG); 
>   
>      if (_notify_cfg) { 
> -        _notify_offset_multiplier 
> =_dev->pci_readl(_notify_cfg->get_cfg_offset() + 
> +        _notify_offset_multiplier = 
> _dev->pci_readl(_notify_cfg->get_cfg_offset() + 
>                  offsetof(virtio_pci_notify_cap, 
> notify_offset_multiplier)); 
>      } 
>   
>      return _common_cfg && _isr_cfg && _notify_cfg && _device_cfg; 
>  } 
>   
> -void 
> virtio_modern_pci_device::parse_virtio_capability(std::unique_ptr<virtio_capability>
>  
> &ptr, u8 type) 
> -{ 
> -    u8 cfg_offset = _dev->find_capability(pci::function::PCI_CAP_VENDOR, 
> [type] (pci::function *fun, u8 offset) { 
> -        u8 cfg_type = fun->pci_readb(offset + offsetof(struct 
> virtio_pci_cap, cfg_type)); 
> -        return type == cfg_type; 
> +// Parse all virtio PCI capabilities whose types match @type and append 
> them to 
> +// @caps. 
> +// From the spec: "The device MAY offer more than one structure of any 
> type - 
> +// this makes it possible for the device to expose multiple interfaces to 
> +// drivers. The order of the capabilities in the capability list 
> specifies the 
> +// order of preference suggested by the device. A device may specify that 
> this 
> +// ordering mechanism be overridden by the use of the id field." 
> +void virtio_modern_pci_device::parse_virtio_capabilities( 
> +    std::vector<std::unique_ptr<virtio_capability>>& caps, u8 type) 
> +{ 
> +    std::vector<u8> past_ids; 
> +    auto pred = [&past_ids] (u8 id) { 
> +        return std::none_of(past_ids.cbegin(), past_ids.cend(), [id] (u8 
> past_id) { 
> +            return past_id == id; 
> +        }); 
> +    }; 
> + 
> +    while (true) { 
> +        std::unique_ptr<virtio_capability> ptr; 
> +        // Search for a matching capability with an id not already in 
> past_ids 
> +        parse_virtio_capability(ptr, type, pred); 
> +        if (!ptr) { 
> +            return; 
> +        } 
> +        u8 id = _dev->pci_readb(ptr->get_cfg_offset() + 
> offsetof(virtio_pci_cap, id)); 
> +        past_ids.push_back(id); 
> +        caps.emplace_back(ptr.release()); 
> +    } 
> +} 
>
So the idea with the implementation of parse_virtio_capabilities() above is 
really to collect all capabilities of the given type (in our case so far 
VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)? I wonder if the code can be simplified 
if we add a method function::find_capabilities(u8 cap_id) in 
pci-function.cc that will simply return a collection of matching offsets?

The general idea - we do not have to do it in this patch. I noticed we 
re-read same data in u8 function::find_capability() over and over (and each 
read is an exit I think) and that is what cause the pci initialization 
slower especially for modern devices (I see 10ms difference in boot time 
between running the same program with '--virtio modern' and '--virtio 
legacy' (default). Maybe some easy caching - again let us not worry now.

> + 
> +// Parse a single virtio PCI capability, whose type must match @type and 
> id must 
> +// satisfy @id_pred. 
> +void 
> virtio_modern_pci_device::parse_virtio_capability(std::unique_ptr<virtio_capability>&
>  
> ptr, u8 type, 
> +    std::function<bool (u8)> id_pred) 
> +{ 
> +    u8 cfg_offset = _dev->find_capability(pci::function::PCI_CAP_VENDOR, 
> +        [type, id_pred] (pci::function *fun, u8 offset) { 
> +            u8 cfg_type = fun->pci_readb(offset + 
> offsetof(virtio_pci_cap, cfg_type)); 
> +            if (cfg_type != type) { 
> +                return false; 
> +            } 
> +            u8 id = fun->pci_readb(offset + offsetof(virtio_pci_cap, 
> id)); 
> +            if (id_pred && !id_pred(id)) { 
> +                return false; 
> +            } 
> +            return true; 
>
Easy optimization suggestion (given that I think each pci_readb is an exit 
to hypervisor) - maybe first check if id_pred not null and only then do 
pci_readb() and call id_pred. Most parse_virtio_capability() calls pass 
id_pred as null, right?

>      }); 
>   
>      if (cfg_offset != 0xFF) { 
> -        u8 bar_index = _dev->pci_readb(cfg_offset + offsetof(struct 
> virtio_pci_cap, bar)); 
> -        u32 offset = _dev->pci_readl(cfg_offset + offsetof(struct 
> virtio_pci_cap, offset)); 
> -        u32 length = _dev->pci_readl(cfg_offset + offsetof(struct 
> virtio_pci_cap, length)); 
> - 
> +        u8 bar_index = _dev->pci_readb(cfg_offset + 
> offsetof(virtio_pci_cap, bar)); 
>          auto bar_no = bar_index + 1; 
>          auto bar = _dev->get_bar(bar_no); 
>          if (bar && bar->is_mmio() && !bar->is_mapped()) { 
>              bar->map(); 
>          } 
>   
> +        u64 offset = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap, offset)); 
> +        u64 length = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap, length)); 
> +        if (type == VIRTIO_PCI_CAP_SHARED_MEMORY_CFG) { 
> +            // The shared memory region capability is defined by a struct 
> +            // virtio_pci_cap64 
> +            u32 offset_hi = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap64, offset_hi)); 
> +            u32 length_hi = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap64, length_hi)); 
> +            offset |= ((u64)offset_hi << 32); 
> +            length |= ((u64)length_hi << 32); 
> +        } 
> + 
>          ptr.reset(new 
> virtio_modern_pci_device::virtio_capability(cfg_offset, bar, bar_no, 
> offset, length)); 
>      } 
>  } 
> diff --git a/drivers/virtio-pci-device.hh b/drivers/virtio-pci-device.hh 
> index 5a891d93..9a32aa18 100644 
> --- a/drivers/virtio-pci-device.hh 
> +++ b/drivers/virtio-pci-device.hh 
> @@ -98,7 +98,7 @@ public: 
>      ~virtio_legacy_pci_device() {} 
>   
>      virtual const char *get_version() { return "legacy"; } 
> -    virtual u16 get_type_id() { return _dev->get_subsystem_id(); }; 
> +    virtual u16 get_type_id() { return _dev->get_subsystem_id(); } 
>   
>      virtual void select_queue(int queue); 
>      virtual u16 get_queue_size(); 
> @@ -115,7 +115,7 @@ public: 
>      virtual u8 read_config(u32 offset); 
>      virtual u8 read_and_ack_isr(); 
>   
> -    virtual bool is_modern() { return false; }; 
> +    virtual bool is_modern() { return false; } 
>  protected: 
>      virtual bool parse_pci_config(); 
>   
> @@ -145,6 +145,8 @@ enum VIRTIO_MODERN_PCI_CONFIG { 
>      VIRTIO_PCI_CAP_DEVICE_CFG = 4, 
>      /* PCI configuration access */ 
>      VIRTIO_PCI_CAP_PCI_CFG = 5, 
> +    /* Shared memory region */ 
> +    VIRTIO_PCI_CAP_SHARED_MEMORY_CFG = 8, 
>  }; 
>   
>  /* This is the PCI capability header: */ 
> @@ -154,11 +156,20 @@ struct virtio_pci_cap { 
>      u8 cap_len;     /* Generic PCI field: capability length */ 
>      u8 cfg_type;    /* Identifies the structure. */ 
>      u8 bar;         /* Where to find it. */ 
> -    u8 padding[3];  /* Pad to full dword. */ 
> +    u8 id;          /* Multiple capabilities of the same type */ 
> +    u8 padding[2];  /* Pad to full dword. */ 
>      u32 offset;     /* Offset within bar. */ 
>      u32 length;     /* Length of the structure, in bytes. */ 
>  }; 
>   
> +/* A variant of virtio_pci_cap, for capabilities that require offsets or 
> lengths 
> + * larger than 4GiB */ 
> +struct virtio_pci_cap64 { 
> +    struct virtio_pci_cap cap; 
> +    u32 offset_hi; 
> +    u32 length_hi; 
> +}; 
> + 
>  /* The notification location is found using the VIRTIO_PCI_CAP_NOTIFY_CFG 
> capability. 
>   * This capability is immediately followed by an additional field, like 
> so:*/ 
>  struct virtio_pci_notify_cap { 
> @@ -198,7 +209,7 @@ struct virtio_pci_common_cfg { 
>  class virtio_modern_pci_device : public virtio_pci_device { 
>  public: 
>      struct virtio_capability { 
> -        virtio_capability(u32 cfg_offset, pci::bar* bar, u32 bar_no, u32 
> bar_offset, u32 length) : 
> +        virtio_capability(u32 cfg_offset, pci::bar* bar, u32 bar_no, u64 
> bar_offset, u64 length) : 
>              _cfg_offset(cfg_offset), 
>              _bar(bar), 
>              _bar_no(bar_no), 
> @@ -207,27 +218,27 @@ public: 
>              assert(_length > 0 && _bar_offset >= 0 && _bar_offset + 
> _length <= _bar->get_size()); 
>          } 
>   
> -        u8 virtio_conf_readb(u32 offset) { 
> +        u8 virtio_conf_readb(u64 offset) { 
>              verify_offset(offset, sizeof(u8)); 
>              return _bar->readb(_bar_offset + offset); 
>          }; 
> -        u16 virtio_conf_readw(u32 offset) { 
> +        u16 virtio_conf_readw(u64 offset) { 
>              verify_offset(offset, sizeof(u16)); 
>              return _bar->readw(_bar_offset + offset); 
>          }; 
> -        u32 virtio_conf_readl(u32 offset) { 
> +        u32 virtio_conf_readl(u64 offset) { 
>              verify_offset(offset, sizeof(u32)); 
>              return _bar->readl(_bar_offset + offset); 
>          }; 
> -        void virtio_conf_writeb(u32 offset, u8 val) { 
> +        void virtio_conf_writeb(u64 offset, u8 val) { 
>              verify_offset(offset, sizeof(u8)); 
>              _bar->writeb(_bar_offset + offset, val); 
>          }; 
> -        void virtio_conf_writew(u32 offset, u16 val) { 
> +        void virtio_conf_writew(u64 offset, u16 val) { 
>              verify_offset(offset, sizeof(u16)); 
>              _bar->writew(_bar_offset + offset, val); 
>          }; 
> -        void virtio_conf_writel(u32 offset, u32 val) { 
> +        void virtio_conf_writel(u64 offset, u32 val) { 
>              verify_offset(offset, sizeof(u32)); 
>              _bar->writel(_bar_offset + offset, val); 
>          }; 
> @@ -237,15 +248,15 @@ public: 
>              virtio_d("%s bar=%d, offset=%x, size=%x", prefix, _bar_no, 
> _bar_offset, _length); 
>          } 
>      private: 
> -        inline void verify_offset(u32 offset, u32 size) { 
> +        inline void verify_offset(u64 offset, u32 size) { 
>              assert(offset >= 0 && offset + size <= _length); 
>          } 
>   
>          u32 _cfg_offset; 
>          pci::bar* _bar; 
>          u32 _bar_no; 
> -        u32 _bar_offset; 
> -        u32 _length; 
> +        u64 _bar_offset; 
> +        u64 _length; 
>      }; 
>   
>      explicit virtio_modern_pci_device(pci::device *dev); 
> @@ -276,12 +287,15 @@ public: 
>  protected: 
>      virtual bool parse_pci_config(); 
>  private: 
> -    void parse_virtio_capability(std::unique_ptr<virtio_capability> &ptr, 
> u8 type); 
> +    void 
> parse_virtio_capabilities(std::vector<std::unique_ptr<virtio_capability>>& 
> caps, u8 type); 
> +    void parse_virtio_capability(std::unique_ptr<virtio_capability> &ptr, 
> u8 type, 
> +        std::function<bool (u8)> id_pred); 
>   
>      std::unique_ptr<virtio_capability> _common_cfg; 
>      std::unique_ptr<virtio_capability> _isr_cfg; 
>      std::unique_ptr<virtio_capability> _notify_cfg; 
>      std::unique_ptr<virtio_capability> _device_cfg; 
> +    std::vector<std::unique_ptr<virtio_capability>> _shm_cfgs; 
>   
>      u32 _notify_offset_multiplier; 
>      u32 _queues_notify_offsets[64]; 
> @@ -293,4 +307,4 @@ virtio_device* create_virtio_pci_device(pci::device 
> *dev); 
>   
>  } 
>   
> -#endif //VIRTIO_PCI_DEVICE_HH 
> \ No newline at end of file 
> +#endif //VIRTIO_PCI_DEVICE_HH 
> -- 
> 2.25.1 
>
>

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/76837ad1-7e3b-433f-827f-e8c2ad81cafb%40googlegroups.com.

Reply via email to