Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang



On 2020/6/10 下午4:51, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 04:25:06PM +0800, Jason Wang wrote:

+
+#define VP_VDPA_FEATURES \
+   ((1ULL << VIRTIO_F_ANY_LAYOUT)| \

This is presumably for transitional devices only.  In fact looking at
code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
Spec violation I guess ... but what should we do? Relax the spec
or fix drivers?


I don't get how it violates the spec.

Spec says transitional drivers must ack VIRTIO_F_ANY_LAYOUT



I don't see this in the spec, maybe you can point it to me?




But grep for VIRTIO_F_ANY_LAYOUT and you will see they don't.
Only legacy virtio net does.

Maybe it should have been
transitional drivers when using the lgacy interface, but there
you are.




+(1ULL << VIRTIO_F_VERSION_1) | \
+(1ULL << VIRTIO_F_ORDER_PLATFORM)| \
+(1ULL << VIRTIO_F_IOMMU_PLATFORM))
+
+struct vp_vring {
+   void __iomem *notify;
+   char msix_name[256];
+   resource_size_t notify_pa;
+   struct vdpa_callback cb;
+   int irq;
+};
+
+struct vp_vdpa {
+   struct vdpa_device vdpa;
+   struct pci_dev *pdev;
+
+   struct virtio_device_id id;
+
+   struct vp_vring vring[VP_VDPA_MAX_QUEUE];
+
+   /* The IO mapping for the PCI config space */
+   void __iomem * const *base;
+   struct virtio_pci_common_cfg __iomem *common;
+   void __iomem *device;
+   /* Base of vq notifications */
+   void __iomem *notify;
+
+   /* Multiplier for queue_notify_off. */
+   u32 notify_off_multiplier;
+
+   int modern_bars;
+   int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+   return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each field using the “natural” access
+ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
+ * for 16-bit fields and 8-bit accesses for 8-bit fields.
+ */
+static inline u8 vp_ioread8(u8 __iomem *addr)
+{
+   return ioread8(addr);
+}
+static inline u16 vp_ioread16(__le16 __iomem *addr)
+{
+   return ioread16(addr);
+}
+
+static inline u32 vp_ioread32(__le32 __iomem *addr)
+{
+   return ioread32(addr);
+}
+
+static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
+{
+   iowrite8(value, addr);
+}
+
+static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
+{
+   iowrite16(value, addr);
+}
+
+static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
+{
+   iowrite32(value, addr);
+}
+
+static void vp_iowrite64_twopart(u64 val,
+__le32 __iomem *lo, __le32 __iomem *hi)
+{
+   vp_iowrite32((u32)val, lo);
+   vp_iowrite32(val >> 32, hi);
+}
+
+static int find_capability(struct pci_dev *dev, u8 cfg_type,
+  u32 ioresource_types, int *bars)
+{
+   int pos;
+
+   for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+pos > 0;
+pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+   u8 type, bar;
+
+   pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+cfg_type),
+);
+   pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+bar),
+);
+
+   /* Ignore structures with reserved BAR values */
+   if (bar > 0x5)
+   continue;
+
+   if (type == cfg_type) {
+   if (pci_resource_len(dev, bar) &&
+   pci_resource_flags(dev, bar) & ioresource_types) {
+   *bars |= (1 << bar);
+   return pos;
+   }
+   }
+   }
+   return 0;
+}
+
+static void __iomem *map_capability(struct vp_vdpa *vp_vdpa, int off,
+   resource_size_t *pa)
+{
+   struct pci_dev *pdev = vp_vdpa->pdev;
+   u32 offset;
+   u8 bar;
+
+   pci_read_config_byte(pdev,
+off + offsetof(struct virtio_pci_cap, bar),
+);
+   pci_read_config_dword(pdev,
+ off + offsetof(struct virtio_pci_cap, offset),
+ );
+
+   if (pa)
+   *pa = pci_resource_start(pdev, bar) + offset;
+
+   return vp_vdpa->base[bar] + offset;
+}
+
+static u64 vp_vdpa_get_features(struct vdpa_device *vdpa)
+{
+   struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+   u64 features;
+
+   vp_iowrite32(0, _vdpa->common->device_feature_select);
+   features = vp_ioread32(_vdpa->common->device_feature);
+   

Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 04:25:06PM +0800, Jason Wang wrote:
> > > +
> > > +#define VP_VDPA_FEATURES \
> > > + ((1ULL << VIRTIO_F_ANY_LAYOUT)  | \
> > 
> > This is presumably for transitional devices only.  In fact looking at
> > code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
> > Spec violation I guess ... but what should we do? Relax the spec
> > or fix drivers?
> 
> 
> I don't get how it violates the spec.

Spec says transitional drivers must ack VIRTIO_F_ANY_LAYOUT

But grep for VIRTIO_F_ANY_LAYOUT and you will see they don't.
Only legacy virtio net does.

Maybe it should have been
transitional drivers when using the lgacy interface, but there
you are.

> 
> > 
> > 
> > > +  (1ULL << VIRTIO_F_VERSION_1)   | \
> > > +  (1ULL << VIRTIO_F_ORDER_PLATFORM)  | \
> > > +  (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> > > +
> > > +struct vp_vring {
> > > + void __iomem *notify;
> > > + char msix_name[256];
> > > + resource_size_t notify_pa;
> > > + struct vdpa_callback cb;
> > > + int irq;
> > > +};
> > > +
> > > +struct vp_vdpa {
> > > + struct vdpa_device vdpa;
> > > + struct pci_dev *pdev;
> > > +
> > > + struct virtio_device_id id;
> > > +
> > > + struct vp_vring vring[VP_VDPA_MAX_QUEUE];
> > > +
> > > + /* The IO mapping for the PCI config space */
> > > + void __iomem * const *base;
> > > + struct virtio_pci_common_cfg __iomem *common;
> > > + void __iomem *device;
> > > + /* Base of vq notifications */
> > > + void __iomem *notify;
> > > +
> > > + /* Multiplier for queue_notify_off. */
> > > + u32 notify_off_multiplier;
> > > +
> > > + int modern_bars;
> > > + int vectors;
> > > +};
> > > +
> > > +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
> > > +{
> > > + return container_of(vdpa, struct vp_vdpa, vdpa);
> > > +}
> > > +
> > > +/*
> > > + * Type-safe wrappers for io accesses.
> > > + * Use these to enforce at compile time the following spec requirement:
> > > + *
> > > + * The driver MUST access each field using the “natural” access
> > > + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
> > > + * for 16-bit fields and 8-bit accesses for 8-bit fields.
> > > + */
> > > +static inline u8 vp_ioread8(u8 __iomem *addr)
> > > +{
> > > + return ioread8(addr);
> > > +}
> > > +static inline u16 vp_ioread16(__le16 __iomem *addr)
> > > +{
> > > + return ioread16(addr);
> > > +}
> > > +
> > > +static inline u32 vp_ioread32(__le32 __iomem *addr)
> > > +{
> > > + return ioread32(addr);
> > > +}
> > > +
> > > +static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
> > > +{
> > > + iowrite8(value, addr);
> > > +}
> > > +
> > > +static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
> > > +{
> > > + iowrite16(value, addr);
> > > +}
> > > +
> > > +static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
> > > +{
> > > + iowrite32(value, addr);
> > > +}
> > > +
> > > +static void vp_iowrite64_twopart(u64 val,
> > > +  __le32 __iomem *lo, __le32 __iomem *hi)
> > > +{
> > > + vp_iowrite32((u32)val, lo);
> > > + vp_iowrite32(val >> 32, hi);
> > > +}
> > > +
> > > +static int find_capability(struct pci_dev *dev, u8 cfg_type,
> > > +u32 ioresource_types, int *bars)
> > > +{
> > > + int pos;
> > > +
> > > + for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
> > > +  pos > 0;
> > > +  pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
> > > + u8 type, bar;
> > > +
> > > + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
> > > +  cfg_type),
> > > +  );
> > > + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
> > > +  bar),
> > > +  );
> > > +
> > > + /* Ignore structures with reserved BAR values */
> > > + if (bar > 0x5)
> > > + continue;
> > > +
> > > + if (type == cfg_type) {
> > > + if (pci_resource_len(dev, bar) &&
> > > + pci_resource_flags(dev, bar) & ioresource_types) {
> > > + *bars |= (1 << bar);
> > > + return pos;
> > > + }
> > > + }
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +static void __iomem *map_capability(struct vp_vdpa *vp_vdpa, int off,
> > > + resource_size_t *pa)
> > > +{
> > > + struct pci_dev *pdev = vp_vdpa->pdev;
> > > + u32 offset;
> > > + u8 bar;
> > > +
> > > + pci_read_config_byte(pdev,
> > > +  off + offsetof(struct virtio_pci_cap, bar),
> > > +  );
> > > + pci_read_config_dword(pdev,
> > > +   off + offsetof(struct virtio_pci_cap, offset),
> > > +   );
> > > +
> > > + if (pa)
> > > + *pa = pci_resource_start(pdev, bar) + offset;
> > > +
> > > + return 

Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang



On 2020/6/10 下午3:08, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 02:52:17PM +0800, Jason Wang wrote:

This patch introduce a vDPA driver for virtio-pci device. It bridges
the virtio-pci control command to the vDPA bus. This will be used for
developing new features for both software vDPA framework and hardware
vDPA feature.

Compared to vdpa_sim, it has several advantages:

- it's a real device driver which allow us to play with real hardware
   features
- type independent instead of networking specific

Note that since virtio specification does not support get/restore
virtqueue state. So we can not use this driver for VM. This can be
addressed by extending the virtio specification.

Consider the driver is mainly for testing and development for vDPA
features, it can only be bound via dynamic ids to make sure it's not
conflict with the drivers like virtio-pci or IFCVF.

Signed-off-by: Jason Wang 
---
Changes from V2:
- rebase on vhost.git vhost branch
---
  drivers/vdpa/Kconfig   |   8 +
  drivers/vdpa/Makefile  |   1 +
  drivers/vdpa/vp_vdpa/Makefile  |   2 +
  drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 +
  4 files changed, 612 insertions(+)
  create mode 100644 drivers/vdpa/vp_vdpa/Makefile
  create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c

diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index 3e1ceb8e9f2b..deb85e43a4c2 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -28,4 +28,12 @@ config IFCVF
  To compile this driver as a module, choose M here: the module will
  be called ifcvf.
  
+config VP_VDPA

+   tristate "Virtio PCI bridge vDPA driver"
+   depends on PCI_MSI
+   help
+ This kernel module that bridges virtio PCI device to vDPA
+ bus. It allows us to test and develop vDPA subsystem inside
+ an VM with the emulated virtio-pci device
+
  endif # VDPA
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index 8bbb686ca7a2..37d00f49b3bf 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -2,3 +2,4 @@
  obj-$(CONFIG_VDPA) += vdpa.o
  obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
  obj-$(CONFIG_IFCVF)+= ifcvf/
+obj-$(CONFIG_VP_VDPA)+= vp_vdpa/
diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile
new file mode 100644
index ..231088d3af7d
--- /dev/null
+++ b/drivers/vdpa/vp_vdpa/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c
new file mode 100644
index ..2070298ab9fc
--- /dev/null
+++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bridge driver for modern virtio-pci device

And judging by the code, transitional too?
Or maybe we should drop transitional device support here.



Yes, I will simply drop the transitional device support.





+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang 
+ *
+ * Based on virtio_pci_modern.c.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* TBD: read from config space */
+#define VP_VDPA_MAX_QUEUE 2

We need to fix that right? Otherwise lots of devices break ...



Yes, will fix.





+#define VP_VDPA_DRIVER_NAME "vp_vdpa"

not sure why you need this macro ...



Used only once, so I will remove this.





+
+#define VP_VDPA_FEATURES \
+   ((1ULL << VIRTIO_F_ANY_LAYOUT)| \


This is presumably for transitional devices only.  In fact looking at
code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
Spec violation I guess ... but what should we do? Relax the spec
or fix drivers?



I don't get how it violates the spec.






+(1ULL << VIRTIO_F_VERSION_1) | \
+(1ULL << VIRTIO_F_ORDER_PLATFORM)| \
+(1ULL << VIRTIO_F_IOMMU_PLATFORM))
+
+struct vp_vring {
+   void __iomem *notify;
+   char msix_name[256];
+   resource_size_t notify_pa;
+   struct vdpa_callback cb;
+   int irq;
+};
+
+struct vp_vdpa {
+   struct vdpa_device vdpa;
+   struct pci_dev *pdev;
+
+   struct virtio_device_id id;
+
+   struct vp_vring vring[VP_VDPA_MAX_QUEUE];
+
+   /* The IO mapping for the PCI config space */
+   void __iomem * const *base;
+   struct virtio_pci_common_cfg __iomem *common;
+   void __iomem *device;
+   /* Base of vq notifications */
+   void __iomem *notify;
+
+   /* Multiplier for queue_notify_off. */
+   u32 notify_off_multiplier;
+
+   int modern_bars;
+   int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+   return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each 

Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 02:52:17PM +0800, Jason Wang wrote:
> This patch introduce a vDPA driver for virtio-pci device. It bridges
> the virtio-pci control command to the vDPA bus. This will be used for
> developing new features for both software vDPA framework and hardware
> vDPA feature.
> 
> Compared to vdpa_sim, it has several advantages:
> 
> - it's a real device driver which allow us to play with real hardware
>   features
> - type independent instead of networking specific
> 
> Note that since virtio specification does not support get/restore
> virtqueue state. So we can not use this driver for VM. This can be
> addressed by extending the virtio specification.
> 
> Consider the driver is mainly for testing and development for vDPA
> features, it can only be bound via dynamic ids to make sure it's not
> conflict with the drivers like virtio-pci or IFCVF.
> 
> Signed-off-by: Jason Wang 
> ---
> Changes from V2:
> - rebase on vhost.git vhost branch
> ---
>  drivers/vdpa/Kconfig   |   8 +
>  drivers/vdpa/Makefile  |   1 +
>  drivers/vdpa/vp_vdpa/Makefile  |   2 +
>  drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 +
>  4 files changed, 612 insertions(+)
>  create mode 100644 drivers/vdpa/vp_vdpa/Makefile
>  create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c
> 
> diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
> index 3e1ceb8e9f2b..deb85e43a4c2 100644
> --- a/drivers/vdpa/Kconfig
> +++ b/drivers/vdpa/Kconfig
> @@ -28,4 +28,12 @@ config IFCVF
> To compile this driver as a module, choose M here: the module will
> be called ifcvf.
>  
> +config VP_VDPA
> + tristate "Virtio PCI bridge vDPA driver"
> + depends on PCI_MSI
> + help
> +   This kernel module that bridges virtio PCI device to vDPA
> +   bus. It allows us to test and develop vDPA subsystem inside
> +   an VM with the emulated virtio-pci device
> +
>  endif # VDPA
> diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
> index 8bbb686ca7a2..37d00f49b3bf 100644
> --- a/drivers/vdpa/Makefile
> +++ b/drivers/vdpa/Makefile
> @@ -2,3 +2,4 @@
>  obj-$(CONFIG_VDPA) += vdpa.o
>  obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
>  obj-$(CONFIG_IFCVF)+= ifcvf/
> +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/
> diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile
> new file mode 100644
> index ..231088d3af7d
> --- /dev/null
> +++ b/drivers/vdpa/vp_vdpa/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
> diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c
> new file mode 100644
> index ..2070298ab9fc
> --- /dev/null
> +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c
> @@ -0,0 +1,601 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * vDPA bridge driver for modern virtio-pci device

And judging by the code, transitional too?
Or maybe we should drop transitional device support here.

> + *
> + * Copyright (c) 2020, Red Hat Inc. All rights reserved.
> + * Author: Jason Wang 
> + *
> + * Based on virtio_pci_modern.c.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* TBD: read from config space */
> +#define VP_VDPA_MAX_QUEUE 2

We need to fix that right? Otherwise lots of devices break ...

> +#define VP_VDPA_DRIVER_NAME "vp_vdpa"

not sure why you need this macro ...

> +
> +#define VP_VDPA_FEATURES \
> + ((1ULL << VIRTIO_F_ANY_LAYOUT)  | \


This is presumably for transitional devices only.  In fact looking at
code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
Spec violation I guess ... but what should we do? Relax the spec
or fix drivers?


> +  (1ULL << VIRTIO_F_VERSION_1)   | \
> +  (1ULL << VIRTIO_F_ORDER_PLATFORM)  | \
> +  (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +
> +struct vp_vring {
> + void __iomem *notify;
> + char msix_name[256];
> + resource_size_t notify_pa;
> + struct vdpa_callback cb;
> + int irq;
> +};
> +
> +struct vp_vdpa {
> + struct vdpa_device vdpa;
> + struct pci_dev *pdev;
> +
> + struct virtio_device_id id;
> +
> + struct vp_vring vring[VP_VDPA_MAX_QUEUE];
> +
> + /* The IO mapping for the PCI config space */
> + void __iomem * const *base;
> + struct virtio_pci_common_cfg __iomem *common;
> + void __iomem *device;
> + /* Base of vq notifications */
> + void __iomem *notify;
> +
> + /* Multiplier for queue_notify_off. */
> + u32 notify_off_multiplier;
> +
> + int modern_bars;
> + int vectors;
> +};
> +
> +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
> +{
> + return container_of(vdpa, struct vp_vdpa, vdpa);
> +}
> +
> +/*
> + * Type-safe wrappers for io accesses.
> + * Use these to enforce at compile time the following spec requirement:
> + *
> + * The driver MUST access each field using the “natural” access
>