Re: [PATCH v2] drm/qxl: do not run release if qxl failed to init

2021-02-04 Thread Gerd Hoffmann
On Thu, Feb 04, 2021 at 11:30:50AM -0500, Tong Zhang wrote:
> if qxl_device_init() fail, drm device will not be registered,
> in this case, do not run qxl_drm_release()

How do you trigger this?

take care,
  Gerd

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v1] vdpa/mlx5: Restore the hardware used index after change map

2021-02-04 Thread Jason Wang


On 2021/2/4 下午3:36, Eli Cohen wrote:

When a change of memory map occurs, the hardware resources are destroyed
and then re-created again with the new memory map. In such case, we need
to restore the hardware available and used indices. The driver failed to
restore the used index which is added here.

Also, since the driver also fails to reset the available and used
indices upon device reset, fix this here to avoid regression caused by
the fact that used index may not be zero upon device reset.

Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices")
Signed-off-by: Eli Cohen 
---
v0 -> v1:
Clear indices upon device reset



Acked-by: Jason Wang 




  drivers/vdpa/mlx5/net/mlx5_vnet.c | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 88dde3455bfd..b5fe6d2ad22f 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -87,6 +87,7 @@ struct mlx5_vq_restore_info {
u64 device_addr;
u64 driver_addr;
u16 avail_index;
+   u16 used_index;
bool ready;
struct vdpa_callback cb;
bool restore;
@@ -121,6 +122,7 @@ struct mlx5_vdpa_virtqueue {
u32 virtq_id;
struct mlx5_vdpa_net *ndev;
u16 avail_idx;
+   u16 used_idx;
int fw_state;
  
  	/* keep last in the struct */

@@ -804,6 +806,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, 
struct mlx5_vdpa_virtque
  
  	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);

MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, 
mvq->avail_idx);
+   MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, 
mvq->used_idx);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
 get_features_12_3(ndev->mvdev.actual_features));
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, 
virtio_q_context);
@@ -1022,6 +1025,7 @@ static int connect_qps(struct mlx5_vdpa_net *ndev, struct 
mlx5_vdpa_virtqueue *m
  struct mlx5_virtq_attr {
u8 state;
u16 available_index;
+   u16 used_index;
  };
  
  static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,

@@ -1052,6 +1056,7 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, 
struct mlx5_vdpa_virtqueu
memset(attr, 0, sizeof(*attr));
attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, 
hw_available_index);
+   attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, 
hw_used_index);
kfree(out);
return 0;
  
@@ -1535,6 +1540,16 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)

}
  }
  
+static void clear_virtqueues(struct mlx5_vdpa_net *ndev)

+{
+   int i;
+
+   for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
+   ndev->vqs[i].avail_idx = 0;
+   ndev->vqs[i].used_idx = 0;
+   }
+}
+
  /* TODO: cross-endian support */
  static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
  {
@@ -1610,6 +1625,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, 
struct mlx5_vdpa_virtqu
return err;
  
  	ri->avail_index = attr.available_index;

+   ri->used_index = attr.used_index;
ri->ready = mvq->ready;
ri->num_ent = mvq->num_ent;
ri->desc_addr = mvq->desc_addr;
@@ -1654,6 +1670,7 @@ static void restore_channels_info(struct mlx5_vdpa_net 
*ndev)
continue;
  
  		mvq->avail_idx = ri->avail_index;

+   mvq->used_idx = ri->used_index;
mvq->ready = ri->ready;
mvq->num_ent = ri->num_ent;
mvq->desc_addr = ri->desc_addr;
@@ -1768,6 +1785,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device 
*vdev, u8 status)
if (!status) {
mlx5_vdpa_info(mvdev, "performing device reset\n");
teardown_driver(ndev);
+   clear_virtqueues(ndev);
mlx5_vdpa_destroy_mr(>mvdev);
ndev->mvdev.status = 0;
ndev->mvdev.mlx_features = 0;


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC 05/10] vhost: Add vhost_dev_from_virtio

2021-02-04 Thread Jason Wang


On 2021/2/4 下午5:25, Eugenio Perez Martin wrote:

On Thu, Feb 4, 2021 at 4:14 AM Jason Wang  wrote:


On 2021/2/2 下午6:17, Eugenio Perez Martin wrote:

On Tue, Feb 2, 2021 at 4:31 AM Jason Wang  wrote:

On 2021/2/1 下午4:28, Eugenio Perez Martin wrote:

On Mon, Feb 1, 2021 at 7:13 AM Jason Wang  wrote:

On 2021/1/30 上午4:54, Eugenio Pérez wrote:

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost.h |  1 +
 hw/virtio/vhost.c | 17 +
 2 files changed, 18 insertions(+)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 4a8bc75415..fca076e3f0 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -123,6 +123,7 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const 
int *feature_bits,
 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
 uint64_t features);
 bool vhost_has_free_slot(void);
+struct vhost_dev *vhost_dev_from_virtio(const VirtIODevice *vdev);

 int vhost_net_set_backend(struct vhost_dev *hdev,
   struct vhost_vring_file *file);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 28c7d78172..8683d507f5 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -61,6 +61,23 @@ bool vhost_has_free_slot(void)
 return slots_limit > used_memslots;
 }

+/*
+ * Get the vhost device associated to a VirtIO device.
+ */
+struct vhost_dev *vhost_dev_from_virtio(const VirtIODevice *vdev)
+{
+struct vhost_dev *hdev;
+
+QLIST_FOREACH(hdev, _devices, entry) {
+if (hdev->vdev == vdev) {
+return hdev;
+}
+}
+
+assert(hdev);
+return NULL;
+}

I'm not sure this can work in the case of multiqueue. E.g vhost-net
multiqueue is a N:1 mapping between vhost devics and virtio devices.

Thanks


Right. We could add an "vdev vq index" parameter to the function in
this case, but I guess the most reliable way to do this is to add a
vhost_opaque value to VirtQueue, as Stefan proposed in previous RFC.

So the question still, it looks like it's easier to hide the shadow
virtqueue stuffs at vhost layer instead of expose them to virtio layer:

1) vhost protocol is stable ABI
2) no need to deal with virtio stuffs which is more complex than vhost

Or are there any advantages if we do it at virtio layer?


As far as I can tell, we will need the virtio layer the moment we
start copying/translating buffers.

In this series, the virtio dependency can be reduced if qemu does not
check the used ring _F_NO_NOTIFY flag before writing to irqfd. It
would enable packed queues and IOMMU immediately, and I think the cost
should not be so high. In the previous RFC this check was deleted
later anyway, so I think it was a bad idea to include it from the start.


I am not sure I understand here. For vhost, we can still do anything we
want, e.g accessing guest memory etc. Any blocker that prevent us from
copying/translating buffers? (Note that qemu will propagate memory
mappings to vhost).


There is nothing that forbids us to access directly, but if we don't
reuse the virtio layer functionality we would have to duplicate every
access function. "Need" was a too strong word maybe :).

In other words: for the shadow vq vring exposed for the device, qemu
treats it as a driver, and this functionality needs to be added to
qemu. But for accessing the guest's one do not reuse virtio.c would be
a bad idea in my opinion.



The problem is, virtio.c is not a library and it has a lot of dependency 
with other qemu modules basically makes it impossible to be reused at 
vhost level.


We can solve this by:

1) split the core functions out as a library or
2) switch to use contrib/lib-vhostuser but needs to decouple UNIX socket 
transport


None of the above looks trivial and they are only device codes. For 
shadow virtqueue, we need driver codes as well where no code can be reused.


As we discussed, we probably need IOVA allocated when forwarding 
descriptors between the two virtqueues. So my feeling is we can have our 
own codes to start then we can consider whether we can reuse some from 
the existing virtio.c or lib-vhostuser.


Thanks





Thanks








Thanks



I need to take this into account in qmp_x_vhost_enable_shadow_vq too.


+
 static void vhost_dev_sync_region(struct vhost_dev *dev,
   MemoryRegionSection *section,
   uint64_t mfirst, uint64_t mlast,




___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH iproute2-next v3 0/5] Add vdpa device management tool

2021-02-04 Thread Jason Wang


On 2021/2/4 下午7:15, Adrian Moreno wrote:

Sorry I have not followed this work as close as I would have wanted.
Some questions below.

On 2/4/21 4:16 AM, Jason Wang wrote:

On 2021/2/2 下午6:35, Parav Pandit wrote:

Linux vdpa interface allows vdpa device management functionality.
This includes adding, removing, querying vdpa devices.

vdpa interface also includes showing supported management devices
which support such operations.

This patchset includes kernel uapi headers and a vdpa tool.

examples:

$ vdpa mgmtdev show
vdpasim:
    supported_classes net

$ vdpa mgmtdev show -jp
{
  "show": {
  "vdpasim": {
  "supported_classes": [ "net" ]
  }
  }
}


How can a user establish the relationship between a mgmtdev and it's parent
device (pci vf, sf, etc)?



Parav should know more but I try to answer.

I think there should be BDF information in the mgmtdev show command if 
the parent is a PCI device, or we can simply show the parent here?






Create a vdpa device of type networking named as "foo2" from
the management device vdpasim_net:

$ vdpa dev add mgmtdev vdpasim_net name foo2


I guess this command will accept a 'type' parameter once more supported_classes
are added?



This could be extended in the future.




Also, will this tool also handle the vdpa driver binding or will the user handle
that through the vdpa bus' sysfs interface?



I think not, it's the configuration below the vdpa bus. The sysfs should 
be the only interface for managing driver binding.


Thanks





Show the newly created vdpa device by its name:
$ vdpa dev show foo2
foo2: type network mgmtdev vdpasim_net vendor_id 0 max_vqs 2 max_vq_size 256

$ vdpa dev show foo2 -jp
{
  "dev": {
  "foo2": {
  "type": "network",
  "mgmtdev": "vdpasim_net",
  "vendor_id": 0,
  "max_vqs": 2,
  "max_vq_size": 256
  }
  }
}

Delete the vdpa device after its use:
$ vdpa dev del foo2

Patch summary:
Patch-1 adds kernel headers for vdpa subsystem
Patch-2 adds library routines for indent handling
Patch-3 adds library routines for generic socket communication
PAtch-4 adds library routine for number to string mapping
Patch-5 adds vdpa tool

Kernel headers are from the vhost kernel tree [1] from branch linux-next.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git

---


Adding Adrian to see if this looks good for k8s integration.

Thanks


Thanks


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 09/13] vhost/vdpa: remove vhost_vdpa_config_validate()

2021-02-04 Thread Jason Wang


On 2021/2/5 上午1:22, Stefano Garzarella wrote:

get_config() and set_config() callbacks in the 'struct vdpa_config_ops'
usually already validated the inputs. Also now they can return an error,
so we don't need to validate them here anymore.

Let's use the return value of these callbacks and return it in case of
error in vhost_vdpa_get_config() and vhost_vdpa_set_config().

Originally-by: Xie Yongji 
Signed-off-by: Stefano Garzarella 
---
  drivers/vhost/vdpa.c | 41 +
  1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ef688c8c0e0e..d61e779000a8 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -185,51 +185,35 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, 
u8 __user *statusp)
return 0;
  }
  
-static int vhost_vdpa_config_validate(struct vhost_vdpa *v,

- struct vhost_vdpa_config *c)
-{
-   long size = 0;
-
-   switch (v->virtio_id) {
-   case VIRTIO_ID_NET:
-   size = sizeof(struct virtio_net_config);
-   break;
-   }
-
-   if (c->len == 0)
-   return -EINVAL;
-
-   if (c->len > size - c->off)
-   return -E2BIG;
-
-   return 0;
-}
-
  static long vhost_vdpa_get_config(struct vhost_vdpa *v,
  struct vhost_vdpa_config __user *c)
  {
struct vdpa_device *vdpa = v->vdpa;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
+   long ret;
u8 *buf;
  
  	if (copy_from_user(, c, size))

return -EFAULT;
-   if (vhost_vdpa_config_validate(v, ))
+   if (config.len == 0)
return -EINVAL;
buf = kvzalloc(config.len, GFP_KERNEL);



Then it means usersapce can allocate a very large memory.

Rethink about this, we should limit the size here (e.g PAGE_SIZE) or 
fetch the config size first (either through a config ops as you 
suggested or a variable in the vdpa device that is initialized during 
device creation).


Thanks


if (!buf)
return -ENOMEM;
  
-	vdpa_get_config(vdpa, config.off, buf, config.len);

+   ret = vdpa_get_config(vdpa, config.off, buf, config.len);
+   if (ret)
+   goto out;
  
  	if (copy_to_user(c->buf, buf, config.len)) {

-   kvfree(buf);
-   return -EFAULT;
+   ret = -EFAULT;
+   goto out;
}
  
+out:

kvfree(buf);
-   return 0;
+   return ret;
  }
  
  static long vhost_vdpa_set_config(struct vhost_vdpa *v,

@@ -239,21 +223,22 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
+   long ret;
u8 *buf;
  
  	if (copy_from_user(, c, size))

return -EFAULT;
-   if (vhost_vdpa_config_validate(v, ))
+   if (config.len == 0)
return -EINVAL;
  
  	buf = vmemdup_user(c->buf, config.len);

if (IS_ERR(buf))
return PTR_ERR(buf);
  
-	ops->set_config(vdpa, config.off, buf, config.len);

+   ret = ops->set_config(vdpa, config.off, buf, config.len);
  
  	kvfree(buf);

-   return 0;
+   return ret;
  }
  
  static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 08/13] vdpa: add return value to get_config/set_config callbacks

2021-02-04 Thread Jason Wang


On 2021/2/5 上午1:22, Stefano Garzarella wrote:

All implementations of these callbacks already validate inputs.

Let's return an error from these callbacks, so the caller doesn't
need to validate the input anymore.

We update all implementations to return -EINVAL in case of invalid
input.

Signed-off-by: Stefano Garzarella 
---
  include/linux/vdpa.h  | 18 ++
  drivers/vdpa/ifcvf/ifcvf_main.c   | 24 
  drivers/vdpa/mlx5/net/mlx5_vnet.c | 17 +++--
  drivers/vdpa/vdpa_sim/vdpa_sim.c  | 16 ++--
  4 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 4ab5494503a8..0e0cbd5fb41b 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -157,6 +157,7 @@ struct vdpa_iova_range {
   *@buf: buffer used to read to
   *@len: the length to read from
   *configuration space
+ * Returns integer: success (0) or error (< 0)
   * @set_config:   Write to device specific configuration 
space
   *@vdev: vdpa device
   *@offset: offset from the beginning of
@@ -164,6 +165,7 @@ struct vdpa_iova_range {
   *@buf: buffer used to write from
   *@len: the length to write to
   *configuration space
+ * Returns integer: success (0) or error (< 0)
   * @get_generation:   Get device config generation (optional)
   *@vdev: vdpa device
   *Returns u32: device generation
@@ -231,10 +233,10 @@ struct vdpa_config_ops {
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
-   void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
-  void *buf, unsigned int len);
-   void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
-  const void *buf, unsigned int len);
+   int (*get_config)(struct vdpa_device *vdev, unsigned int offset,
+ void *buf, unsigned int len);
+   int (*set_config)(struct vdpa_device *vdev, unsigned int offset,
+ const void *buf, unsigned int len);
u32 (*get_generation)(struct vdpa_device *vdev);
struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
  
@@ -329,8 +331,8 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

  }
  
  
-static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,

-  void *buf, unsigned int len)
+static inline int vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
+ void *buf, unsigned int len)
  {
  const struct vdpa_config_ops *ops = vdev->config;
  
@@ -339,8 +341,8 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,

 * If it does happen we assume a legacy guest.
 */
if (!vdev->features_valid)
-   vdpa_set_features(vdev, 0);
-   ops->get_config(vdev, offset, buf, len);
+   return vdpa_set_features(vdev, 0);
+   return ops->get_config(vdev, offset, buf, len);
  }
  
  /**

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 7c8bbfcf6c3e..f5e6a90d8114 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -332,24 +332,32 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device 
*vdpa_dev)
return IFCVF_QUEUE_ALIGNMENT;
  }
  
-static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,

- unsigned int offset,
- void *buf, unsigned int len)
+static int ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
+unsigned int offset,
+void *buf, unsigned int len)
  {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
  
-	WARN_ON(offset + len > sizeof(struct virtio_net_config));

+   if (offset + len > sizeof(struct virtio_net_config))
+   return -EINVAL;
+
ifcvf_read_net_config(vf, offset, buf, len);
+
+   return 0;
  }
  
-static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,

- unsigned int offset, const void *buf,
- unsigned int len)
+static int ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
+unsigned int offset, const void *buf,
+unsigned int len)
  {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
  
-	WARN_ON(offset + len > sizeof(struct 

Re: [PATCH v3 04/13] vringh: explain more about cleaning riov and wiov

2021-02-04 Thread Jason Wang


On 2021/2/5 上午1:22, Stefano Garzarella wrote:

riov and wiov can be reused with subsequent calls of vringh_getdesc_*().

Let's add a paragraph in the documentation of these functions to better
explain when riov and wiov need to be cleaned up.

Signed-off-by: Stefano Garzarella 
---
  drivers/vhost/vringh.c | 15 ---
  1 file changed, 12 insertions(+), 3 deletions(-)



Acked-by: Jason Wang 




diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index bee63d68201a..2a88e087afd8 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -662,7 +662,10 @@ EXPORT_SYMBOL(vringh_init_user);
   * *head will be vrh->vring.num.  You may be able to ignore an invalid
   * descriptor, but there's not much you can do with an invalid ring.
   *
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_iov_cleanup() to release the memory, even on error!
   */
  int vringh_getdesc_user(struct vringh *vrh,
struct vringh_iov *riov,
@@ -932,7 +935,10 @@ EXPORT_SYMBOL(vringh_init_kern);
   * *head will be vrh->vring.num.  You may be able to ignore an invalid
   * descriptor, but there's not much you can do with an invalid ring.
   *
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
   */
  int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
@@ -1292,7 +1298,10 @@ EXPORT_SYMBOL(vringh_set_iotlb);
   * *head will be vrh->vring.num.  You may be able to ignore an invalid
   * descriptor, but there's not much you can do with an invalid ring.
   *
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
   */
  int vringh_getdesc_iotlb(struct vringh *vrh,
 struct vringh_kiov *riov,


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 03/13] vringh: reset kiov 'consumed' field in __vringh_iov()

2021-02-04 Thread Jason Wang


On 2021/2/5 上午1:22, Stefano Garzarella wrote:

__vringh_iov() overwrites the contents of riov and wiov, in fact it
resets the 'i' and 'used' fields, but also the 'consumed' field should
be reset to avoid an inconsistent state.

Signed-off-by: Stefano Garzarella 
---
  drivers/vhost/vringh.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)



Acked-by: Jason Wang 




diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index f68122705719..bee63d68201a 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -290,9 +290,9 @@ __vringh_iov(struct vringh *vrh, u16 i,
return -EINVAL;
  
  	if (riov)

-   riov->i = riov->used = 0;
+   riov->i = riov->used = riov->consumed = 0;
if (wiov)
-   wiov->i = wiov->used = 0;
+   wiov->i = wiov->used = wiov->consumed = 0;
  
  	for (;;) {

void *addr;


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 08/13] vdpa: add return value to get_config/set_config callbacks

2021-02-04 Thread Stefano Garzarella

On Fri, Feb 05, 2021 at 06:31:20AM +0800, kernel test robot wrote:

Hi Stefano,

I love your patch! Yet something to improve:

[auto build test ERROR on vhost/linux-next]
[also build test ERROR on linus/master v5.11-rc6 next-20210125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Stefano-Garzarella/vdpa-add-vdpa-simulator-for-block-device/20210205-020448
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: parisc-randconfig-r005-20210204 (attached as .config)
compiler: hppa-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
   wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
   chmod +x ~/bin/make.cross
   # 
https://github.com/0day-ci/linux/commit/17cf2b1e6be083a27f43414cc0f2524cf81fff60
   git remote add linux-review https://github.com/0day-ci/linux
   git fetch --no-tags linux-review 
Stefano-Garzarella/vdpa-add-vdpa-simulator-for-block-device/20210205-020448
   git checkout 17cf2b1e6be083a27f43414cc0f2524cf81fff60
   # save the attached .config to linux build tree
   COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=parisc

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

  drivers/vdpa/mlx5/net/mlx5_vnet.c: In function 'mlx5_vdpa_get_config':

drivers/vdpa/mlx5/net/mlx5_vnet.c:1810:10: error: expected ';' before '}' token

   1810 |  return 0
|  ^
|  ;
   1811 | }
| ~



Ooops, I forgot to add mlx5_vnet.c on my .config.

Sorry for that, I'll fix in the next release and I'll build all vDPA 
related stuff.


Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 08/13] vdpa: add return value to get_config/set_config callbacks

2021-02-04 Thread kernel test robot
Hi Stefano,

I love your patch! Yet something to improve:

[auto build test ERROR on vhost/linux-next]
[also build test ERROR on linus/master v5.11-rc6 next-20210125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Stefano-Garzarella/vdpa-add-vdpa-simulator-for-block-device/20210205-020448
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: parisc-randconfig-r005-20210204 (attached as .config)
compiler: hppa-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/17cf2b1e6be083a27f43414cc0f2524cf81fff60
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Stefano-Garzarella/vdpa-add-vdpa-simulator-for-block-device/20210205-020448
git checkout 17cf2b1e6be083a27f43414cc0f2524cf81fff60
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=parisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/vdpa/mlx5/net/mlx5_vnet.c: In function 'mlx5_vdpa_get_config':
>> drivers/vdpa/mlx5/net/mlx5_vnet.c:1810:10: error: expected ';' before '}' 
>> token
1810 |  return 0
 |  ^
 |  ;
1811 | }
 | ~ 


vim +1810 drivers/vdpa/mlx5/net/mlx5_vnet.c

  1798  
  1799  static int mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int 
offset, void *buf,
  1800  unsigned int len)
  1801  {
  1802  struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
  1803  struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
  1804  
  1805  if (offset + len > sizeof(struct virtio_net_config))
  1806  return -EINVAL;
  1807  
  1808  memcpy(buf, (u8 *)>config + offset, len);
  1809  
> 1810  return 0
  1811  }
  1812  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH net] virtio-net: suppress bad irq warning for tx napi

2021-02-04 Thread Willem de Bruijn
On Wed, Feb 3, 2021 at 10:06 PM Jason Wang  wrote:
>
>
> On 2021/2/4 上午2:28, Willem de Bruijn wrote:
> > On Wed, Feb 3, 2021 at 12:33 AM Jason Wang  wrote:
> >>
> >> On 2021/2/2 下午10:37, Willem de Bruijn wrote:
> >>> On Mon, Feb 1, 2021 at 10:09 PM Jason Wang  wrote:
>  On 2021/1/29 上午8:21, Wei Wang wrote:
> > With the implementation of napi-tx in virtio driver, we clean tx
> > descriptors from rx napi handler, for the purpose of reducing tx
> > complete interrupts. But this could introduce a race where tx complete
> > interrupt has been raised, but the handler found there is no work to do
> > because we have done the work in the previous rx interrupt handler.
> > This could lead to the following warning msg:
> > [ 3588.010778] irq 38: nobody cared (try booting with the
> > "irqpoll" option)
> > [ 3588.017938] CPU: 4 PID: 0 Comm: swapper/4 Not tainted
> > 5.3.0-19-generic #20~18.04.2-Ubuntu
> > [ 3588.017940] Call Trace:
> > [ 3588.017942]  
> > [ 3588.017951]  dump_stack+0x63/0x85
> > [ 3588.017953]  __report_bad_irq+0x35/0xc0
> > [ 3588.017955]  note_interrupt+0x24b/0x2a0
> > [ 3588.017956]  handle_irq_event_percpu+0x54/0x80
> > [ 3588.017957]  handle_irq_event+0x3b/0x60
> > [ 3588.017958]  handle_edge_irq+0x83/0x1a0
> > [ 3588.017961]  handle_irq+0x20/0x30
> > [ 3588.017964]  do_IRQ+0x50/0xe0
> > [ 3588.017966]  common_interrupt+0xf/0xf
> > [ 3588.017966]  
> > [ 3588.017989] handlers:
> > [ 3588.020374] [<1b9f1da8>] vring_interrupt
> > [ 3588.025099] Disabling IRQ #38
> >
> > This patch adds a new param to struct vring_virtqueue, and we set it for
> > tx virtqueues if napi-tx is enabled, to suppress the warning in such
> > case.
> >
> > Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi")
> > Reported-by: Rick Jones 
> > Signed-off-by: Wei Wang 
> > Signed-off-by: Willem de Bruijn 
>  Please use get_maintainer.pl to make sure Michael and me were cced.
> >>> Will do. Sorry about that. I suggested just the virtualization list, my 
> >>> bad.
> >>>
> > ---
> > drivers/net/virtio_net.c | 19 ++-
> > drivers/virtio/virtio_ring.c | 16 
> > include/linux/virtio.h   |  2 ++
> > 3 files changed, 32 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 508408fbe78f..e9a3f30864e8 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -1303,13 +1303,22 @@ static void virtnet_napi_tx_enable(struct 
> > virtnet_info *vi,
> > return;
> > }
> >
> > + /* With napi_tx enabled, free_old_xmit_skbs() could be called from
> > +  * rx napi handler. Set work_steal to suppress bad irq warning for
> > +  * IRQ_NONE case from tx complete interrupt handler.
> > +  */
> > + virtqueue_set_work_steal(vq, true);
> > +
> > return virtnet_napi_enable(vq, napi);
>  Do we need to force the ordering between steal set and napi enable?
> >>> The warning only occurs after one hundred spurious interrupts, so not
> >>> really.
> >>
> >> Ok, so it looks like a hint. Then I wonder how much value do we need to
> >> introduce helper like virtqueue_set_work_steal() that allows the caller
> >> to toggle. How about disable the check forever during virtqueue
> >> initialization?
> > Yes, that is even simpler.
> >
> > We still need the helper, as the internal variables of vring_virtqueue
> > are not accessible from virtio-net. An earlier patch added the
> > variable to virtqueue itself, but I think it belongs in
> > vring_virtqueue. And the helper is not a lot of code.
>
>
> It's better to do this before the allocating the irq. But it looks not
> easy unless we extend find_vqs().

Can you elaborate why that is better? At virtnet_open the interrupts
are not firing either.

I have no preference. Just curious, especially if it complicates the patch.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH net] virtio-net: suppress bad irq warning for tx napi

2021-02-04 Thread Willem de Bruijn
On Wed, Feb 3, 2021 at 6:53 PM Wei Wang  wrote:
>
> On Wed, Feb 3, 2021 at 3:10 PM Michael S. Tsirkin  wrote:
> >
> > On Wed, Feb 03, 2021 at 01:24:08PM -0500, Willem de Bruijn wrote:
> > > On Wed, Feb 3, 2021 at 5:42 AM Michael S. Tsirkin  wrote:
> > > >
> > > > On Tue, Feb 02, 2021 at 07:06:53PM -0500, Willem de Bruijn wrote:
> > > > > On Tue, Feb 2, 2021 at 6:53 PM Willem de Bruijn  
> > > > > wrote:
> > > > > >
> > > > > > On Tue, Feb 2, 2021 at 6:47 PM Wei Wang  wrote:
> > > > > > >
> > > > > > > On Tue, Feb 2, 2021 at 3:12 PM Michael S. Tsirkin 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Thu, Jan 28, 2021 at 04:21:36PM -0800, Wei Wang wrote:
> > > > > > > > > With the implementation of napi-tx in virtio driver, we clean 
> > > > > > > > > tx
> > > > > > > > > descriptors from rx napi handler, for the purpose of reducing 
> > > > > > > > > tx
> > > > > > > > > complete interrupts. But this could introduce a race where tx 
> > > > > > > > > complete
> > > > > > > > > interrupt has been raised, but the handler found there is no 
> > > > > > > > > work to do
> > > > > > > > > because we have done the work in the previous rx interrupt 
> > > > > > > > > handler.
> > > > > > > > > This could lead to the following warning msg:
> > > > > > > > > [ 3588.010778] irq 38: nobody cared (try booting with the
> > > > > > > > > "irqpoll" option)
> > > > > > > > > [ 3588.017938] CPU: 4 PID: 0 Comm: swapper/4 Not tainted
> > > > > > > > > 5.3.0-19-generic #20~18.04.2-Ubuntu
> > > > > > > > > [ 3588.017940] Call Trace:
> > > > > > > > > [ 3588.017942]  
> > > > > > > > > [ 3588.017951]  dump_stack+0x63/0x85
> > > > > > > > > [ 3588.017953]  __report_bad_irq+0x35/0xc0
> > > > > > > > > [ 3588.017955]  note_interrupt+0x24b/0x2a0
> > > > > > > > > [ 3588.017956]  handle_irq_event_percpu+0x54/0x80
> > > > > > > > > [ 3588.017957]  handle_irq_event+0x3b/0x60
> > > > > > > > > [ 3588.017958]  handle_edge_irq+0x83/0x1a0
> > > > > > > > > [ 3588.017961]  handle_irq+0x20/0x30
> > > > > > > > > [ 3588.017964]  do_IRQ+0x50/0xe0
> > > > > > > > > [ 3588.017966]  common_interrupt+0xf/0xf
> > > > > > > > > [ 3588.017966]  
> > > > > > > > > [ 3588.017989] handlers:
> > > > > > > > > [ 3588.020374] [<1b9f1da8>] vring_interrupt
> > > > > > > > > [ 3588.025099] Disabling IRQ #38
> > > > > > > > >
> > > > > > > > > This patch adds a new param to struct vring_virtqueue, and we 
> > > > > > > > > set it for
> > > > > > > > > tx virtqueues if napi-tx is enabled, to suppress the warning 
> > > > > > > > > in such
> > > > > > > > > case.
> > > > > > > > >
> > > > > > > > > Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from 
> > > > > > > > > rx napi")
> > > > > > > > > Reported-by: Rick Jones 
> > > > > > > > > Signed-off-by: Wei Wang 
> > > > > > > > > Signed-off-by: Willem de Bruijn 
> > > > > > > >
> > > > > > > >
> > > > > > > > This description does not make sense to me.
> > > > > > > >
> > > > > > > > irq X: nobody cared
> > > > > > > > only triggers after an interrupt is unhandled repeatedly.
> > > > > > > >
> > > > > > > > So something causes a storm of useless tx interrupts here.
> > > > > > > >
> > > > > > > > Let's find out what it was please. What you are doing is
> > > > > > > > just preventing linux from complaining.
> > > > > > >
> > > > > > > The traffic that causes this warning is a netperf tcp_stream with 
> > > > > > > at
> > > > > > > least 128 flows between 2 hosts. And the warning gets triggered 
> > > > > > > on the
> > > > > > > receiving host, which has a lot of rx interrupts firing on all 
> > > > > > > queues,
> > > > > > > and a few tx interrupts.
> > > > > > > And I think the scenario is: when the tx interrupt gets fired, it 
> > > > > > > gets
> > > > > > > coalesced with the rx interrupt. Basically, the rx and tx 
> > > > > > > interrupts
> > > > > > > get triggered very close to each other, and gets handled in one 
> > > > > > > round
> > > > > > > of do_IRQ(). And the rx irq handler gets called first, which calls
> > > > > > > virtnet_poll(). However, virtnet_poll() calls 
> > > > > > > virtnet_poll_cleantx()
> > > > > > > to try to do the work on the corresponding tx queue as well. 
> > > > > > > That's
> > > > > > > why when tx interrupt handler gets called, it sees no work to do.
> > > > > > > And the reason for the rx handler to handle the tx work is here:
> > > > > > > https://lists.linuxfoundation.org/pipermail/virtualization/2017-April/034740.html
> > > > > >
> > > > > > Indeed. It's not a storm necessarily. The warning occurs after one
> > > > > > hundred such events, since boot, which is a small number compared 
> > > > > > real
> > > > > > interrupt load.
> > > > >
> > > > > Sorry, this is wrong. It is the other call to __report_bad_irq from
> > > > > note_interrupt that applies here.
> > > > >
> > > > > > Occasionally seeing an interrupt with no work is expected after
> > > > > > 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi"). As
> > > > > > long as 

[PATCH 4/6] drm/cirrus: Move vmap out of commit tail

2021-02-04 Thread Thomas Zimmermann
Vmap operations may acquire the dmabuf reservation lock, which is not
allowed within atomic commit-tail functions. Therefore move vmap and
vunmap from the damage handler into prepare_fb and cleanup_fb callbacks.

The mapping is provided as GEM shadow-buffered plane. The functions in
the commit tail use the pre-established mapping for damage handling.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/tiny/cirrus.c | 43 ++-
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index a043e602199e..ad922c3ec681 100644
--- a/drivers/gpu/drm/tiny/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -33,8 +33,9 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -311,22 +312,15 @@ static int cirrus_mode_set(struct cirrus_device *cirrus,
return 0;
 }
 
-static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
+static int cirrus_fb_blit_rect(struct drm_framebuffer *fb, const struct 
dma_buf_map *map,
   struct drm_rect *rect)
 {
struct cirrus_device *cirrus = to_cirrus(fb->dev);
-   struct dma_buf_map map;
-   void *vmap;
-   int idx, ret;
+   void *vmap = map->vaddr; /* TODO: Use mapping abstraction properly */
+   int idx;
 
-   ret = -ENODEV;
if (!drm_dev_enter(>dev, ))
-   goto out;
-
-   ret = drm_gem_shmem_vmap(fb->obj[0], );
-   if (ret)
-   goto out_dev_exit;
-   vmap = map.vaddr; /* TODO: Use mapping abstraction properly */
+   return -ENODEV;
 
if (cirrus->cpp == fb->format->cpp[0])
drm_fb_memcpy_dstclip(cirrus->vram,
@@ -345,16 +339,12 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
else
WARN_ON_ONCE("cpp mismatch");
 
-   drm_gem_shmem_vunmap(fb->obj[0], );
-   ret = 0;
-
-out_dev_exit:
drm_dev_exit(idx);
-out:
-   return ret;
+
+   return 0;
 }
 
-static int cirrus_fb_blit_fullscreen(struct drm_framebuffer *fb)
+static int cirrus_fb_blit_fullscreen(struct drm_framebuffer *fb, const struct 
dma_buf_map *map)
 {
struct drm_rect fullscreen = {
.x1 = 0,
@@ -362,7 +352,7 @@ static int cirrus_fb_blit_fullscreen(struct drm_framebuffer 
*fb)
.y1 = 0,
.y2 = fb->height,
};
-   return cirrus_fb_blit_rect(fb, );
+   return cirrus_fb_blit_rect(fb, map, );
 }
 
 static int cirrus_check_size(int width, int height,
@@ -441,9 +431,10 @@ static void cirrus_pipe_enable(struct 
drm_simple_display_pipe *pipe,
   struct drm_plane_state *plane_state)
 {
struct cirrus_device *cirrus = to_cirrus(pipe->crtc.dev);
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(plane_state);
 
cirrus_mode_set(cirrus, _state->mode, plane_state->fb);
-   cirrus_fb_blit_fullscreen(plane_state->fb);
+   cirrus_fb_blit_fullscreen(plane_state->fb, _plane_state->map[0]);
 }
 
 static void cirrus_pipe_update(struct drm_simple_display_pipe *pipe,
@@ -451,16 +442,15 @@ static void cirrus_pipe_update(struct 
drm_simple_display_pipe *pipe,
 {
struct cirrus_device *cirrus = to_cirrus(pipe->crtc.dev);
struct drm_plane_state *state = pipe->plane.state;
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(state);
struct drm_crtc *crtc = >crtc;
struct drm_rect rect;
 
-   if (pipe->plane.state->fb &&
-   cirrus->cpp != cirrus_cpp(pipe->plane.state->fb))
-   cirrus_mode_set(cirrus, >mode,
-   pipe->plane.state->fb);
+   if (state->fb && cirrus->cpp != cirrus_cpp(state->fb))
+   cirrus_mode_set(cirrus, >mode, state->fb);
 
if (drm_atomic_helper_damage_merged(old_state, state, ))
-   cirrus_fb_blit_rect(pipe->plane.state->fb, );
+   cirrus_fb_blit_rect(state->fb, _plane_state->map[0], 
);
 }
 
 static const struct drm_simple_display_pipe_funcs cirrus_pipe_funcs = {
@@ -468,6 +458,7 @@ static const struct drm_simple_display_pipe_funcs 
cirrus_pipe_funcs = {
.check  = cirrus_pipe_check,
.enable = cirrus_pipe_enable,
.update = cirrus_pipe_update,
+   DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS,
 };
 
 static const uint32_t cirrus_formats[] = {
-- 
2.30.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 0/6] drm: Move vmap out of commit tail for SHMEM-based drivers

2021-02-04 Thread Thomas Zimmermann
Several SHMEM-based drivers use the BO as shadow buffer for the real
framebuffer memory. Damage handling requires a vmap of the BO memory.
But vmap/vunmap can acquire the dma-buf reservation lock, which is not
allowed in commit tails.

This patchset introduces a set of helpers that implement vmap/vunmap
in the plane's prepare_fb and cleanup_fb; and provide the mapping's
address to commit-tail functions. Wrappers for simple KMS are provides,
as all of the involved drivers are built upon simple-KMS helpers.

Patch 1 adds the support for private plane state to the simple-KMS
helper library.

Patch 2 provides plane state for shadow-buffered planes. It's a
DRM plane with BO mappings into kernel address space. The involved
helpers take care of the details. The code is independent from GEM
SHMEM and can be used with other shadow-buffered planes. I put all
this in a new file. In a later patch, drm_gem_fb_prepare_fb() et al
could be moved there as well.

Patches 3 to 6 convert each involved driver to the new helpers. The
vmap operations are being removed from commit-tail functions. An
additional benefit is that vmap errors are now detected before the
commit starts. The original commit-tail functions could not
handle failed vmap operations.

I smoke-tested the code by running fbdev, Xorg and weston with the
converted mgag200 driver.

Thomas Zimmermann (6):
  drm/simple-kms: Add plane-state helpers
  drm: Add additional atomic helpers for shadow-buffered planes
  drm/mgag200: Move vmap out of commit tail
  drm/cirrus: Move vmap out of commit tail
  drm/gm12u320: Move vmap out of commit tail
  drm/udl: Move vmap out of commit tail

 Documentation/gpu/drm-kms-helpers.rst   |  12 ++
 drivers/gpu/drm/Makefile|   3 +-
 drivers/gpu/drm/drm_gem_atomic_helper.c | 210 
 drivers/gpu/drm/drm_simple_kms_helper.c |  40 -
 drivers/gpu/drm/mgag200/mgag200_mode.c  |  23 +--
 drivers/gpu/drm/tiny/cirrus.c   |  43 ++---
 drivers/gpu/drm/tiny/gm12u320.c |  28 ++--
 drivers/gpu/drm/udl/udl_modeset.c   |  34 ++--
 include/drm/drm_gem_atomic_helper.h |  75 +
 include/drm/drm_simple_kms_helper.h |  28 
 10 files changed, 415 insertions(+), 81 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_gem_atomic_helper.c
 create mode 100644 include/drm/drm_gem_atomic_helper.h


base-commit: f9173d6435c6a9bb0b0076ebf9122d876da208ae
prerequisite-patch-id: c2b2f08f0eccc9f5df0c0da49fa1d36267deb11d
prerequisite-patch-id: f8140f08b77c49beb6243d9a2a88ebcc7097e391
prerequisite-patch-id: 6bffaf03d01daeb29a0b0ffbc78b415e72907a17
prerequisite-patch-id: 6386ca949b8b677a7eada2672990dab2f84f734f
prerequisite-patch-id: 706e35d0b2185d2332f725e1c22d8ffed910ea7b
--
2.30.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/6] drm/simple-kms: Add plane-state helpers

2021-02-04 Thread Thomas Zimmermann
Just like regular plane-state helpers, drivers can use these new
callbacks to create and destroy private plane state.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/drm_simple_kms_helper.c | 40 +++--
 include/drm/drm_simple_kms_helper.h | 28 +
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c 
b/drivers/gpu/drm/drm_simple_kms_helper.c
index 6ce8f5cd1eb5..0c5bb0f98fa0 100644
--- a/drivers/gpu/drm/drm_simple_kms_helper.c
+++ b/drivers/gpu/drm/drm_simple_kms_helper.c
@@ -253,13 +253,47 @@ static const struct drm_plane_helper_funcs 
drm_simple_kms_plane_helper_funcs = {
.atomic_update = drm_simple_kms_plane_atomic_update,
 };
 
+static void drm_simple_kms_plane_reset(struct drm_plane *plane)
+{
+   struct drm_simple_display_pipe *pipe;
+
+   pipe = container_of(plane, struct drm_simple_display_pipe, plane);
+   if (!pipe->funcs || !pipe->funcs->reset_plane)
+   return drm_atomic_helper_plane_reset(plane);
+
+   return pipe->funcs->reset_plane(pipe);
+}
+
+static struct drm_plane_state *drm_simple_kms_plane_duplicate_state(struct 
drm_plane *plane)
+{
+   struct drm_simple_display_pipe *pipe;
+
+   pipe = container_of(plane, struct drm_simple_display_pipe, plane);
+   if (!pipe->funcs || !pipe->funcs->duplicate_plane_state)
+   return drm_atomic_helper_plane_duplicate_state(plane);
+
+   return pipe->funcs->duplicate_plane_state(pipe, plane->state);
+}
+
+static void drm_simple_kms_plane_destroy_state(struct drm_plane *plane,
+  struct drm_plane_state *state)
+{
+   struct drm_simple_display_pipe *pipe;
+
+   pipe = container_of(plane, struct drm_simple_display_pipe, plane);
+   if (!pipe->funcs || !pipe->funcs->destroy_plane_state)
+   drm_atomic_helper_plane_destroy_state(plane, state);
+   else
+   pipe->funcs->destroy_plane_state(pipe, state);
+}
+
 static const struct drm_plane_funcs drm_simple_kms_plane_funcs = {
.update_plane   = drm_atomic_helper_update_plane,
.disable_plane  = drm_atomic_helper_disable_plane,
.destroy= drm_plane_cleanup,
-   .reset  = drm_atomic_helper_plane_reset,
-   .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
-   .atomic_destroy_state   = drm_atomic_helper_plane_destroy_state,
+   .reset  = drm_simple_kms_plane_reset,
+   .atomic_duplicate_state = drm_simple_kms_plane_duplicate_state,
+   .atomic_destroy_state   = drm_simple_kms_plane_destroy_state,
.format_mod_supported   = drm_simple_kms_format_mod_supported,
 };
 
diff --git a/include/drm/drm_simple_kms_helper.h 
b/include/drm/drm_simple_kms_helper.h
index e6dbf3161c2f..0c1a2e07caf2 100644
--- a/include/drm/drm_simple_kms_helper.h
+++ b/include/drm/drm_simple_kms_helper.h
@@ -149,6 +149,34 @@ struct drm_simple_display_pipe_funcs {
 * more details.
 */
void (*disable_vblank)(struct drm_simple_display_pipe *pipe);
+
+   /**
+* @reset_plane:
+*
+* Optional, called by _plane_funcs.reset. Please read the
+* documentation for the _plane_funcs.reset hook for more details.
+*/
+   void (*reset_plane)(struct drm_simple_display_pipe *pipe);
+
+   /**
+* @duplicate_plane_state:
+*
+* Optional, called by _plane_funcs.atomic_duplicate_state.  Please
+* read the documentation for the 
_plane_funcs.atomic_duplicate_state
+* hook for more details.
+*/
+   struct drm_plane_state * (*duplicate_plane_state)(struct 
drm_simple_display_pipe *pipe,
+ struct 
drm_plane_state *plane_state);
+
+   /**
+* @destroy_plane_state:
+*
+* Optional, called by _plane_funcs.atomic_destroy_state.  Please
+* read the documentation for the _plane_funcs.atomic_destroy_state
+* hook for more details.
+*/
+   void (*destroy_plane_state)(struct drm_simple_display_pipe *pipe,
+   struct drm_plane_state *plane_state);
 };
 
 /**
-- 
2.30.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 2/6] drm: Add additional atomic helpers for shadow-buffered planes

2021-02-04 Thread Thomas Zimmermann
Several drivers use GEM buffer objects as shadow buffers for the actual
framebuffer memory. Right now, drivers do these vmap operations in their
commit tail, which is actually not allowed by the locking rules for
the dma-buf reservation lock. The involved BO has to be vmapped in the
plane's prepare_fb callback and vunmapped in cleanup_fb.

This patch introduces atomic helpers for such shadow planes. Plane
functions manage the plane state for shadow planes. The provided
implementations for prepare_fb and cleanup_fb vmap and vunmap all BOs of
struct drm_plane_state.fb. The mappings are afterwards available in the
plane's commit-tail functions.

For now, all rsp drivers use the simple KMS helpers, so we add the plane
callbacks and wrappers for simple KMS. The internal plane functions can
late rbe exported as needed.

Signed-off-by: Thomas Zimmermann 
---
 Documentation/gpu/drm-kms-helpers.rst   |  12 ++
 drivers/gpu/drm/Makefile|   3 +-
 drivers/gpu/drm/drm_gem_atomic_helper.c | 210 
 include/drm/drm_gem_atomic_helper.h |  75 +
 4 files changed, 299 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/drm_gem_atomic_helper.c
 create mode 100644 include/drm/drm_gem_atomic_helper.h

diff --git a/Documentation/gpu/drm-kms-helpers.rst 
b/Documentation/gpu/drm-kms-helpers.rst
index b89ddd06dabb..389892f36185 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -80,6 +80,18 @@ Atomic State Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_atomic_state_helper.c
:export:
 
+GEM Atomic Helper Reference
+---
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_atomic_helper.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_gem_atomic_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_atomic_helper.c
+   :export:
+
 Simple KMS Helper Reference
 ===
 
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 926adef289db..02c229392345 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -44,7 +44,8 @@ drm_kms_helper-y := drm_bridge_connector.o drm_crtc_helper.o 
drm_dp_helper.o \
drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
drm_simple_kms_helper.o drm_modeset_helper.o \
-   drm_scdc_helper.o drm_gem_framebuffer_helper.o \
+   drm_scdc_helper.o drm_gem_atomic_helper.o \
+   drm_gem_framebuffer_helper.o \
drm_atomic_state_helper.o drm_damage_helper.o \
drm_format_helper.o drm_self_refresh_helper.o
 
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c 
b/drivers/gpu/drm/drm_gem_atomic_helper.c
new file mode 100644
index ..b6ad2b91a011
--- /dev/null
+++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include 
+#include 
+#include 
+#include 
+
+#include "drm_internal.h"
+
+/**
+ * DOC: overview
+ *
+ * The GEM atomic helpers library implements generic atomic-commit
+ * functions for drivers that use GEM objects. Currently, it provides
+ * plane state and framebuffer BO mappings for planes with shadow
+ * buffers.
+ */
+
+/*
+ * Shadow-buffered Planes
+ */
+
+static struct drm_plane_state *
+drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane,
+struct drm_plane_state *plane_state)
+{
+   struct drm_shadow_plane_state *new_shadow_plane_state;
+
+   if (!plane_state)
+   return NULL;
+
+   new_shadow_plane_state = kzalloc(sizeof(*new_shadow_plane_state), 
GFP_KERNEL);
+   if (!new_shadow_plane_state)
+   return NULL;
+   __drm_atomic_helper_plane_duplicate_state(plane, 
_shadow_plane_state->base);
+
+   return _shadow_plane_state->base;
+}
+
+static void drm_gem_destroy_shadow_plane_state(struct drm_plane *plane,
+  struct drm_plane_state 
*plane_state)
+{
+   struct drm_shadow_plane_state *shadow_plane_state =
+   to_drm_shadow_plane_state(plane_state);
+
+   __drm_atomic_helper_plane_destroy_state(_plane_state->base);
+   kfree(shadow_plane_state);
+}
+
+static void drm_gem_reset_shadow_plane(struct drm_plane *plane)
+{
+   struct drm_shadow_plane_state *shadow_plane_state;
+
+   if (plane->state) {
+   drm_gem_destroy_shadow_plane_state(plane, plane->state);
+   plane->state = NULL; /* must be set to NULL here */
+   }
+
+   shadow_plane_state = kzalloc(sizeof(*shadow_plane_state), GFP_KERNEL);
+   if (!shadow_plane_state)
+   return;
+   __drm_atomic_helper_plane_reset(plane, _plane_state->base);
+}
+
+static int drm_gem_prepare_shadow_fb(struct drm_plane *plane, struct 
drm_plane_state *plane_state)
+{
+   struct drm_shadow_plane_state 

[PATCH 6/6] drm/udl: Move vmap out of commit tail

2021-02-04 Thread Thomas Zimmermann
Vmap operations may acquire the dmabuf reservation lock, which is not
allowed within atomic commit-tail functions. Therefore move vmap and
vunmap from the damage handler into prepare_fb and cleanup_fb callbacks.

The mapping is provided as GEM shadow-buffered plane. The functions in
the commit tail use the pre-established mapping for damage handling.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/udl/udl_modeset.c | 34 ---
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_modeset.c 
b/drivers/gpu/drm/udl/udl_modeset.c
index 9d34ec9d03f6..8d98bf69d075 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -266,18 +267,17 @@ static int udl_aligned_damage_clip(struct drm_rect *clip, 
int x, int y,
return 0;
 }
 
-static int udl_handle_damage(struct drm_framebuffer *fb, int x, int y,
-int width, int height)
+static int udl_handle_damage(struct drm_framebuffer *fb, const struct 
dma_buf_map *map,
+int x, int y, int width, int height)
 {
struct drm_device *dev = fb->dev;
struct dma_buf_attachment *import_attach = fb->obj[0]->import_attach;
+   void *vaddr = map->vaddr; /* TODO: Use mapping abstraction properly */
int i, ret, tmp_ret;
char *cmd;
struct urb *urb;
struct drm_rect clip;
int log_bpp;
-   struct dma_buf_map map;
-   void *vaddr;
 
ret = udl_log_cpp(fb->format->cpp[0]);
if (ret < 0)
@@ -297,17 +297,10 @@ static int udl_handle_damage(struct drm_framebuffer *fb, 
int x, int y,
return ret;
}
 
-   ret = drm_gem_shmem_vmap(fb->obj[0], );
-   if (ret) {
-   DRM_ERROR("failed to vmap fb\n");
-   goto out_dma_buf_end_cpu_access;
-   }
-   vaddr = map.vaddr; /* TODO: Use mapping abstraction properly */
-
urb = udl_get_urb(dev);
if (!urb) {
ret = -ENOMEM;
-   goto out_drm_gem_shmem_vunmap;
+   goto out_dma_buf_end_cpu_access;
}
cmd = urb->transfer_buffer;
 
@@ -320,7 +313,7 @@ static int udl_handle_damage(struct drm_framebuffer *fb, 
int x, int y,
   , byte_offset, dev_byte_offset,
   byte_width);
if (ret)
-   goto out_drm_gem_shmem_vunmap;
+   goto out_dma_buf_end_cpu_access;
}
 
if (cmd > (char *)urb->transfer_buffer) {
@@ -336,8 +329,6 @@ static int udl_handle_damage(struct drm_framebuffer *fb, 
int x, int y,
 
ret = 0;
 
-out_drm_gem_shmem_vunmap:
-   drm_gem_shmem_vunmap(fb->obj[0], );
 out_dma_buf_end_cpu_access:
if (import_attach) {
tmp_ret = dma_buf_end_cpu_access(import_attach->dmabuf,
@@ -375,6 +366,7 @@ udl_simple_display_pipe_enable(struct 
drm_simple_display_pipe *pipe,
struct drm_framebuffer *fb = plane_state->fb;
struct udl_device *udl = to_udl(dev);
struct drm_display_mode *mode = _state->mode;
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(plane_state);
char *buf;
char *wrptr;
int color_depth = UDL_COLOR_DEPTH_16BPP;
@@ -400,7 +392,7 @@ udl_simple_display_pipe_enable(struct 
drm_simple_display_pipe *pipe,
 
udl->mode_buf_len = wrptr - buf;
 
-   udl_handle_damage(fb, 0, 0, fb->width, fb->height);
+   udl_handle_damage(fb, _plane_state->map[0], 0, 0, fb->width, 
fb->height);
 
if (!crtc_state->mode_changed)
return;
@@ -435,6 +427,7 @@ udl_simple_display_pipe_update(struct 
drm_simple_display_pipe *pipe,
   struct drm_plane_state *old_plane_state)
 {
struct drm_plane_state *state = pipe->plane.state;
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(state);
struct drm_framebuffer *fb = state->fb;
struct drm_rect rect;
 
@@ -442,17 +435,16 @@ udl_simple_display_pipe_update(struct 
drm_simple_display_pipe *pipe,
return;
 
if (drm_atomic_helper_damage_merged(old_plane_state, state, ))
-   udl_handle_damage(fb, rect.x1, rect.y1, rect.x2 - rect.x1,
- rect.y2 - rect.y1);
+   udl_handle_damage(fb, _plane_state->map[0], rect.x1, 
rect.y1,
+ rect.x2 - rect.x1, rect.y2 - rect.y1);
 }
 
-static const
-struct drm_simple_display_pipe_funcs udl_simple_display_pipe_funcs = {
+static const struct drm_simple_display_pipe_funcs 
udl_simple_display_pipe_funcs = {
.mode_valid = udl_simple_display_pipe_mode_valid,
.enable = udl_simple_display_pipe_enable,
.disable = udl_simple_display_pipe_disable,
   

[PATCH 5/6] drm/gm12u320: Move vmap out of commit tail

2021-02-04 Thread Thomas Zimmermann
Vmap operations may acquire the dmabuf reservation lock, which is not
allowed within atomic commit-tail functions. Therefore move vmap and
vunmap from the damage handler into prepare_fb and cleanup_fb callbacks.

The mapping is provided as GEM shadow-buffered plane. The functions in
the commit tail use the pre-established mapping for damage handling.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/tiny/gm12u320.c | 28 +---
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c
index 33f65f4626e5..0b4f4f2af1ef 100644
--- a/drivers/gpu/drm/tiny/gm12u320.c
+++ b/drivers/gpu/drm/tiny/gm12u320.c
@@ -16,8 +16,9 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -94,6 +95,7 @@ struct gm12u320_device {
struct drm_rect  rect;
int frame;
int draw_status_timeout;
+   struct dma_buf_map src_map;
} fb_update;
 };
 
@@ -250,7 +252,6 @@ static void gm12u320_copy_fb_to_blocks(struct 
gm12u320_device *gm12u320)
 {
int block, dst_offset, len, remain, ret, x1, x2, y1, y2;
struct drm_framebuffer *fb;
-   struct dma_buf_map map;
void *vaddr;
u8 *src;
 
@@ -264,20 +265,14 @@ static void gm12u320_copy_fb_to_blocks(struct 
gm12u320_device *gm12u320)
x2 = gm12u320->fb_update.rect.x2;
y1 = gm12u320->fb_update.rect.y1;
y2 = gm12u320->fb_update.rect.y2;
-
-   ret = drm_gem_shmem_vmap(fb->obj[0], );
-   if (ret) {
-   GM12U320_ERR("failed to vmap fb: %d\n", ret);
-   goto put_fb;
-   }
-   vaddr = map.vaddr; /* TODO: Use mapping abstraction properly */
+   vaddr = gm12u320->fb_update.src_map.vaddr; /* TODO: Use mapping 
abstraction properly */
 
if (fb->obj[0]->import_attach) {
ret = dma_buf_begin_cpu_access(
fb->obj[0]->import_attach->dmabuf, DMA_FROM_DEVICE);
if (ret) {
GM12U320_ERR("dma_buf_begin_cpu_access err: %d\n", ret);
-   goto vunmap;
+   goto put_fb;
}
}
 
@@ -321,8 +316,6 @@ static void gm12u320_copy_fb_to_blocks(struct 
gm12u320_device *gm12u320)
if (ret)
GM12U320_ERR("dma_buf_end_cpu_access err: %d\n", ret);
}
-vunmap:
-   drm_gem_shmem_vunmap(fb->obj[0], );
 put_fb:
drm_framebuffer_put(fb);
gm12u320->fb_update.fb = NULL;
@@ -410,7 +403,7 @@ static void gm12u320_fb_update_work(struct work_struct 
*work)
GM12U320_ERR("Frame update error: %d\n", ret);
 }
 
-static void gm12u320_fb_mark_dirty(struct drm_framebuffer *fb,
+static void gm12u320_fb_mark_dirty(struct drm_framebuffer *fb, const struct 
dma_buf_map *map,
   struct drm_rect *dirty)
 {
struct gm12u320_device *gm12u320 = to_gm12u320(fb->dev);
@@ -424,6 +417,7 @@ static void gm12u320_fb_mark_dirty(struct drm_framebuffer 
*fb,
drm_framebuffer_get(fb);
gm12u320->fb_update.fb = fb;
gm12u320->fb_update.rect = *dirty;
+   gm12u320->fb_update.src_map = *map;
wakeup = true;
} else {
struct drm_rect *rect = >fb_update.rect;
@@ -452,6 +446,7 @@ static void gm12u320_stop_fb_update(struct gm12u320_device 
*gm12u320)
mutex_lock(>fb_update.lock);
old_fb = gm12u320->fb_update.fb;
gm12u320->fb_update.fb = NULL;
+   dma_buf_map_clear(>fb_update.src_map);
mutex_unlock(>fb_update.lock);
 
drm_framebuffer_put(old_fb);
@@ -564,9 +559,10 @@ static void gm12u320_pipe_enable(struct 
drm_simple_display_pipe *pipe,
 {
struct drm_rect rect = { 0, 0, GM12U320_USER_WIDTH, GM12U320_HEIGHT };
struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev);
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(plane_state);
 
gm12u320->fb_update.draw_status_timeout = FIRST_FRAME_TIMEOUT;
-   gm12u320_fb_mark_dirty(plane_state->fb, );
+   gm12u320_fb_mark_dirty(plane_state->fb, _plane_state->map[0], 
);
 }
 
 static void gm12u320_pipe_disable(struct drm_simple_display_pipe *pipe)
@@ -580,16 +576,18 @@ static void gm12u320_pipe_update(struct 
drm_simple_display_pipe *pipe,
 struct drm_plane_state *old_state)
 {
struct drm_plane_state *state = pipe->plane.state;
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(state);
struct drm_rect rect;
 
if (drm_atomic_helper_damage_merged(old_state, state, ))
-   gm12u320_fb_mark_dirty(pipe->plane.state->fb, );
+   gm12u320_fb_mark_dirty(state->fb, _plane_state->map[0], 
);
 }
 
 static const struct 

[PATCH 3/6] drm/mgag200: Move vmap out of commit tail

2021-02-04 Thread Thomas Zimmermann
Vmap operations may acquire the dmabuf reservation lock, which is not
allowed within atomic commit-tail functions. Therefore move vmap and
vunmap from the damage handler into prepare_fb and cleanup_fb callbacks.

The mapping is provided as GEM shadow-buffered plane. The functions in
the commit tail use the pre-established mapping for damage handling.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/mgag200/mgag200_mode.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c 
b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 1dfc42170059..f871753e898e 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1549,22 +1550,12 @@ mgag200_simple_display_pipe_mode_valid(struct 
drm_simple_display_pipe *pipe,
 
 static void
 mgag200_handle_damage(struct mga_device *mdev, struct drm_framebuffer *fb,
- struct drm_rect *clip)
+ struct drm_rect *clip, const struct dma_buf_map *map)
 {
-   struct drm_device *dev = >base;
-   struct dma_buf_map map;
-   void *vmap;
-   int ret;
-
-   ret = drm_gem_shmem_vmap(fb->obj[0], );
-   if (drm_WARN_ON(dev, ret))
-   return; /* BUG: SHMEM BO should always be vmapped */
-   vmap = map.vaddr; /* TODO: Use mapping abstraction properly */
+   void *vmap = map->vaddr; /* TODO: Use mapping abstraction properly */
 
drm_fb_memcpy_dstclip(mdev->vram, vmap, fb, clip);
 
-   drm_gem_shmem_vunmap(fb->obj[0], );
-
/* Always scanout image at VRAM offset 0 */
mgag200_set_startadd(mdev, (u32)0);
mgag200_set_offset(mdev, fb);
@@ -1580,6 +1571,7 @@ mgag200_simple_display_pipe_enable(struct 
drm_simple_display_pipe *pipe,
struct mga_device *mdev = to_mga_device(dev);
struct drm_display_mode *adjusted_mode = _state->adjusted_mode;
struct drm_framebuffer *fb = plane_state->fb;
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(plane_state);
struct drm_rect fullscreen = {
.x1 = 0,
.x2 = fb->width,
@@ -1608,7 +1600,7 @@ mgag200_simple_display_pipe_enable(struct 
drm_simple_display_pipe *pipe,
mga_crtc_load_lut(crtc);
mgag200_enable_display(mdev);
 
-   mgag200_handle_damage(mdev, fb, );
+   mgag200_handle_damage(mdev, fb, , 
_plane_state->map[0]);
 }
 
 static void
@@ -1649,6 +1641,7 @@ mgag200_simple_display_pipe_update(struct 
drm_simple_display_pipe *pipe,
struct drm_device *dev = plane->dev;
struct mga_device *mdev = to_mga_device(dev);
struct drm_plane_state *state = plane->state;
+   struct drm_shadow_plane_state *shadow_plane_state = 
to_drm_shadow_plane_state(state);
struct drm_framebuffer *fb = state->fb;
struct drm_rect damage;
 
@@ -1656,7 +1649,7 @@ mgag200_simple_display_pipe_update(struct 
drm_simple_display_pipe *pipe,
return;
 
if (drm_atomic_helper_damage_merged(old_state, state, ))
-   mgag200_handle_damage(mdev, fb, );
+   mgag200_handle_damage(mdev, fb, , 
_plane_state->map[0]);
 }
 
 static const struct drm_simple_display_pipe_funcs
@@ -1666,7 +1659,7 @@ mgag200_simple_display_pipe_funcs = {
.disable= mgag200_simple_display_pipe_disable,
.check  = mgag200_simple_display_pipe_check,
.update = mgag200_simple_display_pipe_update,
-   .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
+   DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS,
 };
 
 static const uint32_t mgag200_simple_display_pipe_formats[] = {
-- 
2.30.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v6 02/10] Revert "drm/qxl: do not run release if qxl failed to init"

2021-02-04 Thread Thomas Zimmermann

Hi Tong

Am 04.02.21 um 19:52 schrieb Tong Zhang:

Hi Thomas,

The original problem was qxl_device_init() can fail,
when it fails there is no need to call
qxl_modeset_fini(qdev);
qxl_device_fini(qdev);
But those two functions are otherwise called in the qxl_drm_release() -


OK, makes sense. Thanks for the explanation.



I have posted an updated patch.
The new patch use the following logic

+   if (!qdev->ddev.mode_config.funcs)
+ return;


This is again just papering over the issue. Better don't call 
qxl_drm_release() in the error path if qxl_device_init() fails.


I see two solutions: either roll-back manually, or use our new managed 
DRM interfaces. This is what the other drivers do.


Best regards
Thomas


qxl_modeset_fini(qdev);
qxl_device_fini(qdev);

Thanks,
- Tong



On Feb 4, 2021, at 1:34 PM, Thomas Zimmermann  wrote:

Hi

Am 04.02.21 um 15:57 schrieb Gerd Hoffmann:

This reverts commit b91907a6241193465ca92e357adf16822242296d.


This should be in the correct format, as given by 'dim cite'.

dim cite b91907a6241193465ca92e357adf16822242296d
b91907a62411 ("drm/qxl: do not run release if qxl failed to init")


Patch is broken, it effectively makes qxl_drm_release() a nop
because on normal driver shutdown qxl_drm_release() is called
*after* drm_dev_unregister().
Cc: Tong Zhang 
Signed-off-by: Gerd Hoffmann 
---
  drivers/gpu/drm/qxl/qxl_drv.c | 2 --
  1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 34c8b25b5780..fb5f6a5e81d7 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -144,8 +144,6 @@ static void qxl_drm_release(struct drm_device *dev)
 * reodering qxl_modeset_fini() + qxl_device_fini() calls is
 * non-trivial though.
 */
-   if (!dev->registered)
-   return;


I'm not sure what the original problem was, but I'm sure that this isn't the 
fix for it. If there's a problem with shutdown, the operations rather have to 
be reordered correctly.

With the citation style address:

Acked-by: Thomas Zimmermann 


qxl_modeset_fini(qdev);
qxl_device_fini(qdev);
  }


--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer





--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v6 08/16] ACPI / NUMA: add a stub function for node_to_pxm()

2021-02-04 Thread Rafael J. Wysocki
On Thu, Feb 4, 2021 at 7:41 PM Wei Liu  wrote:
>
> On Wed, Feb 03, 2021 at 03:04:27PM +, Wei Liu wrote:
> > There is already a stub function for pxm_to_node but conversion to the
> > other direction is missing.
> >
> > It will be used by Microsoft Hypervisor code later.
> >
> > Signed-off-by: Wei Liu 
>
> Hi ACPI maintainers, if you're happy with this patch I can take it via
> the hyperv-next tree, given the issue is discovered when pxm_to_node is
> called in our code.

Yes, you can.

Thanks!

>
> > ---
> > v6: new
> > ---
> >  include/acpi/acpi_numa.h | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
> > index a4c6ef809e27..40a91ce87e04 100644
> > --- a/include/acpi/acpi_numa.h
> > +++ b/include/acpi/acpi_numa.h
> > @@ -30,6 +30,10 @@ static inline int pxm_to_node(int pxm)
> >  {
> >   return 0;
> >  }
> > +static inline int node_to_pxm(int node)
> > +{
> > + return 0;
> > +}
> >  #endif   /* CONFIG_ACPI_NUMA */
> >
> >  #ifdef CONFIG_ACPI_HMAT
> > --
> > 2.20.1
> >
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 15/16] x86/hyperv: implement an MSI domain for root partition

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Thursday, February 4, 2021 9:57 AM
> 
> On Thu, Feb 04, 2021 at 05:43:16PM +, Michael Kelley wrote:
> [...]
> > >  remove_cpuhp_state:
> > > diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
> > > new file mode 100644
> > > index ..117f17e8c88a
> > > --- /dev/null
> > > +++ b/arch/x86/hyperv/irqdomain.c
> > > @@ -0,0 +1,362 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +
> > > +/*
> > > + * for Linux to run as the root partition on Microsoft Hypervisor.
> >
> > Nit:  Looks like the initial word "Irqdomain" got dropped from the above
> > comment line.  But don't respin just for this.
> >
> 
> I've added it back. Thanks.
> 
> > > +static int hv_map_interrupt(union hv_device_id device_id, bool level,
> > > + int cpu, int vector, struct hv_interrupt_entry *entry)
> > > +{
> > > + struct hv_input_map_device_interrupt *input;
> > > + struct hv_output_map_device_interrupt *output;
> > > + struct hv_device_interrupt_descriptor *intr_desc;
> > > + unsigned long flags;
> > > + u64 status;
> > > + cpumask_t mask = CPU_MASK_NONE;
> > > + int nr_bank, var_size;
> > > +
> > > + local_irq_save(flags);
> > > +
> > > + input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> > > + output = *this_cpu_ptr(hyperv_pcpu_output_arg);
> > > +
> > > + intr_desc = >interrupt_descriptor;
> > > + memset(input, 0, sizeof(*input));
> > > + input->partition_id = hv_current_partition_id;
> > > + input->device_id = device_id.as_uint64;
> > > + intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
> > > + intr_desc->vector_count = 1;
> > > + intr_desc->target.vector = vector;
> > > +
> > > + if (level)
> > > + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
> > > + else
> > > + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
> > > +
> > > + cpumask_set_cpu(cpu, );
> > > + intr_desc->target.vp_set.valid_bank_mask = 0;
> > > + intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> > > + nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), );
> >
> > There's a function get_cpu_mask() that returns a pointer to a cpumask with 
> > only
> > the specified cpu set in the mask.  It returns a const pointer to the 
> > correct entry
> > in a pre-allocated array of all such cpumasks, so it's a lot more efficient 
> > than
> > allocating and initializing a local cpumask instance on the stack.
> >
> 
> That's nice.
> 
> I've got the following diff to fix both issues. If you're happy with the
> changes, can you give your Reviewed-by? That saves a round of posting.
> 
> diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
> index 0cabc9aece38..fa71db798465 100644
> --- a/arch/x86/hyperv/irqdomain.c
> +++ b/arch/x86/hyperv/irqdomain.c
> @@ -1,7 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
> 
>  /*
> - * for Linux to run as the root partition on Microsoft Hypervisor.
> + * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
>   *
>   * Authors:
>   *  Sunil Muthuswamy 
> @@ -20,7 +20,7 @@ static int hv_map_interrupt(union hv_device_id device_id, 
> bool level,
> struct hv_device_interrupt_descriptor *intr_desc;
> unsigned long flags;
> u64 status;
> -   cpumask_t mask = CPU_MASK_NONE;
> +   const cpumask_t *mask;
> int nr_bank, var_size;
> 
> local_irq_save(flags);
> @@ -41,10 +41,10 @@ static int hv_map_interrupt(union hv_device_id device_id, 
> bool
> level,
> else
> intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
> 
> -   cpumask_set_cpu(cpu, );
> +   mask = cpumask_of(cpu);
> intr_desc->target.vp_set.valid_bank_mask = 0;
> intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> -   nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), );
> +   nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), mask);

Can you just do the following and get rid of the 'mask' local entirely?

nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));

Either way,

Reviewed-by: Michael Kelley 

> if (nr_bank < 0) {
> local_irq_restore(flags);
> pr_err("%s: unable to generate VP set\n", __func__);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v6 02/10] Revert "drm/qxl: do not run release if qxl failed to init"

2021-02-04 Thread Thomas Zimmermann

Hi

Am 04.02.21 um 15:57 schrieb Gerd Hoffmann:

This reverts commit b91907a6241193465ca92e357adf16822242296d.


This should be in the correct format, as given by 'dim cite'.

 dim cite b91907a6241193465ca92e357adf16822242296d
b91907a62411 ("drm/qxl: do not run release if qxl failed to init")



Patch is broken, it effectively makes qxl_drm_release() a nop
because on normal driver shutdown qxl_drm_release() is called
*after* drm_dev_unregister().

Cc: Tong Zhang 
Signed-off-by: Gerd Hoffmann 
---
  drivers/gpu/drm/qxl/qxl_drv.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 34c8b25b5780..fb5f6a5e81d7 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -144,8 +144,6 @@ static void qxl_drm_release(struct drm_device *dev)
 * reodering qxl_modeset_fini() + qxl_device_fini() calls is
 * non-trivial though.
 */
-   if (!dev->registered)
-   return;


I'm not sure what the original problem was, but I'm sure that this isn't 
the fix for it. If there's a problem with shutdown, the operations 
rather have to be reordered correctly.


With the citation style address:

Acked-by: Thomas Zimmermann 


qxl_modeset_fini(qdev);
qxl_device_fini(qdev);
  }



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v6 10/10] drm/qxl: allocate dumb buffers in ram

2021-02-04 Thread Thomas Zimmermann



Am 04.02.21 um 15:57 schrieb Gerd Hoffmann:

dumb buffers are shadowed anyway, so there is no need to store them
in device memory.  Use QXL_GEM_DOMAIN_CPU (TTM_PL_SYSTEM) instead.


Makes sense. I had similar issues in other drivers about the placement 
of buffers. For them, all new buffers now go into system ram by default, 
and only move into device memory when they have to.




Signed-off-by: Gerd Hoffmann 


Acked-by: Thomas Zimmermann 


---
  drivers/gpu/drm/qxl/qxl_dumb.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c
index c04cd5a2553c..48a58ba1db96 100644
--- a/drivers/gpu/drm/qxl/qxl_dumb.c
+++ b/drivers/gpu/drm/qxl/qxl_dumb.c
@@ -59,7 +59,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
surf.stride = pitch;
surf.format = format;
r = qxl_gem_object_create_with_handle(qdev, file_priv,
- QXL_GEM_DOMAIN_SURFACE,
+ QXL_GEM_DOMAIN_CPU,
  args->size, , ,
  );
if (r)



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v6 06/10] drm/qxl: properly pin/unpin shadow

2021-02-04 Thread Thomas Zimmermann



Am 04.02.21 um 15:57 schrieb Gerd Hoffmann:

Suggested-by: Thomas Zimmermann 
Signed-off-by: Gerd Hoffmann 


Thanks for this.

Acked-by: Thomas Zimmermann 



---
  drivers/gpu/drm/qxl/qxl_display.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c 
b/drivers/gpu/drm/qxl/qxl_display.c
index 60331e31861a..d25fd3acc891 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -802,12 +802,14 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane,
}
if (user_bo->shadow != qdev->dumb_shadow_bo) {
if (user_bo->shadow) {
+   qxl_bo_unpin(user_bo->shadow);
drm_gem_object_put
(_bo->shadow->tbo.base);
user_bo->shadow = NULL;
}
drm_gem_object_get(>dumb_shadow_bo->tbo.base);
user_bo->shadow = qdev->dumb_shadow_bo;
+   qxl_bo_pin(user_bo->shadow);
}
}
  
@@ -833,6 +835,7 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane,

qxl_bo_unpin(user_bo);
  
  	if (old_state->fb != plane->state->fb && user_bo->shadow) {

+   qxl_bo_unpin(user_bo->shadow);
drm_gem_object_put(_bo->shadow->tbo.base);
user_bo->shadow = NULL;
}
@@ -1230,6 +1233,7 @@ int qxl_modeset_init(struct qxl_device *qdev)
  void qxl_modeset_fini(struct qxl_device *qdev)
  {
if (qdev->dumb_shadow_bo) {
+   qxl_bo_unpin(qdev->dumb_shadow_bo);
drm_gem_object_put(>dumb_shadow_bo->tbo.base);
qdev->dumb_shadow_bo = NULL;
}



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v6 05/10] drm/qxl: release shadow on shutdown

2021-02-04 Thread Thomas Zimmermann



Am 04.02.21 um 15:57 schrieb Gerd Hoffmann:

In case we have a shadow surface on shutdown release
it so it doesn't leak.

Signed-off-by: Gerd Hoffmann 


Acked-by: Thomas Zimmermann 


---
  drivers/gpu/drm/qxl/qxl_display.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c 
b/drivers/gpu/drm/qxl/qxl_display.c
index 38d6b596094d..60331e31861a 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -1229,5 +1229,9 @@ int qxl_modeset_init(struct qxl_device *qdev)
  
  void qxl_modeset_fini(struct qxl_device *qdev)

  {
+   if (qdev->dumb_shadow_bo) {
+   drm_gem_object_put(>dumb_shadow_bo->tbo.base);
+   qdev->dumb_shadow_bo = NULL;
+   }
qxl_destroy_monitors_object(qdev);
  }



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

RE: [PATCH v6 15/16] x86/hyperv: implement an MSI domain for root partition

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:05 AM
> 
> When Linux runs as the root partition on Microsoft Hypervisor, its
> interrupts are remapped.  Linux will need to explicitly map and unmap
> interrupts for hardware.
> 
> Implement an MSI domain to issue the correct hypercalls. And initialize
> this irqdomain as the default MSI irq domain.
> 
> Signed-off-by: Sunil Muthuswamy 
> Co-Developed-by: Sunil Muthuswamy 
> Signed-off-by: Wei Liu 
> ---
> v6:
> 1. Use u64 status.
> 2. Use vpset instead of bitmap.
> 3. Factor out hv_map_interrupt
> 4. Address other misc comments.
> 
> v4: Fix compilation issue when CONFIG_PCI_MSI is not set.
> v3: build irqdomain.o for 32bit as well.
> v2: This patch is simplified due to upstream changes.
> ---
>  arch/x86/hyperv/Makefile|   2 +-
>  arch/x86/hyperv/hv_init.c   |   9 +
>  arch/x86/hyperv/irqdomain.c | 362 
>  arch/x86/include/asm/mshyperv.h |   2 +
>  4 files changed, 374 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/hyperv/irqdomain.c
> 
> diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
> index 565358020921..48e2c51464e8 100644
> --- a/arch/x86/hyperv/Makefile
> +++ b/arch/x86/hyperv/Makefile
> @@ -1,5 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> -obj-y:= hv_init.o mmu.o nested.o
> +obj-y:= hv_init.o mmu.o nested.o irqdomain.o
>  obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
> 
>  ifdef CONFIG_X86_64
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 11c5997691f4..894ce899f0cb 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -483,6 +483,15 @@ void __init hyperv_init(void)
> 
>   BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull);
> 
> +#ifdef CONFIG_PCI_MSI
> + /*
> +  * If we're running as root, we want to create our own PCI MSI domain.
> +  * We can't set this in hv_pci_init because that would be too late.
> +  */
> + if (hv_root_partition)
> + x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
> +#endif
> +
>   return;
> 
>  remove_cpuhp_state:
> diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
> new file mode 100644
> index ..117f17e8c88a
> --- /dev/null
> +++ b/arch/x86/hyperv/irqdomain.c
> @@ -0,0 +1,362 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/*
> + * for Linux to run as the root partition on Microsoft Hypervisor.

Nit:  Looks like the initial word "Irqdomain" got dropped from the above
comment line.  But don't respin just for this.

> + *
> + * Authors:
> + *  Sunil Muthuswamy 
> + *  Wei Liu 
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +static int hv_map_interrupt(union hv_device_id device_id, bool level,
> + int cpu, int vector, struct hv_interrupt_entry *entry)
> +{
> + struct hv_input_map_device_interrupt *input;
> + struct hv_output_map_device_interrupt *output;
> + struct hv_device_interrupt_descriptor *intr_desc;
> + unsigned long flags;
> + u64 status;
> + cpumask_t mask = CPU_MASK_NONE;
> + int nr_bank, var_size;
> +
> + local_irq_save(flags);
> +
> + input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> + output = *this_cpu_ptr(hyperv_pcpu_output_arg);
> +
> + intr_desc = >interrupt_descriptor;
> + memset(input, 0, sizeof(*input));
> + input->partition_id = hv_current_partition_id;
> + input->device_id = device_id.as_uint64;
> + intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
> + intr_desc->vector_count = 1;
> + intr_desc->target.vector = vector;
> +
> + if (level)
> + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
> + else
> + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
> +
> + cpumask_set_cpu(cpu, );
> + intr_desc->target.vp_set.valid_bank_mask = 0;
> + intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> + nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), );

There's a function get_cpu_mask() that returns a pointer to a cpumask with only
the specified cpu set in the mask.  It returns a const pointer to the correct 
entry
in a pre-allocated array of all such cpumasks, so it's a lot more efficient than
allocating and initializing a local cpumask instance on the stack.

> + if (nr_bank < 0) {
> + local_irq_restore(flags);
> + pr_err("%s: unable to generate VP set\n", __func__);
> + return EINVAL;
> + }
> + intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
> +
> + /*
> +  * var-sized hypercall, var-size starts after vp_mask (thus
> +  * vp_set.format does not count, but vp_set.valid_bank_mask
> +  * does).
> +  */
> + var_size = nr_bank + 1;
> +
> + status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
> + input, output);
> + *entry 

RE: [PATCH v6 16/16] iommu/hyperv: setup an IO-APIC IRQ remapping domain for root partition

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:05 AM
> 
> Just like MSI/MSI-X, IO-APIC interrupts are remapped by Microsoft
> Hypervisor when Linux runs as the root partition. Implement an IRQ
> domain to handle mapping and unmapping of IO-APIC interrupts.
> 
> Signed-off-by: Wei Liu 
> ---
> v6:
> 1. Simplify code due to changes in a previous patch.
> ---
>  arch/x86/hyperv/irqdomain.c |  25 +
>  arch/x86/include/asm/mshyperv.h |   4 +
>  drivers/iommu/hyperv-iommu.c| 177 +++-
>  3 files changed, 203 insertions(+), 3 deletions(-)
> 

Reviewed-by: Michael Kelley 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 13/13] vdpa_sim_blk: handle VIRTIO_BLK_T_GET_ID

2021-02-04 Thread Stefano Garzarella
Handle VIRTIO_BLK_T_GET_ID request, always answering the
"vdpa_blk_sim" string.

Acked-by: Jason Wang 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Stefano Garzarella 
---
v2:
- made 'vdpasim_blk_id' static [Jason]
---
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 2652a499fb34..4e4112dda616 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -37,6 +37,7 @@
 #define VDPASIM_BLK_VQ_NUM 1
 
 static struct vdpasim *vdpasim_blk_dev;
+static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
 
 static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
 {
@@ -152,6 +153,20 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
}
break;
 
+   case VIRTIO_BLK_T_GET_ID:
+   bytes = vringh_iov_push_iotlb(>vring, >in_iov,
+ vdpasim_blk_id,
+ VIRTIO_BLK_ID_BYTES);
+   if (bytes < 0) {
+   dev_err(>vdpa.dev,
+   "vringh_iov_push_iotlb() error: %zd\n", bytes);
+   status = VIRTIO_BLK_S_IOERR;
+   break;
+   }
+
+   pushed += bytes;
+   break;
+
default:
dev_warn(>vdpa.dev,
 "Unsupported request type %d\n", type);
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 12/13] vdpa_sim_blk: implement ramdisk behaviour

2021-02-04 Thread Stefano Garzarella
The previous implementation wrote only the status of each request.
This patch implements a more accurate block device simulator,
providing a ramdisk-like behavior and adding input validation.

Acked-by: Jason Wang 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Stefano Garzarella 
---
v2:
- used %zd %zx to print size_t and ssize_t variables in dev_err()
- removed unnecessary new line [Jason]
- moved VIRTIO_BLK_T_GET_ID in another patch [Jason]
- used push/pull instead of write/read terminology
- added vdpasim_blk_check_range() to avoid overflows [Stefan]
- use vdpasim*_to_cpu instead of le*_to_cpu
- used vringh_kiov_length() helper [Jason]
---
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 164 ---
 1 file changed, 146 insertions(+), 18 deletions(-)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 9822f9edc511..2652a499fb34 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -3,6 +3,7 @@
  * VDPA simulator for block device.
  *
  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2021, Red Hat Inc. All rights reserved.
  *
  */
 
@@ -13,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "vdpa_sim.h"
@@ -36,10 +38,151 @@
 
 static struct vdpasim *vdpasim_blk_dev;
 
+static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
+{
+   u64 range_sectors = range_size >> SECTOR_SHIFT;
+
+   if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)
+   return false;
+
+   if (start_sector > VDPASIM_BLK_CAPACITY)
+   return false;
+
+   if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector)
+   return false;
+
+   return true;
+}
+
+/* Returns 'true' if the request is handled (with or without an I/O error)
+ * and the status is correctly written in the last byte of the 'in iov',
+ * 'false' otherwise.
+ */
+static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
+  struct vdpasim_virtqueue *vq)
+{
+   size_t pushed = 0, to_pull, to_push;
+   struct virtio_blk_outhdr hdr;
+   ssize_t bytes;
+   loff_t offset;
+   u64 sector;
+   u8 status;
+   u32 type;
+   int ret;
+
+   ret = vringh_getdesc_iotlb(>vring, >out_iov, >in_iov,
+  >head, GFP_ATOMIC);
+   if (ret != 1)
+   return false;
+
+   if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
+   dev_err(>vdpa.dev, "missing headers - out_iov: %u 
in_iov %u\n",
+   vq->out_iov.used, vq->in_iov.used);
+   return false;
+   }
+
+   if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
+   dev_err(>vdpa.dev, "request in header too short\n");
+   return false;
+   }
+
+   /* The last byte is the status and we checked if the last iov has
+* enough room for it.
+*/
+   to_push = vringh_kiov_length(>in_iov) - 1;
+
+   to_pull = vringh_kiov_length(>out_iov);
+
+   bytes = vringh_iov_pull_iotlb(>vring, >out_iov, ,
+ sizeof(hdr));
+   if (bytes != sizeof(hdr)) {
+   dev_err(>vdpa.dev, "request out header too short\n");
+   return false;
+   }
+
+   to_pull -= bytes;
+
+   type = vdpasim32_to_cpu(vdpasim, hdr.type);
+   sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
+   offset = sector << SECTOR_SHIFT;
+   status = VIRTIO_BLK_S_OK;
+
+   switch (type) {
+   case VIRTIO_BLK_T_IN:
+   if (!vdpasim_blk_check_range(sector, to_push)) {
+   dev_err(>vdpa.dev,
+   "reading over the capacity - offset: 0x%llx 
len: 0x%zx\n",
+   offset, to_push);
+   status = VIRTIO_BLK_S_IOERR;
+   break;
+   }
+
+   bytes = vringh_iov_push_iotlb(>vring, >in_iov,
+ vdpasim->buffer + offset,
+ to_push);
+   if (bytes < 0) {
+   dev_err(>vdpa.dev,
+   "vringh_iov_push_iotlb() error: %zd offset: 
0x%llx len: 0x%zx\n",
+   bytes, offset, to_push);
+   status = VIRTIO_BLK_S_IOERR;
+   break;
+   }
+
+   pushed += bytes;
+   break;
+
+   case VIRTIO_BLK_T_OUT:
+   if (!vdpasim_blk_check_range(sector, to_pull)) {
+   dev_err(>vdpa.dev,
+   "writing over the capacity - offset: 0x%llx 
len: 0x%zx\n",
+   offset, to_pull);
+   status = VIRTIO_BLK_S_IOERR;
+   break;
+   }
+
+   bytes = 

[PATCH v3 11/13] vdpa: add vdpa simulator for block device

2021-02-04 Thread Stefano Garzarella
From: Max Gurtovoy 

This will allow running vDPA for virtio block protocol.

It's a preliminary implementation with a simple request handling:
for each request, only the status (last byte) is set.
It's always set to VIRTIO_BLK_S_OK.

Also input validation is missing and will be added in the next commits.

Signed-off-by: Max Gurtovoy 
[sgarzare: various cleanups/fixes]
Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---
v3:
- updated Mellanox copyright to NVIDIA [Max]
- explained in the commit message that inputs are validated in subsequent
  patches [Stefan]

v2:
- rebased on top of other changes (dev_attr, get_config(), notify(), etc.)
- memset to 0 the config structure in vdpasim_blk_get_config()
- used vdpasim pointer in vdpasim_blk_get_config()

v1:
- Removed unused headers
- Used cpu_to_vdpasim*() to store config fields
- Replaced 'select VDPA_SIM' with 'depends on VDPA_SIM' since selected
  option can not depend on other [Jason]
- Start with a single queue for now [Jason]
- Add comments to memory barriers
---
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 145 +++
 drivers/vdpa/Kconfig |   7 ++
 drivers/vdpa/vdpa_sim/Makefile   |   1 +
 3 files changed, 153 insertions(+)
 create mode 100644 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
new file mode 100644
index ..9822f9edc511
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA simulator for block device.
+ *
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vdpa_sim.h"
+
+#define DRV_VERSION  "0.1"
+#define DRV_AUTHOR   "Max Gurtovoy "
+#define DRV_DESC "vDPA Device Simulator for block device"
+#define DRV_LICENSE  "GPL v2"
+
+#define VDPASIM_BLK_FEATURES   (VDPASIM_FEATURES | \
+(1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
+(1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
+(1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+(1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
+(1ULL << VIRTIO_BLK_F_MQ))
+
+#define VDPASIM_BLK_CAPACITY   0x4
+#define VDPASIM_BLK_SIZE_MAX   0x1000
+#define VDPASIM_BLK_SEG_MAX32
+#define VDPASIM_BLK_VQ_NUM 1
+
+static struct vdpasim *vdpasim_blk_dev;
+
+static void vdpasim_blk_work(struct work_struct *work)
+{
+   struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+   u8 status = VIRTIO_BLK_S_OK;
+   int i;
+
+   spin_lock(>lock);
+
+   if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+   goto out;
+
+   for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
+   struct vdpasim_virtqueue *vq = >vqs[i];
+
+   if (!vq->ready)
+   continue;
+
+   while (vringh_getdesc_iotlb(>vring, >out_iov,
+   >in_iov, >head,
+   GFP_ATOMIC) > 0) {
+   int write;
+
+   vq->in_iov.i = vq->in_iov.used - 1;
+   write = vringh_iov_push_iotlb(>vring, >in_iov,
+ , 1);
+   if (write <= 0)
+   break;
+
+   /* Make sure data is wrote before advancing index */
+   smp_wmb();
+
+   vringh_complete_iotlb(>vring, vq->head, write);
+
+   /* Make sure used is visible before rasing the 
interrupt. */
+   smp_wmb();
+
+   local_bh_disable();
+   if (vringh_need_notify_iotlb(>vring) > 0)
+   vringh_notify(>vring);
+   local_bh_enable();
+   }
+   }
+out:
+   spin_unlock(>lock);
+}
+
+static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
+{
+   struct virtio_blk_config *blk_config =
+   (struct virtio_blk_config *)config;
+
+   memset(config, 0, sizeof(struct virtio_blk_config));
+
+   blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
+   blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
+   blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
+   blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
+   blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
+   blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
+   blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+}
+
+static int __init vdpasim_blk_init(void)
+{
+   struct vdpasim_dev_attr dev_attr = {};
+   int ret;
+
+   

[PATCH v3 08/13] vdpa: add return value to get_config/set_config callbacks

2021-02-04 Thread Stefano Garzarella
All implementations of these callbacks already validate inputs.

Let's return an error from these callbacks, so the caller doesn't
need to validate the input anymore.

We update all implementations to return -EINVAL in case of invalid
input.

Signed-off-by: Stefano Garzarella 
---
 include/linux/vdpa.h  | 18 ++
 drivers/vdpa/ifcvf/ifcvf_main.c   | 24 
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 17 +++--
 drivers/vdpa/vdpa_sim/vdpa_sim.c  | 16 ++--
 4 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 4ab5494503a8..0e0cbd5fb41b 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -157,6 +157,7 @@ struct vdpa_iova_range {
  * @buf: buffer used to read to
  * @len: the length to read from
  * configuration space
+ * Returns integer: success (0) or error (< 0)
  * @set_config:Write to device specific configuration 
space
  * @vdev: vdpa device
  * @offset: offset from the beginning of
@@ -164,6 +165,7 @@ struct vdpa_iova_range {
  * @buf: buffer used to write from
  * @len: the length to write to
  * configuration space
+ * Returns integer: success (0) or error (< 0)
  * @get_generation:Get device config generation (optional)
  * @vdev: vdpa device
  * Returns u32: device generation
@@ -231,10 +233,10 @@ struct vdpa_config_ops {
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
-   void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
-  void *buf, unsigned int len);
-   void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
-  const void *buf, unsigned int len);
+   int (*get_config)(struct vdpa_device *vdev, unsigned int offset,
+ void *buf, unsigned int len);
+   int (*set_config)(struct vdpa_device *vdev, unsigned int offset,
+ const void *buf, unsigned int len);
u32 (*get_generation)(struct vdpa_device *vdev);
struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
 
@@ -329,8 +331,8 @@ static inline int vdpa_set_features(struct vdpa_device 
*vdev, u64 features)
 }
 
 
-static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
-  void *buf, unsigned int len)
+static inline int vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
+ void *buf, unsigned int len)
 {
 const struct vdpa_config_ops *ops = vdev->config;
 
@@ -339,8 +341,8 @@ static inline void vdpa_get_config(struct vdpa_device 
*vdev, unsigned offset,
 * If it does happen we assume a legacy guest.
 */
if (!vdev->features_valid)
-   vdpa_set_features(vdev, 0);
-   ops->get_config(vdev, offset, buf, len);
+   return vdpa_set_features(vdev, 0);
+   return ops->get_config(vdev, offset, buf, len);
 }
 
 /**
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 7c8bbfcf6c3e..f5e6a90d8114 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -332,24 +332,32 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device 
*vdpa_dev)
return IFCVF_QUEUE_ALIGNMENT;
 }
 
-static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
- unsigned int offset,
- void *buf, unsigned int len)
+static int ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
+unsigned int offset,
+void *buf, unsigned int len)
 {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 
-   WARN_ON(offset + len > sizeof(struct virtio_net_config));
+   if (offset + len > sizeof(struct virtio_net_config))
+   return -EINVAL;
+
ifcvf_read_net_config(vf, offset, buf, len);
+
+   return 0;
 }
 
-static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
- unsigned int offset, const void *buf,
- unsigned int len)
+static int ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
+unsigned int offset, const void *buf,
+unsigned int len)
 {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 
-   WARN_ON(offset + len > sizeof(struct virtio_net_config));
+   if (offset + len > sizeof(struct 

[PATCH v3 10/13] vhost/vdpa: Remove the restriction that only supports virtio-net devices

2021-02-04 Thread Stefano Garzarella
From: Xie Yongji 

Since the config checks are done by the vDPA drivers, we can remove the
virtio-net restriction and we should be able to support all kinds of
virtio devices.

 is not needed anymore, but we need to include
 to avoid compilation failures.

Signed-off-by: Xie Yongji 
Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vdpa.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index d61e779000a8..3879b1ad12dd 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -16,12 +16,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 
 #include "vhost.h"
 
@@ -1006,10 +1006,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
int minor;
int r;
 
-   /* Currently, we only accept the network devices. */
-   if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
-   return -ENOTSUPP;
-
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!v)
return -ENOMEM;
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 09/13] vhost/vdpa: remove vhost_vdpa_config_validate()

2021-02-04 Thread Stefano Garzarella
get_config() and set_config() callbacks in the 'struct vdpa_config_ops'
usually already validated the inputs. Also now they can return an error,
so we don't need to validate them here anymore.

Let's use the return value of these callbacks and return it in case of
error in vhost_vdpa_get_config() and vhost_vdpa_set_config().

Originally-by: Xie Yongji 
Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vdpa.c | 41 +
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ef688c8c0e0e..d61e779000a8 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -185,51 +185,35 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, 
u8 __user *statusp)
return 0;
 }
 
-static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
- struct vhost_vdpa_config *c)
-{
-   long size = 0;
-
-   switch (v->virtio_id) {
-   case VIRTIO_ID_NET:
-   size = sizeof(struct virtio_net_config);
-   break;
-   }
-
-   if (c->len == 0)
-   return -EINVAL;
-
-   if (c->len > size - c->off)
-   return -E2BIG;
-
-   return 0;
-}
-
 static long vhost_vdpa_get_config(struct vhost_vdpa *v,
  struct vhost_vdpa_config __user *c)
 {
struct vdpa_device *vdpa = v->vdpa;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
+   long ret;
u8 *buf;
 
if (copy_from_user(, c, size))
return -EFAULT;
-   if (vhost_vdpa_config_validate(v, ))
+   if (config.len == 0)
return -EINVAL;
buf = kvzalloc(config.len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
 
-   vdpa_get_config(vdpa, config.off, buf, config.len);
+   ret = vdpa_get_config(vdpa, config.off, buf, config.len);
+   if (ret)
+   goto out;
 
if (copy_to_user(c->buf, buf, config.len)) {
-   kvfree(buf);
-   return -EFAULT;
+   ret = -EFAULT;
+   goto out;
}
 
+out:
kvfree(buf);
-   return 0;
+   return ret;
 }
 
 static long vhost_vdpa_set_config(struct vhost_vdpa *v,
@@ -239,21 +223,22 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
+   long ret;
u8 *buf;
 
if (copy_from_user(, c, size))
return -EFAULT;
-   if (vhost_vdpa_config_validate(v, ))
+   if (config.len == 0)
return -EINVAL;
 
buf = vmemdup_user(c->buf, config.len);
if (IS_ERR(buf))
return PTR_ERR(buf);
 
-   ops->set_config(vdpa, config.off, buf, config.len);
+   ret = ops->set_config(vdpa, config.off, buf, config.len);
 
kvfree(buf);
-   return 0;
+   return ret;
 }
 
 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 06/13] vringh: add vringh_kiov_length() helper

2021-02-04 Thread Stefano Garzarella
This new helper returns the total number of bytes covered by
a vringh_kiov.

Suggested-by: Jason Wang 
Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---
 include/linux/vringh.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 755211ebd195..84db7b8f912f 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -199,6 +199,17 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov 
*kiov)
kiov->iov = NULL;
 }
 
+static inline size_t vringh_kiov_length(struct vringh_kiov *kiov)
+{
+   size_t len = 0;
+   int i;
+
+   for (i = kiov->i; i < kiov->used; i++)
+   len += kiov->iov[i].iov_len;
+
+   return len;
+}
+
 void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len);
 
 int vringh_getdesc_kern(struct vringh *vrh,
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 04/13] vringh: explain more about cleaning riov and wiov

2021-02-04 Thread Stefano Garzarella
riov and wiov can be reused with subsequent calls of vringh_getdesc_*().

Let's add a paragraph in the documentation of these functions to better
explain when riov and wiov need to be cleaned up.

Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vringh.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index bee63d68201a..2a88e087afd8 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -662,7 +662,10 @@ EXPORT_SYMBOL(vringh_init_user);
  * *head will be vrh->vring.num.  You may be able to ignore an invalid
  * descriptor, but there's not much you can do with an invalid ring.
  *
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_iov_cleanup() to release the memory, even on error!
  */
 int vringh_getdesc_user(struct vringh *vrh,
struct vringh_iov *riov,
@@ -932,7 +935,10 @@ EXPORT_SYMBOL(vringh_init_kern);
  * *head will be vrh->vring.num.  You may be able to ignore an invalid
  * descriptor, but there's not much you can do with an invalid ring.
  *
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
  */
 int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
@@ -1292,7 +1298,10 @@ EXPORT_SYMBOL(vringh_set_iotlb);
  * *head will be vrh->vring.num.  You may be able to ignore an invalid
  * descriptor, but there's not much you can do with an invalid ring.
  *
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
  */
 int vringh_getdesc_iotlb(struct vringh *vrh,
 struct vringh_kiov *riov,
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 05/13] vringh: implement vringh_kiov_advance()

2021-02-04 Thread Stefano Garzarella
In some cases, it may be useful to provide a way to skip a number
of bytes in a vringh_kiov.

Let's implement vringh_kiov_advance() for this purpose, reusing the
code from vringh_iov_xfer().
We replace that code calling the new vringh_kiov_advance().

Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---
 include/linux/vringh.h |  2 ++
 drivers/vhost/vringh.c | 41 +
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 9c077863c8f6..755211ebd195 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -199,6 +199,8 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov 
*kiov)
kiov->iov = NULL;
 }
 
+void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len);
+
 int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
struct vringh_kiov *wiov,
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 2a88e087afd8..4af8fa259d65 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -75,6 +75,34 @@ static inline int __vringh_get_head(const struct vringh *vrh,
return head;
 }
 
+/**
+ * vringh_kiov_advance - skip bytes from vring_kiov
+ * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
+ * @len: the maximum length to advance
+ */
+void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
+{
+   while (len && iov->i < iov->used) {
+   size_t partlen = min(iov->iov[iov->i].iov_len, len);
+
+   iov->consumed += partlen;
+   iov->iov[iov->i].iov_len -= partlen;
+   iov->iov[iov->i].iov_base += partlen;
+
+   if (!iov->iov[iov->i].iov_len) {
+   /* Fix up old iov element then increment. */
+   iov->iov[iov->i].iov_len = iov->consumed;
+   iov->iov[iov->i].iov_base -= iov->consumed;
+
+   iov->consumed = 0;
+   iov->i++;
+   }
+
+   len -= partlen;
+   }
+}
+EXPORT_SYMBOL(vringh_kiov_advance);
+
 /* Copy some bytes to/from the iovec.  Returns num copied. */
 static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
  struct vringh_kiov *iov,
@@ -95,19 +123,8 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
done += partlen;
len -= partlen;
ptr += partlen;
-   iov->consumed += partlen;
-   iov->iov[iov->i].iov_len -= partlen;
-   iov->iov[iov->i].iov_base += partlen;
 
-   if (!iov->iov[iov->i].iov_len) {
-   /* Fix up old iov element then increment. */
-   iov->iov[iov->i].iov_len = iov->consumed;
-   iov->iov[iov->i].iov_base -= iov->consumed;
-
-   
-   iov->consumed = 0;
-   iov->i++;
-   }
+   vringh_kiov_advance(iov, partlen);
}
return done;
 }
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 07/13] vdpa_sim: cleanup kiovs in vdpasim_free()

2021-02-04 Thread Stefano Garzarella
vringh_getdesc_iotlb() allocates memory to store the kvec, that
is freed with vringh_kiov_cleanup().

vringh_getdesc_iotlb() is able to reuse a kvec previously allocated,
so in order to avoid to allocate the kvec for each request, we are
not calling vringh_kiov_cleanup() when we finished to handle a
request, but we should call it when we free the entire device.

Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 53238989713d..a7aeb5d01c3e 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -562,8 +562,15 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 
iova, u64 size)
 static void vdpasim_free(struct vdpa_device *vdpa)
 {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+   int i;
 
cancel_work_sync(>work);
+
+   for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
+   vringh_kiov_cleanup(>vqs[i].out_iov);
+   vringh_kiov_cleanup(>vqs[i].in_iov);
+   }
+
put_iova_domain(>iova);
iova_cache_put();
kvfree(vdpasim->buffer);
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 03/13] vringh: reset kiov 'consumed' field in __vringh_iov()

2021-02-04 Thread Stefano Garzarella
__vringh_iov() overwrites the contents of riov and wiov, in fact it
resets the 'i' and 'used' fields, but also the 'consumed' field should
be reset to avoid an inconsistent state.

Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vringh.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index f68122705719..bee63d68201a 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -290,9 +290,9 @@ __vringh_iov(struct vringh *vrh, u16 i,
return -EINVAL;
 
if (riov)
-   riov->i = riov->used = 0;
+   riov->i = riov->used = riov->consumed = 0;
if (wiov)
-   wiov->i = wiov->used = 0;
+   wiov->i = wiov->used = wiov->consumed = 0;
 
for (;;) {
void *addr;
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 01/13] vdpa_sim: use iova module to allocate IOVA addresses

2021-02-04 Thread Stefano Garzarella
The identical mapping used until now created issues when mapping
different virtual pages with the same physical address.
To solve this issue, we can use the iova module, to handle the IOVA
allocation.
For simplicity we use an IOVA allocator with byte granularity.

We add two new functions, vdpasim_map_range() and vdpasim_unmap_range(),
to handle the IOVA allocation and the registration into the IOMMU/IOTLB.
These functions are used by dma_map_ops callbacks.

Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---
v2:
- used ULONG_MAX instead of ~0UL [Jason]
- fixed typos in comment and patch description [Jason]
---
 drivers/vdpa/vdpa_sim/vdpa_sim.h |   2 +
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 108 +++
 drivers/vdpa/Kconfig |   1 +
 3 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 6d75444f9948..cd58e888bcf3 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -6,6 +6,7 @@
 #ifndef _VDPA_SIM_H
 #define _VDPA_SIM_H
 
+#include 
 #include 
 #include 
 #include 
@@ -57,6 +58,7 @@ struct vdpasim {
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
+   struct iova_domain iova;
void *buffer;
u32 status;
u32 generation;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index d5942842432d..2183a833fcf4 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vdpa_sim.h"
 
@@ -128,30 +129,57 @@ static int dir_to_perm(enum dma_data_direction dir)
return perm;
 }
 
+static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
+   size_t size, unsigned int perm)
+{
+   struct iova *iova;
+   dma_addr_t dma_addr;
+   int ret;
+
+   /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
+   iova = alloc_iova(>iova, size, ULONG_MAX - 1, true);
+   if (!iova)
+   return DMA_MAPPING_ERROR;
+
+   dma_addr = iova_dma_addr(>iova, iova);
+
+   spin_lock(>iommu_lock);
+   ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr,
+   (u64)dma_addr + size - 1, (u64)paddr, perm);
+   spin_unlock(>iommu_lock);
+
+   if (ret) {
+   __free_iova(>iova, iova);
+   return DMA_MAPPING_ERROR;
+   }
+
+   return dma_addr;
+}
+
+static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
+   size_t size)
+{
+   spin_lock(>iommu_lock);
+   vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+   spin_unlock(>iommu_lock);
+
+   free_iova(>iova, iova_pfn(>iova, dma_addr));
+}
+
 static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
   unsigned long offset, size_t size,
   enum dma_data_direction dir,
   unsigned long attrs)
 {
struct vdpasim *vdpasim = dev_to_sim(dev);
-   struct vhost_iotlb *iommu = vdpasim->iommu;
-   u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset;
-   int ret, perm = dir_to_perm(dir);
+   phys_addr_t paddr = page_to_phys(page) + offset;
+   int perm = dir_to_perm(dir);
 
if (perm < 0)
return DMA_MAPPING_ERROR;
 
-   /* For simplicity, use identical mapping to avoid e.g iova
-* allocator.
-*/
-   spin_lock(>iommu_lock);
-   ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
-   pa, dir_to_perm(dir));
-   spin_unlock(>iommu_lock);
-   if (ret)
-   return DMA_MAPPING_ERROR;
-
-   return (dma_addr_t)(pa);
+   return vdpasim_map_range(vdpasim, paddr, size, perm);
 }
 
 static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
@@ -159,12 +187,8 @@ static void vdpasim_unmap_page(struct device *dev, 
dma_addr_t dma_addr,
   unsigned long attrs)
 {
struct vdpasim *vdpasim = dev_to_sim(dev);
-   struct vhost_iotlb *iommu = vdpasim->iommu;
 
-   spin_lock(>iommu_lock);
-   vhost_iotlb_del_range(iommu, (u64)dma_addr,
- (u64)dma_addr + size - 1);
-   spin_unlock(>iommu_lock);
+   vdpasim_unmap_range(vdpasim, dma_addr, size);
 }
 
 static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
@@ -172,27 +196,22 @@ static void *vdpasim_alloc_coherent(struct device *dev, 
size_t size,
unsigned long attrs)
 {
struct vdpasim *vdpasim = dev_to_sim(dev);
-   struct vhost_iotlb *iommu = vdpasim->iommu;
-   void *addr = kmalloc(size, flag);
-   int 

[PATCH v3 02/13] vringh: add 'iotlb_lock' to synchronize iotlb accesses

2021-02-04 Thread Stefano Garzarella
Usually iotlb accesses are synchronized with a spinlock.
Let's request it as a new parameter in vringh_set_iotlb() and
hold it when we navigate the iotlb in iotlb_translate() to avoid
race conditions with any new additions/deletions of ranges from
the ioltb.

Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---
 include/linux/vringh.h   | 6 +-
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 ++-
 drivers/vhost/vringh.c   | 9 -
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 59bd50f99291..9c077863c8f6 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -46,6 +46,9 @@ struct vringh {
/* IOTLB for this vring */
struct vhost_iotlb *iotlb;
 
+   /* spinlock to synchronize IOTLB accesses */
+   spinlock_t *iotlb_lock;
+
/* The function to call to notify the guest about added buffers */
void (*notify)(struct vringh *);
 };
@@ -258,7 +261,8 @@ static inline __virtio64 cpu_to_vringh64(const struct 
vringh *vrh, u64 val)
 
 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
 
-void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb);
+void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
+ spinlock_t *iotlb_lock);
 
 int vringh_init_iotlb(struct vringh *vrh, u64 features,
  unsigned int num, bool weak_barriers,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 2183a833fcf4..53238989713d 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -284,7 +284,8 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr 
*dev_attr)
goto err_iommu;
 
for (i = 0; i < dev_attr->nvqs; i++)
-   vringh_set_iotlb(>vqs[i].vring, vdpasim->iommu);
+   vringh_set_iotlb(>vqs[i].vring, vdpasim->iommu,
+>iommu_lock);
 
ret = iova_cache_get();
if (ret)
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 85d85faba058..f68122705719 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1074,6 +1074,8 @@ static int iotlb_translate(const struct vringh *vrh,
int ret = 0;
u64 s = 0;
 
+   spin_lock(vrh->iotlb_lock);
+
while (len > s) {
u64 size, pa, pfn;
 
@@ -1103,6 +1105,8 @@ static int iotlb_translate(const struct vringh *vrh,
++ret;
}
 
+   spin_unlock(vrh->iotlb_lock);
+
return ret;
 }
 
@@ -1262,10 +1266,13 @@ EXPORT_SYMBOL(vringh_init_iotlb);
  * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
  * @vrh: the vring
  * @iotlb: iotlb associated with this vring
+ * @iotlb_lock: spinlock to synchronize the iotlb accesses
  */
-void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb)
+void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
+ spinlock_t *iotlb_lock)
 {
vrh->iotlb = iotlb;
+   vrh->iotlb_lock = iotlb_lock;
 }
 EXPORT_SYMBOL(vringh_set_iotlb);
 
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 00/13] vdpa: add vdpa simulator for block device

2021-02-04 Thread Stefano Garzarella
v3:
- added new patches
  - 'vringh: explain more about cleaning riov and wiov'
  - 'vdpa: add return value to get_config/set_config callbacks'
  - 'vhost/vdpa: remove vhost_vdpa_config_validate()'
- split Xie's patch 'vhost/vdpa: Remove the restriction that only supports
  virtio-net devices'
- updated Mellanox copyright to NVIDIA [Max]
- explained in the 'vdpa: add vdpa simulator for block device' commit
  message that inputs are validated in subsequent patches [Stefan]

v2: https://lore.kernel.org/lkml/20210128144127.113245-1-sgarz...@redhat.com/
v1: 
https://lore.kernel.org/lkml/93f207c0-61e6-3696-f218-e7d7ea9a7...@redhat.com/

This series is the second part of the v1 linked above. The first part with
refactoring of vdpa_sim has already been merged.

The patches are based on Max Gurtovoy's work and extend the block simulator to
have a ramdisk behaviour.

As mentioned in the v1 there was 2 issues and I fixed them in this series:
1. The identical mapping in the IOMMU used until now in vdpa_sim created issues
   when mapping different virtual pages with the same physical address.
   Fixed by patch "vdpa_sim: use iova module to allocate IOVA addresses"

2. There was a race accessing the IOMMU between the vdpasim_blk_work() and the
   device driver that map/unmap DMA regions. Fixed by patch "vringh: add
   'iotlb_lock' to synchronize iotlb accesses"

I used the Xie's patch coming from VDUSE series to allow vhost-vdpa to use
block devices. As Jason suggested I split it into two patches and I added
a return value to get_config()/set_config() callbacks.

The series also includes small fixes for vringh, vdpa, and vdpa_sim that I
discovered while implementing and testing the block simulator.

Thanks for your feedback,
Stefano

Max Gurtovoy (1):
  vdpa: add vdpa simulator for block device

Stefano Garzarella (11):
  vdpa_sim: use iova module to allocate IOVA addresses
  vringh: add 'iotlb_lock' to synchronize iotlb accesses
  vringh: reset kiov 'consumed' field in __vringh_iov()
  vringh: explain more about cleaning riov and wiov
  vringh: implement vringh_kiov_advance()
  vringh: add vringh_kiov_length() helper
  vdpa_sim: cleanup kiovs in vdpasim_free()
  vdpa: add return value to get_config/set_config callbacks
  vhost/vdpa: remove vhost_vdpa_config_validate()
  vdpa_sim_blk: implement ramdisk behaviour
  vdpa_sim_blk: handle VIRTIO_BLK_T_GET_ID

Xie Yongji (1):
  vhost/vdpa: Remove the restriction that only supports virtio-net
devices

 drivers/vdpa/vdpa_sim/vdpa_sim.h |   2 +
 include/linux/vdpa.h |  18 +-
 include/linux/vringh.h   |  19 +-
 drivers/vdpa/ifcvf/ifcvf_main.c  |  24 ++-
 drivers/vdpa/mlx5/net/mlx5_vnet.c|  17 +-
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 134 -
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 288 +++
 drivers/vhost/vdpa.c |  47 ++---
 drivers/vhost/vringh.c   |  69 +--
 drivers/vdpa/Kconfig |   8 +
 drivers/vdpa/vdpa_sim/Makefile   |   1 +
 11 files changed, 504 insertions(+), 123 deletions(-)
 create mode 100644 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c

-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 13/16] asm-generic/hyperv: introduce hv_device_id and auxiliary structures

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:05 AM
> 
> We will need to identify the device we want Microsoft Hypervisor to
> manipulate.  Introduce the data structures for that purpose.
> 
> They will be used in a later patch.
> 
> Signed-off-by: Sunil Muthuswamy 
> Co-Developed-by: Sunil Muthuswamy 
> Signed-off-by: Wei Liu 
> ---
> v6:
> 1. Add reserved0 as field name.
> ---
>  include/asm-generic/hyperv-tlfs.h | 79 +++
>  1 file changed, 79 insertions(+)
> 
> diff --git a/include/asm-generic/hyperv-tlfs.h 
> b/include/asm-generic/hyperv-tlfs.h
> index 94c7d77bbf68..ce53c0db28ae 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -623,4 +623,83 @@ struct hv_set_vp_registers_input {
>   } element[];
>  } __packed;
> 
> +enum hv_device_type {
> + HV_DEVICE_TYPE_LOGICAL = 0,
> + HV_DEVICE_TYPE_PCI = 1,
> + HV_DEVICE_TYPE_IOAPIC = 2,
> + HV_DEVICE_TYPE_ACPI = 3,
> +};
> +
> +typedef u16 hv_pci_rid;
> +typedef u16 hv_pci_segment;
> +typedef u64 hv_logical_device_id;
> +union hv_pci_bdf {
> + u16 as_uint16;
> +
> + struct {
> + u8 function:3;
> + u8 device:5;
> + u8 bus;
> + };
> +} __packed;
> +
> +union hv_pci_bus_range {
> + u16 as_uint16;
> +
> + struct {
> + u8 subordinate_bus;
> + u8 secondary_bus;
> + };
> +} __packed;
> +
> +union hv_device_id {
> + u64 as_uint64;
> +
> + struct {
> + u64 reserved0:62;
> + u64 device_type:2;
> + };
> +
> + /* HV_DEVICE_TYPE_LOGICAL */
> + struct {
> + u64 id:62;
> + u64 device_type:2;
> + } logical;
> +
> + /* HV_DEVICE_TYPE_PCI */
> + struct {
> + union {
> + hv_pci_rid rid;
> + union hv_pci_bdf bdf;
> + };
> +
> + hv_pci_segment segment;
> + union hv_pci_bus_range shadow_bus_range;
> +
> + u16 phantom_function_bits:2;
> + u16 source_shadow:1;
> +
> + u16 rsvdz0:11;
> + u16 device_type:2;
> + } pci;
> +
> + /* HV_DEVICE_TYPE_IOAPIC */
> + struct {
> + u8 ioapic_id;
> + u8 rsvdz0;
> + u16 rsvdz1;
> + u16 rsvdz2;
> +
> + u16 rsvdz3:14;
> + u16 device_type:2;
> + } ioapic;
> +
> + /* HV_DEVICE_TYPE_ACPI */
> + struct {
> + u32 input_mapping_base;
> + u32 input_mapping_count:30;
> + u32 device_type:2;
> + } acpi;
> +} __packed;
> +
>  #endif
> --
> 2.20.1

Reviewed-by: Michael Kelley 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 09/16] x86/hyperv: provide a bunch of helper functions

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:04 AM
> 
> They are used to deposit pages into Microsoft Hypervisor and bring up
> logical and virtual processors.
> 
> Signed-off-by: Lillian Grassin-Drake 
> Signed-off-by: Sunil Muthuswamy 
> Signed-off-by: Nuno Das Neves 
> Co-Developed-by: Lillian Grassin-Drake 
> Co-Developed-by: Sunil Muthuswamy 
> Co-Developed-by: Nuno Das Neves 
> Signed-off-by: Wei Liu 
> ---
> v6:
> 1. Address Michael's comments.
> 
> v4: Fix compilation issue when CONFIG_ACPI_NUMA is not set.
> 
> v3:
> 1. Add __packed to structures.
> 2. Drop unnecessary exports.
> 
> v2:
> 1. Adapt to hypervisor side changes
> 2. Address Vitaly's comments
> 
> use u64 status
> 
> pages
> 
> major comments
> 
> minor comments
> 
> rely on acpi code
> ---
>  arch/x86/hyperv/Makefile  |   2 +-
>  arch/x86/hyperv/hv_proc.c | 219 ++
>  arch/x86/include/asm/mshyperv.h   |   4 +
>  include/asm-generic/hyperv-tlfs.h |  67 +
>  4 files changed, 291 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/hyperv/hv_proc.c
> 

Reviewed-by: Michael Kelley 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 08/16] ACPI / NUMA: add a stub function for node_to_pxm()

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:04 AM
> 
> There is already a stub function for pxm_to_node but conversion to the
> other direction is missing.
> 
> It will be used by Microsoft Hypervisor code later.
> 
> Signed-off-by: Wei Liu 
> ---
> v6: new
> ---
>  include/acpi/acpi_numa.h | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
> index a4c6ef809e27..40a91ce87e04 100644
> --- a/include/acpi/acpi_numa.h
> +++ b/include/acpi/acpi_numa.h
> @@ -30,6 +30,10 @@ static inline int pxm_to_node(int pxm)
>  {
>   return 0;
>  }
> +static inline int node_to_pxm(int node)
> +{
> + return 0;
> +}
>  #endif   /* CONFIG_ACPI_NUMA */
> 
>  #ifdef CONFIG_ACPI_HMAT
> --
> 2.20.1

Reviewed-by: Michael Kelley 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 06/16] x86/hyperv: extract partition ID from Microsoft Hypervisor if necessary

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:04 AM
> 
> We will need the partition ID for executing some hypercalls later.
> 
> Signed-off-by: Lillian Grassin-Drake 
> Co-Developed-by: Sunil Muthuswamy 
> Signed-off-by: Wei Liu 
> ---
> v6:
> 1. Use u64 status.
> 
> v3:
> 1. Make hv_get_partition_id static.
> 2. Change code structure a bit.
> ---
>  arch/x86/hyperv/hv_init.c | 26 ++
>  arch/x86/include/asm/mshyperv.h   |  2 ++
>  include/asm-generic/hyperv-tlfs.h |  6 ++
>  3 files changed, 34 insertions(+)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 6f4cb40e53fe..5b90a7290177 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -26,6 +26,9 @@
>  #include 
>  #include 
> 
> +u64 hv_current_partition_id = ~0ull;
> +EXPORT_SYMBOL_GPL(hv_current_partition_id);
> +
>  void *hv_hypercall_pg;
>  EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> 
> @@ -331,6 +334,24 @@ static struct syscore_ops hv_syscore_ops = {
>   .resume = hv_resume,
>  };
> 
> +static void __init hv_get_partition_id(void)
> +{
> + struct hv_get_partition_id *output_page;
> + u64 status;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
> + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
> + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
> + /* No point in proceeding if this failed */
> + pr_err("Failed to get partition ID: %lld\n", status);
> + BUG();
> + }
> + hv_current_partition_id = output_page->partition_id;
> + local_irq_restore(flags);
> +}
> +
>  /*
>   * This function is to be invoked early in the boot sequence after the
>   * hypervisor has been detected.
> @@ -426,6 +447,11 @@ void __init hyperv_init(void)
> 
>   register_syscore_ops(_syscore_ops);
> 
> + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID)
> + hv_get_partition_id();
> +
> + BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull);
> +
>   return;
> 
>  remove_cpuhp_state:
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 62d9390f1ddf..67f5d35a73d3 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -78,6 +78,8 @@ extern void *hv_hypercall_pg;
>  extern void  __percpu  **hyperv_pcpu_input_arg;
>  extern void  __percpu  **hyperv_pcpu_output_arg;
> 
> +extern u64 hv_current_partition_id;
> +
>  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  {
>   u64 input_address = input ? virt_to_phys(input) : 0;
> diff --git a/include/asm-generic/hyperv-tlfs.h 
> b/include/asm-generic/hyperv-tlfs.h
> index e6903589a82a..87b1a79b19eb 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -141,6 +141,7 @@ struct ms_hyperv_tsc_page {
>  #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX0x0013
>  #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
>  #define HVCALL_SEND_IPI_EX   0x0015
> +#define HVCALL_GET_PARTITION_ID  0x0046
>  #define HVCALL_GET_VP_REGISTERS  0x0050
>  #define HVCALL_SET_VP_REGISTERS  0x0051
>  #define HVCALL_POST_MESSAGE  0x005c
> @@ -407,6 +408,11 @@ struct hv_tlb_flush_ex {
>   u64 gva_list[];
>  } __packed;
> 
> +/* HvGetPartitionId hypercall (output only) */
> +struct hv_get_partition_id {
> + u64 partition_id;
> +} __packed;
> +
>  /* HvRetargetDeviceInterrupt hypercall */
>  union hv_msi_entry {
>   u64 as_uint64;
> --
> 2.20.1

Reviewed-by: Michael Kelley 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 05/16] x86/hyperv: allocate output arg pages if required

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:04 AM
> 
> When Linux runs as the root partition, it will need to make hypercalls
> which return data from the hypervisor.
> 
> Allocate pages for storing results when Linux runs as the root
> partition.
> 
> Signed-off-by: Lillian Grassin-Drake 
> Co-Developed-by: Lillian Grassin-Drake 
> Signed-off-by: Wei Liu 
> ---
> v3: Fix hv_cpu_die to use free_pages.
> v2: Address Vitaly's comments
> ---
>  arch/x86/hyperv/hv_init.c   | 35 -
>  arch/x86/include/asm/mshyperv.h |  1 +
>  2 files changed, 31 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index e04d90af4c27..6f4cb40e53fe 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
>  void  __percpu **hyperv_pcpu_input_arg;
>  EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
> 
> +void  __percpu **hyperv_pcpu_output_arg;
> +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
> +
>  u32 hv_max_vp_index;
>  EXPORT_SYMBOL_GPL(hv_max_vp_index);
> 
> @@ -73,12 +76,19 @@ static int hv_cpu_init(unsigned int cpu)
>   void **input_arg;
>   struct page *pg;
> 
> - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
>   /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> - pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
> + pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, 
> hv_root_partition ? 1 : 0);
>   if (unlikely(!pg))
>   return -ENOMEM;
> +
> + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
>   *input_arg = page_address(pg);
> + if (hv_root_partition) {
> + void **output_arg;
> +
> + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> + *output_arg = page_address(pg + 1);
> + }
> 
>   hv_get_vp_index(msr_vp_index);
> 
> @@ -205,14 +215,23 @@ static int hv_cpu_die(unsigned int cpu)
>   unsigned int new_cpu;
>   unsigned long flags;
>   void **input_arg;
> - void *input_pg = NULL;
> + void *pg;
> 
>   local_irq_save(flags);
>   input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> - input_pg = *input_arg;
> + pg = *input_arg;
>   *input_arg = NULL;
> +
> + if (hv_root_partition) {
> + void **output_arg;
> +
> + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> + *output_arg = NULL;
> + }
> +
>   local_irq_restore(flags);
> - free_page((unsigned long)input_pg);
> +
> + free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
> 
>   if (hv_vp_assist_page && hv_vp_assist_page[cpu])
>   wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
> @@ -346,6 +365,12 @@ void __init hyperv_init(void)
> 
>   BUG_ON(hyperv_pcpu_input_arg == NULL);
> 
> + /* Allocate the per-CPU state for output arg for root */
> + if (hv_root_partition) {
> + hyperv_pcpu_output_arg = alloc_percpu(void *);
> + BUG_ON(hyperv_pcpu_output_arg == NULL);
> + }
> +
>   /* Allocate percpu VP index */
>   hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
>   GFP_KERNEL);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index ac2b0d110f03..62d9390f1ddf 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -76,6 +76,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
>  #if IS_ENABLED(CONFIG_HYPERV)
>  extern void *hv_hypercall_pg;
>  extern void  __percpu  **hyperv_pcpu_input_arg;
> +extern void  __percpu  **hyperv_pcpu_output_arg;
> 
>  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  {
> --
> 2.20.1

Reviewed-by: Michael Kelley 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v6 02/16] x86/hyperv: detect if Linux is the root partition

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 7:04 AM
> 
> For now we can use the privilege flag to check. Stash the value to be
> used later.
> 
> Put in a bunch of defines for future use when we want to have more
> fine-grained detection.
> 
> Signed-off-by: Wei Liu 
> Reviewed-by: Pavel Tatashin 
> ---
> v3: move hv_root_partition to mshyperv.c
> ---
>  arch/x86/include/asm/hyperv-tlfs.h | 10 ++
>  arch/x86/include/asm/mshyperv.h|  2 ++
>  arch/x86/kernel/cpu/mshyperv.c | 20 
>  3 files changed, 32 insertions(+)
> 
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> b/arch/x86/include/asm/hyperv-tlfs.h
> index 6bf42aed387e..204010350604 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -21,6 +21,7 @@
>  #define HYPERV_CPUID_FEATURES0x4003
>  #define HYPERV_CPUID_ENLIGHTMENT_INFO0x4004
>  #define HYPERV_CPUID_IMPLEMENT_LIMITS0x4005
> +#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x4007
>  #define HYPERV_CPUID_NESTED_FEATURES 0x400A
> 
>  #define HYPERV_CPUID_VIRT_STACK_INTERFACE0x4081
> @@ -110,6 +111,15 @@
>  /* Recommend using enlightened VMCS */
>  #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED  BIT(14)
> 
> +/*
> + * CPU management features identification.
> + * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits.
> + */
> +#define HV_X64_START_LOGICAL_PROCESSOR   BIT(0)
> +#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1)
> +#define HV_X64_PERFORMANCE_COUNTER_SYNC  BIT(2)
> +#define HV_X64_RESERVED_IDENTITY_BIT BIT(31)
> +
>  /*
>   * Virtual processor will never share a physical core with another virtual
>   * processor, except for virtual processors that are reported as sibling SMT
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index ffc289992d1b..ac2b0d110f03 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -237,6 +237,8 @@ int hyperv_fill_flush_guest_mapping_list(
>   struct hv_guest_mapping_flush_list *flush,
>   u64 start_gfn, u64 end_gfn);
> 
> +extern bool hv_root_partition;
> +
>  #ifdef CONFIG_X86_64
>  void hv_apic_init(void);
>  void __init hv_init_spinlocks(void);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index f628e3dc150f..c376d191a260 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -32,6 +32,10 @@
>  #include 
>  #include 
> 
> +/* Is Linux running as the root partition? */
> +bool hv_root_partition;
> +EXPORT_SYMBOL_GPL(hv_root_partition);
> +
>  struct ms_hyperv_info ms_hyperv;
>  EXPORT_SYMBOL_GPL(ms_hyperv);
> 
> @@ -237,6 +241,22 @@ static void __init ms_hyperv_init_platform(void)
>   pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
>ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
> 
> + /*
> +  * Check CPU management privilege.
> +  *
> +  * To mirror what Windows does we should extract CPU management
> +  * features and use the ReservedIdentityBit to detect if Linux is the
> +  * root partition. But that requires negotiating CPU management
> +  * interface (a process to be finalized).
> +  *
> +  * For now, use the privilege flag as the indicator for running as
> +  * root.
> +  */
> + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
> + hv_root_partition = true;
> + pr_info("Hyper-V: running as root partition\n");
> + }
> +
>   /*
>* Extract host information.
>*/
> --
> 2.20.1

Reviewed-by: Michael Kelley 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v5 13/16] asm-generic/hyperv: introduce hv_device_id and auxiliary structures

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 6:09 AM
> 
> On Wed, Feb 03, 2021 at 02:49:53PM +0100, Arnd Bergmann wrote:
> > On Wed, Feb 3, 2021 at 2:26 PM Wei Liu  wrote:
> > > On Tue, Feb 02, 2021 at 05:02:48PM +, Wei Liu wrote:
> > > > On Tue, Jan 26, 2021 at 01:26:52AM +, Michael Kelley wrote:
> > > > > From: Wei Liu  Sent: Wednesday, January 20, 2021 
> > > > > 4:01 AM
> > > > > > +union hv_device_id {
> > > > > > + u64 as_uint64;
> > > > > > +
> > > > > > + struct {
> > > > > > + u64 :62;
> > > > > > + u64 device_type:2;
> > > > > > + };
> > > > >
> > > > > Are the above 4 lines extraneous junk?
> > > > > If not, a comment would be helpful.  And we
> > > > > would normally label the 62 bit field as
> > > > > "reserved0" or something similar.
> > > > >
> > > >
> > > > No. It is not junk. I got this from a header in tree.
> > > >
> > > > I am inclined to just drop this hunk. If that breaks things, I will use
> > > > "reserved0".
> > > >
> > >
> > > It turns out adding reserved0 is required. Dropping this hunk does not
> > > work.
> >
> > Generally speaking, bitfields are not great for specifying binary 
> > interfaces,
> > as the actual bit order can differ by architecture. The normal way we get
> > around it in the kernel is to use basic integer types and define macros
> > for bit masks. Ideally, each such field should also be marked with a
> > particular endianess as __le64 or __be64, in case this is ever used with
> > an Arm guest running a big-endian kernel.
> 
> Thanks for the information.
> 
> I think we will need to wait until Microsoft Hypervisor clearly defines
> the endianess in its header(s) before we can make changes to the copy in
> Linux.
> 
> >
> > That said, if you do not care about the specific order of the bits, having
> > anonymous bitfields for the reserved members is fine, I don't see a
> > reason to name it as reserved.
> 
> Michael, let me know what you think. I'm not too fussed either way.
> 
> Wei.

I'm OK either way.  In the Hyper-V code we've typically given such
fields a name rather than leave them anonymous, which is why it stuck
out.

Michael

> 
> >
> >   Arnd
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v5 16/16] iommu/hyperv: setup an IO-APIC IRQ remapping domain for root partition

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Wednesday, February 3, 2021 4:47 AM
> 
> On Wed, Jan 27, 2021 at 05:47:08AM +, Michael Kelley wrote:
> > From: Wei Liu  Sent: Wednesday, January 20, 2021 4:01 AM
> > >
> > > Just like MSI/MSI-X, IO-APIC interrupts are remapped by Microsoft
> > > Hypervisor when Linux runs as the root partition. Implement an IRQ
> > > domain to handle mapping and unmapping of IO-APIC interrupts.
> > >
> > > Signed-off-by: Wei Liu 
> > > ---
> > >  arch/x86/hyperv/irqdomain.c |  54 ++
> > >  arch/x86/include/asm/mshyperv.h |   4 +
> > >  drivers/iommu/hyperv-iommu.c| 179 +++-
> > >  3 files changed, 233 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
> > > index 19637cd60231..8e2b4e478b70 100644
> > > --- a/arch/x86/hyperv/irqdomain.c
> > > +++ b/arch/x86/hyperv/irqdomain.c
> > > @@ -330,3 +330,57 @@ struct irq_domain * __init 
> > > hv_create_pci_msi_domain(void)
> > >  }
> > >
> > >  #endif /* CONFIG_PCI_MSI */
> > > +
> > > +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry 
> > > *entry)
> > > +{
> > > + union hv_device_id device_id;
> > > +
> > > + device_id.as_uint64 = 0;
> > > + device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
> > > + device_id.ioapic.ioapic_id = (u8)ioapic_id;
> > > +
> > > + return hv_unmap_interrupt(device_id.as_uint64, entry) &
> HV_HYPERCALL_RESULT_MASK;
> >
> > The masking is already done in hv_unmap_interrupt.
> 
> Fixed.
> 
> >
> > > +}
> > > +EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
> > > +
> > > +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int 
> > > vector,
> > > + struct hv_interrupt_entry *entry)
> > > +{
> > > + unsigned long flags;
> > > + struct hv_input_map_device_interrupt *input;
> > > + struct hv_output_map_device_interrupt *output;
> > > + union hv_device_id device_id;
> > > + struct hv_device_interrupt_descriptor *intr_desc;
> > > + u16 status;
> > > +
> > > + device_id.as_uint64 = 0;
> > > + device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
> > > + device_id.ioapic.ioapic_id = (u8)ioapic_id;
> > > +
> > > + local_irq_save(flags);
> > > + input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> > > + output = *this_cpu_ptr(hyperv_pcpu_output_arg);
> > > + memset(input, 0, sizeof(*input));
> > > + intr_desc = >interrupt_descriptor;
> > > + input->partition_id = hv_current_partition_id;
> > > + input->device_id = device_id.as_uint64;
> > > + intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
> > > + intr_desc->target.vector = vector;
> > > + intr_desc->vector_count = 1;
> > > +
> > > + if (level)
> > > + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
> > > + else
> > > + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
> > > +
> > > + __set_bit(vcpu, (unsigned long *)_desc->target.vp_mask);
> > > +
> > > + status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, 0, input,
> output) &
> > > +  HV_HYPERCALL_RESULT_MASK;
> > > + local_irq_restore(flags);
> > > +
> > > + *entry = output->interrupt_entry;
> > > +
> > > + return status;
> >
> > As a cross-check, I was comparing this code against hv_map_msi_interrupt(). 
> >  They are
> > mostly parallel, though some of the assignments are done in a different 
> > order.  It's a nit,
> > but making them as parallel as possible would be nice. :-)
> >
> 
> Indeed. I will see about factoring out a function.

If factoring out a separate helper function is clumsy, just having the parallel 
code
in the two functions be as similar as possible makes it easier to see what's the
same and what's different.

> 
> > Same 64 vCPU comment applies here as well.
> >
> 
> This is changed to use vpset instead. Took me a bit of time to get it
> working because document is a bit lacking.
> 
> >
> > > +}
> > > +EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
> > > diff --git a/arch/x86/include/asm/mshyperv.h 
> > > b/arch/x86/include/asm/mshyperv.h
> > > index ccc849e25d5e..345d7c6f8c37 100644
> > > --- a/arch/x86/include/asm/mshyperv.h
> > > +++ b/arch/x86/include/asm/mshyperv.h
> > > @@ -263,6 +263,10 @@ static inline void hv_set_msi_entry_from_desc(union
> > > hv_msi_entry *msi_entry,
> > >
> > >  struct irq_domain *hv_create_pci_msi_domain(void);
> > >
> > > +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int 
> > > vector,
> > > + struct hv_interrupt_entry *entry);
> > > +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry 
> > > *entry);
> > > +
> > >  #else /* CONFIG_HYPERV */
> > >  static inline void hyperv_init(void) {}
> > >  static inline void hyperv_setup_mmu_ops(void) {}
> > > diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
> > > index b7db6024e65c..6d35e4c303c6 100644
> > > --- a/drivers/iommu/hyperv-iommu.c
> > > +++ b/drivers/iommu/hyperv-iommu.c
> > > @@ -116,30 +116,43 @@ static const struct irq_domain_ops 
> > > hyperv_ir_domain_ops = {
> > 

RE: [PATCH v5 07/16] x86/hyperv: extract partition ID from Microsoft Hypervisor if necessary

2021-02-04 Thread Michael Kelley via Virtualization
From: Wei Liu  Sent: Tuesday, February 2, 2021 7:04 AM
> 
> On Tue, Jan 26, 2021 at 12:48:37AM +, Michael Kelley wrote:
> > From: Wei Liu  Sent: Wednesday, January 20, 2021 4:01 AM
> > >
> > > We will need the partition ID for executing some hypercalls later.
> > >
> > > Signed-off-by: Lillian Grassin-Drake 
> > > Co-Developed-by: Sunil Muthuswamy 
> > > Signed-off-by: Wei Liu 
> > > ---
> > > v3:
> > > 1. Make hv_get_partition_id static.
> > > 2. Change code structure a bit.
> > > ---
> > >  arch/x86/hyperv/hv_init.c | 27 +++
> > >  arch/x86/include/asm/mshyperv.h   |  2 ++
> > >  include/asm-generic/hyperv-tlfs.h |  6 ++
> > >  3 files changed, 35 insertions(+)
> > >
> > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> > > index 6f4cb40e53fe..fc9941bd8653 100644
> > > --- a/arch/x86/hyperv/hv_init.c
> > > +++ b/arch/x86/hyperv/hv_init.c
> > > @@ -26,6 +26,9 @@
> > >  #include 
> > >  #include 
> > >
> > > +u64 hv_current_partition_id = ~0ull;
> > > +EXPORT_SYMBOL_GPL(hv_current_partition_id);
> > > +
> > >  void *hv_hypercall_pg;
> > >  EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> > >
> > > @@ -331,6 +334,25 @@ static struct syscore_ops hv_syscore_ops = {
> > >   .resume = hv_resume,
> > >  };
> > >
> > > +static void __init hv_get_partition_id(void)
> > > +{
> > > + struct hv_get_partition_id *output_page;
> > > + u16 status;
> > > + unsigned long flags;
> > > +
> > > + local_irq_save(flags);
> > > + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
> > > + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page) &
> > > + HV_HYPERCALL_RESULT_MASK;
> > > + if (status != HV_STATUS_SUCCESS) {
> >
> > Across the Hyper-V code in Linux, the way we check the hypercall result
> > is very inconsistent.  IMHO, the and'ing of hv_do_hypercall() with
> > HV_HYPERCALL_RESULT_MASK so that status can be a u16 is stylistically
> > a bit unusual.
> >
> > I'd like to see the hypercall result being stored into a u64 local variable.
> > Then the subsequent test for the status should 'and' the u64 with
> > HV_HYPERCALL_RESULT_MASK to determine the result code.
> > I've made a note to go fix the places that aren't doing it that way.
> >
> 
> I will fold in the following diff in the next version. I will also check
> if there are other instances in this patch series that need fixing.
> Pretty sure there are a few.
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index fc9941bd8653..6064f64a1295 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -337,14 +337,13 @@ static struct syscore_ops hv_syscore_ops = {
>  static void __init hv_get_partition_id(void)
>  {
> struct hv_get_partition_id *output_page;
> -   u16 status;
> +   u64 status;
> unsigned long flags;
> 
> local_irq_save(flags);
> output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
> -   status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page) &
> -   HV_HYPERCALL_RESULT_MASK;
> -   if (status != HV_STATUS_SUCCESS) {
> +   status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
> +   if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
> /* No point in proceeding if this failed */
> pr_err("Failed to get partition ID: %d\n", status);
> BUG();
> > > + /* No point in proceeding if this failed */
> > > + pr_err("Failed to get partition ID: %d\n", status);
> > > + BUG();
> > > + }
> > > + hv_current_partition_id = output_page->partition_id;
> > > + local_irq_restore(flags);
> > > +}
> > > +
> > >  /*
> > >   * This function is to be invoked early in the boot sequence after the
> > >   * hypervisor has been detected.
> > > @@ -426,6 +448,11 @@ void __init hyperv_init(void)
> > >
> > >   register_syscore_ops(_syscore_ops);
> > >
> > > + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID)
> > > + hv_get_partition_id();
> >
> > Another place where the EBX value saved into the ms_hyperv structure
> > could be used.
> 
> If you're okay with my response earlier, this will be handled later in
> another patch (series).
> 

Yes, that's OK.  Andrea Parri's patch series for Isolated VMs is capturing the
EBX value as well.

Michael
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 06/10] drm/qxl: properly pin/unpin shadow

2021-02-04 Thread Gerd Hoffmann
Suggested-by: Thomas Zimmermann 
Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/qxl/qxl_display.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c 
b/drivers/gpu/drm/qxl/qxl_display.c
index 60331e31861a..d25fd3acc891 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -802,12 +802,14 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane,
}
if (user_bo->shadow != qdev->dumb_shadow_bo) {
if (user_bo->shadow) {
+   qxl_bo_unpin(user_bo->shadow);
drm_gem_object_put
(_bo->shadow->tbo.base);
user_bo->shadow = NULL;
}
drm_gem_object_get(>dumb_shadow_bo->tbo.base);
user_bo->shadow = qdev->dumb_shadow_bo;
+   qxl_bo_pin(user_bo->shadow);
}
}
 
@@ -833,6 +835,7 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane,
qxl_bo_unpin(user_bo);
 
if (old_state->fb != plane->state->fb && user_bo->shadow) {
+   qxl_bo_unpin(user_bo->shadow);
drm_gem_object_put(_bo->shadow->tbo.base);
user_bo->shadow = NULL;
}
@@ -1230,6 +1233,7 @@ int qxl_modeset_init(struct qxl_device *qdev)
 void qxl_modeset_fini(struct qxl_device *qdev)
 {
if (qdev->dumb_shadow_bo) {
+   qxl_bo_unpin(qdev->dumb_shadow_bo);
drm_gem_object_put(>dumb_shadow_bo->tbo.base);
qdev->dumb_shadow_bo = NULL;
}
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 05/10] drm/qxl: release shadow on shutdown

2021-02-04 Thread Gerd Hoffmann
In case we have a shadow surface on shutdown release
it so it doesn't leak.

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/qxl/qxl_display.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c 
b/drivers/gpu/drm/qxl/qxl_display.c
index 38d6b596094d..60331e31861a 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -1229,5 +1229,9 @@ int qxl_modeset_init(struct qxl_device *qdev)
 
 void qxl_modeset_fini(struct qxl_device *qdev)
 {
+   if (qdev->dumb_shadow_bo) {
+   drm_gem_object_put(>dumb_shadow_bo->tbo.base);
+   qdev->dumb_shadow_bo = NULL;
+   }
qxl_destroy_monitors_object(qdev);
 }
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 09/10] drm/qxl: simplify qxl_fence_wait

2021-02-04 Thread Gerd Hoffmann
Now that we have the new release_event wait queue we can just
use that in qxl_fence_wait() and simplify the code alot.

Signed-off-by: Gerd Hoffmann 
Acked-by: Thomas Zimmermann 
---
 drivers/gpu/drm/qxl/qxl_release.c | 44 +++
 1 file changed, 4 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
b/drivers/gpu/drm/qxl/qxl_release.c
index 43a5436853b7..6ed673d75f9f 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -58,54 +58,18 @@ static long qxl_fence_wait(struct dma_fence *fence, bool 
intr,
   signed long timeout)
 {
struct qxl_device *qdev;
-   struct qxl_release *release;
-   int count = 0, sc = 0;
-   bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;
 
qdev = container_of(fence->lock, struct qxl_device, release_lock);
-   release = container_of(fence, struct qxl_release, base);
-   have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
-
-retry:
-   sc++;
 
if (dma_fence_is_signaled(fence))
goto signaled;
 
qxl_io_notify_oom(qdev);
-
-   for (count = 0; count < 11; count++) {
-   if (!qxl_queue_garbage_collect(qdev, true))
-   break;
-
-   if (dma_fence_is_signaled(fence))
-   goto signaled;
-   }
-
-   if (dma_fence_is_signaled(fence))
-   goto signaled;
-
-   if (have_drawable_releases || sc < 4) {
-   if (sc > 2)
-   /* back off */
-   usleep_range(500, 1000);
-
-   if (time_after(jiffies, end))
-   return 0;
-
-   if (have_drawable_releases && sc > 300) {
-   DMA_FENCE_WARN(fence, "failed to wait on release %llu "
-  "after spincount %d\n",
-  fence->context & ~0xf000, sc);
-   goto signaled;
-   }
-   goto retry;
-   }
-   /*
-* yeah, original sync_obj_wait gave up after 3 spins when
-* have_drawable_releases is not set.
-*/
+   if (!wait_event_timeout(qdev->release_event,
+   dma_fence_is_signaled(fence),
+   timeout))
+   return 0;
 
 signaled:
cur = jiffies;
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 08/10] drm/qxl: properly free qxl releases

2021-02-04 Thread Gerd Hoffmann
Reorganize qxl_device_fini() a bit.
Add missing unpin() calls.

Count releases.  Add wait queue for releases.  That way
qxl_device_fini() can easily wait until everything is
ready for proper shutdown.

Signed-off-by: Gerd Hoffmann 
Acked-by: Thomas Zimmermann 
---
 drivers/gpu/drm/qxl/qxl_drv.h |  2 ++
 drivers/gpu/drm/qxl/qxl_cmd.c |  1 +
 drivers/gpu/drm/qxl/qxl_irq.c |  1 +
 drivers/gpu/drm/qxl/qxl_kms.c | 28 
 drivers/gpu/drm/qxl/qxl_release.c |  2 ++
 5 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 01354b43c413..6dd57cfb2e7c 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -214,6 +214,8 @@ struct qxl_device {
spinlock_t  release_lock;
struct idr  release_idr;
uint32_trelease_seqno;
+   atomic_trelease_count;
+   wait_queue_head_t release_event;
spinlock_t release_idr_lock;
struct mutexasync_io_mutex;
unsigned int last_sent_io_cmd;
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 54e3c3a97440..7e22a81bfb36 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -254,6 +254,7 @@ int qxl_garbage_collect(struct qxl_device *qdev)
}
}
 
+   wake_up_all(>release_event);
DRM_DEBUG_DRIVER("%d\n", i);
 
return i;
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index ddf6588a2a38..d312322cacd1 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -87,6 +87,7 @@ int qxl_irq_init(struct qxl_device *qdev)
init_waitqueue_head(>display_event);
init_waitqueue_head(>cursor_event);
init_waitqueue_head(>io_cmd_event);
+   init_waitqueue_head(>release_event);
INIT_WORK(>client_monitors_config_work,
  qxl_client_monitors_config_work_func);
atomic_set(>irq_received, 0);
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index 4a60a52ab62e..66d74aaaee06 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -286,11 +286,31 @@ int qxl_device_init(struct qxl_device *qdev,
 
 void qxl_device_fini(struct qxl_device *qdev)
 {
-   qxl_bo_unref(>current_release_bo[0]);
-   qxl_bo_unref(>current_release_bo[1]);
-   qxl_gem_fini(qdev);
-   qxl_bo_fini(qdev);
+   int cur_idx;
+
+   for (cur_idx = 0; cur_idx < 3; cur_idx++) {
+   if (!qdev->current_release_bo[cur_idx])
+   continue;
+   qxl_bo_unpin(qdev->current_release_bo[cur_idx]);
+   qxl_bo_unref(>current_release_bo[cur_idx]);
+   qdev->current_release_bo_offset[cur_idx] = 0;
+   qdev->current_release_bo[cur_idx] = NULL;
+   }
+
+   /*
+* Ask host to release resources (+fill release ring),
+* then wait for the release actually happening.
+*/
+   qxl_io_notify_oom(qdev);
+   wait_event_timeout(qdev->release_event,
+  atomic_read(>release_count) == 0,
+  HZ);
flush_work(>gc_work);
+   qxl_surf_evict(qdev);
+   qxl_vram_evict(qdev);
+
+   qxl_gem_fini(qdev);
+   qxl_bo_fini(qdev);
qxl_ring_free(qdev->command_ring);
qxl_ring_free(qdev->cursor_ring);
qxl_ring_free(qdev->release_ring);
diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
b/drivers/gpu/drm/qxl/qxl_release.c
index 28013fd1f8ea..43a5436853b7 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -196,6 +196,7 @@ qxl_release_free(struct qxl_device *qdev,
qxl_release_free_list(release);
kfree(release);
}
+   atomic_dec(>release_count);
 }
 
 static int qxl_release_bo_alloc(struct qxl_device *qdev,
@@ -344,6 +345,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, 
unsigned long size,
*rbo = NULL;
return idr_ret;
}
+   atomic_inc(>release_count);
 
mutex_lock(>release_mutex);
if (qdev->current_release_bo_offset[cur_idx] + 1 >= 
releases_per_bo[cur_idx]) {
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 03/10] drm/qxl: use drmm_mode_config_init

2021-02-04 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Daniel Vetter 
Acked-by: Thomas Zimmermann 
---
 drivers/gpu/drm/qxl/qxl_display.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c 
b/drivers/gpu/drm/qxl/qxl_display.c
index 012bce0cdb65..38d6b596094d 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -1195,7 +1195,9 @@ int qxl_modeset_init(struct qxl_device *qdev)
int i;
int ret;
 
-   drm_mode_config_init(>ddev);
+   ret = drmm_mode_config_init(>ddev);
+   if (ret)
+   return ret;
 
ret = qxl_create_monitors_object(qdev);
if (ret)
@@ -1228,5 +1230,4 @@ int qxl_modeset_init(struct qxl_device *qdev)
 void qxl_modeset_fini(struct qxl_device *qdev)
 {
qxl_destroy_monitors_object(qdev);
-   drm_mode_config_cleanup(>ddev);
 }
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 10/10] drm/qxl: allocate dumb buffers in ram

2021-02-04 Thread Gerd Hoffmann
dumb buffers are shadowed anyway, so there is no need to store them
in device memory.  Use QXL_GEM_DOMAIN_CPU (TTM_PL_SYSTEM) instead.

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/qxl/qxl_dumb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c
index c04cd5a2553c..48a58ba1db96 100644
--- a/drivers/gpu/drm/qxl/qxl_dumb.c
+++ b/drivers/gpu/drm/qxl/qxl_dumb.c
@@ -59,7 +59,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
surf.stride = pitch;
surf.format = format;
r = qxl_gem_object_create_with_handle(qdev, file_priv,
- QXL_GEM_DOMAIN_SURFACE,
+ QXL_GEM_DOMAIN_CPU,
  args->size, , ,
  );
if (r)
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 07/10] drm/qxl: handle shadow in primary destroy

2021-02-04 Thread Gerd Hoffmann
qxl_primary_atomic_disable must check whenever the framebuffer bo has a
shadow surface and in case it has check the shadow primary status.

Signed-off-by: Gerd Hoffmann 
Acked-by: Thomas Zimmermann 
---
 drivers/gpu/drm/qxl/qxl_display.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c 
b/drivers/gpu/drm/qxl/qxl_display.c
index d25fd3acc891..c326412136c5 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -562,6 +562,8 @@ static void qxl_primary_atomic_disable(struct drm_plane 
*plane,
if (old_state->fb) {
struct qxl_bo *bo = gem_to_qxl_bo(old_state->fb->obj[0]);
 
+   if (bo->shadow)
+   bo = bo->shadow;
if (bo->is_primary) {
qxl_io_destroy_primary(qdev);
bo->is_primary = false;
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 04/10] drm/qxl: unpin release objects

2021-02-04 Thread Gerd Hoffmann
Balances the qxl_create_bo(..., pinned=true, ...);
call in qxl_release_bo_alloc().

Signed-off-by: Gerd Hoffmann 
Acked-by: Thomas Zimmermann 
---
 drivers/gpu/drm/qxl/qxl_release.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
b/drivers/gpu/drm/qxl/qxl_release.c
index c52412724c26..28013fd1f8ea 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -347,6 +347,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, 
unsigned long size,
 
mutex_lock(>release_mutex);
if (qdev->current_release_bo_offset[cur_idx] + 1 >= 
releases_per_bo[cur_idx]) {
+   qxl_bo_unpin(qdev->current_release_bo[cur_idx]);
qxl_bo_unref(>current_release_bo[cur_idx]);
qdev->current_release_bo_offset[cur_idx] = 0;
qdev->current_release_bo[cur_idx] = NULL;
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v6 02/10] Revert "drm/qxl: do not run release if qxl failed to init"

2021-02-04 Thread Gerd Hoffmann
This reverts commit b91907a6241193465ca92e357adf16822242296d.

Patch is broken, it effectively makes qxl_drm_release() a nop
because on normal driver shutdown qxl_drm_release() is called
*after* drm_dev_unregister().

Cc: Tong Zhang 
Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/qxl/qxl_drv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 34c8b25b5780..fb5f6a5e81d7 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -144,8 +144,6 @@ static void qxl_drm_release(struct drm_device *dev)
 * reodering qxl_modeset_fini() + qxl_device_fini() calls is
 * non-trivial though.
 */
-   if (!dev->registered)
-   return;
qxl_modeset_fini(qdev);
qxl_device_fini(qdev);
 }
-- 
2.29.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v6 16/16] iommu/hyperv: setup an IO-APIC IRQ remapping domain for root partition

2021-02-04 Thread Joerg Roedel
On Wed, Feb 03, 2021 at 03:04:35PM +, Wei Liu wrote:
> Just like MSI/MSI-X, IO-APIC interrupts are remapped by Microsoft
> Hypervisor when Linux runs as the root partition. Implement an IRQ
> domain to handle mapping and unmapping of IO-APIC interrupts.
> 
> Signed-off-by: Wei Liu 

Acked-by: Joerg Roedel 

> ---
> v6:
> 1. Simplify code due to changes in a previous patch.
> ---
>  arch/x86/hyperv/irqdomain.c |  25 +
>  arch/x86/include/asm/mshyperv.h |   4 +
>  drivers/iommu/hyperv-iommu.c| 177 +++-
>  3 files changed, 203 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
> index 117f17e8c88a..0cabc9aece38 100644
> --- a/arch/x86/hyperv/irqdomain.c
> +++ b/arch/x86/hyperv/irqdomain.c
> @@ -360,3 +360,28 @@ struct irq_domain * __init hv_create_pci_msi_domain(void)
>  }
>  
>  #endif /* CONFIG_PCI_MSI */
> +
> +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry 
> *entry)
> +{
> + union hv_device_id device_id;
> +
> + device_id.as_uint64 = 0;
> + device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
> + device_id.ioapic.ioapic_id = (u8)ioapic_id;
> +
> + return hv_unmap_interrupt(device_id.as_uint64, entry);
> +}
> +EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
> +
> +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
> + struct hv_interrupt_entry *entry)
> +{
> + union hv_device_id device_id;
> +
> + device_id.as_uint64 = 0;
> + device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
> + device_id.ioapic.ioapic_id = (u8)ioapic_id;
> +
> + return hv_map_interrupt(device_id, level, cpu, vector, entry);
> +}
> +EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index ccc849e25d5e..345d7c6f8c37 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -263,6 +263,10 @@ static inline void hv_set_msi_entry_from_desc(union 
> hv_msi_entry *msi_entry,
>  
>  struct irq_domain *hv_create_pci_msi_domain(void);
>  
> +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
> + struct hv_interrupt_entry *entry);
> +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry 
> *entry);
> +
>  #else /* CONFIG_HYPERV */
>  static inline void hyperv_init(void) {}
>  static inline void hyperv_setup_mmu_ops(void) {}
> diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
> index 1d21a0b5f724..e285a220c913 100644
> --- a/drivers/iommu/hyperv-iommu.c
> +++ b/drivers/iommu/hyperv-iommu.c
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "irq_remapping.h"
>  
> @@ -115,30 +116,43 @@ static const struct irq_domain_ops hyperv_ir_domain_ops 
> = {
>   .free = hyperv_irq_remapping_free,
>  };
>  
> +static const struct irq_domain_ops hyperv_root_ir_domain_ops;
>  static int __init hyperv_prepare_irq_remapping(void)
>  {
>   struct fwnode_handle *fn;
>   int i;
> + const char *name;
> + const struct irq_domain_ops *ops;
>  
>   if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) ||
>   x86_init.hyper.msi_ext_dest_id() ||
>   !x2apic_supported())
>   return -ENODEV;
>  
> - fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0);
> + if (hv_root_partition) {
> + name = "HYPERV-ROOT-IR";
> + ops = _root_ir_domain_ops;
> + } else {
> + name = "HYPERV-IR";
> + ops = _ir_domain_ops;
> + }
> +
> + fn = irq_domain_alloc_named_id_fwnode(name, 0);
>   if (!fn)
>   return -ENOMEM;
>  
>   ioapic_ir_domain =
>   irq_domain_create_hierarchy(arch_get_ir_parent_domain(),
> - 0, IOAPIC_REMAPPING_ENTRY, fn,
> - _ir_domain_ops, NULL);
> + 0, IOAPIC_REMAPPING_ENTRY, fn, ops, NULL);
>  
>   if (!ioapic_ir_domain) {
>   irq_domain_free_fwnode(fn);
>   return -ENOMEM;
>   }
>  
> + if (hv_root_partition)
> + return 0; /* The rest is only relevant to guests */
> +
>   /*
>* Hyper-V doesn't provide irq remapping function for
>* IO-APIC and so IO-APIC only accepts 8-bit APIC ID.
> @@ -166,4 +180,161 @@ struct irq_remap_ops hyperv_irq_remap_ops = {
>   .enable = hyperv_enable_irq_remapping,
>  };
>  
> +/* IRQ remapping domain when Linux runs as the root partition */
> +struct hyperv_root_ir_data {
> + u8 ioapic_id;
> + bool is_level;
> + struct hv_interrupt_entry entry;
> +};
> +
> +static void
> +hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg 
> *msg)
> +{
> + u64 status;
> + u32 vector;
> + struct irq_cfg *cfg;
> + int ioapic_id;
> + struct cpumask *affinity;
> + int cpu;
> + struct 

Re: [RFC 08/10] vhost: Add x-vhost-enable-shadow-vq qmp

2021-02-04 Thread Markus Armbruster
Eugenio Perez Martin  writes:

> On Tue, Feb 2, 2021 at 4:38 PM Eric Blake  wrote:
>>
>> On 1/29/21 2:54 PM, Eugenio Pérez wrote:
[...]
>> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>> > index 040f68ff2e..42836e45f3 100644
>> > --- a/hw/virtio/vhost.c
>> > +++ b/hw/virtio/vhost.c
>> > @@ -15,6 +15,7 @@
>> >
>> >  #include "qemu/osdep.h"
>> >  #include "qapi/error.h"
>> > +#include "qapi/qapi-commands-net.h"
>> >  #include "hw/virtio/vhost.h"
>> >  #include "qemu/atomic.h"
>> >  #include "qemu/range.h"
>> > @@ -1841,3 +1842,8 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
>> >
>> >  return -1;
>> >  }
>> > +
>> > +void qmp_x_vhost_enable_shadow_vq(const char *name, bool enable, Error 
>> > **errp)
>> > +{
>> > +error_setg(errp, "Shadow virtqueue still not implemented.");
>>
>> error_setg() should not be passed a trailing '.'.
>>
>
> Oh, sorry I missed the comment in the error_setg doc.
>
> I copy from the call to error_setg "Migration disabled: vhost
> lacks VHOST_F_LOG_ALL feature.". I'm wondering if it's a good moment
> to delete the dot there too, since other tools could depend on parsing
> it.

It's pretty much always a good moment for patches improving error
messages :)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH iproute2-next v3 0/5] Add vdpa device management tool

2021-02-04 Thread Adrian Moreno
Sorry I have not followed this work as close as I would have wanted.
Some questions below.

On 2/4/21 4:16 AM, Jason Wang wrote:
> 
> On 2021/2/2 下午6:35, Parav Pandit wrote:
>> Linux vdpa interface allows vdpa device management functionality.
>> This includes adding, removing, querying vdpa devices.
>>
>> vdpa interface also includes showing supported management devices
>> which support such operations.
>>
>> This patchset includes kernel uapi headers and a vdpa tool.
>>
>> examples:
>>
>> $ vdpa mgmtdev show
>> vdpasim:
>>    supported_classes net
>>
>> $ vdpa mgmtdev show -jp
>> {
>>  "show": {
>>  "vdpasim": {
>>  "supported_classes": [ "net" ]
>>  }
>>  }
>> }
>>

How can a user establish the relationship between a mgmtdev and it's parent
device (pci vf, sf, etc)?

>> Create a vdpa device of type networking named as "foo2" from
>> the management device vdpasim_net:
>>
>> $ vdpa dev add mgmtdev vdpasim_net name foo2
>>

I guess this command will accept a 'type' parameter once more supported_classes
are added?

Also, will this tool also handle the vdpa driver binding or will the user handle
that through the vdpa bus' sysfs interface?

>> Show the newly created vdpa device by its name:
>> $ vdpa dev show foo2
>> foo2: type network mgmtdev vdpasim_net vendor_id 0 max_vqs 2 max_vq_size 256
>>
>> $ vdpa dev show foo2 -jp
>> {
>>  "dev": {
>>  "foo2": {
>>  "type": "network",
>>  "mgmtdev": "vdpasim_net",
>>  "vendor_id": 0,
>>  "max_vqs": 2,
>>  "max_vq_size": 256
>>  }
>>  }
>> }
>>
>> Delete the vdpa device after its use:
>> $ vdpa dev del foo2
>>
>> Patch summary:
>> Patch-1 adds kernel headers for vdpa subsystem
>> Patch-2 adds library routines for indent handling
>> Patch-3 adds library routines for generic socket communication
>> PAtch-4 adds library routine for number to string mapping
>> Patch-5 adds vdpa tool
>>
>> Kernel headers are from the vhost kernel tree [1] from branch linux-next.
>>
>> [1] https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
>>
>> ---
> 
> 
> Adding Adrian to see if this looks good for k8s integration.
> 
> Thanks
> 

Thanks
-- 
Adrián Moreno

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v2 08/10] vdpa: add vdpa simulator for block device

2021-02-04 Thread Stefano Garzarella

On Wed, Feb 03, 2021 at 04:45:51PM +, Stefan Hajnoczi wrote:

On Tue, Feb 02, 2021 at 04:49:50PM +0100, Stefano Garzarella wrote:

On Tue, Feb 02, 2021 at 09:34:12AM +, Stefan Hajnoczi wrote:
> On Thu, Jan 28, 2021 at 03:41:25PM +0100, Stefano Garzarella wrote:
> > +static void vdpasim_blk_work(struct work_struct *work)
> > +{
> > + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
> > + u8 status = VIRTIO_BLK_S_OK;
> > + int i;
> > +
> > + spin_lock(>lock);
> > +
> > + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
> > + goto out;
> > +
> > + for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
> > + struct vdpasim_virtqueue *vq = >vqs[i];
> > +
> > + if (!vq->ready)
> > + continue;
> > +
> > + while (vringh_getdesc_iotlb(>vring, >out_iov,
> > + >in_iov, >head,
> > + GFP_ATOMIC) > 0) {
> > + int write;
> > +
> > + vq->in_iov.i = vq->in_iov.used - 1;
> > + write = vringh_iov_push_iotlb(>vring, >in_iov,
> > +   , 1);
> > + if (write <= 0)
> > + break;
>
> This code looks fragile:
>
> 1. Relying on unsigned underflow and the while loop in
>   vringh_iov_push_iotlb() to handle the case where in_iov.used == 0 is
>   risky and could break.
>
> 2. Does this assume that the last in_iov element has size 1? For
>   example, the guest driver may send a single "in" iovec with size 513
>   when reading 512 bytes (with an extra byte for the request status).
>
> Please validate inputs fully, even in test/development code, because
> it's likely to be copied by others when writing production code (or
> deployed in production by unsuspecting users) :).

Perfectly agree on that, so I addressed these things, also following your
review on the previous version, on the next patch of this series:
"vdpa_sim_blk: implement ramdisk behaviour".

Do you think should I move these checks in this patch?

I did this to leave Max credit for this patch and add more code to emulate a
ramdisk in later patches.


You could update the commit description so it's clear that input
validation is missing and will be added in the next commit.


I'll do it.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization