[PATCH V2] vhost-scsi: unbreak any layout for response

2023-01-18 Thread Jason Wang
Al Viro said:

"""
Since "vhost/scsi: fix reuse of >iov[out] in response"
we have this:
cmd->tvc_resp_iov = vq->iov[vc.out];
cmd->tvc_in_iovs = vc.in;
combined with
iov_iter_init(_iter, ITER_DEST, >tvc_resp_iov,
  cmd->tvc_in_iovs, sizeof(v_rsp));
in vhost_scsi_complete_cmd_work().  We used to have ->tvc_resp_iov
_pointing_ to vq->iov[vc.out]; back then iov_iter_init() asked to
set an iovec-backed iov_iter over the tail of vq->iov[], with
length being the amount of iovecs in the tail.

Now we have a copy of one element of that array.  Fortunately, the members
following it in the containing structure are two non-NULL kernel pointers,
so copy_to_iter() will not copy anything beyond the first iovec - kernel
pointer is not (on the majority of architectures) going to be accepted by
access_ok() in copyout() and it won't be skipped since the "length" (in
reality - another non-NULL kernel pointer) won't be zero.

So it's not going to give a guest-to-qemu escalation, but it's definitely
a bug.  Frankly, my preference would be to verify that the very first iovec
is long enough to hold rsp_size.  Due to the above, any users that try to
give us vq->iov[vc.out].iov_len < sizeof(struct virtio_scsi_cmd_resp)
would currently get a failure in vhost_scsi_complete_cmd_work()
anyway.
"""

However, the spec doesn't say anything about the legacy descriptor
layout for the respone. So this patch tries to not assume the response
to reside in a single separate descriptor which is what commit
79c14141a487 ("vhost/scsi: Convert completion path to use") tries to
achieve towards to ANY_LAYOUT.

This is done by allocating and using dedicate resp iov in the
command. To be safety, start with UIO_MAXIOV to be consistent with the
limitation that we advertise to the vhost_get_vq_desc().

Testing with the hacked virtio-scsi driver that use 1 descriptor for 1
byte in the response.

Reported-by: Al Viro 
Cc: Benjamin Coddington 
Cc: Nicholas Bellinger 
Fixes: a77ec83a5789 ("vhost/scsi: fix reuse of >iov[out] in response")
Signed-off-by: Jason Wang 
---
Changes since V1:
- tweak the changelog
- fix the allocation size for tvc_resp_iov (should be sizeof(struct iovec))
---
 drivers/vhost/scsi.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index dca6346d75b3..d5ecb8876fc9 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -80,7 +80,7 @@ struct vhost_scsi_cmd {
struct scatterlist *tvc_prot_sgl;
struct page **tvc_upages;
/* Pointer to response header iovec */
-   struct iovec tvc_resp_iov;
+   struct iovec *tvc_resp_iov;
/* Pointer to vhost_scsi for our device */
struct vhost_scsi *tvc_vhost;
/* Pointer to vhost_virtqueue for the cmd */
@@ -563,7 +563,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work 
*work)
memcpy(v_rsp.sense, cmd->tvc_sense_buf,
   se_cmd->scsi_sense_length);
 
-   iov_iter_init(_iter, ITER_DEST, >tvc_resp_iov,
+   iov_iter_init(_iter, ITER_DEST, cmd->tvc_resp_iov,
  cmd->tvc_in_iovs, sizeof(v_rsp));
ret = copy_to_iter(_rsp, sizeof(v_rsp), _iter);
if (likely(ret == sizeof(v_rsp))) {
@@ -594,6 +594,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct 
vhost_scsi_tpg *tpg,
struct vhost_scsi_cmd *cmd;
struct vhost_scsi_nexus *tv_nexus;
struct scatterlist *sg, *prot_sg;
+   struct iovec *tvc_resp_iov;
struct page **pages;
int tag;
 
@@ -613,6 +614,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct 
vhost_scsi_tpg *tpg,
sg = cmd->tvc_sgl;
prot_sg = cmd->tvc_prot_sgl;
pages = cmd->tvc_upages;
+   tvc_resp_iov = cmd->tvc_resp_iov;
memset(cmd, 0, sizeof(*cmd));
cmd->tvc_sgl = sg;
cmd->tvc_prot_sgl = prot_sg;
@@ -625,6 +627,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct 
vhost_scsi_tpg *tpg,
cmd->tvc_data_direction = data_direction;
cmd->tvc_nexus = tv_nexus;
cmd->inflight = vhost_scsi_get_inflight(vq);
+   cmd->tvc_resp_iov = tvc_resp_iov;
 
memcpy(cmd->tvc_cdb, cdb, VHOST_SCSI_MAX_CDB_SIZE);
 
@@ -935,7 +938,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
struct iov_iter in_iter, prot_iter, data_iter;
u64 tag;
u32 exp_data_len, data_direction;
-   int ret, prot_bytes, c = 0;
+   int ret, prot_bytes, i, c = 0;
u16 lun;
u8 task_attr;
bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
@@ -1092,7 +1095,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
}
cmd->tvc_vhost = vs;
cmd->tvc_vq = vq;
-   cmd->tvc_resp_iov = vq->iov[vc.out];
+  

[PATCH V2 5/5] vdpa: mlx5: support per virtqueue dma device

2023-01-18 Thread Jason Wang
This patch implements per virtqueue dma device for mlx5_vdpa. This is
needed for virtio_vdpa to work for CVQ which is backed by vringh but
not DMA. We simply advertise the vDPA device itself as the DMA device
for CVQ then DMA API can simply use PA so the identical mapping for
CVQ can still be used. Otherwise the identical (1:1) mapping won't
work when platform IOMMU is enabled since the IOVA is allocated on
demand which is not necessarily the PA.

This fixes the following crash when mlx5 vDPA device is bound to
virtio-vdpa with platform IOMMU enabled but not in passthrough mode:

BUG: unable to handle page fault for address: ff2fb3063deb1002
#PF: supervisor read access in kernel mode
#PF: error_code(0x) - not-present page
PGD 1393001067 P4D 1393002067 PUD 0
Oops:  [#1] PREEMPT SMP NOPTI
CPU: 55 PID: 8923 Comm: kworker/u112:3 Kdump: loaded Not tainted 6.1.0+ #7
Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.5.4 12/17/2021
Workqueue: mlx5_vdpa_wq mlx5_cvq_kick_handler [mlx5_vdpa]
RIP: 0010:vringh_getdesc_iotlb+0x93/0x1d0 [vringh]
Code: 14 25 40 ef 01 00 83 82 c0 0a 00 00 01 48 2b 05 93 5a 1b ea 8b 4c 24 14 
48 c1 f8 06 48 c1 e0 0c 48 03 05 90 5a 1b ea 48 01 c8 <0f> b7 00 83 aa c0 0a 00 
00 01 65 ff 0d bc e4 41 3f 0f 84 05 01 00
RSP: 0018:ff46821ba664fdf8 EFLAGS: 00010282
RAX: ff2fb3063deb1002 RBX: 0a20 RCX: 0002
RDX: ff2fb318d2f94380 RSI: 0002 RDI: 0001
RBP: ff2fb3065e832410 R08: ff46821ba664fe00 R09: 0001
R10:  R11: 000d R12: ff2fb3065e832488
R13: ff2fb3065e8324a8 R14: ff2fb3065e8324c8 R15: ff2fb3065e8324a8
FS:  () GS:ff2fb3257fac() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: ff2fb3063deb1002 CR3: 001392010006 CR4: 00771ee0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
PKRU: 5554
Call Trace:

  mlx5_cvq_kick_handler+0x89/0x2b0 [mlx5_vdpa]
  process_one_work+0x1e2/0x3b0
  ? rescuer_thread+0x390/0x390
  worker_thread+0x50/0x3a0
  ? rescuer_thread+0x390/0x390
  kthread+0xd6/0x100
  ? kthread_complete_and_exit+0x20/0x20
  ret_from_fork+0x1f/0x30
  

Reviewed-by: Eli Cohen 
Tested-by: Eli Cohen 
Signed-off-by: Jason Wang 
---
Changes since V1:
- make mlx5_get_vq_dma_dev() static
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 6632651b1e54..97d1ada7f4db 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2682,6 +2682,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, 
unsigned int asid,
return err;
 }
 
+static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
+{
+   struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+   if (is_ctrl_vq_idx(mvdev, idx))
+   return >dev;
+
+   return mvdev->vdev.dma_dev;
+}
+
 static void mlx5_vdpa_free(struct vdpa_device *vdev)
 {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2897,6 +2907,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map,
.set_group_asid = mlx5_set_group_asid,
+   .get_vq_dma_dev = mlx5_get_vq_dma_dev,
.free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
 };
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V2 4/5] vdpa: set dma mask for vDPA device

2023-01-18 Thread Jason Wang
Setting DMA mask for vDPA device in case that there are virtqueue that
is not backed by DMA so the vDPA device could be advertised as the DMA
device that is used by DMA API for software emulated virtqueues.

Reviewed-by: Eli Cohen 
Tested-by: Eli Cohen 
Signed-off-by: Jason Wang 
---
 drivers/vdpa/vdpa.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 8ef7aa1365cc..6821b2850bbb 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -39,6 +39,11 @@ static int vdpa_dev_probe(struct device *d)
u32 max_num, min_num = 1;
int ret = 0;
 
+   d->dma_mask = >coherent_dma_mask;
+   ret = dma_set_mask_and_coherent(d, DMA_BIT_MASK(64));
+   if (ret)
+   return ret;
+
max_num = ops->get_vq_num_max(vdev);
if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdev);
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V2 0/5] virtio_ring: per virtqueue DMA device

2023-01-18 Thread Jason Wang
Hi All:

In some cases, the virtqueue could be backed by different devices. One
example is that in the case of vDPA some parent may emualte virtqueue
via vringh. In this case, it would be wrong if we stick with the
physical DMA device for software emulated device, since there's no
easy way for vringh to know about the hardware IOMMU mappings.

So this series tries to introduce per virtqueue DMA device, then
software virtqueues can utilize the transport specific method to
assign appropirate DMA device.

This fixes the crash of mlx5_vdpa + virtio_vdpa when platform IOMMU is
enabled but not in the passthrough mode. The reason for the crash is
that the virito_ring tries to map the control virtqueue into platform
IOMMU but the vringh assumes a direct mapping (PA as IOVA). This is
fixed by advetise the vDPA device that doesnt do DMA (without a DMA
ops). So DMA API can go with the direct mapping then the vringh will
be happy since mlx5_vdpa assuems a direct/identical mapping by
default.

Please review.

Changes since V1:
- make mlx5_get_vq_dma_dev() static

Jason Wang (5):
  virtio_ring: per virtqueue dma device
  vdpa: introduce get_vq_dma_device()
  virtio-vdpa: support per vq dma device
  vdpa: set dma mask for vDPA device
  vdpa: mlx5: support per virtqueue dma device

 drivers/vdpa/mlx5/net/mlx5_vnet.c |  11 +++
 drivers/vdpa/vdpa.c   |   5 ++
 drivers/virtio/virtio_ring.c  | 133 +-
 drivers/virtio/virtio_vdpa.c  |  13 ++-
 include/linux/vdpa.h  |   6 ++
 include/linux/virtio_ring.h   |  16 
 6 files changed, 141 insertions(+), 43 deletions(-)

-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V2 3/5] virtio-vdpa: support per vq dma device

2023-01-18 Thread Jason Wang
This patch adds the support of per vq dma device for virito-vDPA. vDPA
parents then are allowed to use different DMA devices. This is useful
for the parents that have software or emulated virtqueues.

Reviewed-by: Eli Cohen 
Tested-by: Eli Cohen 
Signed-off-by: Jason Wang 
---
 drivers/virtio/virtio_vdpa.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 9670cc79371d..d7f5af62ddaa 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -135,6 +135,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned 
int index,
 {
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
+   struct device *dma_dev;
const struct vdpa_config_ops *ops = vdpa->config;
struct virtio_vdpa_vq_info *info;
struct vdpa_callback cb;
@@ -175,9 +176,15 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned 
int index,
 
/* Create the vring */
align = ops->get_vq_align(vdpa);
-   vq = vring_create_virtqueue(index, max_num, align, vdev,
-   true, may_reduce_num, ctx,
-   virtio_vdpa_notify, callback, name);
+
+   if (ops->get_vq_dma_dev)
+   dma_dev = ops->get_vq_dma_dev(vdpa, index);
+   else
+   dma_dev = vdpa_get_dma_dev(vdpa);
+   vq = vring_create_virtqueue_dma(index, max_num, align, vdev,
+   true, may_reduce_num, ctx,
+   virtio_vdpa_notify, callback,
+   name, dma_dev);
if (!vq) {
err = -ENOMEM;
goto error_new_virtqueue;
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V2 2/5] vdpa: introduce get_vq_dma_device()

2023-01-18 Thread Jason Wang
This patch introduces a new method to query the dma device that is use
for a specific virtqueue.

Reviewed-by: Eli Cohen 
Tested-by: Eli Cohen 
Signed-off-by: Jason Wang 
---
 include/linux/vdpa.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 6d0f5e4e82c2..3ec13aee35f5 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -282,6 +282,11 @@ struct vdpa_map_file {
  * @iova: iova to be unmapped
  * @size: size of the area
  * Returns integer: success (0) or error (< 0)
+ * @get_vq_dma_dev:Get the dma device for a specific
+ * virtqueue (optional)
+ * @vdev: vdpa device
+ * @idx: virtqueue index
+ * Returns pointer to structure device or error 
(NULL)
  * @free:  Free resources that belongs to vDPA (optional)
  * @vdev: vdpa device
  */
@@ -341,6 +346,7 @@ struct vdpa_config_ops {
 u64 iova, u64 size);
int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
  unsigned int asid);
+   struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
 
/* Free device resources */
void (*free)(struct vdpa_device *vdev);
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V2 1/5] virtio_ring: per virtqueue dma device

2023-01-18 Thread Jason Wang
This patch introduces a per virtqueue dma device. This will be used
for virtio devices whose virtqueue are backed by different underlayer
devices.

One example is the vDPA that where the control virtqueue could be
implemented through software mediation.

Some of the work are actually done before since the helper like
vring_dma_device(). This work left are:

- Let vring_dma_device() return the per virtqueue dma device instead
  of the vdev's parent.
- Allow passing a dma_device when creating the virtqueue through a new
  helper, old vring creation helper will keep using vdev's parent.

Reviewed-by: Eli Cohen 
Tested-by: Eli Cohen 
Signed-off-by: Jason Wang 
---
 drivers/virtio/virtio_ring.c | 133 ---
 include/linux/virtio_ring.h  |  16 +
 2 files changed, 109 insertions(+), 40 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 723c4e29e1d3..41144b5246a8 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -202,6 +202,9 @@ struct vring_virtqueue {
/* DMA, allocation, and size information */
bool we_own_ring;
 
+   /* Device used for doing DMA */
+   struct device *dma_dev;
+
 #ifdef DEBUG
/* They're supposed to lock for us. */
unsigned int in_use;
@@ -219,7 +222,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int 
index,
   bool context,
   bool (*notify)(struct virtqueue 
*),
   void (*callback)(struct 
virtqueue *),
-  const char *name);
+  const char *name,
+  struct device *dma_dev);
 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
 static void vring_free(struct virtqueue *_vq);
 
@@ -297,10 +301,11 @@ size_t virtio_max_dma_size(struct virtio_device *vdev)
 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 
 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
+  dma_addr_t *dma_handle, gfp_t flag,
+  struct device *dma_dev)
 {
if (vring_use_dma_api(vdev)) {
-   return dma_alloc_coherent(vdev->dev.parent, size,
+   return dma_alloc_coherent(dma_dev, size,
  dma_handle, flag);
} else {
void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
@@ -330,10 +335,11 @@ static void *vring_alloc_queue(struct virtio_device 
*vdev, size_t size,
 }
 
 static void vring_free_queue(struct virtio_device *vdev, size_t size,
-void *queue, dma_addr_t dma_handle)
+void *queue, dma_addr_t dma_handle,
+struct device *dma_dev)
 {
if (vring_use_dma_api(vdev))
-   dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
+   dma_free_coherent(dma_dev, size, queue, dma_handle);
else
free_pages_exact(queue, PAGE_ALIGN(size));
 }
@@ -341,11 +347,11 @@ static void vring_free_queue(struct virtio_device *vdev, 
size_t size,
 /*
  * The DMA ops on various arches are rather gnarly right now, and
  * making all of the arch DMA ops work on the vring device itself
- * is a mess.  For now, we use the parent device for DMA ops.
+ * is a mess.
  */
 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 {
-   return vq->vq.vdev->dev.parent;
+   return vq->dma_dev;
 }
 
 /* Map one sg entry. */
@@ -1032,11 +1038,12 @@ static int vring_alloc_state_extra_split(struct 
vring_virtqueue_split *vring_spl
 }
 
 static void vring_free_split(struct vring_virtqueue_split *vring_split,
-struct virtio_device *vdev)
+struct virtio_device *vdev, struct device *dma_dev)
 {
vring_free_queue(vdev, vring_split->queue_size_in_bytes,
 vring_split->vring.desc,
-vring_split->queue_dma_addr);
+vring_split->queue_dma_addr,
+dma_dev);
 
kfree(vring_split->desc_state);
kfree(vring_split->desc_extra);
@@ -1046,7 +1053,8 @@ static int vring_alloc_queue_split(struct 
vring_virtqueue_split *vring_split,
   struct virtio_device *vdev,
   u32 num,
   unsigned int vring_align,
-  bool may_reduce_num)
+  bool may_reduce_num,
+  struct device *dma_dev)
 {
void *queue = NULL;
dma_addr_t dma_addr;
@@ -1061,7 +1069,8 @@ static int vring_alloc_queue_split(struct 

RE: [PATCH v2 08/10] iommu/intel: Use GFP_KERNEL in sleepable contexts

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> These contexts are sleepable, so use the proper annotation. The
> GFP_ATOMIC
> was added mechanically in the prior patches.
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 07/10] iommu/intel: Support the gfp argument to the map_pages op

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> Flow it down to alloc_pgtable_page() via pfn_to_dma_pte() and
> __domain_mapping().
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 06/10] iommu/intel: Add a gfp parameter to alloc_pgtable_page()

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> This is eventually called by iommufd through intel_iommu_map_pages() and
> it should not be forced to atomic. Push the GFP_ATOMIC to all callers.
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 05/10] iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map()

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> iommufd follows the same design as KVM and uses memory cgroups to limit
> the amount of kernel memory a iommufd file descriptor can pin down. The
> various internal data structures already use GFP_KERNEL_ACCOUNT.
> 
> However, one of the biggest consumers of kernel memory is the IOPTEs
> stored under the iommu_domain. Many drivers will allocate these at
> iommu_map() time and will trivially do the right thing if we pass in
> GFP_KERNEL_ACCOUNT.
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 04/10] iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous()

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> Change the sg_alloc_table_from_pages() allocation that was hardwired to
> GFP_KERNEL to use the gfp parameter like the other allocations in this
> function.
> 
> Auditing says this is never called from an atomic context, so it is safe
> as is, but reads wrong.
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 03/10] iommu: Add a gfp parameter to iommu_map_sg()

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> Follow the pattern for iommu_map() and remove iommu_map_sg_atomic().
> 
> This allows __iommu_dma_alloc_noncontiguous() to use a GFP_KERNEL
> allocation here, based on the provided gfp flags.
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 02/10] iommu: Remove iommu_map_atomic()

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> There is only one call site and it can now just pass the GFP_ATOMIC to the
> normal iommu_map().
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2 01/10] iommu: Add a gfp parameter to iommu_map()

2023-01-18 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, January 19, 2023 2:01 AM
> 
> The internal mechanisms support this, but instead of exposting the gfp to
> the caller it wrappers it into iommu_map() and iommu_map_atomic()
> 
> Fix this instead of adding more variants for GFP_KERNEL_ACCOUNT.
> 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 2/2] vringh: fetch used_idx from vring at vringh_init_iotlb

2023-01-18 Thread Jason Wang
On Thu, Jan 19, 2023 at 12:44 AM Eugenio Pérez  wrote:
>
> Starting from an used_idx different than 0 is needed in use cases like
> virtual machine migration.  Not doing so and letting the caller set an
> avail idx different than 0 causes destination device to try to use old
> buffers that source driver already recover and are not available
> anymore.
>
> While callers like vdpa_sim set avail_idx directly it does not set
> used_idx.  Instead of let the caller do the assignment, fetch it from
> the guest at initialization like vhost-kernel do.
>
> To perform the same at vring_kernel_init and vring_user_init is left for
> the future.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  drivers/vhost/vringh.c | 25 +++--
>  1 file changed, 23 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 33eb941fcf15..0eed825197f2 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1301,6 +1301,17 @@ static inline int putused_iotlb(const struct vringh 
> *vrh,
> return 0;
>  }
>
> +/**
> + * vringh_update_used_idx - fetch used idx from driver's used split vring
> + * @vrh: The vring.
> + *
> + * Returns -errno or 0.
> + */
> +static inline int vringh_update_used_idx(struct vringh *vrh)
> +{
> +   return getu16_iotlb(vrh, >last_used_idx, >vring.used->idx);
> +}
> +
>  /**
>   * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
>   * @vrh: the vringh to initialize.
> @@ -1319,8 +1330,18 @@ int vringh_init_iotlb(struct vringh *vrh, u64 features,
>   struct vring_avail *avail,
>   struct vring_used *used)
>  {

While at this, I wonder if it's better to have a dedicated parameter
for last_avail_idx?

> -   return vringh_init_kern(vrh, features, num, weak_barriers,
> -   desc, avail, used);
> +   int r = vringh_init_kern(vrh, features, num, weak_barriers, desc,
> +avail, used);
> +
> +   if (r != 0)
> +   return r;
> +
> +   /* Consider the ring not initialized */
> +   if ((void *)desc == used)
> +   return 0;

I don't understand when we can get this (actually it should be a bug
of the caller).

Thanks

> +
> +   return vringh_update_used_idx(vrh);
> +
>  }
>  EXPORT_SYMBOL(vringh_init_iotlb);
>
> --
> 2.31.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 1/2] vdpa_sim: not reset state in vdpasim_queue_ready

2023-01-18 Thread Jason Wang
On Thu, Jan 19, 2023 at 12:44 AM Eugenio Pérez  wrote:
>
> vdpasim_queue_ready calls vringh_init_iotlb, which resets split indexes.
> But it can be called after setting a ring base with
> vdpasim_set_vq_state.
>
> Fix it by stashing them. They're still resetted in vdpasim_vq_reset.
>
> This was discovered and tested live migrating the vdpa_sim_net device.
>
> Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator")
> Signed-off-by: Eugenio Pérez 
> ---
>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c 
> b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index cb88891b44a8..8839232a3fcb 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -66,6 +66,7 @@ static void vdpasim_vq_notify(struct vringh *vring)
>  static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>  {
> struct vdpasim_virtqueue *vq = >vqs[idx];
> +   uint16_t last_avail_idx = vq->vring.last_avail_idx;
>
> vringh_init_iotlb(>vring, vdpasim->features, vq->num, false,
>   (struct vring_desc *)(uintptr_t)vq->desc_addr,
> @@ -74,6 +75,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, 
> unsigned int idx)
>   (struct vring_used *)
>   (uintptr_t)vq->device_addr);
>
> +   vq->vring.last_avail_idx = last_avail_idx;

Does this need to be serialized with the datapath?

E.g in set_vq_state() we do:

spin_lock(>lock);
vrh->last_avail_idx = state->split.avail_index;
spin_unlock(>lock);

Thanks

> vq->vring.notify = vdpasim_vq_notify;
>  }
>
> --
> 2.31.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[linux-next:master] BUILD REGRESSION f3381a7baf5ccbd091eb2c4fd2afd84266fcef24

2023-01-18 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: f3381a7baf5ccbd091eb2c4fd2afd84266fcef24  Add linux-next specific 
files for 20230118

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202301171511.4zszviyp-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

Documentation/mm/unevictable-lru.rst:186: WARNING: Title underline too short.
Error: failed to load BTF from vmlinux: No data available
drivers/scsi/qla2xxx/qla_mid.c:1094:51: warning: format '%ld' expects argument 
of type 'long int', but argument 5 has type 'unsigned int' [-Wformat=]
drivers/scsi/qla2xxx/qla_mid.c:1189:6: warning: no previous prototype for 
'qla_trim_buf' [-Wmissing-prototypes]
drivers/scsi/qla2xxx/qla_mid.c:1221:6: warning: no previous prototype for 
'__qla_adjust_buf' [-Wmissing-prototypes]
libbpf: failed to find '.BTF' ELF section in vmlinux

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/block/virtio_blk.c:721:9: sparse:bad type *
drivers/block/virtio_blk.c:721:9: sparse:unsigned int *
drivers/block/virtio_blk.c:721:9: sparse: sparse: incompatible types in 
comparison expression (different base types):
drivers/block/virtio_blk.c:721:9: sparse: sparse: no generic selection for 
'restricted __le32 [addressable] virtio_cread_v'
drivers/block/virtio_blk.c:721:9: sparse: sparse: no generic selection for 
'restricted __le32 virtio_cread_v'
drivers/nvmem/imx-ocotp.c:599:21: sparse: sparse: symbol 'imx_ocotp_layout' was 
not declared. Should it be static?
fs/verity/enable.c:29:2: warning: Null pointer passed as 1st argument to memory 
set function [clang-analyzer-unix.cstring.NullArg]

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- arc-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:format-ld-expects-argument-of-type-long-int-but-argument-has-type-unsigned-int
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- arm-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:format-ld-expects-argument-of-type-long-int-but-argument-has-type-unsigned-int
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- arm64-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- csky-randconfig-s051-20230115
|   `-- 
drivers-nvmem-imx-ocotp.c:sparse:sparse:symbol-imx_ocotp_layout-was-not-declared.-Should-it-be-static
|-- i386-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:format-ld-expects-argument-of-type-long-int-but-argument-has-type-unsigned-int
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- ia64-allmodconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- ia64-buildonly-randconfig-r002-20230117
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- loongarch-randconfig-r035-20230116
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- mips-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:format-ld-expects-argument-of-type-long-int-but-argument-has-type-unsigned-int
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- powerpc-allmodconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:format-ld-expects-argument-of-type-long-int-but-argument-has-type-unsigned-int
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- s390-allyesconfig
|   |-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-__qla_adjust_buf
|   `-- 
drivers-scsi-qla2xxx-qla_mid.c:warning:no-previous-prototype-for-qla_trim_buf
|-- s390-randconfig-s052-20230115
|   |-- drivers-block-virtio_blk.c:sparse:bad-type
|   |-- 
drivers-block-virtio_blk.c:sparse:sparse:incompatible-types-in-comparison-expression-(different