[PATCH AUTOSEL 5.17 28/43] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 56128b9c46eb..1dd396d4bebb 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -688,6 +688,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH V2 5/6] vDPA: answer num of queue pairs = 1 to userspace when VIRTIO_NET_F_MQ == 0

2022-06-13 Thread Parav Pandit via Virtualization



> From: Zhu Lingshan 
> Sent: Monday, June 13, 2022 6:17 AM
> To: jasow...@redhat.com; m...@redhat.com
> Cc: virtualization@lists.linux-foundation.org; net...@vger.kernel.org; Parav
> Pandit ; xieyon...@bytedance.com;
> gautam.da...@amd.com; Zhu Lingshan 
> Subject: [PATCH V2 5/6] vDPA: answer num of queue pairs = 1 to userspace
> when VIRTIO_NET_F_MQ == 0
> 
> If VIRTIO_NET_F_MQ == 0, the virtio device should have one queue pair, so
> when userspace querying queue pair numbers, it should return mq=1 than
> zero.
> 
> Function vdpa_dev_net_config_fill() fills the attributions of the vDPA
> devices, so that it should call vdpa_dev_net_mq_config_fill() so the
> parameter in vdpa_dev_net_mq_config_fill() should be feature_device than
> feature_driver for the vDPA devices themselves
> 
> Before this change, when MQ = 0, iproute2 output:
> $vdpa dev config show vdpa0
> vdpa0: mac 00:e8:ca:11:be:05 link up link_announce false max_vq_pairs 0
> mtu 1500
> 
Max_vq_pairs should not be printed when _MQ feature is not negotiated.
Existing code that returns 0 is correct following this line of the spec.

" The following driver-read-only field, max_virtqueue_pairs only exists if 
VIRTIO_NET_F_MQ or VIRTIO_-
NET_F_RSS is set."
The field doesn't exist when _MQ is not there. Hence, it should not be printed.
Is _RSS offered and is that why you see it?

If not a fix in the iproute2/vdpa should be done.


> After applying this commit, when MQ = 0, iproute2 output:
> $vdpa dev config show vdpa0
> vdpa0: mac 00:e8:ca:11:be:05 link up link_announce false max_vq_pairs 1
> mtu 1500
> 
> Signed-off-by: Zhu Lingshan 
> ---
>  drivers/vdpa/vdpa.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> d76b22b2f7ae..846dd37f3549 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -806,9 +806,10 @@ static int vdpa_dev_net_mq_config_fill(struct
> vdpa_device *vdev,
>   u16 val_u16;
> 
>   if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0)
> - return 0;
> + val_u16 = 1;
> + else
> + val_u16 = __virtio16_to_cpu(true, config-
> >max_virtqueue_pairs);
> 
> - val_u16 = le16_to_cpu(config->max_virtqueue_pairs);
>   return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP,
> val_u16);  }
> 
> @@ -842,7 +843,7 @@ static int vdpa_dev_net_config_fill(struct
> vdpa_device *vdev, struct sk_buff *ms
> VDPA_ATTR_PAD))
>   return -EMSGSIZE;
> 
> - return vdpa_dev_net_mq_config_fill(vdev, msg, features_driver,
> );
> + return vdpa_dev_net_mq_config_fill(vdev, msg, features_device,
> +);
>  }
> 
>  static int
> --
> 2.31.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH V2 3/6] vDPA: allow userspace to query features of a vDPA device

2022-06-13 Thread Parav Pandit via Virtualization



> From: Zhu Lingshan 
> Sent: Monday, June 13, 2022 6:17 AM
> device
> 
> This commit adds a new vDPA netlink attribution
> VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES. Userspace can query
> features of vDPA devices through this new attr.
> 
> Signed-off-by: Zhu Lingshan 
> ---
>  drivers/vdpa/vdpa.c   | 13 +
>  include/uapi/linux/vdpa.h |  1 +
>  2 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> ebf2f363fbe7..9b0e39b2f022 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -815,7 +815,7 @@ static int vdpa_dev_net_mq_config_fill(struct
> vdpa_device *vdev,  static int vdpa_dev_net_config_fill(struct vdpa_device
> *vdev, struct sk_buff *msg)  {
>   struct virtio_net_config config = {};
> - u64 features;
> + u64 features_device, features_driver;
>   u16 val_u16;
> 
>   vdpa_get_config_unlocked(vdev, 0, , sizeof(config)); @@ -
> 832,12 +832,17 @@ static int vdpa_dev_net_config_fill(struct vdpa_device
> *vdev, struct sk_buff *ms
>   if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
>   return -EMSGSIZE;
> 
> - features = vdev->config->get_driver_features(vdev);
> - if (nla_put_u64_64bit(msg,
> VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features,
> + features_driver = vdev->config->get_driver_features(vdev);
> + if (nla_put_u64_64bit(msg,
> VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver,
> +   VDPA_ATTR_PAD))
> + return -EMSGSIZE;
> +
> + features_device = vdev->config->get_device_features(vdev);
> + if (nla_put_u64_64bit(msg,
> VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES,
> +features_device,
> VDPA_ATTR_PAD))
>   return -EMSGSIZE;
> 
> - return vdpa_dev_net_mq_config_fill(vdev, msg, features, );
> + return vdpa_dev_net_mq_config_fill(vdev, msg, features_driver,
> +);
>  }
> 
>  static int
> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index
> 25c55cab3d7c..39f1c3d7c112 100644
> --- a/include/uapi/linux/vdpa.h
> +++ b/include/uapi/linux/vdpa.h
> @@ -47,6 +47,7 @@ enum vdpa_attr {
>   VDPA_ATTR_DEV_NEGOTIATED_FEATURES,  /* u64 */
>   VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,  /* u32 */
>   VDPA_ATTR_DEV_SUPPORTED_FEATURES,   /* u64 */

I see now what was done incorrectly with commit cd2629f6df1ca.

Above was done with wrong name prefix that missed MGMTDEV_. :(
Please don't add VDPA_ prefix due to one mistake.
Please reuse this VDPA_ATTR_DEV_SUPPORTED_FEATURES for device attribute as well.

> + VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES,  /* u64 */
> 

>   VDPA_ATTR_DEV_QUEUE_INDEX,  /* u32 */
>   VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */
> --
> 2.31.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 5.15 28/41] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 56128b9c46eb..1dd396d4bebb 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -688,6 +688,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 5.10 22/29] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 238383ff1064..5c970e6f664c 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -689,6 +689,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH V2 4/6] vDPA: !FEATURES_OK should not block querying device config space

2022-06-13 Thread Parav Pandit via Virtualization



> From: Zhu Lingshan 
> Sent: Monday, June 13, 2022 6:17 AM
> 
> Users may want to query the config space of a vDPA device, to choose a
> appropriate one for a certain guest. This means the users need to read the
> config space before FEATURES_OK, and the existence of config space
> contents does not depend on FEATURES_OK.
> 
> The spec says:
> The device MUST allow reading of any device-specific configuration field
> before FEATURES_OK is set by the driver. This includes fields which are
> conditional on feature bits, as long as those feature bits are offered by the
> device.
> 
> Signed-off-by: Zhu Lingshan 
> ---
>  drivers/vdpa/vdpa.c | 8 
>  1 file changed, 8 deletions(-)
> 
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> 9b0e39b2f022..d76b22b2f7ae 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -851,17 +851,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev,
> struct sk_buff *msg, u32 portid,  {
>   u32 device_id;
>   void *hdr;
> - u8 status;
>   int err;
> 
>   down_read(>cf_lock);
> - status = vdev->config->get_status(vdev);
> - if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
> - NL_SET_ERR_MSG_MOD(extack, "Features negotiation not
> completed");
> - err = -EAGAIN;
> - goto out;
> - }
> -
>   hdr = genlmsg_put(msg, portid, seq, _nl_family, flags,
> VDPA_CMD_DEV_CONFIG_GET);
>   if (!hdr) {
> --
> 2.31.1

Management interface should not mix up the device state machine checks like 
above.
Hence above code removal is better choice.
Please add fixes tag to this patch.

Reviewed-by: Parav Pandit 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 4.9 09/12] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 50840984fbfa..f62da3b7c27b 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -630,6 +630,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH V2 3/6] vDPA: allow userspace to query features of a vDPA device

2022-06-13 Thread Parav Pandit via Virtualization



> From: Zhu Lingshan 
> Sent: Monday, June 13, 2022 6:17 AM
> 
> This commit adds a new vDPA netlink attribution
> VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES. Userspace can query
> features of vDPA devices through this new attr.
> 
> Signed-off-by: Zhu Lingshan 
> ---
>  drivers/vdpa/vdpa.c   | 13 +
>  include/uapi/linux/vdpa.h |  1 +
>  2 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index
> ebf2f363fbe7..9b0e39b2f022 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -815,7 +815,7 @@ static int vdpa_dev_net_mq_config_fill(struct
> vdpa_device *vdev,  static int vdpa_dev_net_config_fill(struct vdpa_device
> *vdev, struct sk_buff *msg)  {
>   struct virtio_net_config config = {};
> - u64 features;
> + u64 features_device, features_driver;
>   u16 val_u16;
> 
>   vdpa_get_config_unlocked(vdev, 0, , sizeof(config)); @@ -
> 832,12 +832,17 @@ static int vdpa_dev_net_config_fill(struct vdpa_device
> *vdev, struct sk_buff *ms
>   if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
>   return -EMSGSIZE;
> 
> - features = vdev->config->get_driver_features(vdev);
> - if (nla_put_u64_64bit(msg,
> VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features,
> + features_driver = vdev->config->get_driver_features(vdev);
> + if (nla_put_u64_64bit(msg,
> VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver,
> +   VDPA_ATTR_PAD))
> + return -EMSGSIZE;
> +
> + features_device = vdev->config->get_device_features(vdev);
> + if (nla_put_u64_64bit(msg,
> VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES,
> +features_device,
> VDPA_ATTR_PAD))
>   return -EMSGSIZE;
> 
> - return vdpa_dev_net_mq_config_fill(vdev, msg, features, );
> + return vdpa_dev_net_mq_config_fill(vdev, msg, features_driver,
> +);
>  }
> 
>  static int
> diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index
> 25c55cab3d7c..39f1c3d7c112 100644
> --- a/include/uapi/linux/vdpa.h
> +++ b/include/uapi/linux/vdpa.h
> @@ -47,6 +47,7 @@ enum vdpa_attr {
>   VDPA_ATTR_DEV_NEGOTIATED_FEATURES,  /* u64 */
>   VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,  /* u32 */
>   VDPA_ATTR_DEV_SUPPORTED_FEATURES,   /* u64 */
> + VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES,  /* u64 */
> 
How is this different than VDPA_ATTR_DEV_SUPPORTED_FEATURES?
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 5.18 31/47] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 56128b9c46eb..1dd396d4bebb 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -688,6 +688,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 4.19 14/18] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 4cd9ea5c75be..c69c755bf553 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -663,6 +663,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 4.14 11/14] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 74dc7170fd35..181386e06cb7 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -635,6 +635,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH AUTOSEL 5.4 19/23] virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed

2022-06-13 Thread Sasha Levin
From: chengkaitao 

[ Upstream commit a58a7f97ba11391d2d0d408e0b24f38d86ae748e ]

The reference must be released when device_register(_cmdline_parent)
failed. Add the corresponding 'put_device()' in the error handling path.

Signed-off-by: chengkaitao 
Message-Id: <20220602005542.16489-1-chengkai...@didiglobal.com>
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
Signed-off-by: Sasha Levin 
---
 drivers/virtio/virtio_mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index e09edb5c5e06..74547323aa83 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -669,6 +669,7 @@ static int vm_cmdline_set(const char *device,
if (!vm_cmdline_parent_registered) {
err = device_register(_cmdline_parent);
if (err) {
+   put_device(_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [Pv-drivers] [PATCH 29/36] cpuidle, xenpv: Make more PARAVIRT_XXL noinstr clean

2022-06-13 Thread Nadav Amit via Virtualization
On Jun 13, 2022, at 11:48 AM, Srivatsa S. Bhat  wrote:

> ⚠ External Email
> 
> On 6/8/22 4:27 PM, Peter Zijlstra wrote:
>> vmlinux.o: warning: objtool: acpi_idle_enter_s2idle+0xde: call to wbinvd() 
>> leaves .noinstr.text section
>> vmlinux.o: warning: objtool: default_idle+0x4: call to arch_safe_halt() 
>> leaves .noinstr.text section
>> vmlinux.o: warning: objtool: xen_safe_halt+0xa: call to 
>> HYPERVISOR_sched_op.constprop.0() leaves .noinstr.text section
>> 
>> Signed-off-by: Peter Zijlstra (Intel) 
> 
> Reviewed-by: Srivatsa S. Bhat (VMware) 
> 
>> 
>> -static inline void wbinvd(void)
>> +extern noinstr void pv_native_wbinvd(void);
>> +
>> +static __always_inline void wbinvd(void)
>> {
>>  PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
>> }

I guess it is yet another instance of wrong accounting of GCC for
the assembly blocks’ weight. I guess it is not a solution for older
GCCs, but presumably PVOP_ALT_CALL() and friends should have
used asm_inline or some new “asm_volatile_inline” variant.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v1] vduse: Tie vduse mgmtdev and its device

2022-06-13 Thread Parav Pandit via Virtualization
vduse devices are not backed by any real devices such as PCI. Hence it
doesn't have any parent device linked to it.

Kernel driver model in [1] suggests to avoid an empty device
release callback.

Hence tie the mgmtdevice object's life cycle to an allocate dummy struct
device instead of static one.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/kobject.rst?h=v5.18-rc7#n284

Signed-off-by: Parav Pandit 
---
changelog:
v0->v1:
- removed typo in device name of "-la"
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 60 ++
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
b/drivers/vdpa/vdpa_user/vduse_dev.c
index 776ad7496f53..3bc27de58f46 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1476,16 +1476,12 @@ static char *vduse_devnode(struct device *dev, umode_t 
*mode)
return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
 }
 
-static void vduse_mgmtdev_release(struct device *dev)
-{
-}
-
-static struct device vduse_mgmtdev = {
-   .init_name = "vduse",
-   .release = vduse_mgmtdev_release,
+struct vduse_mgmt_dev {
+   struct vdpa_mgmt_dev mgmt_dev;
+   struct device dev;
 };
 
-static struct vdpa_mgmt_dev mgmt_dev;
+static struct vduse_mgmt_dev *vduse_mgmt;
 
 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
 {
@@ -1510,7 +1506,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, 
const char *name)
}
set_dma_ops(>vdpa.dev, _dev_dma_ops);
vdev->vdpa.dma_dev = >vdpa.dev;
-   vdev->vdpa.mdev = _dev;
+   vdev->vdpa.mdev = _mgmt->mgmt_dev;
 
return 0;
 }
@@ -1556,34 +1552,52 @@ static struct virtio_device_id id_table[] = {
{ 0 },
 };
 
-static struct vdpa_mgmt_dev mgmt_dev = {
-   .device = _mgmtdev,
-   .id_table = id_table,
-   .ops = _dev_mgmtdev_ops,
-};
+static void vduse_mgmtdev_release(struct device *dev)
+{
+   struct vduse_mgmt_dev *mgmt_dev;
+
+   mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
+   kfree(mgmt_dev);
+}
 
 static int vduse_mgmtdev_init(void)
 {
int ret;
 
-   ret = device_register(_mgmtdev);
-   if (ret)
+   vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
+   if (!vduse_mgmt)
+   return -ENOMEM;
+
+   ret = dev_set_name(_mgmt->dev, "vduse");
+   if (ret) {
+   kfree(vduse_mgmt);
return ret;
+   }
 
-   ret = vdpa_mgmtdev_register(_dev);
+   vduse_mgmt->dev.release = vduse_mgmtdev_release;
+
+   ret = device_register(_mgmt->dev);
if (ret)
-   goto err;
+   goto dev_reg_err;
 
-   return 0;
-err:
-   device_unregister(_mgmtdev);
+   vduse_mgmt->mgmt_dev.id_table = id_table;
+   vduse_mgmt->mgmt_dev.ops = _dev_mgmtdev_ops;
+   vduse_mgmt->mgmt_dev.device = _mgmt->dev;
+   ret = vdpa_mgmtdev_register(_mgmt->mgmt_dev);
+   if (ret)
+   device_unregister(_mgmt->dev);
+
+   return ret;
+
+dev_reg_err:
+   put_device(_mgmt->dev);
return ret;
 }
 
 static void vduse_mgmtdev_exit(void)
 {
-   vdpa_mgmtdev_unregister(_dev);
-   device_unregister(_mgmtdev);
+   vdpa_mgmtdev_unregister(_mgmt->mgmt_dev);
+   device_unregister(_mgmt->dev);
 }
 
 static int vduse_init(void)
-- 
2.26.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH] vduse: Tie vduse mgmtdev and its device

2022-06-13 Thread Parav Pandit via Virtualization



> From: Yongji Xie 
> Sent: Monday, June 13, 2022 5:54 AM
> 
> On Mon, Jun 13, 2022 at 2:02 AM Parav Pandit  wrote:
> >
> > vduse devices are not backed by any real devices such as PCI. Hence it
> > doesn't have any parent device linked to it.
> >
> > Kernel driver model in [1] suggests to avoid an empty device release
> > callback.
> >
> > Hence tie the mgmtdevice object's life cycle to an allocate dummy
> > struct device instead of static one.
> >
> > [1]
> >
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.
> >
> kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git
> %2
> > Ftree%2FDocumentation%2Fcore-api%2Fkobject.rst%3Fh%3Dv5.18-
> rc7%23n284&
> >
> amp;data=05%7C01%7Cparav%40nvidia.com%7C6b16045f2b7a4e168bc008da
> 4d2277
> >
> f7%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C6379071076788198
> 90%7CU
> >
> nknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI
> 6Ik1ha
> >
> WwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=G8R1%2BsfBPTe4P
> sOYqrv0pAhM
> > 3qFg%2F%2BWw7GmTli8%2BNNw%3Dreserved=0
> >
> > Signed-off-by: Parav Pandit 
> > ---
> >  drivers/vdpa/vdpa_user/vduse_dev.c | 60
> > ++
> >  1 file changed, 37 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c
> > b/drivers/vdpa/vdpa_user/vduse_dev.c
> > index f85d1a08ed87..ebe272575fb8 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -1475,16 +1475,12 @@ static char *vduse_devnode(struct device
> *dev, umode_t *mode)
> > return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));  }
> >
> > -static void vduse_mgmtdev_release(struct device *dev) -{ -}
> > -
> > -static struct device vduse_mgmtdev = {
> > -   .init_name = "vduse",
> > -   .release = vduse_mgmtdev_release,
> > +struct vduse_mgmt_dev {
> > +   struct vdpa_mgmt_dev mgmt_dev;
> > +   struct device dev;
> >  };
> >
> > -static struct vdpa_mgmt_dev mgmt_dev;
> > +static struct vduse_mgmt_dev *vduse_mgmt;
> >
> >  static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char
> > *name)  { @@ -1509,7 +1505,7 @@ static int vduse_dev_init_vdpa(struct
> > vduse_dev *dev, const char *name)
> > }
> > set_dma_ops(>vdpa.dev, _dev_dma_ops);
> > vdev->vdpa.dma_dev = >vdpa.dev;
> > -   vdev->vdpa.mdev = _dev;
> > +   vdev->vdpa.mdev = _mgmt->mgmt_dev;
> >
> > return 0;
> >  }
> > @@ -1555,34 +1551,52 @@ static struct virtio_device_id id_table[] = {
> > { 0 },
> >  };
> >
> > -static struct vdpa_mgmt_dev mgmt_dev = {
> > -   .device = _mgmtdev,
> > -   .id_table = id_table,
> > -   .ops = _dev_mgmtdev_ops,
> > -};
> > +static void vduse_mgmtdev_release(struct device *dev) {
> > +   struct vduse_mgmt_dev *mgmt_dev;
> > +
> > +   mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
> > +   kfree(mgmt_dev);
> > +}
> >
> >  static int vduse_mgmtdev_init(void)
> >  {
> > int ret;
> >
> > -   ret = device_register(_mgmtdev);
> > -   if (ret)
> > +   vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
> > +   if (!vduse_mgmt)
> > +   return -ENOMEM;
> > +
> > +   ret = dev_set_name(_mgmt->dev, "vduse-la");
> 
> Do we need to keep using "vduse" as the device name. We have
> documented it in qemu docs.
Yes. I added suffix of -la while testing to ensure a new code in action.
I missed to remove it.
Will send v2 with the correction.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/2] vdpa/mlx5: Update Control VQ callback information

2022-06-13 Thread Jason Wang
On Mon, Jun 13, 2022 at 4:00 PM Eli Cohen  wrote:
>
> The control VQ specific information is stored in the dedicated struct
> mlx5_control_vq. When the callback is updated through
> mlx5_vdpa_set_vq_cb(), make sure to update the control VQ struct.
>
> Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting")
> Signed-off-by: Eli Cohen 
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 1b6d46b86f81..789c078ff1af 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -1962,6 +1962,8 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device 
> *vdev, u16 idx, struct vdpa_c
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>
> ndev->event_cbs[idx] = *cb;
> +   if (is_ctrl_vq_idx(mvdev, idx))
> +   mvdev->cvq.event_cb = *cb;
>  }
>

Acked-by: Jason Wang   static void mlx5_cvq_notify(struct vringh *vring)
> --
> 2.35.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  wrote:
> >
> > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin  
> > > wrote:
> > > >
> > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > >
> > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity 
> > > > > managed IRQ
> > > > >that is used by some device such as virtio-blk
> > > > > 2) done only for PCI transport
> > > > >
> > > > > The vq->broken is re-used in this patch for implementing the IRQ
> > > > > hardening. The vq->broken is set to true during both initialization
> > > > > and reset. And the vq->broken is set to false in
> > > > > virtio_device_ready(). Then vring_interrupt() can check and return
> > > > > when vq->broken is true. And in this case, switch to return IRQ_NONE
> > > > > to let the interrupt core aware of such invalid interrupt to prevent
> > > > > IRQ storm.
> > > > >
> > > > > The reason of using a per queue variable instead of a per device one
> > > > > is that we may need it for per queue reset hardening in the future.
> > > > >
> > > > > Note that the hardening is only done for vring interrupt since the
> > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > ("virtio: defer config changed notifications"). But the method that is
> > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > handler because it uses spinlock to do the synchronization which is
> > > > > expensive.
> > > > >
> > > > > Cc: Thomas Gleixner 
> > > > > Cc: Peter Zijlstra 
> > > > > Cc: "Paul E. McKenney" 
> > > > > Cc: Marc Zyngier 
> > > > > Cc: Halil Pasic 
> > > > > Cc: Cornelia Huck 
> > > > > Cc: Vineeth Vijayan 
> > > > > Cc: Peter Oberparleiter 
> > > > > Cc: linux-s...@vger.kernel.org
> > > > > Signed-off-by: Jason Wang 
> > > >
> > > >
> > > > Jason, I am really concerned by all the fallout.
> > > > I propose adding a flag to suppress the hardening -
> > > > this will be a debugging aid and a work around for
> > > > users if we find more buggy drivers.
> > > >
> > > > suppress_interrupt_hardening ?
> > >
> > > I can post a patch but I'm afraid if we disable it by default, it
> > > won't be used by the users so there's no way for us to receive the bug
> > > report. Or we need a plan to enable it by default.
> > >
> > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if we
> > > simply warn instead of disable it by default.
> > >
> > > Thanks
> >
> > I meant more like a flag in struct virtio_driver.
> > For now, could you audit all drivers which don't call _ready?
> > I found 5 of these:
> >
> > drivers/bluetooth/virtio_bt.c
> 
> This driver seems to be fine, it doesn't use the device/vq in its probe().


But it calls hci_register_dev and that in turn queues all kind of
work. Also, can linux start using the device immediately after
it's registered?


> > drivers/gpu/drm/virtio/virtgpu_drv.c
> 
> It calles virtio_device_ready() in virtio_gpu_init(), and it looks to
> me the code is correct.

OK.

> > drivers/i2c/busses/i2c-virtio.c
> > drivers/net/caif/caif_virtio.c
> > drivers/nvdimm/virtio_pmem.c
> 
> The above looks fine and we have three more:
> 
> arm_scmi: probe() doesn't use vq
> mac80211_hwsim.c: doesn't use vq (only fill rx), but it kicks the rx,
> it looks to me we need a device_ready before the kick.
> virtio_rpmsg_bus.c: doesn't use vq
> 
> I will post a patch for mac80211_hwsim.c.
> Thanks

Same comments for all of the above. Might linux not start using the
device once it's registered?

> >
> >
> >
> >
> > > >
> > > >
> > > > > ---
> > > > >  drivers/s390/virtio/virtio_ccw.c   |  4 
> > > > >  drivers/virtio/virtio.c| 15 ---
> > > > >  drivers/virtio/virtio_mmio.c   |  5 +
> > > > >  drivers/virtio/virtio_pci_modern_dev.c |  5 +
> > > > >  drivers/virtio/virtio_ring.c   | 11 +++
> > > > >  include/linux/virtio_config.h  | 20 
> > > > >  6 files changed, 53 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/drivers/s390/virtio/virtio_ccw.c 
> > > > > b/drivers/s390/virtio/virtio_ccw.c
> > > > > index c188e4f20ca3..97e51c34e6cf 100644
> > > > > --- a/drivers/s390/virtio/virtio_ccw.c
> > > > > +++ b/drivers/s390/virtio/virtio_ccw.c
> > > > > @@ -971,6 +971,10 @@ static void virtio_ccw_set_status(struct 
> > > > > virtio_device *vdev, u8 status)
> > > > >   ccw->flags = 0;
> > > > >   ccw->count = sizeof(status);
> > > > >   ccw->cda = (__u32)(unsigned long)>dma_area->status;
> > > > > + /* We use ssch for setting the status which is a serializing
> > > > > +  * instruction that guarantees the memory writes have
> > > > > +

Re: [PATCH 21/36] x86/tdx: Remove TDX_HCALL_ISSUE_STI

2022-06-13 Thread Peter Zijlstra
On Mon, Jun 13, 2022 at 04:26:01PM +0800, Lai Jiangshan wrote:
> On Wed, Jun 8, 2022 at 10:48 PM Peter Zijlstra  wrote:
> >
> > Now that arch_cpu_idle() is expected to return with IRQs disabled,
> > avoid the useless STI/CLI dance.
> >
> > Per the specs this is supposed to work, but nobody has yet relied up
> > this behaviour so broken implementations are possible.
> 
> I'm totally newbie here.
> 
> The point of safe_halt() is that STI must be used and be used
> directly before HLT to enable IRQ during the halting and stop
> the halting if there is any IRQ.

Correct; on real hardware. But this is virt...

> In TDX case, STI must be used directly before the hypercall.
> Otherwise, no IRQ can come and the vcpu would be stalled forever.
> 
> Although the hypercall has an "irq_disabled" argument.
> But the hypervisor doesn't (and can't) touch the IRQ flags no matter
> what the "irq_disabled" argument is.  The IRQ is not enabled during
> the halting if the IRQ is disabled before the hypercall even if
> irq_disabled=false.

All we need the VMM to do is wake the vCPU, and it can do that,
irrespective of the guest's IF.

So the VMM can (and does) know if there's an interrupt pending, and
that's all that's needed to wake from this hypercall. Once the vCPU is
back up and running again, we'll eventually set IF again and the pending
interrupt will get delivered and all's well.

Think of this like MWAIT with ECX[0] set if you will.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v2 0/8] virtio/vsock: experimental zerocopy receive

2022-06-13 Thread Stefano Garzarella

On Thu, Jun 09, 2022 at 12:33:32PM +, Arseniy Krasnov wrote:

On 09.06.2022 11:54, Stefano Garzarella wrote:

Hi Arseniy,
I left some comments in the patches, and I'm adding something also here:

Thanks for comments


On Fri, Jun 03, 2022 at 05:27:56AM +, Arseniy Krasnov wrote:

 INTRODUCTION

Hello, this is experimental implementation of virtio vsock zerocopy
receive. It was inspired by TCP zerocopy receive by Eric Dumazet. This API uses
same idea: call 'mmap()' on socket's descriptor, then every 'getsockopt()' will
fill provided vma area with pages of virtio RX buffers. After received data was
processed by user, pages must be freed by 'madvise()'  call with MADV_DONTNEED
flag set(if user won't call 'madvise()', next 'getsockopt()' will fail).


If it is not too time-consuming, can we have a table/list to compare this and 
the TCP zerocopy?

You mean compare API with more details?


Yes, maybe a comparison from the user's point of view to do zero-copy 
with TCP and VSOCK.






    DETAILS

Here is how mapping with mapped pages looks exactly: first page mapping
contains array of trimmed virtio vsock packet headers (in contains only length
of data on the corresponding page and 'flags' field):

struct virtio_vsock_usr_hdr {
    uint32_t length;
    uint32_t flags;
    uint32_t copy_len;
};

Field  'length' allows user to know exact size of payload within each sequence
of pages and 'flags' allows user to handle SOCK_SEQPACKET flags(such as message
bounds or record bounds). Field 'copy_len' is described below in 'v1->v2' part.
All other pages are data pages from RX queue.

    Page 0  Page 1  Page N

[ hdr1 .. hdrN ][ data ] .. [ data ]
  |    |   ^   ^
  |    |   |   |
  |    *---*
  |    |
  |    |
  **

Of course, single header could represent array of pages (when packet's
buffer is bigger than one page).So here is example of detailed mapping layout
for some set of packages. Lets consider that we have the following sequence  of
packages: 56 bytes, 4096 bytes and 8200 bytes. All pages: 0,1,2,3,4 and 5 will
be inserted to user's vma(vma is large enough).


In order to have a "userspace polling-friendly approach" and reduce number of 
syscall, can we allow for example the userspace to mmap at least the first header before 
packets arrive.
Then the userspace can poll a flag or other fields in the header to understand 
that there are new packets.

You mean to avoid 'poll()' syscall, user will spin on some flag, provided by 
kernel on some mapped page? I think yes. This is ok. Also i think, that i can 
avoid 'madvise' call
to clear memory mapping before each 'getsockopt()' - let 'getsockopt()' do 
'madvise()' job by removing pages from previous data. In this case only one 
system call is needed - 'getsockopt()'.


Yes, that's right. I mean to support both, poll() for interrupt-based 
applications and the ability to actively poll a variable in the shared 
memory for applications that want to minimize latency.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] virtio_ring : fix vring_packed_desc memory out of bounds bug

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 02:56:19PM +0800, Jason Wang wrote:
> On Fri, Jun 10, 2022 at 10:51 PM Michael S. Tsirkin  wrote:
> >
> > On Fri, Jun 10, 2022 at 06:33:14PM +0800, huangjie.albert wrote:
> > > ksoftirqd may consume the packet and it will call:
> > > virtnet_poll
> > >   -->virtnet_receive
> > >   -->virtqueue_get_buf_ctx
> > >   -->virtqueue_get_buf_ctx_packed
> > > and in virtqueue_get_buf_ctx_packed:
> > >
> > > vq->last_used_idx += vq->packed.desc_state[id].num;
> > > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> > >  vq->last_used_idx -= vq->packed.vring.num;
> > >  vq->packed.used_wrap_counter ^= 1;
> > > }
> > >
> > > if at the same time, there comes a vring interrupt,in vring_interrupt:
> > > we will call:
> > > vring_interrupt
> > >   -->more_used
> > >   -->more_used_packed
> > >   -->is_used_desc_packed
> > > in is_used_desc_packed, the last_used_idx maybe >= vq->packed.vring.num.
> > > so this could case a memory out of bounds bug.
> > >
> > > this patch is to fix this.
> > >
> > > Signed-off-by: huangjie.albert 
> >
> >
> > This pattern was always iffy, but I don't think the patch
> > improves the situation much. last_used_idx and vq->packed.used_wrap_counter
> > can still get out of sync.
> >
> > Maybe refactor code to keep everything in vq->last_used_idx?
> >
> > Jason what is your take?
> 
> I think we can do this since 16bit/32bit operations are guaranteed to be 
> atomic.
> 
> Thanks


OK. Who's working on this?

> >
> >
> >
> >
> >
> > > ---
> > >  drivers/virtio/virtio_ring.c | 3 +++
> > >  1 file changed, 3 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 13a7348cedff..d2abbb3a8187 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -1397,6 +1397,9 @@ static inline bool is_used_desc_packed(const struct 
> > > vring_virtqueue *vq,
> > >   bool avail, used;
> > >   u16 flags;
> > >
> > > + if (idx >= vq->packed.vring.num)
> > > + return false;
> > > +
> > >   flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> > >   avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
> > >   used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
> > > --
> > > 2.27.0
> >

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio_ring : fix vring_packed_desc memory out of bounds bug

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 10:17:58PM +0800, 黄杰 wrote:
> OK, thank you for the explanation, I get it. By the way, if we could
> fix the  the OOB access first, it seems that even if we do this, the
> OOB access also exists.
> > I think we can do this since 16bit/32bit operations are guaranteed to be 
> > atomic.
> 
> BR

Presumably as we are reworking the code we'll make sure any value
written into last_used_idx is within bounds.

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio_ring : fix vring_packed_desc memory out of bounds bug

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 09:47:55PM +0800, 黄杰 wrote:
> Michael S. Tsirkin  于2022年6月13日周一 16:55写道:
> >
> > On Mon, Jun 13, 2022 at 04:44:03PM +0800, 黄杰 wrote:
> > > Michael S. Tsirkin  于2022年6月12日周日 22:13写道:
> > > >
> > > > On Sun, Jun 12, 2022 at 07:02:25PM +0800, 黄杰 wrote:
> > > > > Michael S. Tsirkin  于2022年6月11日周六 08:35写道:
> > > > > >
> > > > > > On Sat, Jun 11, 2022 at 12:38:10AM +0800, 黄杰 wrote:
> > > > > > > > This pattern was always iffy, but I don't think the patch
> > > > > > > > improves the situation much. last_used_idx and 
> > > > > > > > vq->packed.used_wrap_counter
> > > > > > > > can still get out of sync.
> > > > > > >
> > > > > > > Yes, You are absolutely correct, thanks for pointing out this 
> > > > > > > issue, I
> > > > > > > didn't take that into consideration,
> > > > > > > how about disabling interrupts before this code below:
> > > > > > >
> > > > > > > > vq->last_used_idx += vq->packed.desc_state[id].num;
> > > > > > > > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> > > > > > > >  vq->last_used_idx -= vq->packed.vring.num;
> > > > > > > >  vq->packed.used_wrap_counter ^= 1;
> > > > > > > > }
> > > > > > >
> > > > > > > it seems to be fine to just turn off the interrupts of the 
> > > > > > > current vring.
> > > > > > >
> > > > > > > BR
> > > > > >
> > > > > > That would make datapath significantly slower.
> > > > > >
> > > > > > >
> > > > > > > Michael S. Tsirkin  于2022年6月10日周五 22:50写道:
> > > > > > > >
> > > > > > > > On Fri, Jun 10, 2022 at 06:33:14PM +0800, huangjie.albert wrote:
> > > > > > > > > ksoftirqd may consume the packet and it will call:
> > > > > > > > > virtnet_poll
> > > > > > > > >   -->virtnet_receive
> > > > > > > > >   -->virtqueue_get_buf_ctx
> > > > > > > > >   -->virtqueue_get_buf_ctx_packed
> > > > > > > > > and in virtqueue_get_buf_ctx_packed:
> > > > > > > > >
> > > > > > > > > vq->last_used_idx += vq->packed.desc_state[id].num;
> > > > > > > > > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> > > > > > > > >  vq->last_used_idx -= vq->packed.vring.num;
> > > > > > > > >  vq->packed.used_wrap_counter ^= 1;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > if at the same time, there comes a vring interrupt,in 
> > > > > > > > > vring_interrupt:
> > > > > > > > > we will call:
> > > > > > > > > vring_interrupt
> > > > > > > > >   -->more_used
> > > > > > > > >   -->more_used_packed
> > > > > > > > >   -->is_used_desc_packed
> > > > > > > > > in is_used_desc_packed, the last_used_idx maybe >= 
> > > > > > > > > vq->packed.vring.num.
> > > > > > > > > so this could case a memory out of bounds bug.
> > > > > > > > >
> > > > > > > > > this patch is to fix this.
> > > > > > > > >
> > > > > > > > > Signed-off-by: huangjie.albert 
> > > > > > > >
> > > > > > > >
> > > > > > > > This pattern was always iffy, but I don't think the patch
> > > > > > > > improves the situation much. last_used_idx and 
> > > > > > > > vq->packed.used_wrap_counter
> > > > > > > > can still get out of sync.
> > > > > > > >
> > > > > > > > Maybe refactor code to keep everything in vq->last_used_idx?
> > > > > > > >
> > > > > > > > Jason what is your take?
> > > > > > > >
> > > > > > > >
> > > > > > > > > ---
> > > > > > > > >  drivers/virtio/virtio_ring.c | 3 +++
> > > > > > > > >  1 file changed, 3 insertions(+)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > > > > index 13a7348cedff..d2abbb3a8187 100644
> > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > @@ -1397,6 +1397,9 @@ static inline bool 
> > > > > > > > > is_used_desc_packed(const struct vring_virtqueue *vq,
> > > > > > > > >   bool avail, used;
> > > > > > > > >   u16 flags;
> > > > > > > > >
> > > > > > > > > + if (idx >= vq->packed.vring.num)
> > > > > > > > > + return false;
> > > > > > > > > +
> > > > > > > > >   flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> > > > > > > > >   avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
> > > > > > > > >   used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
> > > > > > > > > --
> > > > > > > > > 2.27.0
> > > > > > > >
> > > > > >
> > > > >
> > > > > Michael S , thanks for your correction, there may be another simple
> > > > > solution here:
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > b/drivers/virtio/virtio_ring.c
> > > > > index 13a7348cedff..4db4db19f94a 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -1397,6 +1397,9 @@ static inline bool is_used_desc_packed(const
> > > > > struct vring_virtqueue *vq,
> > > > > bool avail, used;
> > > > > u16 flags;
> > > > >
> > > > > +   if (idx >= vq->packed.vring.num)
> > > > > + 

Re: [PATCH] virtio_ring : fix vring_packed_desc memory out of bounds bug

2022-06-13 Thread Jason Wang
On Fri, Jun 10, 2022 at 10:51 PM Michael S. Tsirkin  wrote:
>
> On Fri, Jun 10, 2022 at 06:33:14PM +0800, huangjie.albert wrote:
> > ksoftirqd may consume the packet and it will call:
> > virtnet_poll
> >   -->virtnet_receive
> >   -->virtqueue_get_buf_ctx
> >   -->virtqueue_get_buf_ctx_packed
> > and in virtqueue_get_buf_ctx_packed:
> >
> > vq->last_used_idx += vq->packed.desc_state[id].num;
> > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> >  vq->last_used_idx -= vq->packed.vring.num;
> >  vq->packed.used_wrap_counter ^= 1;
> > }
> >
> > if at the same time, there comes a vring interrupt,in vring_interrupt:
> > we will call:
> > vring_interrupt
> >   -->more_used
> >   -->more_used_packed
> >   -->is_used_desc_packed
> > in is_used_desc_packed, the last_used_idx maybe >= vq->packed.vring.num.
> > so this could case a memory out of bounds bug.
> >
> > this patch is to fix this.
> >
> > Signed-off-by: huangjie.albert 
>
>
> This pattern was always iffy, but I don't think the patch
> improves the situation much. last_used_idx and vq->packed.used_wrap_counter
> can still get out of sync.
>
> Maybe refactor code to keep everything in vq->last_used_idx?
>
> Jason what is your take?

I think we can do this since 16bit/32bit operations are guaranteed to be atomic.

Thanks

>
>
>
>
>
> > ---
> >  drivers/virtio/virtio_ring.c | 3 +++
> >  1 file changed, 3 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 13a7348cedff..d2abbb3a8187 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -1397,6 +1397,9 @@ static inline bool is_used_desc_packed(const struct 
> > vring_virtqueue *vq,
> >   bool avail, used;
> >   u16 flags;
> >
> > + if (idx >= vq->packed.vring.num)
> > + return false;
> > +
> >   flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> >   avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
> >   used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
> > --
> > 2.27.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 05:14:59PM +0800, Jason Wang wrote:
> On Mon, Jun 13, 2022 at 5:08 PM Jason Wang  wrote:
> >
> > On Mon, Jun 13, 2022 at 4:59 PM Michael S. Tsirkin  wrote:
> > >
> > > On Mon, Jun 13, 2022 at 04:51:08PM +0800, Jason Wang wrote:
> > > > On Mon, Jun 13, 2022 at 4:19 PM Michael S. Tsirkin  
> > > > wrote:
> > > > >
> > > > > On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> > > > > > On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin 
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > > > > > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > > > > > > This is a rework on the previous IRQ hardening that is done 
> > > > > > > > > > for
> > > > > > > > > > virtio-pci where several drawbacks were found and were 
> > > > > > > > > > reverted:
> > > > > > > > > >
> > > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to 
> > > > > > > > > > affinity managed IRQ
> > > > > > > > > >that is used by some device such as virtio-blk
> > > > > > > > > > 2) done only for PCI transport
> > > > > > > > > >
> > > > > > > > > > The vq->broken is re-used in this patch for implementing 
> > > > > > > > > > the IRQ
> > > > > > > > > > hardening. The vq->broken is set to true during both 
> > > > > > > > > > initialization
> > > > > > > > > > and reset. And the vq->broken is set to false in
> > > > > > > > > > virtio_device_ready(). Then vring_interrupt() can check and 
> > > > > > > > > > return
> > > > > > > > > > when vq->broken is true. And in this case, switch to return 
> > > > > > > > > > IRQ_NONE
> > > > > > > > > > to let the interrupt core aware of such invalid interrupt 
> > > > > > > > > > to prevent
> > > > > > > > > > IRQ storm.
> > > > > > > > > >
> > > > > > > > > > The reason of using a per queue variable instead of a per 
> > > > > > > > > > device one
> > > > > > > > > > is that we may need it for per queue reset hardening in the 
> > > > > > > > > > future.
> > > > > > > > > >
> > > > > > > > > > Note that the hardening is only done for vring interrupt 
> > > > > > > > > > since the
> > > > > > > > > > config interrupt hardening is already done in commit 
> > > > > > > > > > 22b7050a024d7
> > > > > > > > > > ("virtio: defer config changed notifications"). But the 
> > > > > > > > > > method that is
> > > > > > > > > > used by config interrupt can't be reused by the vring 
> > > > > > > > > > interrupt
> > > > > > > > > > handler because it uses spinlock to do the synchronization 
> > > > > > > > > > which is
> > > > > > > > > > expensive.
> > > > > > > > > >
> > > > > > > > > > Cc: Thomas Gleixner 
> > > > > > > > > > Cc: Peter Zijlstra 
> > > > > > > > > > Cc: "Paul E. McKenney" 
> > > > > > > > > > Cc: Marc Zyngier 
> > > > > > > > > > Cc: Halil Pasic 
> > > > > > > > > > Cc: Cornelia Huck 
> > > > > > > > > > Cc: Vineeth Vijayan 
> > > > > > > > > > Cc: Peter Oberparleiter 
> > > > > > > > > > Cc: linux-s...@vger.kernel.org
> > > > > > > > > > Signed-off-by: Jason Wang 
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Jason, I am really concerned by all the fallout.
> > > > > > > > > I propose adding a flag to suppress the hardening -
> > > > > > > > > this will be a debugging aid and a work around for
> > > > > > > > > users if we find more buggy drivers.
> > > > > > > > >
> > > > > > > > > suppress_interrupt_hardening ?
> > > > > > > >
> > > > > > > > I can post a patch but I'm afraid if we disable it by default, 
> > > > > > > > it
> > > > > > > > won't be used by the users so there's no way for us to receive 
> > > > > > > > the bug
> > > > > > > > report. Or we need a plan to enable it by default.
> > > > > > > >
> > > > > > > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better 
> > > > > > > > if we
> > > > > > > > simply warn instead of disable it by default.
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > I meant more like a flag in struct virtio_driver.
> > > > > > > For now, could you audit all drivers which don't call _ready?
> > > > > > > I found 5 of these:
> > > > > > >
> > > > > > > drivers/bluetooth/virtio_bt.c
> > > > > >
> > > > > > This driver seems to be fine, it doesn't use the device/vq in its 
> > > > > > probe().
> > > > >
> > > > >
> > > > > But it calls hci_register_dev and that in turn queues all kind of
> > > > > work. Also, can linux start using the device immediately after
> > > > > it's registered?
> > > >
> > > > So I think the driver is allowed to queue before DRIVER_OK.
> > >
> > > it's not allowed to kick
> >
> > Yes.
> >
> > >
> > > > If yes,
> > > > the only side effect is the delay of the tx interrupt after DRIVER_OK
> > > > for a well behaved device.
> > >
> > > your patches drop the interrupt though, it won't be just delayed.
> >
> > For a well behaved device, it can only trigger the 

Re: [PATCH RFC v1 4/7] swiotlb: to implement io_tlb_high_mem

2022-06-13 Thread Christoph Hellwig
On Fri, Jun 10, 2022 at 02:56:08PM -0700, Dongli Zhang wrote:
> Since this patch file has 200+ lines, would you please help clarify what does
> 'this' indicate?

This indicates that any choice of a different swiotlb pools needs to
be hidden inside of ѕwiotlb.  The dma mapping API already provides
swiotlb the addressability requirement for the device.  Similarly we
already have a SWIOTLB_ANY flag that switches to a 64-bit buffer
by default, which we can change to, or replace with a flag that
allocates an additional buffer that is not addressing limited.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] Bluetooth: virtio_bt: fix device removal

2022-06-13 Thread Michael S. Tsirkin
On Fri, Jan 14, 2022 at 03:12:47PM -0500, Michael S. Tsirkin wrote:
> On Thu, Dec 16, 2021 at 08:58:31PM +0100, Marcel Holtmann wrote:
> > Hi Michael,
> > 
> > > Device removal is clearly out of virtio spec: it attempts to remove
> > > unused buffers from a VQ before invoking device reset. To fix, make
> > > open/close NOPs and do all cleanup/setup in probe/remove.
> >  
> >  so the virtbt_{open,close} as NOP is not really what a driver is 
> >  suppose
> >  to be doing. These are transport enable/disable callbacks from the BT
> >  Core towards the driver. It maps to a device being enabled/disabled by
> >  something like bluetoothd for example. So if disabled, I expect that no
> >  resources/queues are in use.
> >  
> >  Maybe I misunderstand the virtio spec in that regard, but I would like
> >  to keep this fundamental concept of a Bluetooth driver. It does work
> >  with all other transports like USB, SDIO, UART etc.
> >  
> > > The cost here is a single skb wasted on an unused bt device - which
> > > seems modest.
> >  
> >  There should be no buffer used if the device is powered off. We also 
> >  don’t
> >  have any USB URBs in-flight if the transport is not active.
> >  
> > > NB: with this fix in place driver still suffers from a race condition 
> > > if
> > > an interrupt triggers while device is being reset. Work on a fix for
> > > that issue is in progress.
> >  
> >  In the virtbt_close() callback we should deactivate all interrupts.
> >  
> >  Regards
> >  
> >  Marcel
> > >>> 
> > >>> So Marcel, do I read it right that you are working on a fix
> > >>> and I can drop this patch for now?
> > >> 
> > >> ping
> > > 
> > > 
> > > If I don't hear otherwise I'll queue my version - it might not
> > > be ideal but it at least does not violate the spec.
> > > We can work on not allocating/freeing buffers later
> > > as appropriate.
> > 
> > I have a patch, but it is not fully tested yet.
> > 
> > Regards
> > 
> > Marcel
> 
> ping
> 
> it's been a month ...
> 
> I'm working on cleaning up module/device removal in virtio and bt
> is kind of sticking out.

I am inclined to make this driver depend on BROKEN for now.
Any objections?


> -- 
> MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin  wrote:
> >
> > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > This is a rework on the previous IRQ hardening that is done for
> > > virtio-pci where several drawbacks were found and were reverted:
> > >
> > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ
> > >that is used by some device such as virtio-blk
> > > 2) done only for PCI transport
> > >
> > > The vq->broken is re-used in this patch for implementing the IRQ
> > > hardening. The vq->broken is set to true during both initialization
> > > and reset. And the vq->broken is set to false in
> > > virtio_device_ready(). Then vring_interrupt() can check and return
> > > when vq->broken is true. And in this case, switch to return IRQ_NONE
> > > to let the interrupt core aware of such invalid interrupt to prevent
> > > IRQ storm.
> > >
> > > The reason of using a per queue variable instead of a per device one
> > > is that we may need it for per queue reset hardening in the future.
> > >
> > > Note that the hardening is only done for vring interrupt since the
> > > config interrupt hardening is already done in commit 22b7050a024d7
> > > ("virtio: defer config changed notifications"). But the method that is
> > > used by config interrupt can't be reused by the vring interrupt
> > > handler because it uses spinlock to do the synchronization which is
> > > expensive.
> > >
> > > Cc: Thomas Gleixner 
> > > Cc: Peter Zijlstra 
> > > Cc: "Paul E. McKenney" 
> > > Cc: Marc Zyngier 
> > > Cc: Halil Pasic 
> > > Cc: Cornelia Huck 
> > > Cc: Vineeth Vijayan 
> > > Cc: Peter Oberparleiter 
> > > Cc: linux-s...@vger.kernel.org
> > > Signed-off-by: Jason Wang 
> >
> >
> > Jason, I am really concerned by all the fallout.
> > I propose adding a flag to suppress the hardening -
> > this will be a debugging aid and a work around for
> > users if we find more buggy drivers.
> >
> > suppress_interrupt_hardening ?
> 
> I can post a patch but I'm afraid if we disable it by default, it
> won't be used by the users so there's no way for us to receive the bug
> report. Or we need a plan to enable it by default.
> 
> It's rc2, how about waiting for 1 and 2 rc? Or it looks better if we
> simply warn instead of disable it by default.
> 
> Thanks

I meant more like a flag in struct virtio_driver.
For now, could you audit all drivers which don't call _ready?
I found 5 of these:

drivers/bluetooth/virtio_bt.c
drivers/gpu/drm/virtio/virtgpu_drv.c
drivers/i2c/busses/i2c-virtio.c
drivers/net/caif/caif_virtio.c
drivers/nvdimm/virtio_pmem.c




> >
> >
> > > ---
> > >  drivers/s390/virtio/virtio_ccw.c   |  4 
> > >  drivers/virtio/virtio.c| 15 ---
> > >  drivers/virtio/virtio_mmio.c   |  5 +
> > >  drivers/virtio/virtio_pci_modern_dev.c |  5 +
> > >  drivers/virtio/virtio_ring.c   | 11 +++
> > >  include/linux/virtio_config.h  | 20 
> > >  6 files changed, 53 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/s390/virtio/virtio_ccw.c 
> > > b/drivers/s390/virtio/virtio_ccw.c
> > > index c188e4f20ca3..97e51c34e6cf 100644
> > > --- a/drivers/s390/virtio/virtio_ccw.c
> > > +++ b/drivers/s390/virtio/virtio_ccw.c
> > > @@ -971,6 +971,10 @@ static void virtio_ccw_set_status(struct 
> > > virtio_device *vdev, u8 status)
> > >   ccw->flags = 0;
> > >   ccw->count = sizeof(status);
> > >   ccw->cda = (__u32)(unsigned long)>dma_area->status;
> > > + /* We use ssch for setting the status which is a serializing
> > > +  * instruction that guarantees the memory writes have
> > > +  * completed before ssch.
> > > +  */
> > >   ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS);
> > >   /* Write failed? We assume status is unchanged. */
> > >   if (ret)
> > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > index aa1eb5132767..95fac4c97c8b 100644
> > > --- a/drivers/virtio/virtio.c
> > > +++ b/drivers/virtio/virtio.c
> > > @@ -220,6 +220,15 @@ static int virtio_features_ok(struct virtio_device 
> > > *dev)
> > >   * */
> > >  void virtio_reset_device(struct virtio_device *dev)
> > >  {
> > > + /*
> > > +  * The below virtio_synchronize_cbs() guarantees that any
> > > +  * interrupt for this line arriving after
> > > +  * virtio_synchronize_vqs() has completed is guaranteed to see
> > > +  * vq->broken as true.
> > > +  */
> > > + virtio_break_device(dev);
> >
> > So make this conditional
> >
> > > + virtio_synchronize_cbs(dev);
> > > +
> > >   dev->config->reset(dev);
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtio_reset_device);
> > > @@ -428,6 +437,9 @@ int register_virtio_device(struct virtio_device *dev)
> > >   dev->config_enabled = false;
> > >   dev->config_change_pending = false;
> > >
> > > + INIT_LIST_HEAD(>vqs);
> 

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Jason Wang
On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  wrote:
>
> On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin  wrote:
> > >
> > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > This is a rework on the previous IRQ hardening that is done for
> > > > virtio-pci where several drawbacks were found and were reverted:
> > > >
> > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed 
> > > > IRQ
> > > >that is used by some device such as virtio-blk
> > > > 2) done only for PCI transport
> > > >
> > > > The vq->broken is re-used in this patch for implementing the IRQ
> > > > hardening. The vq->broken is set to true during both initialization
> > > > and reset. And the vq->broken is set to false in
> > > > virtio_device_ready(). Then vring_interrupt() can check and return
> > > > when vq->broken is true. And in this case, switch to return IRQ_NONE
> > > > to let the interrupt core aware of such invalid interrupt to prevent
> > > > IRQ storm.
> > > >
> > > > The reason of using a per queue variable instead of a per device one
> > > > is that we may need it for per queue reset hardening in the future.
> > > >
> > > > Note that the hardening is only done for vring interrupt since the
> > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > ("virtio: defer config changed notifications"). But the method that is
> > > > used by config interrupt can't be reused by the vring interrupt
> > > > handler because it uses spinlock to do the synchronization which is
> > > > expensive.
> > > >
> > > > Cc: Thomas Gleixner 
> > > > Cc: Peter Zijlstra 
> > > > Cc: "Paul E. McKenney" 
> > > > Cc: Marc Zyngier 
> > > > Cc: Halil Pasic 
> > > > Cc: Cornelia Huck 
> > > > Cc: Vineeth Vijayan 
> > > > Cc: Peter Oberparleiter 
> > > > Cc: linux-s...@vger.kernel.org
> > > > Signed-off-by: Jason Wang 
> > >
> > >
> > > Jason, I am really concerned by all the fallout.
> > > I propose adding a flag to suppress the hardening -
> > > this will be a debugging aid and a work around for
> > > users if we find more buggy drivers.
> > >
> > > suppress_interrupt_hardening ?
> >
> > I can post a patch but I'm afraid if we disable it by default, it
> > won't be used by the users so there's no way for us to receive the bug
> > report. Or we need a plan to enable it by default.
> >
> > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if we
> > simply warn instead of disable it by default.
> >
> > Thanks
>
> I meant more like a flag in struct virtio_driver.
> For now, could you audit all drivers which don't call _ready?
> I found 5 of these:
>
> drivers/bluetooth/virtio_bt.c

This driver seems to be fine, it doesn't use the device/vq in its probe().

> drivers/gpu/drm/virtio/virtgpu_drv.c

It calles virtio_device_ready() in virtio_gpu_init(), and it looks to
me the code is correct.

> drivers/i2c/busses/i2c-virtio.c
> drivers/net/caif/caif_virtio.c
> drivers/nvdimm/virtio_pmem.c

The above looks fine and we have three more:

arm_scmi: probe() doesn't use vq
mac80211_hwsim.c: doesn't use vq (only fill rx), but it kicks the rx,
it looks to me we need a device_ready before the kick.
virtio_rpmsg_bus.c: doesn't use vq

I will post a patch for mac80211_hwsim.c.

Thanks

>
>
>
>
> > >
> > >
> > > > ---
> > > >  drivers/s390/virtio/virtio_ccw.c   |  4 
> > > >  drivers/virtio/virtio.c| 15 ---
> > > >  drivers/virtio/virtio_mmio.c   |  5 +
> > > >  drivers/virtio/virtio_pci_modern_dev.c |  5 +
> > > >  drivers/virtio/virtio_ring.c   | 11 +++
> > > >  include/linux/virtio_config.h  | 20 
> > > >  6 files changed, 53 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/drivers/s390/virtio/virtio_ccw.c 
> > > > b/drivers/s390/virtio/virtio_ccw.c
> > > > index c188e4f20ca3..97e51c34e6cf 100644
> > > > --- a/drivers/s390/virtio/virtio_ccw.c
> > > > +++ b/drivers/s390/virtio/virtio_ccw.c
> > > > @@ -971,6 +971,10 @@ static void virtio_ccw_set_status(struct 
> > > > virtio_device *vdev, u8 status)
> > > >   ccw->flags = 0;
> > > >   ccw->count = sizeof(status);
> > > >   ccw->cda = (__u32)(unsigned long)>dma_area->status;
> > > > + /* We use ssch for setting the status which is a serializing
> > > > +  * instruction that guarantees the memory writes have
> > > > +  * completed before ssch.
> > > > +  */
> > > >   ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS);
> > > >   /* Write failed? We assume status is unchanged. */
> > > >   if (ret)
> > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > index aa1eb5132767..95fac4c97c8b 100644
> > > > --- a/drivers/virtio/virtio.c
> > > > +++ b/drivers/virtio/virtio.c
> > > > @@ -220,6 +220,15 @@ static int virtio_features_ok(struct virtio_device 
> > > > *dev)
> > > >   * */
> > > >  void 

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Jason Wang
On Mon, Jun 13, 2022 at 4:19 PM Michael S. Tsirkin  wrote:
>
> On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> > On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  wrote:
> > >
> > > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin  
> > > > wrote:
> > > > >
> > > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > >
> > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity 
> > > > > > managed IRQ
> > > > > >that is used by some device such as virtio-blk
> > > > > > 2) done only for PCI transport
> > > > > >
> > > > > > The vq->broken is re-used in this patch for implementing the IRQ
> > > > > > hardening. The vq->broken is set to true during both initialization
> > > > > > and reset. And the vq->broken is set to false in
> > > > > > virtio_device_ready(). Then vring_interrupt() can check and return
> > > > > > when vq->broken is true. And in this case, switch to return IRQ_NONE
> > > > > > to let the interrupt core aware of such invalid interrupt to prevent
> > > > > > IRQ storm.
> > > > > >
> > > > > > The reason of using a per queue variable instead of a per device one
> > > > > > is that we may need it for per queue reset hardening in the future.
> > > > > >
> > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > ("virtio: defer config changed notifications"). But the method that 
> > > > > > is
> > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > handler because it uses spinlock to do the synchronization which is
> > > > > > expensive.
> > > > > >
> > > > > > Cc: Thomas Gleixner 
> > > > > > Cc: Peter Zijlstra 
> > > > > > Cc: "Paul E. McKenney" 
> > > > > > Cc: Marc Zyngier 
> > > > > > Cc: Halil Pasic 
> > > > > > Cc: Cornelia Huck 
> > > > > > Cc: Vineeth Vijayan 
> > > > > > Cc: Peter Oberparleiter 
> > > > > > Cc: linux-s...@vger.kernel.org
> > > > > > Signed-off-by: Jason Wang 
> > > > >
> > > > >
> > > > > Jason, I am really concerned by all the fallout.
> > > > > I propose adding a flag to suppress the hardening -
> > > > > this will be a debugging aid and a work around for
> > > > > users if we find more buggy drivers.
> > > > >
> > > > > suppress_interrupt_hardening ?
> > > >
> > > > I can post a patch but I'm afraid if we disable it by default, it
> > > > won't be used by the users so there's no way for us to receive the bug
> > > > report. Or we need a plan to enable it by default.
> > > >
> > > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if we
> > > > simply warn instead of disable it by default.
> > > >
> > > > Thanks
> > >
> > > I meant more like a flag in struct virtio_driver.
> > > For now, could you audit all drivers which don't call _ready?
> > > I found 5 of these:
> > >
> > > drivers/bluetooth/virtio_bt.c
> >
> > This driver seems to be fine, it doesn't use the device/vq in its probe().
>
>
> But it calls hci_register_dev and that in turn queues all kind of
> work. Also, can linux start using the device immediately after
> it's registered?

So I think the driver is allowed to queue before DRIVER_OK. If yes,
the only side effect is the delay of the tx interrupt after DRIVER_OK
for a well behaved device. If not, we need to clarify it in the spec
and call virtio_device_ready() before subsystem registration.

>
>
> > > drivers/gpu/drm/virtio/virtgpu_drv.c
> >
> > It calles virtio_device_ready() in virtio_gpu_init(), and it looks to
> > me the code is correct.
>
> OK.
>
> > > drivers/i2c/busses/i2c-virtio.c
> > > drivers/net/caif/caif_virtio.c
> > > drivers/nvdimm/virtio_pmem.c
> >
> > The above looks fine and we have three more:
> >
> > arm_scmi: probe() doesn't use vq
> > mac80211_hwsim.c: doesn't use vq (only fill rx), but it kicks the rx,
> > it looks to me we need a device_ready before the kick.
> > virtio_rpmsg_bus.c: doesn't use vq
> >
> > I will post a patch for mac80211_hwsim.c.
> > Thanks
>
> Same comments for all of the above. Might linux not start using the
> device once it's registered?

It depends on the specific subsystem.

For the subsystem that can't use the device immediately, calling
virtio_device_ready() after the subsystem's registration should be
fine. E.g for the networking subsystem, the TX won't happen if
ndo_open() is not called, calling virtio_device_ready() after
netdev_register() seems to be fine.

For the subsystem that can use the device immediately, if the
subsystem does not depend on the result of a request in the probe to
proceed, we are still fine. Since those requests will be proceed after
DRIVER_OK.

For the rest we need to do virtio_device_ready() before registration.

Thanks

>
> > >
> > >
> > >
> > >
> 

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 04:51:08PM +0800, Jason Wang wrote:
> On Mon, Jun 13, 2022 at 4:19 PM Michael S. Tsirkin  wrote:
> >
> > On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> > > On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  
> > > wrote:
> > > >
> > > > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin  
> > > > > wrote:
> > > > > >
> > > > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > >
> > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity 
> > > > > > > managed IRQ
> > > > > > >that is used by some device such as virtio-blk
> > > > > > > 2) done only for PCI transport
> > > > > > >
> > > > > > > The vq->broken is re-used in this patch for implementing the IRQ
> > > > > > > hardening. The vq->broken is set to true during both 
> > > > > > > initialization
> > > > > > > and reset. And the vq->broken is set to false in
> > > > > > > virtio_device_ready(). Then vring_interrupt() can check and return
> > > > > > > when vq->broken is true. And in this case, switch to return 
> > > > > > > IRQ_NONE
> > > > > > > to let the interrupt core aware of such invalid interrupt to 
> > > > > > > prevent
> > > > > > > IRQ storm.
> > > > > > >
> > > > > > > The reason of using a per queue variable instead of a per device 
> > > > > > > one
> > > > > > > is that we may need it for per queue reset hardening in the 
> > > > > > > future.
> > > > > > >
> > > > > > > Note that the hardening is only done for vring interrupt since the
> > > > > > > config interrupt hardening is already done in commit 22b7050a024d7
> > > > > > > ("virtio: defer config changed notifications"). But the method 
> > > > > > > that is
> > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > handler because it uses spinlock to do the synchronization which 
> > > > > > > is
> > > > > > > expensive.
> > > > > > >
> > > > > > > Cc: Thomas Gleixner 
> > > > > > > Cc: Peter Zijlstra 
> > > > > > > Cc: "Paul E. McKenney" 
> > > > > > > Cc: Marc Zyngier 
> > > > > > > Cc: Halil Pasic 
> > > > > > > Cc: Cornelia Huck 
> > > > > > > Cc: Vineeth Vijayan 
> > > > > > > Cc: Peter Oberparleiter 
> > > > > > > Cc: linux-s...@vger.kernel.org
> > > > > > > Signed-off-by: Jason Wang 
> > > > > >
> > > > > >
> > > > > > Jason, I am really concerned by all the fallout.
> > > > > > I propose adding a flag to suppress the hardening -
> > > > > > this will be a debugging aid and a work around for
> > > > > > users if we find more buggy drivers.
> > > > > >
> > > > > > suppress_interrupt_hardening ?
> > > > >
> > > > > I can post a patch but I'm afraid if we disable it by default, it
> > > > > won't be used by the users so there's no way for us to receive the bug
> > > > > report. Or we need a plan to enable it by default.
> > > > >
> > > > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if we
> > > > > simply warn instead of disable it by default.
> > > > >
> > > > > Thanks
> > > >
> > > > I meant more like a flag in struct virtio_driver.
> > > > For now, could you audit all drivers which don't call _ready?
> > > > I found 5 of these:
> > > >
> > > > drivers/bluetooth/virtio_bt.c
> > >
> > > This driver seems to be fine, it doesn't use the device/vq in its probe().
> >
> >
> > But it calls hci_register_dev and that in turn queues all kind of
> > work. Also, can linux start using the device immediately after
> > it's registered?
> 
> So I think the driver is allowed to queue before DRIVER_OK.

it's not allowed to kick

> If yes,
> the only side effect is the delay of the tx interrupt after DRIVER_OK
> for a well behaved device.

your patches drop the interrupt though, it won't be just delayed.

> If not, we need to clarify it in the spec
> and call virtio_device_ready() before subsystem registration.

hmm, i don't get what we need to clarify

> >
> >
> > > > drivers/gpu/drm/virtio/virtgpu_drv.c
> > >
> > > It calles virtio_device_ready() in virtio_gpu_init(), and it looks to
> > > me the code is correct.
> >
> > OK.
> >
> > > > drivers/i2c/busses/i2c-virtio.c
> > > > drivers/net/caif/caif_virtio.c
> > > > drivers/nvdimm/virtio_pmem.c
> > >
> > > The above looks fine and we have three more:
> > >
> > > arm_scmi: probe() doesn't use vq
> > > mac80211_hwsim.c: doesn't use vq (only fill rx), but it kicks the rx,
> > > it looks to me we need a device_ready before the kick.
> > > virtio_rpmsg_bus.c: doesn't use vq
> > >
> > > I will post a patch for mac80211_hwsim.c.
> > > Thanks
> >
> > Same comments for all of the above. Might linux not start using the
> > device once it's registered?
> 
> It depends on the specific subsystem.
> 
> For the subsystem that can't use the device immediately, calling
> virtio_device_ready() after 

Re: [PATCH] virtio_ring : fix vring_packed_desc memory out of bounds bug

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 04:44:03PM +0800, 黄杰 wrote:
> Michael S. Tsirkin  于2022年6月12日周日 22:13写道:
> >
> > On Sun, Jun 12, 2022 at 07:02:25PM +0800, 黄杰 wrote:
> > > Michael S. Tsirkin  于2022年6月11日周六 08:35写道:
> > > >
> > > > On Sat, Jun 11, 2022 at 12:38:10AM +0800, 黄杰 wrote:
> > > > > > This pattern was always iffy, but I don't think the patch
> > > > > > improves the situation much. last_used_idx and 
> > > > > > vq->packed.used_wrap_counter
> > > > > > can still get out of sync.
> > > > >
> > > > > Yes, You are absolutely correct, thanks for pointing out this issue, I
> > > > > didn't take that into consideration,
> > > > > how about disabling interrupts before this code below:
> > > > >
> > > > > > vq->last_used_idx += vq->packed.desc_state[id].num;
> > > > > > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> > > > > >  vq->last_used_idx -= vq->packed.vring.num;
> > > > > >  vq->packed.used_wrap_counter ^= 1;
> > > > > > }
> > > > >
> > > > > it seems to be fine to just turn off the interrupts of the current 
> > > > > vring.
> > > > >
> > > > > BR
> > > >
> > > > That would make datapath significantly slower.
> > > >
> > > > >
> > > > > Michael S. Tsirkin  于2022年6月10日周五 22:50写道:
> > > > > >
> > > > > > On Fri, Jun 10, 2022 at 06:33:14PM +0800, huangjie.albert wrote:
> > > > > > > ksoftirqd may consume the packet and it will call:
> > > > > > > virtnet_poll
> > > > > > >   -->virtnet_receive
> > > > > > >   -->virtqueue_get_buf_ctx
> > > > > > >   -->virtqueue_get_buf_ctx_packed
> > > > > > > and in virtqueue_get_buf_ctx_packed:
> > > > > > >
> > > > > > > vq->last_used_idx += vq->packed.desc_state[id].num;
> > > > > > > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> > > > > > >  vq->last_used_idx -= vq->packed.vring.num;
> > > > > > >  vq->packed.used_wrap_counter ^= 1;
> > > > > > > }
> > > > > > >
> > > > > > > if at the same time, there comes a vring interrupt,in 
> > > > > > > vring_interrupt:
> > > > > > > we will call:
> > > > > > > vring_interrupt
> > > > > > >   -->more_used
> > > > > > >   -->more_used_packed
> > > > > > >   -->is_used_desc_packed
> > > > > > > in is_used_desc_packed, the last_used_idx maybe >= 
> > > > > > > vq->packed.vring.num.
> > > > > > > so this could case a memory out of bounds bug.
> > > > > > >
> > > > > > > this patch is to fix this.
> > > > > > >
> > > > > > > Signed-off-by: huangjie.albert 
> > > > > >
> > > > > >
> > > > > > This pattern was always iffy, but I don't think the patch
> > > > > > improves the situation much. last_used_idx and 
> > > > > > vq->packed.used_wrap_counter
> > > > > > can still get out of sync.
> > > > > >
> > > > > > Maybe refactor code to keep everything in vq->last_used_idx?
> > > > > >
> > > > > > Jason what is your take?
> > > > > >
> > > > > >
> > > > > > > ---
> > > > > > >  drivers/virtio/virtio_ring.c | 3 +++
> > > > > > >  1 file changed, 3 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > > index 13a7348cedff..d2abbb3a8187 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -1397,6 +1397,9 @@ static inline bool 
> > > > > > > is_used_desc_packed(const struct vring_virtqueue *vq,
> > > > > > >   bool avail, used;
> > > > > > >   u16 flags;
> > > > > > >
> > > > > > > + if (idx >= vq->packed.vring.num)
> > > > > > > + return false;
> > > > > > > +
> > > > > > >   flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> > > > > > >   avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
> > > > > > >   used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
> > > > > > > --
> > > > > > > 2.27.0
> > > > > >
> > > >
> > >
> > > Michael S , thanks for your correction, there may be another simple
> > > solution here:
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 13a7348cedff..4db4db19f94a 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -1397,6 +1397,9 @@ static inline bool is_used_desc_packed(const
> > > struct vring_virtqueue *vq,
> > > bool avail, used;
> > > u16 flags;
> > >
> > > +   if (idx >= vq->packed.vring.num)
> > > +   return false;
> > > +
> > > flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> > > avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
> > > used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
> > > @@ -1453,8 +1456,9 @@ static void *virtqueue_get_buf_ctx_packed(struct
> > > virtqueue *_vq,
> > >
> > > vq->last_used_idx += vq->packed.desc_state[id].num;
> > > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
> > > -   vq->last_used_idx -= vq->packed.vring.num;
> > > 

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Jason Wang
On Mon, Jun 13, 2022 at 4:59 PM Michael S. Tsirkin  wrote:
>
> On Mon, Jun 13, 2022 at 04:51:08PM +0800, Jason Wang wrote:
> > On Mon, Jun 13, 2022 at 4:19 PM Michael S. Tsirkin  wrote:
> > >
> > > On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> > > > On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  
> > > > wrote:
> > > > >
> > > > > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > > > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin 
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > > > > This is a rework on the previous IRQ hardening that is done for
> > > > > > > > virtio-pci where several drawbacks were found and were reverted:
> > > > > > > >
> > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity 
> > > > > > > > managed IRQ
> > > > > > > >that is used by some device such as virtio-blk
> > > > > > > > 2) done only for PCI transport
> > > > > > > >
> > > > > > > > The vq->broken is re-used in this patch for implementing the IRQ
> > > > > > > > hardening. The vq->broken is set to true during both 
> > > > > > > > initialization
> > > > > > > > and reset. And the vq->broken is set to false in
> > > > > > > > virtio_device_ready(). Then vring_interrupt() can check and 
> > > > > > > > return
> > > > > > > > when vq->broken is true. And in this case, switch to return 
> > > > > > > > IRQ_NONE
> > > > > > > > to let the interrupt core aware of such invalid interrupt to 
> > > > > > > > prevent
> > > > > > > > IRQ storm.
> > > > > > > >
> > > > > > > > The reason of using a per queue variable instead of a per 
> > > > > > > > device one
> > > > > > > > is that we may need it for per queue reset hardening in the 
> > > > > > > > future.
> > > > > > > >
> > > > > > > > Note that the hardening is only done for vring interrupt since 
> > > > > > > > the
> > > > > > > > config interrupt hardening is already done in commit 
> > > > > > > > 22b7050a024d7
> > > > > > > > ("virtio: defer config changed notifications"). But the method 
> > > > > > > > that is
> > > > > > > > used by config interrupt can't be reused by the vring interrupt
> > > > > > > > handler because it uses spinlock to do the synchronization 
> > > > > > > > which is
> > > > > > > > expensive.
> > > > > > > >
> > > > > > > > Cc: Thomas Gleixner 
> > > > > > > > Cc: Peter Zijlstra 
> > > > > > > > Cc: "Paul E. McKenney" 
> > > > > > > > Cc: Marc Zyngier 
> > > > > > > > Cc: Halil Pasic 
> > > > > > > > Cc: Cornelia Huck 
> > > > > > > > Cc: Vineeth Vijayan 
> > > > > > > > Cc: Peter Oberparleiter 
> > > > > > > > Cc: linux-s...@vger.kernel.org
> > > > > > > > Signed-off-by: Jason Wang 
> > > > > > >
> > > > > > >
> > > > > > > Jason, I am really concerned by all the fallout.
> > > > > > > I propose adding a flag to suppress the hardening -
> > > > > > > this will be a debugging aid and a work around for
> > > > > > > users if we find more buggy drivers.
> > > > > > >
> > > > > > > suppress_interrupt_hardening ?
> > > > > >
> > > > > > I can post a patch but I'm afraid if we disable it by default, it
> > > > > > won't be used by the users so there's no way for us to receive the 
> > > > > > bug
> > > > > > report. Or we need a plan to enable it by default.
> > > > > >
> > > > > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if we
> > > > > > simply warn instead of disable it by default.
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I meant more like a flag in struct virtio_driver.
> > > > > For now, could you audit all drivers which don't call _ready?
> > > > > I found 5 of these:
> > > > >
> > > > > drivers/bluetooth/virtio_bt.c
> > > >
> > > > This driver seems to be fine, it doesn't use the device/vq in its 
> > > > probe().
> > >
> > >
> > > But it calls hci_register_dev and that in turn queues all kind of
> > > work. Also, can linux start using the device immediately after
> > > it's registered?
> >
> > So I think the driver is allowed to queue before DRIVER_OK.
>
> it's not allowed to kick

Yes.

>
> > If yes,
> > the only side effect is the delay of the tx interrupt after DRIVER_OK
> > for a well behaved device.
>
> your patches drop the interrupt though, it won't be just delayed.

For a well behaved device, it can only trigger the interrupt after DRIVER_OK.

So for virtio bt, it works like:

1) driver queue buffer and kick
2) driver set DRIVER_OK
3) device start to process the buffer
4) device send an notification

The only risk is that the virtqueue could be filled before DRIVER_OK,
or anything I missed?

>
> > If not, we need to clarify it in the spec
> > and call virtio_device_ready() before subsystem registration.
>
> hmm, i don't get what we need to clarify

E.g the driver is not allowed to kick or after DRIVER_OK should the
device only process the buffer after a kick after DRIVER_OK (I think
no)?

>
> > >
> > >
> > > > > drivers/gpu/drm/virtio/virtgpu_drv.c
> > > >
> > > > It 

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Jason Wang
On Mon, Jun 13, 2022 at 5:08 PM Jason Wang  wrote:
>
> On Mon, Jun 13, 2022 at 4:59 PM Michael S. Tsirkin  wrote:
> >
> > On Mon, Jun 13, 2022 at 04:51:08PM +0800, Jason Wang wrote:
> > > On Mon, Jun 13, 2022 at 4:19 PM Michael S. Tsirkin  
> > > wrote:
> > > >
> > > > On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> > > > > On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  
> > > > > wrote:
> > > > > >
> > > > > > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > > > > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > > > > > This is a rework on the previous IRQ hardening that is done 
> > > > > > > > > for
> > > > > > > > > virtio-pci where several drawbacks were found and were 
> > > > > > > > > reverted:
> > > > > > > > >
> > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to 
> > > > > > > > > affinity managed IRQ
> > > > > > > > >that is used by some device such as virtio-blk
> > > > > > > > > 2) done only for PCI transport
> > > > > > > > >
> > > > > > > > > The vq->broken is re-used in this patch for implementing the 
> > > > > > > > > IRQ
> > > > > > > > > hardening. The vq->broken is set to true during both 
> > > > > > > > > initialization
> > > > > > > > > and reset. And the vq->broken is set to false in
> > > > > > > > > virtio_device_ready(). Then vring_interrupt() can check and 
> > > > > > > > > return
> > > > > > > > > when vq->broken is true. And in this case, switch to return 
> > > > > > > > > IRQ_NONE
> > > > > > > > > to let the interrupt core aware of such invalid interrupt to 
> > > > > > > > > prevent
> > > > > > > > > IRQ storm.
> > > > > > > > >
> > > > > > > > > The reason of using a per queue variable instead of a per 
> > > > > > > > > device one
> > > > > > > > > is that we may need it for per queue reset hardening in the 
> > > > > > > > > future.
> > > > > > > > >
> > > > > > > > > Note that the hardening is only done for vring interrupt 
> > > > > > > > > since the
> > > > > > > > > config interrupt hardening is already done in commit 
> > > > > > > > > 22b7050a024d7
> > > > > > > > > ("virtio: defer config changed notifications"). But the 
> > > > > > > > > method that is
> > > > > > > > > used by config interrupt can't be reused by the vring 
> > > > > > > > > interrupt
> > > > > > > > > handler because it uses spinlock to do the synchronization 
> > > > > > > > > which is
> > > > > > > > > expensive.
> > > > > > > > >
> > > > > > > > > Cc: Thomas Gleixner 
> > > > > > > > > Cc: Peter Zijlstra 
> > > > > > > > > Cc: "Paul E. McKenney" 
> > > > > > > > > Cc: Marc Zyngier 
> > > > > > > > > Cc: Halil Pasic 
> > > > > > > > > Cc: Cornelia Huck 
> > > > > > > > > Cc: Vineeth Vijayan 
> > > > > > > > > Cc: Peter Oberparleiter 
> > > > > > > > > Cc: linux-s...@vger.kernel.org
> > > > > > > > > Signed-off-by: Jason Wang 
> > > > > > > >
> > > > > > > >
> > > > > > > > Jason, I am really concerned by all the fallout.
> > > > > > > > I propose adding a flag to suppress the hardening -
> > > > > > > > this will be a debugging aid and a work around for
> > > > > > > > users if we find more buggy drivers.
> > > > > > > >
> > > > > > > > suppress_interrupt_hardening ?
> > > > > > >
> > > > > > > I can post a patch but I'm afraid if we disable it by default, it
> > > > > > > won't be used by the users so there's no way for us to receive 
> > > > > > > the bug
> > > > > > > report. Or we need a plan to enable it by default.
> > > > > > >
> > > > > > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if 
> > > > > > > we
> > > > > > > simply warn instead of disable it by default.
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I meant more like a flag in struct virtio_driver.
> > > > > > For now, could you audit all drivers which don't call _ready?
> > > > > > I found 5 of these:
> > > > > >
> > > > > > drivers/bluetooth/virtio_bt.c
> > > > >
> > > > > This driver seems to be fine, it doesn't use the device/vq in its 
> > > > > probe().
> > > >
> > > >
> > > > But it calls hci_register_dev and that in turn queues all kind of
> > > > work. Also, can linux start using the device immediately after
> > > > it's registered?
> > >
> > > So I think the driver is allowed to queue before DRIVER_OK.
> >
> > it's not allowed to kick
>
> Yes.
>
> >
> > > If yes,
> > > the only side effect is the delay of the tx interrupt after DRIVER_OK
> > > for a well behaved device.
> >
> > your patches drop the interrupt though, it won't be just delayed.
>
> For a well behaved device, it can only trigger the interrupt after DRIVER_OK.
>
> So for virtio bt, it works like:
>
> 1) driver queue buffer and kick
> 2) driver set DRIVER_OK
> 3) device start to process the buffer
> 4) device send an notification
>
> The only risk is that the virtqueue could be filled before DRIVER_OK,
> or anything I missed?

btw, hci 

Re: [PATCH 2/2] vdpa/mlx5: Initializde CVQ vringh only once

2022-06-13 Thread Jason Wang
On Mon, Jun 13, 2022 at 4:00 PM Eli Cohen  wrote:
>
> Currently, CVQ vringh is initialized inside setup_virtqueues() which is
> called every time a memory update is done. This is undesirable since it
> resets all the context of the vring, including the available and used
> indices.
>
> Move the initialization to mlx5_vdpa_set_status() when
> VIRTIO_CONFIG_S_DRIVER_OK is set.
>
> Signed-off-by: Eli Cohen 

Acked-by: Jason Wang 

> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 31 ---
>  1 file changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 789c078ff1af..e85c1d71f4ed 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2176,7 +2176,6 @@ static int verify_driver_features(struct mlx5_vdpa_dev 
> *mvdev, u64 features)
>  static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
>  {
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> -   struct mlx5_control_vq *cvq = >cvq;
> int err;
> int i;
>
> @@ -2186,16 +2185,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev 
> *mvdev)
> goto err_vq;
> }
>
> -   if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> -   err = vringh_init_iotlb(>vring, mvdev->actual_features,
> -   MLX5_CVQ_MAX_ENT, false,
> -   (struct vring_desc 
> *)(uintptr_t)cvq->desc_addr,
> -   (struct vring_avail 
> *)(uintptr_t)cvq->driver_addr,
> -   (struct vring_used 
> *)(uintptr_t)cvq->device_addr);
> -   if (err)
> -   goto err_vq;
> -   }
> -
> return 0;
>
>  err_vq:
> @@ -2468,6 +2457,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
> ndev->mvdev.cvq.ready = false;
>  }
>
> +static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
> +{
> +   struct mlx5_control_vq *cvq = >cvq;
> +   int err = 0;
> +
> +   if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
> +   err = vringh_init_iotlb(>vring, mvdev->actual_features,
> +   MLX5_CVQ_MAX_ENT, false,
> +   (struct vring_desc 
> *)(uintptr_t)cvq->desc_addr,
> +   (struct vring_avail 
> *)(uintptr_t)cvq->driver_addr,
> +   (struct vring_used 
> *)(uintptr_t)cvq->device_addr);
> +
> +   return err;
> +}
> +
>  static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>  {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -2480,6 +2484,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device 
> *vdev, u8 status)
>
> if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
> if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
> +   err = setup_cvq_vring(mvdev);
> +   if (err) {
> +   mlx5_vdpa_warn(mvdev, "failed to setup 
> control VQ vring\n");
> +   goto err_setup;
> +   }
> err = setup_driver(mvdev);
> if (err) {
> mlx5_vdpa_warn(mvdev, "failed to setup 
> driver\n");
> --
> 2.35.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 04/36] cpuidle,intel_idle: Fix CPUIDLE_FLAG_IRQ_ENABLE

2022-06-13 Thread Peter Zijlstra
On Thu, Jun 09, 2022 at 04:49:21PM -0700, Jacob Pan wrote:
> Hi Peter,
> 
> On Wed, 08 Jun 2022 16:27:27 +0200, Peter Zijlstra 
> wrote:
> 
> > Commit c227233ad64c ("intel_idle: enable interrupts before C1 on
> > Xeons") wrecked intel_idle in two ways:
> > 
> >  - must not have tracing in idle functions
> >  - must return with IRQs disabled
> > 
> > Additionally, it added a branch for no good reason.
> > 
> > Fixes: c227233ad64c ("intel_idle: enable interrupts before C1 on Xeons")
> > Signed-off-by: Peter Zijlstra (Intel) 
> > ---
> >  drivers/idle/intel_idle.c |   48
> > +++--- 1 file changed, 37
> > insertions(+), 11 deletions(-)
> > 
> > --- a/drivers/idle/intel_idle.c
> > +++ b/drivers/idle/intel_idle.c
> > @@ -129,21 +137,37 @@ static unsigned int mwait_substates __in
> >   *
> >   * Must be called under local_irq_disable().
> >   */
> nit: this comment is no long true, right?

It still is, all the idle routines are called with interrupts disabled,
but must also exit with interrupts disabled.

If the idle method requires interrupts to be enabled, it must be sure to
disable them again before returning. Given all the RCU/tracing concerns
it must use raw_local_irq_*() for this though.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v2 3/8] af_vsock: add zerocopy receive logic

2022-06-13 Thread Stefano Garzarella

On Thu, Jun 09, 2022 at 12:20:22PM +, Arseniy Krasnov wrote:

On 09.06.2022 11:39, Stefano Garzarella wrote:

On Fri, Jun 03, 2022 at 05:35:48AM +, Arseniy Krasnov wrote:

This:
1) Adds callback for 'mmap()' call on socket. It checks vm
  area flags and sets vm area ops.
2) Adds special 'getsockopt()' case which calls transport
  zerocopy callback. Input argument is vm area address.
3) Adds 'getsockopt()/setsockopt()' for switching on/off rx
  zerocopy mode.

Signed-off-by: Arseniy Krasnov 
---
include/net/af_vsock.h  |   7 +++
include/uapi/linux/vm_sockets.h |   3 +
net/vmw_vsock/af_vsock.c    | 100 
3 files changed, 110 insertions(+)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f742e50207fb..f15f84c648ff 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -135,6 +135,13 @@ struct vsock_transport {
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);

+    int (*rx_zerocopy_set)(struct vsock_sock *vsk,
+   bool enable);
+    int (*rx_zerocopy_get)(struct vsock_sock *vsk);
+    int (*zerocopy_dequeue)(struct vsock_sock *vsk,
+    struct vm_area_struct *vma,
+    unsigned long addr);
+
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
 int flags);
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index c60ca33eac59..d1f792bed1a7 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -83,6 +83,9 @@

#define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8

+#define SO_VM_SOCKETS_MAP_RX 9
+#define SO_VM_SOCKETS_ZEROCOPY 10
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
#define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index f04abf662ec6..10061ef21730 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1644,6 +1644,17 @@ static int vsock_connectible_setsockopt(struct socket 
*sock,
    }
    break;
}
+    case SO_VM_SOCKETS_ZEROCOPY: {
+    if (!transport || !transport->rx_zerocopy_set) {
+    err = -EOPNOTSUPP;
+    } else {
+    COPY_IN(val);
+
+    if (transport->rx_zerocopy_set(vsk, val))
+    err = -EINVAL;
+    }
+    break;
+    }

default:
    err = -ENOPROTOOPT;
@@ -1657,6 +1668,48 @@ static int vsock_connectible_setsockopt(struct socket 
*sock,
return err;
}

+static const struct vm_operations_struct afvsock_vm_ops = {
+};
+
+static int vsock_recv_zerocopy(struct socket *sock,
+   unsigned long address)
+{
+    struct sock *sk = sock->sk;
+    struct vsock_sock *vsk = vsock_sk(sk);
+    struct vm_area_struct *vma;
+    const struct vsock_transport *transport;
+    int res;
+
+    transport = vsk->transport;
+
+    if (!transport->rx_zerocopy_get)
+    return -EOPNOTSUPP;
+
+    if (!transport->rx_zerocopy_get(vsk))
+    return -EOPNOTSUPP;


Maybe we can merge in
    if (!transport->rx_zerocopy_get ||
    !transport->rx_zerocopy_get(vsk)}
    return -EOPNOTSUPP;


+
+    if (!transport->zerocopy_dequeue)
+    return -EOPNOTSUPP;
+
+    lock_sock(sk);
+    mmap_write_lock(current->mm);


So, multiple threads using different sockets are serialized if they use 
zero-copy?

IIUC this is necessary because the callback calls vm_insert_page().

At this point I think it's better not to do this in every transport, but have 
the callback return an array of pages to map and we map them here trying to 
limit as much as possible the critical section to protect with 
mmap_write_lock().


Yes, it will be easy to return array of pages by transport callback,




+
+    vma = vma_lookup(current->mm, address);
+
+    if (!vma || vma->vm_ops != _vm_ops) {
+    mmap_write_unlock(current->mm);
+    release_sock(sk);
+    return -EINVAL;
+    }
+
+    res = transport->zerocopy_dequeue(vsk, vma, address);
+
+    mmap_write_unlock(current->mm);
+    release_sock(sk);
+
+    return res;
+}
+
static int vsock_connectible_getsockopt(struct socket *sock,
    int level, int optname,
    char __user *optval,
@@ -1701,6 +1754,39 @@ static int vsock_connectible_getsockopt(struct socket 
*sock,
    lv = sock_get_timeout(vsk->connect_timeout, ,
  optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
    break;
+    case SO_VM_SOCKETS_ZEROCOPY: {
+    const struct vsock_transport *transport;
+    int res;
+
+    transport = vsk->transport;
+
+    if (!transport->rx_zerocopy_get)
+    return -EOPNOTSUPP;
+
+    lock_sock(sk);


I think we should call lock_sock() before reading the transport to avoid races 
and we should check if it is assigned.

At that point I think is 

Re: [PATCH V6 8/9] virtio: harden vring IRQ

2022-06-13 Thread Michael S. Tsirkin
On Mon, Jun 13, 2022 at 05:08:20PM +0800, Jason Wang wrote:
> On Mon, Jun 13, 2022 at 4:59 PM Michael S. Tsirkin  wrote:
> >
> > On Mon, Jun 13, 2022 at 04:51:08PM +0800, Jason Wang wrote:
> > > On Mon, Jun 13, 2022 at 4:19 PM Michael S. Tsirkin  
> > > wrote:
> > > >
> > > > On Mon, Jun 13, 2022 at 04:07:09PM +0800, Jason Wang wrote:
> > > > > On Mon, Jun 13, 2022 at 3:23 PM Michael S. Tsirkin  
> > > > > wrote:
> > > > > >
> > > > > > On Mon, Jun 13, 2022 at 01:26:59PM +0800, Jason Wang wrote:
> > > > > > > On Sat, Jun 11, 2022 at 1:12 PM Michael S. Tsirkin 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Fri, May 27, 2022 at 02:01:19PM +0800, Jason Wang wrote:
> > > > > > > > > This is a rework on the previous IRQ hardening that is done 
> > > > > > > > > for
> > > > > > > > > virtio-pci where several drawbacks were found and were 
> > > > > > > > > reverted:
> > > > > > > > >
> > > > > > > > > 1) try to use IRQF_NO_AUTOEN which is not friendly to 
> > > > > > > > > affinity managed IRQ
> > > > > > > > >that is used by some device such as virtio-blk
> > > > > > > > > 2) done only for PCI transport
> > > > > > > > >
> > > > > > > > > The vq->broken is re-used in this patch for implementing the 
> > > > > > > > > IRQ
> > > > > > > > > hardening. The vq->broken is set to true during both 
> > > > > > > > > initialization
> > > > > > > > > and reset. And the vq->broken is set to false in
> > > > > > > > > virtio_device_ready(). Then vring_interrupt() can check and 
> > > > > > > > > return
> > > > > > > > > when vq->broken is true. And in this case, switch to return 
> > > > > > > > > IRQ_NONE
> > > > > > > > > to let the interrupt core aware of such invalid interrupt to 
> > > > > > > > > prevent
> > > > > > > > > IRQ storm.
> > > > > > > > >
> > > > > > > > > The reason of using a per queue variable instead of a per 
> > > > > > > > > device one
> > > > > > > > > is that we may need it for per queue reset hardening in the 
> > > > > > > > > future.
> > > > > > > > >
> > > > > > > > > Note that the hardening is only done for vring interrupt 
> > > > > > > > > since the
> > > > > > > > > config interrupt hardening is already done in commit 
> > > > > > > > > 22b7050a024d7
> > > > > > > > > ("virtio: defer config changed notifications"). But the 
> > > > > > > > > method that is
> > > > > > > > > used by config interrupt can't be reused by the vring 
> > > > > > > > > interrupt
> > > > > > > > > handler because it uses spinlock to do the synchronization 
> > > > > > > > > which is
> > > > > > > > > expensive.
> > > > > > > > >
> > > > > > > > > Cc: Thomas Gleixner 
> > > > > > > > > Cc: Peter Zijlstra 
> > > > > > > > > Cc: "Paul E. McKenney" 
> > > > > > > > > Cc: Marc Zyngier 
> > > > > > > > > Cc: Halil Pasic 
> > > > > > > > > Cc: Cornelia Huck 
> > > > > > > > > Cc: Vineeth Vijayan 
> > > > > > > > > Cc: Peter Oberparleiter 
> > > > > > > > > Cc: linux-s...@vger.kernel.org
> > > > > > > > > Signed-off-by: Jason Wang 
> > > > > > > >
> > > > > > > >
> > > > > > > > Jason, I am really concerned by all the fallout.
> > > > > > > > I propose adding a flag to suppress the hardening -
> > > > > > > > this will be a debugging aid and a work around for
> > > > > > > > users if we find more buggy drivers.
> > > > > > > >
> > > > > > > > suppress_interrupt_hardening ?
> > > > > > >
> > > > > > > I can post a patch but I'm afraid if we disable it by default, it
> > > > > > > won't be used by the users so there's no way for us to receive 
> > > > > > > the bug
> > > > > > > report. Or we need a plan to enable it by default.
> > > > > > >
> > > > > > > It's rc2, how about waiting for 1 and 2 rc? Or it looks better if 
> > > > > > > we
> > > > > > > simply warn instead of disable it by default.
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I meant more like a flag in struct virtio_driver.
> > > > > > For now, could you audit all drivers which don't call _ready?
> > > > > > I found 5 of these:
> > > > > >
> > > > > > drivers/bluetooth/virtio_bt.c
> > > > >
> > > > > This driver seems to be fine, it doesn't use the device/vq in its 
> > > > > probe().
> > > >
> > > >
> > > > But it calls hci_register_dev and that in turn queues all kind of
> > > > work. Also, can linux start using the device immediately after
> > > > it's registered?
> > >
> > > So I think the driver is allowed to queue before DRIVER_OK.
> >
> > it's not allowed to kick
> 
> Yes.
> 
> >
> > > If yes,
> > > the only side effect is the delay of the tx interrupt after DRIVER_OK
> > > for a well behaved device.
> >
> > your patches drop the interrupt though, it won't be just delayed.
> 
> For a well behaved device, it can only trigger the interrupt after DRIVER_OK.
> 
> So for virtio bt, it works like:
> 
> 1) driver queue buffer and kick
> 2) driver set DRIVER_OK
> 3) device start to process the buffer
> 4) device send an notification
> 
> The only risk is that the virtqueue could be filled before DRIVER_OK,
> or anything I 

Re: [PATCH 29/36] cpuidle, xenpv: Make more PARAVIRT_XXL noinstr clean

2022-06-13 Thread Srivatsa S. Bhat
On 6/8/22 4:27 PM, Peter Zijlstra wrote:
> vmlinux.o: warning: objtool: acpi_idle_enter_s2idle+0xde: call to wbinvd() 
> leaves .noinstr.text section
> vmlinux.o: warning: objtool: default_idle+0x4: call to arch_safe_halt() 
> leaves .noinstr.text section
> vmlinux.o: warning: objtool: xen_safe_halt+0xa: call to 
> HYPERVISOR_sched_op.constprop.0() leaves .noinstr.text section
> 
> Signed-off-by: Peter Zijlstra (Intel) 

Reviewed-by: Srivatsa S. Bhat (VMware) 


Regards,
Srivatsa
VMware Photon OS

> ---
>  arch/x86/include/asm/paravirt.h  |6 --
>  arch/x86/include/asm/special_insns.h |4 ++--
>  arch/x86/include/asm/xen/hypercall.h |2 +-
>  arch/x86/kernel/paravirt.c   |   14 --
>  arch/x86/xen/enlighten_pv.c  |2 +-
>  arch/x86/xen/irq.c   |2 +-
>  6 files changed, 21 insertions(+), 9 deletions(-)
> 
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -168,7 +168,7 @@ static inline void __write_cr4(unsigned
>   PVOP_VCALL1(cpu.write_cr4, x);
>  }
>  
> -static inline void arch_safe_halt(void)
> +static __always_inline void arch_safe_halt(void)
>  {
>   PVOP_VCALL0(irq.safe_halt);
>  }
> @@ -178,7 +178,9 @@ static inline void halt(void)
>   PVOP_VCALL0(irq.halt);
>  }
>  
> -static inline void wbinvd(void)
> +extern noinstr void pv_native_wbinvd(void);
> +
> +static __always_inline void wbinvd(void)
>  {
>   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
>  }
> --- a/arch/x86/include/asm/special_insns.h
> +++ b/arch/x86/include/asm/special_insns.h
> @@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru)
>  }
>  #endif
>  
> -static inline void native_wbinvd(void)
> +static __always_inline void native_wbinvd(void)
>  {
>   asm volatile("wbinvd": : :"memory");
>  }
> @@ -179,7 +179,7 @@ static inline void __write_cr4(unsigned
>   native_write_cr4(x);
>  }
>  
> -static inline void wbinvd(void)
> +static __always_inline void wbinvd(void)
>  {
>   native_wbinvd();
>  }
> --- a/arch/x86/include/asm/xen/hypercall.h
> +++ b/arch/x86/include/asm/xen/hypercall.h
> @@ -382,7 +382,7 @@ MULTI_stack_switch(struct multicall_entr
>  }
>  #endif
>  
> -static inline int
> +static __always_inline int
>  HYPERVISOR_sched_op(int cmd, void *arg)
>  {
>   return _hypercall2(int, sched_op, cmd, arg);
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -233,6 +233,11 @@ static noinstr void pv_native_set_debugr
>   native_set_debugreg(regno, val);
>  }
>  
> +noinstr void pv_native_wbinvd(void)
> +{
> + native_wbinvd();
> +}
> +
>  static noinstr void pv_native_irq_enable(void)
>  {
>   native_irq_enable();
> @@ -242,6 +247,11 @@ static noinstr void pv_native_irq_disabl
>  {
>   native_irq_disable();
>  }
> +
> +static noinstr void pv_native_safe_halt(void)
> +{
> + native_safe_halt();
> +}
>  #endif
>  
>  enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
> @@ -273,7 +283,7 @@ struct paravirt_patch_template pv_ops =
>   .cpu.read_cr0   = native_read_cr0,
>   .cpu.write_cr0  = native_write_cr0,
>   .cpu.write_cr4  = native_write_cr4,
> - .cpu.wbinvd = native_wbinvd,
> + .cpu.wbinvd = pv_native_wbinvd,
>   .cpu.read_msr   = native_read_msr,
>   .cpu.write_msr  = native_write_msr,
>   .cpu.read_msr_safe  = native_read_msr_safe,
> @@ -307,7 +317,7 @@ struct paravirt_patch_template pv_ops =
>   .irq.save_fl= __PV_IS_CALLEE_SAVE(native_save_fl),
>   .irq.irq_disable= __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
>   .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
> - .irq.safe_halt  = native_safe_halt,
> + .irq.safe_halt  = pv_native_safe_halt,
>   .irq.halt   = native_halt,
>  #endif /* CONFIG_PARAVIRT_XXL */
>  
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -1019,7 +1019,7 @@ static const typeof(pv_ops) xen_cpu_ops
>  
>   .write_cr4 = xen_write_cr4,
>  
> - .wbinvd = native_wbinvd,
> + .wbinvd = pv_native_wbinvd,
>  
>   .read_msr = xen_read_msr,
>   .write_msr = xen_write_msr,
> --- a/arch/x86/xen/irq.c
> +++ b/arch/x86/xen/irq.c
> @@ -24,7 +24,7 @@ noinstr void xen_force_evtchn_callback(v
>   (void)HYPERVISOR_xen_version(0, NULL);
>  }
>  
> -static void xen_safe_halt(void)
> +static noinstr void xen_safe_halt(void)
>  {
>   /* Blocking includes an implicit local_irq_enable(). */
>   if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
> 
> 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization