Re: [PATCH v1 2/2] vdpa/mlx5: Add support for reading descriptor statistics
在 2022/2/16 下午4:00, Eli Cohen 写道: Implement the get_vq_stats calback of vdpa_config_ops to return the statistics for a virtqueue. The statistics are provided as vendor specific statistics where the driver provides a pair of attribute name and attribute value. Currently supported are received descriptors and completed descriptors. Signed-off-by: Eli Cohen Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 + drivers/vdpa/mlx5/net/mlx5_vnet.c | 156 + include/linux/mlx5/mlx5_ifc.h | 1 + include/linux/mlx5/mlx5_ifc_vdpa.h | 39 4 files changed, 198 insertions(+) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index daaf7b503677..44104093163b 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -61,6 +61,8 @@ struct mlx5_control_vq { struct vringh_kiov riov; struct vringh_kiov wiov; unsigned short head; + unsigned int received_desc; + unsigned int completed_desc; }; struct mlx5_vdpa_wq_ent { diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b53603d94082..6156cf6e9377 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -119,6 +119,7 @@ struct mlx5_vdpa_virtqueue { struct mlx5_vdpa_umem umem2; struct mlx5_vdpa_umem umem3; + u32 counter_set_id; bool initialized; int index; u32 virtq_id; @@ -163,6 +164,8 @@ struct mlx5_vdpa_net { u32 cur_num_vqs; struct notifier_block nb; struct vdpa_callback config_cb; + /* sync access to virtqueues statistics */ + struct mutex numq_lock; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -821,6 +824,12 @@ static u16 get_features_12_3(u64 features) (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); } +static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) +{ + return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & + BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); +} + static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); @@ -875,6 +884,8 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); + if (counters_supported(>mvdev)) + MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); if (err) @@ -1138,6 +1149,47 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque return err; } +static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; + void *cmd_hdr; + int err; + + if (!counters_supported(>mvdev)) + return 0; + + cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; + + if (!counters_supported(>mvdev)) + return; + + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) + mlx5_vdpa_warn(>mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); +} + static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { u16 idx = mvq->index; @@ -1165,6 +1217,10 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) if (err) goto err_connect; + err =
Re: [PATCH v1 1/2] vdpa: Add support for querying vendor statistics
在 2022/2/16 下午4:00, Eli Cohen 写道: Allows to read vendor statistics of a vdpa device. The specific statistics data is received by the upstream driver in the form of an (attribute name, attribute value) pairs. An example of statistics for mlx5_vdpa device are: received_desc - number of descriptors received by the virtqueue completed_desc - number of descriptors completed by the virtqueue A descriptor using indirect buffers is still counted as 1. In addition, N chained descriptors are counted correctly N times as one would expect. A new callback was added to vdpa_config_ops which provides the means for the vdpa driver to return statistics results. The interface allows for reading all the supported virtqueues, including the control virtqueue if it exists. Below are some examples taken from mlx5_vdpa which are introduced in the following patch: 1. Read statistics for the virtqueue at index 1 $ vdpa dev vstats show vdpa-a qidx 1 vdpa-a: queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836 2. Read statistics for the virtqueue at index 32 $ vdpa dev vstats show vdpa-a qidx 32 vdpa-a: queue_type control_vq queue_index 32 received_desc 62 completed_desc 62 3. Read statisitics for the virtqueue at index 0 with json output $ vdpa -j dev vstats show vdpa-a qidx 0 {"vstats":{"vdpa-a":{ "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\ "name":"completed_desc","value":417548}}} 4. Read statistics for the virtqueue at index 0 with preety json output $ vdpa -jp dev vstats show vdpa-a qidx 0 { "vstats": { "vdpa-a": { "queue_type": "rx", "queue_index": 0, "name": "received_desc", "value": 417776, "name": "completed_desc", "value": 417548 } } } Signed-off-by: Eli Cohen --- drivers/vdpa/vdpa.c | 129 ++ include/linux/vdpa.h | 5 ++ include/uapi/linux/vdpa.h | 7 +++ 3 files changed, 141 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 9846c9de4bfa..d0ff671baf88 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -909,6 +909,74 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, return err; } +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, + struct genl_info *info, u32 index) +{ + int err; + + if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index)) + return -EMSGSIZE; + + err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack); + if (err) + return err; + + return 0; +} + +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, +struct genl_info *info, u32 index) +{ + int err; + + if (!vdev->config->get_vendor_vq_stats) + return -EOPNOTSUPP; + + err = vdpa_fill_stats_rec(vdev, msg, info, index); + if (err) + return err; + + return 0; +} + +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev, + struct sk_buff *msg, + struct genl_info *info, u32 index) +{ + u32 device_id; + void *hdr; + int err; + u32 portid = info->snd_portid; + u32 seq = info->snd_seq; + u32 flags = 0; + + hdr = genlmsg_put(msg, portid, seq, _nl_family, flags, + VDPA_CMD_DEV_VSTATS_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(>dev))) { + err = -EMSGSIZE; + goto undo_msg; + } + + device_id = vdev->config->get_device_id(vdev); + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { + err = -EMSGSIZE; + goto undo_msg; + } + + err = vendor_stats_fill(vdev, msg, info, index); + + genlmsg_end(msg, hdr); + + return err; + +undo_msg: + genlmsg_cancel(msg, hdr); + return err; +} + static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; @@ -990,6 +1058,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback * return msg->len; } +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct vdpa_device *vdev; + struct sk_buff *msg; + const char *devname; + struct device *dev; + u32 index; + int err; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + + if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]) + return -EINVAL; + + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
Re: [PATCH v2 07/14] vhost: Shadow virtqueue buffers forwarding
在 2022/3/3 上午2:23, Eugenio Perez Martin 写道: + +static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +VirtQueueElement *elem, +unsigned *head) +{ +unsigned avail_idx; +vring_avail_t *avail = svq->vring.avail; + +*head = svq->free_head; + +/* We need some descriptors here */ +if (unlikely(!elem->out_num && !elem->in_num)) { +qemu_log_mask(LOG_GUEST_ERROR, +"Guest provided element with no descriptors"); +return false; +} + +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, +elem->in_num > 0, false); +vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); I wonder instead of passing in/out separately and using the hint like more_descs, is it better to simply pass the elem to vhost_vrign_write_descs() then we know which one is the last that doesn't depend on more_descs. I'm not sure I follow this. The purpose of vhost_vring_write_descs is to abstract the writing of a batch of descriptors, its chaining, etc. It accepts the write parameter just for the write flag. If we make elem as a parameter, we would need to duplicate that for loop for read and for write descriptors, isn't it? To duplicate the for loop is the way it is done in the kernel, but I actually think the kernel could benefit from abstracting both in the same function too. Please let me know if you think otherwise or I've missed your point. Ok, so it's just a suggestion and we can do optimization on top for sure. Thanks ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 10/14] vdpa: Add custom IOTLB translations to SVQ
在 2022/3/1 下午4:50, Eugenio Perez Martin 写道: On Mon, Feb 28, 2022 at 8:37 AM Jason Wang wrote: 在 2022/2/27 下午9:41, Eugenio Pérez 写道: Use translations added in VhostIOVATree in SVQ. Only introduce usage here, not allocation and deallocation. As with previous patches, we use the dead code paths of shadow_vqs_enabled to avoid commiting too many changes at once. These are impossible to take at the moment. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.h | 6 +- include/hw/virtio/vhost-vdpa.h | 3 + hw/virtio/vhost-shadow-virtqueue.c | 76 - hw/virtio/vhost-vdpa.c | 128 - 4 files changed, 187 insertions(+), 26 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 04c67685fd..b2f722d101 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -13,6 +13,7 @@ #include "qemu/event_notifier.h" #include "hw/virtio/virtio.h" #include "standard-headers/linux/vhost_types.h" +#include "hw/virtio/vhost-iova-tree.h" /* Shadow virtqueue to relay notifications */ typedef struct VhostShadowVirtqueue { @@ -43,6 +44,9 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; +/* IOVA mapping */ +VhostIOVATree *iova_tree; + /* Map for use the guest's descriptors */ VirtQueueElement **ring_id_maps; @@ -78,7 +82,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq); void vhost_svq_stop(VhostShadowVirtqueue *svq); -VhostShadowVirtqueue *vhost_svq_new(void); +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); void vhost_svq_free(gpointer vq); G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 009a9f3b6b..ee8e939ad0 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -14,6 +14,7 @@ #include +#include "hw/virtio/vhost-iova-tree.h" #include "hw/virtio/virtio.h" #include "standard-headers/linux/vhost_types.h" @@ -30,6 +31,8 @@ typedef struct vhost_vdpa { MemoryListener listener; struct vhost_vdpa_iova_range iova_range; bool shadow_vqs_enabled; +/* IOVA mapping used by the Shadow Virtqueue */ +VhostIOVATree *iova_tree; GPtrArray *shadow_vqs; struct vhost_dev *dev; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index a38d313755..7e073773d1 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -11,6 +11,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "qemu/error-report.h" +#include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/log.h" #include "linux-headers/linux/vhost.h" @@ -84,7 +85,58 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable) } } +/** + * Translate addresses between the qemu's virtual address and the SVQ IOVA + * + * @svqShadow VirtQueue + * @vaddr Translated IOVA addresses + * @iovec Source qemu's VA addresses + * @numLength of iovec and minimum length of vaddr + */ +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, + void **addrs, const struct iovec *iovec, + size_t num) +{ +if (num == 0) { +return true; +} + +for (size_t i = 0; i < num; ++i) { +DMAMap needle = { +.translated_addr = (hwaddr)iovec[i].iov_base, +.size = iovec[i].iov_len, +}; +size_t off; + +const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, ); +/* + * Map cannot be NULL since iova map contains all guest space and + * qemu already has a physical address mapped + */ +if (unlikely(!map)) { +qemu_log_mask(LOG_GUEST_ERROR, + "Invalid address 0x%"HWADDR_PRIx" given by guest", + needle.translated_addr); +return false; +} + +off = needle.translated_addr - map->translated_addr; +addrs[i] = (void *)(map->iova + off); + +if (unlikely(int128_gt(int128_add(needle.translated_addr, + iovec[i].iov_len), + map->translated_addr + map->size))) { +qemu_log_mask(LOG_GUEST_ERROR, + "Guest buffer expands over iova range"); +return false; +} +} + +return true; +} + static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, +void * const *vaddr_sg, Nit: it looks to me we are not passing vaddr but iova here, so it might be better to use
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Thu, Mar 03, 2022 at 11:31:35AM +0800, Yongji Xie wrote: > On Wed, Mar 2, 2022 at 11:05 PM Max Gurtovoy wrote: > > > > > > On 3/2/2022 3:15 PM, Michael S. Tsirkin wrote: > > > On Wed, Mar 02, 2022 at 06:46:03PM +0800, Yongji Xie wrote: > > >> On Tue, Mar 1, 2022 at 11:43 PM Michael S. Tsirkin > > >> wrote: > > >>> On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > Currently we have a BUG_ON() to make sure the number of sg > > list does not exceed queue_max_segments() in virtio_queue_rq(). > > However, the block layer uses queue_max_discard_segments() > > instead of queue_max_segments() to limit the sg list for > > discard requests. So the BUG_ON() might be triggered if > > virtio-blk device reports a larger value for max discard > > segment than queue_max_segments(). > > >>> Hmm the spec does not say what should happen if max_discard_seg > > >>> exceeds seg_max. Is this the config you have in mind? how do you > > >>> create it? > > >>> > > >> One example: the device doesn't specify the value of max_discard_seg > > >> in the config space, then the virtio-blk driver will use > > >> MAX_DISCARD_SEGMENTS (256) by default. Then we're able to trigger the > > >> BUG_ON() if the seg_max is less than 256. > > >> > > >> While the spec didn't say what should happen if max_discard_seg > > >> exceeds seg_max, it also doesn't explicitly prohibit this > > >> configuration. So I think we should at least not panic the kernel in > > >> this case. > > >> > > >> Thanks, > > >> Yongji > > > Oh that last one sounds like a bug, I think it should be > > > min(MAX_DISCARD_SEGMENTS, seg_max) > > > > > > When max_discard_seg and seg_max both exist, that's a different question. > > > We can > > > - do min(max_discard_seg, seg_max) > > > - fail probe > > > - clear the relevant feature flag > > > > > > I feel we need a better plan than submitting an invalid request to device. > > > > We should cover only for a buggy devices. > > > > The situation that max_discard_seg > seg_max should be fine. > > > > Thus the bellow can be added to this patch: > > > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > > index c443cd64fc9b..3e372b97fe10 100644 > > --- a/drivers/block/virtio_blk.c > > +++ b/drivers/block/virtio_blk.c > > @@ -926,8 +926,8 @@ static int virtblk_probe(struct virtio_device *vdev) > > virtio_cread(vdev, struct virtio_blk_config, > > max_discard_seg, > > ); > > blk_queue_max_discard_segments(q, > > - min_not_zero(v, > > - MAX_DISCARD_SEGMENTS)); > > + min_t(u32, (v ? v : > > sg_elems), > > + MAX_DISCARD_SEGMENTS)); > > > > blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); > > } > > > > > > LGTM, I can add this in v3. > > Thanks, > Yongji Except the logic is convoluted then. I would instead add /* max_seg == 0 is out of spec but we always handled it */ if (!v) v = sg_elems; -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 08/14] util: Add iova_tree_alloc
在 2022/3/1 下午6:06, Eugenio Perez Martin 写道: + +/* + * Find a valid hole for the mapping + * + * Assuming low iova_begin, so no need to do a binary search to + * locate the first node. + * + * TODO: Replace all this with g_tree_node_first/next/last when available + * (from glib since 2.68). To do it with g_tree_foreach complicates the + * code a lot. + * One more question The current code looks work but still a little bit complicated to be reviewed. Looking at the missing helpers above, if the add and remove are seldom. I wonder if we can simply do g_tree_foreach() during each add/del to build a sorted list then we can emulate g_tree_node_first/next/last easily? This sounds a lot like the method in v1 [1]:). Oh, right. I missed that and it takes time to recover the memory. But it didn't use the O(N) foreach, since we can locate the new node's previous element looking for the upper bound of iova-1, maintaining the insertion's complexity O(log(N)). The function g_tree_upper_bound is added in Glib version 2.68, so the proposed version will be deleted sooner or later. Also the deletion keeps being O(log(N)) since deleting a node in QLIST is O(1). Yes, so I think we can leave the log as is and do optimization on top. Thanks ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [virtio-comment] [PATCH v5 1/2] virtio-vsock: add description for datagram type
On Thu, Mar 03, 2022 at 03:29:31AM +, Bobby Eshleman wrote: > On Wed, Mar 02, 2022 at 05:09:58PM +0100, Stefano Garzarella wrote: > > Hi Bobby, > > Sorry for the delay, but I saw these patches today. > > Please, can you keep me in CC? > > > > Hey Stefano, sorry about that. I'm not sure how I lost your CC on this > one. I'll make sure you are there moving forward. > > I want to mention that I'm taking a look at > https://gitlab.com/vsock/vsock/-/issues/1 in parallel with my dgram work > here. After sending out this series we noticed potential overlap between > the two issues. The additional dgram queues may become redundant if a > fairness mechanism that solves issue #1 above also applies to > connection-less protocols (similar to how the TC subsystem works). I've > just begun sorting out potential solutions so no hard results yet. Just > putting on your radar that the proposal here in v5 may be impacted if my > investigation into issue #1 yields something adequate. Well not mergeable, but datagram is upstream in Linux, is it not? So we do want it in the spec IMHO, even if in the future there will be a better protocol. > > On Thu, Feb 24, 2022 at 10:15:46PM +, beshleman.dev...@gmail.com wrote: > > > From: Jiang Wang > > > > > ... snip ... > > > > > > > virtio-vsock.tex | 63 > > > +++- > > > 1 file changed, 53 insertions(+), 10 deletions(-) > > > > > > diff --git a/virtio-vsock.tex b/virtio-vsock.tex > > > index d79984d..1a66a1b 100644 > > > --- a/virtio-vsock.tex > > > +++ b/virtio-vsock.tex > > > @@ -9,11 +9,26 @@ \subsection{Device ID}\label{sec:Device Types / Socket > > > Device / Device ID} > > > > > > \subsection{Virtqueues}\label{sec:Device Types / Socket Device / > > > Virtqueues} > > > \begin{description} > > > -\item[0] rx > > > -\item[1] tx > > > +\item[0] stream rx > > > +\item[1] stream tx > > > +\item[2] datagram rx > > > +\item[3] datagram tx > > > +\item[4] event > > > +\end{description} > > > +The virtio socket device uses 5 queues if feature bit > > > VIRTIO_VSOCK_F_DRGAM is set. Otherwise, it > > > +only uses 3 queues, as the following. > > > > We are also adding a new flag (VIRTIO_VSOCK_F_NO_IMPLIED_STREAM) to > > provide the possibility to support for example only dgrams. > > > > So I think we should consider the case where we have only DGRAM queues > > (and it will be similar to the stream only case so 3 queues). > > > > Maybe we could describe this part better and say that if we have both > > STREAM (or SEQPACKET) and DGRAM set we have 5 queues, otherwise > > only 3 queues. > > > > Roger that. > > > > \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket > > > Device / Device Operation / Buffer Space Management} > > > \field{buf_alloc} and \field{fwd_cnt} are used for buffer space > > > management of > > > stream sockets. The guest and the device publish how much buffer space is > > > @@ -170,7 +193,7 @@ \subsubsection{Buffer Space > > > Management}\label{sec:Device Types / Socket Device / > > > u32 peer_free = peer_buf_alloc - (tx_cnt - peer_fwd_cnt); > > > \end{lstlisting} > > > > > > -If there is insufficient buffer space, the sender waits until virtqueue > > > buffers > > > +For stream sockets, if there is insufficient buffer space, the sender > > > waits until virtqueue buffers > > > > stream and seqpacket > > > > > are returned and checks \field{buf_alloc} and \field{fwd_cnt} again. > > > Sending > > > the VIRTIO_VSOCK_OP_CREDIT_REQUEST packet queries how much buffer space is > > > available. The reply to this query is a VIRTIO_VSOCK_OP_CREDIT_UPDATE > > > packet. > > > @@ -178,22 +201,32 @@ \subsubsection{Buffer Space > > > Management}\label{sec:Device Types / Socket Device / > > > previously receiving a VIRTIO_VSOCK_OP_CREDIT_REQUEST packet. This allows > > > communicating updates any time a change in buffer space occurs. > > > > > > +Unlike stream sockets, dgram sockets do not use > > > VIRTIO_VSOCK_OP_CREDIT_UPDATE > > > +or VIRTIO_VSOCK_OP_CREDIT_REQUEST packets. The dgram buffer management > > > is split > > > +into two parts: senders and receivers. For senders, the packet is > > > dropped if the > > > +virtqueue is full. For receivers, the packet is dropped if there is no > > > space > > > +in the receive buffer. > > > + > > > \drivernormative{\paragraph}{Device Operation: Buffer Space > > > Management}{Device Types / Socket Device / Device Operation / Buffer > > > Space Management} > > > -VIRTIO_VSOCK_OP_RW data packets MUST only be transmitted when the peer > > > has > > > -sufficient free buffer space for the payload. > > > +For stream sockets, VIRTIO_VSOCK_OP_RW data packets MUST only be > > > transmitted when the peer has > > > > stream and seqpacket > > > > > +sufficient free buffer space for the payload. For dgram sockets, > > > VIRTIO_VSOCK_OP_RW data packets > > > +MAY be transmitted when the peer rx buffer is full. Then the packet will > >
Re: [PATCH v2 02/14] vhost: Add Shadow VirtQueue kick forwarding capabilities
在 2022/3/2 上午2:49, Eugenio Perez Martin 写道: On Mon, Feb 28, 2022 at 3:57 AM Jason Wang wrote: 在 2022/2/27 下午9:40, Eugenio Pérez 写道: At this mode no buffer forwarding will be performed in SVQ mode: Qemu will just forward the guest's kicks to the device. Host memory notifiers regions are left out for simplicity, and they will not be addressed in this series. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.h | 14 +++ include/hw/virtio/vhost-vdpa.h | 4 + hw/virtio/vhost-shadow-virtqueue.c | 52 +++ hw/virtio/vhost-vdpa.c | 145 - 4 files changed, 213 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index f1519e3c7b..1cbc87d5d8 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -18,8 +18,22 @@ typedef struct VhostShadowVirtqueue { EventNotifier hdev_kick; /* Shadow call notifier, sent to vhost */ EventNotifier hdev_call; + +/* + * Borrowed virtqueue's guest to host notifier. To borrow it in this event + * notifier allows to recover the VhostShadowVirtqueue from the event loop + * easily. If we use the VirtQueue's one, we don't have an easy way to + * retrieve VhostShadowVirtqueue. + * + * So shadow virtqueue must not clean it, or we would lose VirtQueue one. + */ +EventNotifier svq_kick; } VhostShadowVirtqueue; +void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + +void vhost_svq_stop(VhostShadowVirtqueue *svq); + VhostShadowVirtqueue *vhost_svq_new(void); void vhost_svq_free(gpointer vq); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 3ce79a646d..009a9f3b6b 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -12,6 +12,8 @@ #ifndef HW_VIRTIO_VHOST_VDPA_H #define HW_VIRTIO_VHOST_VDPA_H +#include + #include "hw/virtio/virtio.h" #include "standard-headers/linux/vhost_types.h" @@ -27,6 +29,8 @@ typedef struct vhost_vdpa { bool iotlb_batch_begin_sent; MemoryListener listener; struct vhost_vdpa_iova_range iova_range; +bool shadow_vqs_enabled; +GPtrArray *shadow_vqs; struct vhost_dev *dev; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; } VhostVDPA; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 019cf1950f..a5d0659f86 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -11,6 +11,56 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "linux-headers/linux/vhost.h" + +/** Forward guest notifications */ +static void vhost_handle_guest_kick(EventNotifier *n) +{ +VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, + svq_kick); +event_notifier_test_and_clear(n); +event_notifier_set(>hdev_kick); +} + +/** + * Set a new file descriptor for the guest to kick the SVQ and notify for avail + * + * @svq The svq + * @svq_kick_fd The svq kick fd + * + * Note that the SVQ will never close the old file descriptor. + */ +void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) +{ +EventNotifier *svq_kick = >svq_kick; +bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); I wonder if this is robust. E.g is there any chance that may end up with both poll_stop and poll_start are false? I cannot make that happen in qemu, but the function supports that case well: It will do nothing. It's more or less the same code as used in the vhost kernel, and is the expected behaviour if you send two VHOST_FILE_UNBIND one right after another to me. I would think it's just stop twice. If not, can we simple detect poll_stop as below and treat !poll_start and poll_stop? I'm not sure what does it add. Is there an unexpected consequence with the current do-nothing behavior I've missed? I'm not sure, but it feels odd if poll_start is not the reverse value of poll_stop. Thanks ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH net-next] tuntap: add sanity checks about msg_controllen in sendmsg
在 2022/3/3 上午10:24, Harold Huang 写道: In patch [1], tun_msg_ctl was added to allow pass batched xdp buffers to tun_sendmsg. Although we donot use msg_controllen in this path, we should check msg_controllen to make sure the caller pass a valid msg_ctl. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fe8dd45bb7556246c6b76277b1ba4296c91c2505 Reported-by: Eric Dumazet Suggested-by: Jason Wang Signed-off-by: Harold Huang Acked-by: Jason Wang --- drivers/net/tap.c | 3 ++- drivers/net/tun.c | 3 ++- drivers/vhost/net.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 8e3a28ba6b28..ba2ef5437e16 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1198,7 +1198,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, struct xdp_buff *xdp; int i; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 969ea69fd29d..2a0d8a5d7aec 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2501,7 +2501,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { struct tun_page tpage; int n = ctl->num; int flush = 0, queued = 0; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 28ef323882fb..792ab5f23647 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -473,6 +473,7 @@ static void vhost_tx_batch(struct vhost_net *net, goto signal_used; msghdr->msg_control = + msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(>vq, "Fail to batch sending packets\n"); ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: Re: [PATCH v3 0/4] Introduce akcipher service for virtio-crypto
On 3/3/22 4:40 AM, Eric Biggers wrote: On Wed, Mar 02, 2022 at 11:39:13AM +0800, zhenwei pi wrote: v2 -> v3: Rename virtio_crypto_algs.c to virtio_crypto_skcipher_algs.c, and minor changes of function name. Minor changes in virtio_crypto_akcipher_algs.c: no need to copy from buffer if opcode is verify. v1 -> v2: Fix 1 compiling warning reported by kernel test robot Put "__le32 akcipher_algo;" instead of "__le32 reserve;" field of struct virtio_crypto_config directly without size change. Add padding in struct virtio_crypto_ecdsa_session_para to keep 64-bit alignment. Remove irrelevant change by code format alignment. Also CC crypto gurus Herbert and linux-cry...@vger.kernel.org. Test with QEMU(patched by the v2 version), works fine. v1: Introduce akcipher service, implement RSA algorithm, and a minor fix. zhenwei pi (4): virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC virtio-crypto: introduce akcipher service virtio-crypto: implement RSA algorithm virtio-crypto: rename skcipher algs drivers/crypto/virtio/Makefile| 3 +- .../virtio/virtio_crypto_akcipher_algs.c | 585 ++ drivers/crypto/virtio/virtio_crypto_common.h | 7 +- drivers/crypto/virtio/virtio_crypto_core.c| 6 +- drivers/crypto/virtio/virtio_crypto_mgr.c | 15 +- ...o_algs.c => virtio_crypto_skcipher_algs.c} | 4 +- include/uapi/linux/virtio_crypto.h| 82 ++- 7 files changed, 693 insertions(+), 9 deletions(-) create mode 100644 drivers/crypto/virtio/virtio_crypto_akcipher_algs.c rename drivers/crypto/virtio/{virtio_crypto_algs.c => virtio_crypto_skcipher_algs.c} (99%) Why is this patchset useful? That isn't explained anywhere. - Eric Sorry about this missing part. This feature provides akcipher service offloading capability for guest side. And I also sent a patchset of QEMU: https://patchwork.kernel.org/project/qemu-devel/cover/20220211084335.1254281-1-pizhen...@bytedance.com/ The two patchsets work together, guest side sends encrypt/decrypt/sign/verify requests to host side, host side handles request and return response to the guest. -- zhenwei pi ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 04:49:17PM +, Lee Jones wrote: On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: On Wed, Mar 02, 2022 at 05:28:31PM +0100, Stefano Garzarella wrote: > On Wed, Mar 2, 2022 at 3:57 PM Lee Jones wrote: > > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote: > > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > > > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > > > > > > > Cc: > > > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > > > > > Signed-off-by: Lee Jones > > > > > > --- > > > > > > drivers/vhost/vhost.c | 2 ++ > > > > > > 1 file changed, 2 insertions(+) > > > > > > > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > > > > > --- a/drivers/vhost/vhost.c > > > > > > +++ b/drivers/vhost/vhost.c > > > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > > int i; > > > > > > > > > > > > for (i = 0; i < dev->nvqs; ++i) { > > > > > > + mutex_lock(>vqs[i]->mutex); > > > > > > if (dev->vqs[i]->error_ctx) > > > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > > > > > if (dev->vqs[i]->kick) > > > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > > if (dev->vqs[i]->call_ctx.ctx) > > > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > > > > > vhost_vq_reset(dev, dev->vqs[i]); > > > > > > + mutex_unlock(>vqs[i]->mutex); > > > > > > } > > > > > > > > > > So this is a mitigation plan but the bug is still there though > > > > > we don't know exactly what it is. I would prefer adding something like > > > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > > > > > > > > As a rework to this, or as a subsequent patch? > > > > > > Can be a separate patch. > > > > > > > Just before the first lock I assume? > > > > > > I guess so, yes. > > > > No problem. Patch to follow. > > > > I'm also going to attempt to debug the root cause, but I'm new to this > > subsystem to it might take a while for me to get my head around. > > IIUC the root cause should be the same as the one we solved here: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 > > The worker was not stopped before calling vhost_dev_cleanup(). So while > the worker was still running we were going to free memory or initialize > fields while it was still using virtqueue. Right, and I agree but it's not the root though, we do attempt to stop all workers. Exactly. This is what happens, but the question I'm going to attempt to answer is *why* does this happen. IIUC the worker was still running because the /dev/vhost-vsock file was not explicitly closed, so vhost_vsock_dev_release() was called in the do_exit() of the process. In that case there was the issue, because vhost_dev_check_owner() returned false in vhost_vsock_stop() since current->mm was NULL. So it returned earlier, without calling vhost_vq_set_backend(vq, NULL). This did not stop the worker from continuing to run, causing the multiple issues we are seeing. current->mm was NULL, because in the do_exit() the address space is cleaned in the exit_mm(), which is called before releasing the files into the exit_task_work(). This can be seen from the logs, where we see first the warnings printed by vhost_dev_cleanup() and then the panic in the worker (e.g. here https://syzkaller.appspot.com/text?tag=CrashLog=16a61fce70) Mike also added a few more helpful details in this thread: https://lore.kernel.org/virtualization/20220221100500.2x3s2sddqahgdfyt@sgarzare-redhat/T/#ree61316eac63245c9ba3050b44330e4034282cc2 Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > On Wed, Mar 02, 2022 at 05:28:31PM +0100, Stefano Garzarella wrote: > > On Wed, Mar 2, 2022 at 3:57 PM Lee Jones wrote: > > > > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > > > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote: > > > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > > > > > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its > > > > > > > call > > > > > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > > > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > > > > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > > > > > > > > > Cc: > > > > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > > > > > > Signed-off-by: Lee Jones > > > > > > > --- > > > > > > > drivers/vhost/vhost.c | 2 ++ > > > > > > > 1 file changed, 2 insertions(+) > > > > > > > > > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > > > > > > --- a/drivers/vhost/vhost.c > > > > > > > +++ b/drivers/vhost/vhost.c > > > > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > > > int i; > > > > > > > > > > > > > > for (i = 0; i < dev->nvqs; ++i) { > > > > > > > + mutex_lock(>vqs[i]->mutex); > > > > > > > if (dev->vqs[i]->error_ctx) > > > > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > > > > > > if (dev->vqs[i]->kick) > > > > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > > > if (dev->vqs[i]->call_ctx.ctx) > > > > > > > > > > > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > > > > > > vhost_vq_reset(dev, dev->vqs[i]); > > > > > > > + mutex_unlock(>vqs[i]->mutex); > > > > > > > } > > > > > > > > > > > > So this is a mitigation plan but the bug is still there though > > > > > > we don't know exactly what it is. I would prefer adding something > > > > > > like > > > > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > > > > > > > > > > As a rework to this, or as a subsequent patch? > > > > > > > > Can be a separate patch. > > > > > > > > > Just before the first lock I assume? > > > > > > > > I guess so, yes. > > > > > > No problem. Patch to follow. > > > > > > I'm also going to attempt to debug the root cause, but I'm new to this > > > subsystem to it might take a while for me to get my head around. > > > > IIUC the root cause should be the same as the one we solved here: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 > > > > The worker was not stopped before calling vhost_dev_cleanup(). So while > > the worker was still running we were going to free memory or initialize > > fields while it was still using virtqueue. > > Right, and I agree but it's not the root though, we do attempt to stop all > workers. Exactly. This is what happens, but the question I'm going to attempt to answer is *why* does this happen. -- Lee Jones [李琼斯] Principal Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 05:28:31PM +0100, Stefano Garzarella wrote: > On Wed, Mar 2, 2022 at 3:57 PM Lee Jones wrote: > > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote: > > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > > > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > > > > > > > Cc: > > > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > > > > > Signed-off-by: Lee Jones > > > > > > --- > > > > > > drivers/vhost/vhost.c | 2 ++ > > > > > > 1 file changed, 2 insertions(+) > > > > > > > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > > > > > --- a/drivers/vhost/vhost.c > > > > > > +++ b/drivers/vhost/vhost.c > > > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > > int i; > > > > > > > > > > > > for (i = 0; i < dev->nvqs; ++i) { > > > > > > + mutex_lock(>vqs[i]->mutex); > > > > > > if (dev->vqs[i]->error_ctx) > > > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > > > > > if (dev->vqs[i]->kick) > > > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > > if (dev->vqs[i]->call_ctx.ctx) > > > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > > > > > vhost_vq_reset(dev, dev->vqs[i]); > > > > > > + mutex_unlock(>vqs[i]->mutex); > > > > > > } > > > > > > > > > > So this is a mitigation plan but the bug is still there though > > > > > we don't know exactly what it is. I would prefer adding something > > > > > like > > > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > > > > > > > > As a rework to this, or as a subsequent patch? > > > > > > Can be a separate patch. > > > > > > > Just before the first lock I assume? > > > > > > I guess so, yes. > > > > No problem. Patch to follow. > > > > I'm also going to attempt to debug the root cause, but I'm new to this > > subsystem to it might take a while for me to get my head around. > > IIUC the root cause should be the same as the one we solved here: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 > > The worker was not stopped before calling vhost_dev_cleanup(). So while > the worker was still running we were going to free memory or initialize > fields while it was still using virtqueue. > > Cheers, > Stefano Right, and I agree but it's not the root though, we do attempt to stop all workers. -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 2, 2022 at 3:57 PM Lee Jones wrote: > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote: > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > > > > > Cc: > > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > > > > Signed-off-by: Lee Jones > > > > > --- > > > > > drivers/vhost/vhost.c | 2 ++ > > > > > 1 file changed, 2 insertions(+) > > > > > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > > > > --- a/drivers/vhost/vhost.c > > > > > +++ b/drivers/vhost/vhost.c > > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > int i; > > > > > > > > > > for (i = 0; i < dev->nvqs; ++i) { > > > > > + mutex_lock(>vqs[i]->mutex); > > > > > if (dev->vqs[i]->error_ctx) > > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > > > > if (dev->vqs[i]->kick) > > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > > if (dev->vqs[i]->call_ctx.ctx) > > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > > > > vhost_vq_reset(dev, dev->vqs[i]); > > > > > + mutex_unlock(>vqs[i]->mutex); > > > > > } > > > > > > > > So this is a mitigation plan but the bug is still there though > > > > we don't know exactly what it is. I would prefer adding something like > > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > > > > > > As a rework to this, or as a subsequent patch? > > > > Can be a separate patch. > > > > > Just before the first lock I assume? > > > > I guess so, yes. > > No problem. Patch to follow. > > I'm also going to attempt to debug the root cause, but I'm new to this > subsystem to it might take a while for me to get my head around. IIUC the root cause should be the same as the one we solved here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 The worker was not stopped before calling vhost_dev_cleanup(). So while the worker was still running we were going to free memory or initialize fields while it was still using virtqueue. Cheers, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [virtio-comment] [PATCH v5 2/2] virtio-vsock: add mergeable buffer feature bit
On Thu, Feb 24, 2022 at 10:15:47PM +, beshleman.dev...@gmail.com wrote: From: Jiang Wang Add support for mergeable buffers for virtio-vsock. Mergeable buffers allow individual large packets to be spread across multiple buffers while still using only a single packet header. This avoids artificially restraining packet size to the size of a single buffer and offers a performant fragmentation/defragmentation scheme. We need this new functionality because we want to fragment a datagram packet over multiple buffers, right? This reminded me that we don't have a maximum size for now, in this case what would it be? Maybe it would be helpful to define it as Laura is planning to do here: https://lists.oasis-open.org/archives/virtio-comment/202202/msg00053.html Signed-off-by: Jiang Wang Signed-off-by: Bobby Eshleman --- virtio-vsock.tex | 76 1 file changed, 76 insertions(+) diff --git a/virtio-vsock.tex b/virtio-vsock.tex index 1a66a1b..bf44d5d 100644 --- a/virtio-vsock.tex +++ b/virtio-vsock.tex @@ -39,6 +39,7 @@ \subsection{Feature bits}\label{sec:Device Types / Socket Device / Feature bits} \item[VIRTIO_VSOCK_F_STREAM (0)] stream socket type is supported. \item[VIRTIO_VSOCK_F_SEQPACKET (1)] seqpacket socket type is supported. \item[VIRTIO_VSOCK_F_DGRAM (2)] datagram socket type is supported. +\item[VIRTIO_VSOCK_F_MRG_RXBUF (3)] driver can merge receive buffers. \end{description} \subsection{Device configuration layout}\label{sec:Device Types / Socket Device / Device configuration layout} @@ -87,6 +88,8 @@ \subsection{Device Operation}\label{sec:Device Types / Socket Device / Device Op Packets transmitted or received contain a header before the payload: +If feature VIRTIO_VSOCK_F_MRG_RXBUF is not negotiated, use the following header. + \begin{lstlisting} struct virtio_vsock_hdr { le64 src_cid; @@ -102,6 +105,15 @@ \subsection{Device Operation}\label{sec:Device Types / Socket Device / Device Op }; \end{lstlisting} +If feature VIRTIO_VSOCK_F_MRG_RXBUF is negotiated, use the following header. +\begin{lstlisting} +struct virtio_vsock_hdr_mrg_rxbuf { + struct virtio_vsock_hdr hdr; + le16 num_buffers; +}; +\end{lstlisting} + + The upper 32 bits of src_cid and dst_cid are reserved and zeroed. Most packets simply transfer data but control packets are also used for @@ -207,6 +219,25 @@ \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket Device / virtqueue is full. For receivers, the packet is dropped if there is no space in the receive buffer. +\drivernormative{\paragraph}{Device Operation: Buffer Space Management}{Device Types / Socket Device / Device Operation / Setting Up Receive Buffers} +\begin{itemize} +\item If VIRTIO_VSOCK_F_MRG_RXBUF is not negotiated, the driver SHOULD populate the datagram rx queue + with buffers of at least 4096 bytes. +\item If VIRTIO_VSOCK_F_MRG_RXBUF is negotiated, each buffer MUST be at + least the size of the struct virtio_vsock_hdr_mgr_rxbuf. +\end{itemize} + +\begin{note} +Each buffer may be split across multiple descriptor elements. +\end{note} + +\devicenormative{\paragraph}{Device Operation: Buffer Space Management}{Device Types / Socket Device / Device Operation / Setting Up Receive Buffers} +The device MUST set \field{num_buffers} to the number of descriptors used when +transmitting the packet. + +The device MUST use only a single descriptor if VIRTIO_VSOCK_F_MRG_RXBUF +is not negotiated. + \drivernormative{\paragraph}{Device Operation: Buffer Space Management}{Device Types / Socket Device / Device Operation / Buffer Space Management} For stream sockets, VIRTIO_VSOCK_OP_RW data packets MUST only be transmitted when the peer has sufficient free buffer space for the payload. For dgram sockets, VIRTIO_VSOCK_OP_RW data packets @@ -229,6 +260,7 @@ \subsubsection{Receive and Transmit}\label{sec:Device Types / Socket Device / De The driver queues outgoing packets on the tx virtqueue and allocates incoming packet receive buffers on the rx virtqueue. Packets are of the following form: +If VIRTIO_VSOCK_F_MRG_RXBUF is not negotiated, use the following. \begin{lstlisting} struct virtio_vsock_packet { struct virtio_vsock_hdr hdr; @@ -236,11 +268,42 @@ \subsubsection{Receive and Transmit}\label{sec:Device Types / Socket Device / De }; \end{lstlisting} +If VIRTIO_VSOCK_F_MRG_RXBUF is negotiated, use the following form: +\begin{lstlisting} +struct virtio_vsock_packet_mrg_rxbuf { +struct virtio_vsock_hdr_mrg_rxbuf hdr; +u8 data[]; +}; +\end{lstlisting} + + Virtqueue buffers for outgoing packets are read-only. Virtqueue buffers for incoming packets are write-only. When transmitting packets to the device, \field{num_buffers} is not used. +\begin{enumerate} +\item \field{num_buffers} indicates how many descriptors + this packet is spread over (including this one). + This is valid only if VIRTIO_VSOCK_F_MRG_RXBUF is negotiated. + This
Re: [virtio-comment] [PATCH v5 1/2] virtio-vsock: add description for datagram type
Hi Bobby, Sorry for the delay, but I saw these patches today. Please, can you keep me in CC? On Thu, Feb 24, 2022 at 10:15:46PM +, beshleman.dev...@gmail.com wrote: From: Jiang Wang Add supports for datagram type for virtio-vsock. Datagram sockets are connectionless and unreliable. To avoid contention with stream and other sockets, add two more virtqueues and a new feature bit to identify if those two new queues exist or not. Also add descriptions for resource management of datagram, which does not use the existing credit update mechanism associated with stream sockets. Signed-off-by: Jiang Wang Signed-off-by: Bobby Eshleman --- V2: Addressed the comments for the previous version. V3: Add description for the mergeable receive buffer. V4: Add a feature bit for stream and reserver a bit for seqpacket. Fix mrg_rxbuf related sentences. V5: Rebase onto head (change to more nicely accompany seqpacket changes). Remove statement about no feature bits being set (already made by seqpacket patches). Clarify \field{type} declaration. Use words "sender/receiver" instead of "tx/rx" for clarity, and other prose changes. Directly state that full buffers result in dropped packets. Change verbs to present tense. Clarify if-else pairs (e.g., "If XYZ is negotiated" is followed by "If XYZ is not negotiated" instead of "Otherwise"). Mergeable buffer changes are split off into a separate patch. virtio-vsock.tex | 63 +++- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/virtio-vsock.tex b/virtio-vsock.tex index d79984d..1a66a1b 100644 --- a/virtio-vsock.tex +++ b/virtio-vsock.tex @@ -9,11 +9,26 @@ \subsection{Device ID}\label{sec:Device Types / Socket Device / Device ID} \subsection{Virtqueues}\label{sec:Device Types / Socket Device / Virtqueues} \begin{description} -\item[0] rx -\item[1] tx +\item[0] stream rx +\item[1] stream tx +\item[2] datagram rx +\item[3] datagram tx +\item[4] event +\end{description} +The virtio socket device uses 5 queues if feature bit VIRTIO_VSOCK_F_DRGAM is set. Otherwise, it +only uses 3 queues, as the following. We are also adding a new flag (VIRTIO_VSOCK_F_NO_IMPLIED_STREAM) to provide the possibility to support for example only dgrams. So I think we should consider the case where we have only DGRAM queues (and it will be similar to the stream only case so 3 queues). Maybe we could describe this part better and say that if we have both STREAM (or SEQPACKET) and DGRAM set we have 5 queues, otherwise only 3 queues. + +\begin{description} +\item[0] stream rx +\item[1] stream tx \item[2] event \end{description} +When behavior differs between stream and datagram rx/tx virtqueues +their full names are used. Common behavior is simply described in +terms of rx/tx virtqueues and applies to both stream and datagram +virtqueues. + \subsection{Feature bits}\label{sec:Device Types / Socket Device / Feature bits} If no feature bit is set, only stream socket type is supported. @@ -23,6 +38,7 @@ \subsection{Feature bits}\label{sec:Device Types / Socket Device / Feature bits} \begin{description} \item[VIRTIO_VSOCK_F_STREAM (0)] stream socket type is supported. \item[VIRTIO_VSOCK_F_SEQPACKET (1)] seqpacket socket type is supported. +\item[VIRTIO_VSOCK_F_DGRAM (2)] datagram socket type is supported. \end{description} \subsection{Device configuration layout}\label{sec:Device Types / Socket Device / Device configuration layout} @@ -109,6 +125,9 @@ \subsection{Device Operation}\label{sec:Device Types / Socket Device / Device Op \subsubsection{Virtqueue Flow Control}\label{sec:Device Types / Socket Device / Device Operation / Virtqueue Flow Control} +Flow control applies to stream sockets; datagram sockets do not have +flow control. + The tx virtqueue carries packets initiated by applications and replies to received packets. The rx virtqueue carries packets initiated by the device and replies to previously transmitted packets. @@ -142,18 +161,22 @@ \subsubsection{Addressing}\label{sec:Device Types / Socket Device / Device Opera consists of a (cid, port number) tuple. The header fields used for this are \field{src_cid}, \field{src_port}, \field{dst_cid}, and \field{dst_port}. -Currently stream and seqpacket sockets are supported. \field{type} is 1 (VIRTIO_VSOCK_TYPE_STREAM) -for stream socket types, and 2 (VIRTIO_VSOCK_TYPE_SEQPACKET) for seqpacket socket types. +Currently stream, seqpacket, and dgram sockets are supported. \field{type} is 1 (VIRTIO_VSOCK_TYPE_STREAM) +for stream socket types, 2 (VIRTIO_VSOCK_TYPE_SEQPACKET) for seqpacket socket types, and 3 (VIRTIO_VSOCK_TYPE_DGRAM) for dgram socket types. \begin{lstlisting} #define VIRTIO_VSOCK_TYPE_STREAM1 #define VIRTIO_VSOCK_TYPE_SEQPACKET 2 +#define VIRTIO_VSOCK_TYPE_DGRAM 3 \end{lstlisting} Stream sockets provide in-order, guaranteed, connection-oriented delivery without message boundaries. Seqpacket sockets provide
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 09:50:38AM -0500, Michael S. Tsirkin wrote: On Wed, Mar 02, 2022 at 03:11:21PM +0100, Stefano Garzarella wrote: On Wed, Mar 02, 2022 at 08:35:08AM -0500, Michael S. Tsirkin wrote: > On Wed, Mar 02, 2022 at 10:34:46AM +0100, Stefano Garzarella wrote: > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > This issue is similar to [1] that should be already fixed upstream by [2]. > > > > However I think this patch would have prevented some issues, because > > vhost_vq_reset() sets vq->private to NULL, preventing the worker from > > running. > > > > Anyway I think that when we enter in vhost_dev_cleanup() the worker should > > be already stopped, so it shouldn't be necessary to take the mutex. But in > > order to prevent future issues maybe it's better to take them, so: > > > > Reviewed-by: Stefano Garzarella > > > > [1] > > https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822 > > [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 > > > Right. I want to queue this but I would like to get a warning > so we can detect issues like [2] before they cause more issues. I agree, what about moving the warning that we already have higher up, right at the beginning of the function? I mean something like this: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 59edb5a1ffe2..1721ff3f18c0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -692,6 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) { int i; + WARN_ON(!llist_empty(>work_list)); + for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->error_ctx) eventfd_ctx_put(dev->vqs[i]->error_ctx); @@ -712,7 +714,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(>wait, EPOLLIN | EPOLLRDNORM); - WARN_ON(!llist_empty(>work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; Hmm I'm not sure why it matters. Because after this new patch, putting locks in the while loop, when we finish the loop the workers should be stopped, because vhost_vq_reset() sets vq->private to NULL. But the best thing IMHO is to check that there is no backend set for each vq, so the workers have been stopped correctly at this point. Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote: > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > > > Cc: > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > > > Signed-off-by: Lee Jones > > > > --- > > > > drivers/vhost/vhost.c | 2 ++ > > > > 1 file changed, 2 insertions(+) > > > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > > > --- a/drivers/vhost/vhost.c > > > > +++ b/drivers/vhost/vhost.c > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > int i; > > > > > > > > for (i = 0; i < dev->nvqs; ++i) { > > > > + mutex_lock(>vqs[i]->mutex); > > > > if (dev->vqs[i]->error_ctx) > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > > > if (dev->vqs[i]->kick) > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > > if (dev->vqs[i]->call_ctx.ctx) > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > > > vhost_vq_reset(dev, dev->vqs[i]); > > > > + mutex_unlock(>vqs[i]->mutex); > > > > } > > > > > > So this is a mitigation plan but the bug is still there though > > > we don't know exactly what it is. I would prefer adding something like > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > > > > As a rework to this, or as a subsequent patch? > > Can be a separate patch. > > > Just before the first lock I assume? > > I guess so, yes. No problem. Patch to follow. I'm also going to attempt to debug the root cause, but I'm new to this subsystem to it might take a while for me to get my head around. -- Lee Jones [李琼斯] Principal Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote: > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > Cc: > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > > Signed-off-by: Lee Jones > > > --- > > > drivers/vhost/vhost.c | 2 ++ > > > 1 file changed, 2 insertions(+) > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > > --- a/drivers/vhost/vhost.c > > > +++ b/drivers/vhost/vhost.c > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > int i; > > > > > > for (i = 0; i < dev->nvqs; ++i) { > > > + mutex_lock(>vqs[i]->mutex); > > > if (dev->vqs[i]->error_ctx) > > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > > if (dev->vqs[i]->kick) > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > > if (dev->vqs[i]->call_ctx.ctx) > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > > vhost_vq_reset(dev, dev->vqs[i]); > > > + mutex_unlock(>vqs[i]->mutex); > > > } > > > > So this is a mitigation plan but the bug is still there though > > we don't know exactly what it is. I would prefer adding something like > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > > As a rework to this, or as a subsequent patch? Can be a separate patch. > Just before the first lock I assume? I guess so, yes. > -- > Lee Jones [李琼斯] > Principal Technical Lead - Developer Services > Linaro.org │ Open source software for Arm SoCs > Follow Linaro: Facebook | Twitter | Blog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 03:11:21PM +0100, Stefano Garzarella wrote: > On Wed, Mar 02, 2022 at 08:35:08AM -0500, Michael S. Tsirkin wrote: > > On Wed, Mar 02, 2022 at 10:34:46AM +0100, Stefano Garzarella wrote: > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > > > to vhost_get_vq_desc(). All we have to do is take the same lock > > > > during virtqueue clean-up and we mitigate the reported issues. > > > > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > > > This issue is similar to [1] that should be already fixed upstream by [2]. > > > > > > However I think this patch would have prevented some issues, because > > > vhost_vq_reset() sets vq->private to NULL, preventing the worker from > > > running. > > > > > > Anyway I think that when we enter in vhost_dev_cleanup() the worker should > > > be already stopped, so it shouldn't be necessary to take the mutex. But in > > > order to prevent future issues maybe it's better to take them, so: > > > > > > Reviewed-by: Stefano Garzarella > > > > > > [1] > > > https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822 > > > [2] > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 > > > > > > Right. I want to queue this but I would like to get a warning > > so we can detect issues like [2] before they cause more issues. > > I agree, what about moving the warning that we already have higher up, right > at the beginning of the function? > > I mean something like this: > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 59edb5a1ffe2..1721ff3f18c0 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -692,6 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > { > int i; > + WARN_ON(!llist_empty(>work_list)); > + > for (i = 0; i < dev->nvqs; ++i) { > if (dev->vqs[i]->error_ctx) > eventfd_ctx_put(dev->vqs[i]->error_ctx); > @@ -712,7 +714,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > dev->iotlb = NULL; > vhost_clear_msg(dev); > wake_up_interruptible_poll(>wait, EPOLLIN | EPOLLRDNORM); > - WARN_ON(!llist_empty(>work_list)); > if (dev->worker) { > kthread_stop(dev->worker); > dev->worker = NULL; > Hmm I'm not sure why it matters. > And maybe we can also check vq->private and warn in the loop, because the > work_list may be empty if the device is doing nothing. > > Thanks, > Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Wed, Mar 02, 2022 at 04:27:15PM +0200, Max Gurtovoy wrote: > > On 3/2/2022 4:15 PM, Michael S. Tsirkin wrote: > > On Wed, Mar 02, 2022 at 03:45:10PM +0200, Max Gurtovoy wrote: > > > On 3/2/2022 3:33 PM, Michael S. Tsirkin wrote: > > > > On Wed, Mar 02, 2022 at 03:24:51PM +0200, Max Gurtovoy wrote: > > > > > On 3/2/2022 3:17 PM, Michael S. Tsirkin wrote: > > > > > > On Wed, Mar 02, 2022 at 11:51:27AM +0200, Max Gurtovoy wrote: > > > > > > > On 3/1/2022 5:43 PM, Michael S. Tsirkin wrote: > > > > > > > > On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > > > > > > > > Currently we have a BUG_ON() to make sure the number of sg > > > > > > > > > list does not exceed queue_max_segments() in > > > > > > > > > virtio_queue_rq(). > > > > > > > > > However, the block layer uses queue_max_discard_segments() > > > > > > > > > instead of queue_max_segments() to limit the sg list for > > > > > > > > > discard requests. So the BUG_ON() might be triggered if > > > > > > > > > virtio-blk device reports a larger value for max discard > > > > > > > > > segment than queue_max_segments(). > > > > > > > > Hmm the spec does not say what should happen if max_discard_seg > > > > > > > > exceeds seg_max. Is this the config you have in mind? how do you > > > > > > > > create it? > > > > > > > I don't think it's hard to create it. Just change some registers > > > > > > > in the > > > > > > > device. > > > > > > > > > > > > > > But with the dynamic sgl allocation that I added recently, there > > > > > > > is no > > > > > > > problem with this scenario. > > > > > > Well the problem is device says it can't handle such large > > > > > > descriptors, > > > > > > I guess it works anyway, but it seems scary. > > > > > I don't follow. > > > > > > > > > > The only problem this patch solves is when a virtio blk device reports > > > > > larger value for max_discard_segments than max_segments. > > > > > > > > > No, the peroblem reported is when virtio blk device reports > > > > max_segments < 256 but not max_discard_segments. > > > You mean the code will work in case device report max_discard_segments > > > > max_segments ? > > > > > > I don't think so. > > I think it's like this: > > > > > > if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { > > > > > > > > virtio_cread(vdev, struct virtio_blk_config, > > max_discard_seg, > > ); > > blk_queue_max_discard_segments(q, > > min_not_zero(v, > > > > MAX_DISCARD_SEGMENTS)); > > > > } > > > > so, IIUC the case is of a device that sets max_discard_seg to 0. > > > > Which is kind of broken, but we handled this since 2018 so I guess > > we'll need to keep doing that. > > A device can't state VIRTIO_BLK_F_DISCARD and set max_discard_seg to 0. > > If so, it's a broken device and we can add a quirk for it. Well we already have min_not_zero there, presumably for some reason. > Do you have such device to test ? Xie Yongji mentioned he does. > > > > > This is exactly what Xie Yongji mention in the commit message and what I > > > was > > > seeing. > > > > > > But the code will work if VIRTIO_BLK_F_DISCARD is not supported by the > > > device (even if max_segments < 256) , since blk layer set > > > queue_max_discard_segments = 1 in the initialization. > > > > > > And the virtio-blk driver won't change it unless VIRTIO_BLK_F_DISCARD is > > > supported. > > > > > > > I would expect discard to follow max_segments restrictions then. > > > > > > > > > Probably no such devices, but we need to be prepared. > > > > Right, question is how to handle this. > > > > > > > > > > > This commit looks good to me, thanks Xie Yongji. > > > > > > > > > > > > > > Reviewed-by: Max Gurtovoy > > > > > > > > > > > > > > > > To fix it, let's simply > > > > > > > > > remove the BUG_ON() which has become unnecessary after commit > > > > > > > > > 02746e26c39e("virtio-blk: avoid preallocating big SGL for > > > > > > > > > data"). > > > > > > > > > And the unused vblk->sg_elems can also be removed together. > > > > > > > > > > > > > > > > > > Fixes: 1f23816b8eb8 ("virtio_blk: add discard and write > > > > > > > > > zeroes support") > > > > > > > > > Suggested-by: Christoph Hellwig > > > > > > > > > Signed-off-by: Xie Yongji > > > > > > > > > --- > > > > > > > > > drivers/block/virtio_blk.c | 10 +- > > > > > > > > > 1 file changed, 1 insertion(+), 9 deletions(-) > > > > > > > > > > > > > > > > > > diff --git a/drivers/block/virtio_blk.c > > > > > > > > > b/drivers/block/virtio_blk.c > > > > > > > > > index c443cd64fc9b..a43eb1813cec 100644 > > > > > > > > > --- a/drivers/block/virtio_blk.c > > > > > > > > > +++ b/drivers/block/virtio_blk.c > > > > > > > > > @@ -76,9 +76,6 @@ struct virtio_blk { > > > > > > > > >*/ > > > > > > > > > refcount_t refs; > >
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Wed, Mar 02, 2022 at 09:53:17PM +0800, Yongji Xie wrote: > On Wed, Mar 2, 2022 at 9:33 PM Michael S. Tsirkin wrote: > > > > On Wed, Mar 02, 2022 at 03:24:51PM +0200, Max Gurtovoy wrote: > > > > > > On 3/2/2022 3:17 PM, Michael S. Tsirkin wrote: > > > > On Wed, Mar 02, 2022 at 11:51:27AM +0200, Max Gurtovoy wrote: > > > > > On 3/1/2022 5:43 PM, Michael S. Tsirkin wrote: > > > > > > On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > > > > > > Currently we have a BUG_ON() to make sure the number of sg > > > > > > > list does not exceed queue_max_segments() in virtio_queue_rq(). > > > > > > > However, the block layer uses queue_max_discard_segments() > > > > > > > instead of queue_max_segments() to limit the sg list for > > > > > > > discard requests. So the BUG_ON() might be triggered if > > > > > > > virtio-blk device reports a larger value for max discard > > > > > > > segment than queue_max_segments(). > > > > > > Hmm the spec does not say what should happen if max_discard_seg > > > > > > exceeds seg_max. Is this the config you have in mind? how do you > > > > > > create it? > > > > > I don't think it's hard to create it. Just change some registers in > > > > > the > > > > > device. > > > > > > > > > > But with the dynamic sgl allocation that I added recently, there is no > > > > > problem with this scenario. > > > > Well the problem is device says it can't handle such large descriptors, > > > > I guess it works anyway, but it seems scary. > > > > > > I don't follow. > > > > > > The only problem this patch solves is when a virtio blk device reports > > > larger value for max_discard_segments than max_segments. > > > > > > > No, the peroblem reported is when virtio blk device reports > > max_segments < 256 but not max_discard_segments. > > I would expect discard to follow max_segments restrictions then. > > > > I think one point is whether we want to allow the corner case that the > device reports a larger value for max_discard_segments than > max_segments. For example, queue size is 256, max_segments is 128 - 2, > max_discard_segments is 256 - 2. > > Thanks, > Yongji So if device specifies that, then I guess it's fine and from that POV the patch is correct. But I think the issue is when device specifies 0 which we interpret as 256 with no basis in hardware. -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Wed, Mar 02, 2022 at 03:45:10PM +0200, Max Gurtovoy wrote: > > On 3/2/2022 3:33 PM, Michael S. Tsirkin wrote: > > On Wed, Mar 02, 2022 at 03:24:51PM +0200, Max Gurtovoy wrote: > > > On 3/2/2022 3:17 PM, Michael S. Tsirkin wrote: > > > > On Wed, Mar 02, 2022 at 11:51:27AM +0200, Max Gurtovoy wrote: > > > > > On 3/1/2022 5:43 PM, Michael S. Tsirkin wrote: > > > > > > On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > > > > > > Currently we have a BUG_ON() to make sure the number of sg > > > > > > > list does not exceed queue_max_segments() in virtio_queue_rq(). > > > > > > > However, the block layer uses queue_max_discard_segments() > > > > > > > instead of queue_max_segments() to limit the sg list for > > > > > > > discard requests. So the BUG_ON() might be triggered if > > > > > > > virtio-blk device reports a larger value for max discard > > > > > > > segment than queue_max_segments(). > > > > > > Hmm the spec does not say what should happen if max_discard_seg > > > > > > exceeds seg_max. Is this the config you have in mind? how do you > > > > > > create it? > > > > > I don't think it's hard to create it. Just change some registers in > > > > > the > > > > > device. > > > > > > > > > > But with the dynamic sgl allocation that I added recently, there is no > > > > > problem with this scenario. > > > > Well the problem is device says it can't handle such large descriptors, > > > > I guess it works anyway, but it seems scary. > > > I don't follow. > > > > > > The only problem this patch solves is when a virtio blk device reports > > > larger value for max_discard_segments than max_segments. > > > > > No, the peroblem reported is when virtio blk device reports > > max_segments < 256 but not max_discard_segments. > > You mean the code will work in case device report max_discard_segments > > max_segments ? > > I don't think so. I think it's like this: if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, ); blk_queue_max_discard_segments(q, min_not_zero(v, MAX_DISCARD_SEGMENTS)); } so, IIUC the case is of a device that sets max_discard_seg to 0. Which is kind of broken, but we handled this since 2018 so I guess we'll need to keep doing that. > This is exactly what Xie Yongji mention in the commit message and what I was > seeing. > > But the code will work if VIRTIO_BLK_F_DISCARD is not supported by the > device (even if max_segments < 256) , since blk layer set > queue_max_discard_segments = 1 in the initialization. > > And the virtio-blk driver won't change it unless VIRTIO_BLK_F_DISCARD is > supported. > > > I would expect discard to follow max_segments restrictions then. > > > > > Probably no such devices, but we need to be prepared. > > Right, question is how to handle this. > > > > > > > This commit looks good to me, thanks Xie Yongji. > > > > > > > > > > Reviewed-by: Max Gurtovoy > > > > > > > > > > > > To fix it, let's simply > > > > > > > remove the BUG_ON() which has become unnecessary after commit > > > > > > > 02746e26c39e("virtio-blk: avoid preallocating big SGL for data"). > > > > > > > And the unused vblk->sg_elems can also be removed together. > > > > > > > > > > > > > > Fixes: 1f23816b8eb8 ("virtio_blk: add discard and write zeroes > > > > > > > support") > > > > > > > Suggested-by: Christoph Hellwig > > > > > > > Signed-off-by: Xie Yongji > > > > > > > --- > > > > > > > drivers/block/virtio_blk.c | 10 +- > > > > > > > 1 file changed, 1 insertion(+), 9 deletions(-) > > > > > > > > > > > > > > diff --git a/drivers/block/virtio_blk.c > > > > > > > b/drivers/block/virtio_blk.c > > > > > > > index c443cd64fc9b..a43eb1813cec 100644 > > > > > > > --- a/drivers/block/virtio_blk.c > > > > > > > +++ b/drivers/block/virtio_blk.c > > > > > > > @@ -76,9 +76,6 @@ struct virtio_blk { > > > > > > >*/ > > > > > > > refcount_t refs; > > > > > > > - /* What host tells us, plus 2 for header & tailer. */ > > > > > > > - unsigned int sg_elems; > > > > > > > - > > > > > > > /* Ida index - used to track minor number allocations. > > > > > > > */ > > > > > > > int index; > > > > > > > @@ -322,8 +319,6 @@ static blk_status_t virtio_queue_rq(struct > > > > > > > blk_mq_hw_ctx *hctx, > > > > > > > blk_status_t status; > > > > > > > int err; > > > > > > > - BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); > > > > > > > - > > > > > > > status = virtblk_setup_cmd(vblk->vdev, req, vbr); > > > > > > > if (unlikely(status)) > > > > > > > return status; > > > > > > > @@ -783,8 +778,6 @@ static int virtblk_probe(struct virtio_device > > > > > > > *vdev) > > > > > > > /* Prevent integer
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 08:35:08AM -0500, Michael S. Tsirkin wrote: On Wed, Mar 02, 2022 at 10:34:46AM +0100, Stefano Garzarella wrote: On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > vhost_vsock_handle_tx_kick() already holds the mutex during its call > to vhost_get_vq_desc(). All we have to do is take the same lock > during virtqueue clean-up and we mitigate the reported issues. > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 This issue is similar to [1] that should be already fixed upstream by [2]. However I think this patch would have prevented some issues, because vhost_vq_reset() sets vq->private to NULL, preventing the worker from running. Anyway I think that when we enter in vhost_dev_cleanup() the worker should be already stopped, so it shouldn't be necessary to take the mutex. But in order to prevent future issues maybe it's better to take them, so: Reviewed-by: Stefano Garzarella [1] https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 Right. I want to queue this but I would like to get a warning so we can detect issues like [2] before they cause more issues. I agree, what about moving the warning that we already have higher up, right at the beginning of the function? I mean something like this: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 59edb5a1ffe2..1721ff3f18c0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -692,6 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) { int i; + WARN_ON(!llist_empty(>work_list)); + for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->error_ctx) eventfd_ctx_put(dev->vqs[i]->error_ctx); @@ -712,7 +714,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(>wait, EPOLLIN | EPOLLRDNORM); - WARN_ON(!llist_empty(>work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; And maybe we can also check vq->private and warn in the loop, because the work_list may be empty if the device is doing nothing. Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, 02 Mar 2022, Michael S. Tsirkin wrote: > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > to vhost_get_vq_desc(). All we have to do is take the same lock > > during virtqueue clean-up and we mitigate the reported issues. > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > > > Cc: > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > Signed-off-by: Lee Jones > > --- > > drivers/vhost/vhost.c | 2 ++ > > 1 file changed, 2 insertions(+) > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > int i; > > > > for (i = 0; i < dev->nvqs; ++i) { > > + mutex_lock(>vqs[i]->mutex); > > if (dev->vqs[i]->error_ctx) > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > if (dev->vqs[i]->kick) > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > if (dev->vqs[i]->call_ctx.ctx) > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > vhost_vq_reset(dev, dev->vqs[i]); > > + mutex_unlock(>vqs[i]->mutex); > > } > > So this is a mitigation plan but the bug is still there though > we don't know exactly what it is. I would prefer adding something like > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? As a rework to this, or as a subsequent patch? Just before the first lock I assume? -- Lee Jones [李琼斯] Principal Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 10:34:46AM +0100, Stefano Garzarella wrote: > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > to vhost_get_vq_desc(). All we have to do is take the same lock > > during virtqueue clean-up and we mitigate the reported issues. > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > This issue is similar to [1] that should be already fixed upstream by [2]. > > However I think this patch would have prevented some issues, because > vhost_vq_reset() sets vq->private to NULL, preventing the worker from > running. > > Anyway I think that when we enter in vhost_dev_cleanup() the worker should > be already stopped, so it shouldn't be necessary to take the mutex. But in > order to prevent future issues maybe it's better to take them, so: > > Reviewed-by: Stefano Garzarella > > [1] > https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822 > [2] > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 Right. I want to queue this but I would like to get a warning so we can detect issues like [2] before they cause more issues. > > > > Cc: > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > > Signed-off-by: Lee Jones > > --- > > drivers/vhost/vhost.c | 2 ++ > > 1 file changed, 2 insertions(+) > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > int i; > > > > for (i = 0; i < dev->nvqs; ++i) { > > + mutex_lock(>vqs[i]->mutex); > > if (dev->vqs[i]->error_ctx) > > eventfd_ctx_put(dev->vqs[i]->error_ctx); > > if (dev->vqs[i]->kick) > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > > if (dev->vqs[i]->call_ctx.ctx) > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > > vhost_vq_reset(dev, dev->vqs[i]); > > + mutex_unlock(>vqs[i]->mutex); > > } > > vhost_dev_free_iovecs(dev); > > if (dev->log_ctx) > > -- > > 2.35.1.574.g5d30c73bfb-goog > > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Wed, Mar 02, 2022 at 03:24:51PM +0200, Max Gurtovoy wrote: > > On 3/2/2022 3:17 PM, Michael S. Tsirkin wrote: > > On Wed, Mar 02, 2022 at 11:51:27AM +0200, Max Gurtovoy wrote: > > > On 3/1/2022 5:43 PM, Michael S. Tsirkin wrote: > > > > On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > > > > Currently we have a BUG_ON() to make sure the number of sg > > > > > list does not exceed queue_max_segments() in virtio_queue_rq(). > > > > > However, the block layer uses queue_max_discard_segments() > > > > > instead of queue_max_segments() to limit the sg list for > > > > > discard requests. So the BUG_ON() might be triggered if > > > > > virtio-blk device reports a larger value for max discard > > > > > segment than queue_max_segments(). > > > > Hmm the spec does not say what should happen if max_discard_seg > > > > exceeds seg_max. Is this the config you have in mind? how do you > > > > create it? > > > I don't think it's hard to create it. Just change some registers in the > > > device. > > > > > > But with the dynamic sgl allocation that I added recently, there is no > > > problem with this scenario. > > Well the problem is device says it can't handle such large descriptors, > > I guess it works anyway, but it seems scary. > > I don't follow. > > The only problem this patch solves is when a virtio blk device reports > larger value for max_discard_segments than max_segments. > No, the peroblem reported is when virtio blk device reports max_segments < 256 but not max_discard_segments. I would expect discard to follow max_segments restrictions then. > Probably no such devices, but we need to be prepared. Right, question is how to handle this. > > > > > This commit looks good to me, thanks Xie Yongji. > > > > > > Reviewed-by: Max Gurtovoy > > > > > > > > To fix it, let's simply > > > > > remove the BUG_ON() which has become unnecessary after commit > > > > > 02746e26c39e("virtio-blk: avoid preallocating big SGL for data"). > > > > > And the unused vblk->sg_elems can also be removed together. > > > > > > > > > > Fixes: 1f23816b8eb8 ("virtio_blk: add discard and write zeroes > > > > > support") > > > > > Suggested-by: Christoph Hellwig > > > > > Signed-off-by: Xie Yongji > > > > > --- > > > > >drivers/block/virtio_blk.c | 10 +- > > > > >1 file changed, 1 insertion(+), 9 deletions(-) > > > > > > > > > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > > > > > index c443cd64fc9b..a43eb1813cec 100644 > > > > > --- a/drivers/block/virtio_blk.c > > > > > +++ b/drivers/block/virtio_blk.c > > > > > @@ -76,9 +76,6 @@ struct virtio_blk { > > > > >*/ > > > > > refcount_t refs; > > > > > - /* What host tells us, plus 2 for header & tailer. */ > > > > > - unsigned int sg_elems; > > > > > - > > > > > /* Ida index - used to track minor number allocations. */ > > > > > int index; > > > > > @@ -322,8 +319,6 @@ static blk_status_t virtio_queue_rq(struct > > > > > blk_mq_hw_ctx *hctx, > > > > > blk_status_t status; > > > > > int err; > > > > > - BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); > > > > > - > > > > > status = virtblk_setup_cmd(vblk->vdev, req, vbr); > > > > > if (unlikely(status)) > > > > > return status; > > > > > @@ -783,8 +778,6 @@ static int virtblk_probe(struct virtio_device > > > > > *vdev) > > > > > /* Prevent integer overflows and honor max vq size */ > > > > > sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); > > > > > - /* We need extra sg elements at head and tail. */ > > > > > - sg_elems += 2; > > > > > vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); > > > > > if (!vblk) { > > > > > err = -ENOMEM; > > > > > @@ -796,7 +789,6 @@ static int virtblk_probe(struct virtio_device > > > > > *vdev) > > > > > mutex_init(>vdev_mutex); > > > > > vblk->vdev = vdev; > > > > > - vblk->sg_elems = sg_elems; > > > > > INIT_WORK(>config_work, virtblk_config_changed_work); > > > > > @@ -853,7 +845,7 @@ static int virtblk_probe(struct virtio_device > > > > > *vdev) > > > > > set_disk_ro(vblk->disk, 1); > > > > > /* We can handle whatever the host told us to handle. */ > > > > > - blk_queue_max_segments(q, vblk->sg_elems-2); > > > > > + blk_queue_max_segments(q, sg_elems); > > > > > /* No real sector limit. */ > > > > > blk_queue_max_hw_sectors(q, -1U); > > > > > -- > > > > > 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > vhost_vsock_handle_tx_kick() already holds the mutex during its call > to vhost_get_vq_desc(). All we have to do is take the same lock > during virtqueue clean-up and we mitigate the reported issues. > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > Cc: > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com > Signed-off-by: Lee Jones > --- > drivers/vhost/vhost.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 59edb5a1ffe28..bbaff6a5e21b8 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > int i; > > for (i = 0; i < dev->nvqs; ++i) { > + mutex_lock(>vqs[i]->mutex); > if (dev->vqs[i]->error_ctx) > eventfd_ctx_put(dev->vqs[i]->error_ctx); > if (dev->vqs[i]->kick) > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > if (dev->vqs[i]->call_ctx.ctx) > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > vhost_vq_reset(dev, dev->vqs[i]); > + mutex_unlock(>vqs[i]->mutex); > } So this is a mitigation plan but the bug is still there though we don't know exactly what it is. I would prefer adding something like WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense? > vhost_dev_free_iovecs(dev); > if (dev->log_ctx) > -- > 2.35.1.574.g5d30c73bfb-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Wed, Mar 02, 2022 at 11:51:27AM +0200, Max Gurtovoy wrote: > > On 3/1/2022 5:43 PM, Michael S. Tsirkin wrote: > > On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > > Currently we have a BUG_ON() to make sure the number of sg > > > list does not exceed queue_max_segments() in virtio_queue_rq(). > > > However, the block layer uses queue_max_discard_segments() > > > instead of queue_max_segments() to limit the sg list for > > > discard requests. So the BUG_ON() might be triggered if > > > virtio-blk device reports a larger value for max discard > > > segment than queue_max_segments(). > > Hmm the spec does not say what should happen if max_discard_seg > > exceeds seg_max. Is this the config you have in mind? how do you > > create it? > > I don't think it's hard to create it. Just change some registers in the > device. > > But with the dynamic sgl allocation that I added recently, there is no > problem with this scenario. Well the problem is device says it can't handle such large descriptors, I guess it works anyway, but it seems scary. > This commit looks good to me, thanks Xie Yongji. > > Reviewed-by: Max Gurtovoy > > > > To fix it, let's simply > > > remove the BUG_ON() which has become unnecessary after commit > > > 02746e26c39e("virtio-blk: avoid preallocating big SGL for data"). > > > And the unused vblk->sg_elems can also be removed together. > > > > > > Fixes: 1f23816b8eb8 ("virtio_blk: add discard and write zeroes support") > > > Suggested-by: Christoph Hellwig > > > Signed-off-by: Xie Yongji > > > --- > > > drivers/block/virtio_blk.c | 10 +- > > > 1 file changed, 1 insertion(+), 9 deletions(-) > > > > > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > > > index c443cd64fc9b..a43eb1813cec 100644 > > > --- a/drivers/block/virtio_blk.c > > > +++ b/drivers/block/virtio_blk.c > > > @@ -76,9 +76,6 @@ struct virtio_blk { > > >*/ > > > refcount_t refs; > > > - /* What host tells us, plus 2 for header & tailer. */ > > > - unsigned int sg_elems; > > > - > > > /* Ida index - used to track minor number allocations. */ > > > int index; > > > @@ -322,8 +319,6 @@ static blk_status_t virtio_queue_rq(struct > > > blk_mq_hw_ctx *hctx, > > > blk_status_t status; > > > int err; > > > - BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); > > > - > > > status = virtblk_setup_cmd(vblk->vdev, req, vbr); > > > if (unlikely(status)) > > > return status; > > > @@ -783,8 +778,6 @@ static int virtblk_probe(struct virtio_device *vdev) > > > /* Prevent integer overflows and honor max vq size */ > > > sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); > > > - /* We need extra sg elements at head and tail. */ > > > - sg_elems += 2; > > > vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); > > > if (!vblk) { > > > err = -ENOMEM; > > > @@ -796,7 +789,6 @@ static int virtblk_probe(struct virtio_device *vdev) > > > mutex_init(>vdev_mutex); > > > vblk->vdev = vdev; > > > - vblk->sg_elems = sg_elems; > > > INIT_WORK(>config_work, virtblk_config_changed_work); > > > @@ -853,7 +845,7 @@ static int virtblk_probe(struct virtio_device *vdev) > > > set_disk_ro(vblk->disk, 1); > > > /* We can handle whatever the host told us to handle. */ > > > - blk_queue_max_segments(q, vblk->sg_elems-2); > > > + blk_queue_max_segments(q, sg_elems); > > > /* No real sector limit. */ > > > blk_queue_max_hw_sectors(q, -1U); > > > -- > > > 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio-blk: Remove BUG_ON() in virtio_queue_rq()
On Wed, Mar 02, 2022 at 06:46:03PM +0800, Yongji Xie wrote: > On Tue, Mar 1, 2022 at 11:43 PM Michael S. Tsirkin wrote: > > > > On Mon, Feb 28, 2022 at 02:57:20PM +0800, Xie Yongji wrote: > > > Currently we have a BUG_ON() to make sure the number of sg > > > list does not exceed queue_max_segments() in virtio_queue_rq(). > > > However, the block layer uses queue_max_discard_segments() > > > instead of queue_max_segments() to limit the sg list for > > > discard requests. So the BUG_ON() might be triggered if > > > virtio-blk device reports a larger value for max discard > > > segment than queue_max_segments(). > > > > Hmm the spec does not say what should happen if max_discard_seg > > exceeds seg_max. Is this the config you have in mind? how do you > > create it? > > > > One example: the device doesn't specify the value of max_discard_seg > in the config space, then the virtio-blk driver will use > MAX_DISCARD_SEGMENTS (256) by default. Then we're able to trigger the > BUG_ON() if the seg_max is less than 256. > > While the spec didn't say what should happen if max_discard_seg > exceeds seg_max, it also doesn't explicitly prohibit this > configuration. So I think we should at least not panic the kernel in > this case. > > Thanks, > Yongji Oh that last one sounds like a bug, I think it should be min(MAX_DISCARD_SEGMENTS, seg_max) When max_discard_seg and seg_max both exist, that's a different question. We can - do min(max_discard_seg, seg_max) - fail probe - clear the relevant feature flag I feel we need a better plan than submitting an invalid request to device. -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, 02 Mar 2022, Stefano Garzarella wrote: > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: > > vhost_vsock_handle_tx_kick() already holds the mutex during its call > > to vhost_get_vq_desc(). All we have to do is take the same lock > > during virtqueue clean-up and we mitigate the reported issues. > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 > > This issue is similar to [1] that should be already fixed upstream by [2]. > > However I think this patch would have prevented some issues, because > vhost_vq_reset() sets vq->private to NULL, preventing the worker from > running. > > Anyway I think that when we enter in vhost_dev_cleanup() the worker should > be already stopped, so it shouldn't be necessary to take the mutex. But in > order to prevent future issues maybe it's better to take them, so: > Reviewed-by: Stefano Garzarella Thanks for the analysis and the review Stefano. -- Lee Jones [李琼斯] Principal Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use
On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote: vhost_vsock_handle_tx_kick() already holds the mutex during its call to vhost_get_vq_desc(). All we have to do is take the same lock during virtqueue clean-up and we mitigate the reported issues. Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00 This issue is similar to [1] that should be already fixed upstream by [2]. However I think this patch would have prevented some issues, because vhost_vq_reset() sets vq->private to NULL, preventing the worker from running. Anyway I think that when we enter in vhost_dev_cleanup() the worker should be already stopped, so it shouldn't be necessary to take the mutex. But in order to prevent future issues maybe it's better to take them, so: Reviewed-by: Stefano Garzarella [1] https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 Cc: Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com Signed-off-by: Lee Jones --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 59edb5a1ffe28..bbaff6a5e21b8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) { + mutex_lock(>vqs[i]->mutex); if (dev->vqs[i]->error_ctx) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) if (dev->vqs[i]->call_ctx.ctx) eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); + mutex_unlock(>vqs[i]->mutex); } vhost_dev_free_iovecs(dev); if (dev->log_ctx) -- 2.35.1.574.g5d30c73bfb-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [syzbot] kernel BUG in vhost_get_vq_desc
On Wed, Mar 02, 2022 at 10:18:07AM +0100, Stefano Garzarella wrote: On Wed, Mar 02, 2022 at 08:29:41AM +, Lee Jones wrote: On Fri, 18 Feb 2022, Michael S. Tsirkin wrote: On Thu, Feb 17, 2022 at 05:21:20PM -0800, syzbot wrote: syzbot has found a reproducer for the following issue on: HEAD commit:f71077a4d84b Merge tag 'mmc-v5.17-rc1-2' of git://git.kern.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=104c04ca70 kernel config: https://syzkaller.appspot.com/x/.config?x=a78b064590b9f912 dashboard link: https://syzkaller.appspot.com/bug?extid=3140b17cb44a7b174008 compiler: gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for Debian) 2.35.2 syz repro: https://syzkaller.appspot.com/x/repro.syz?x=1362e23270 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=11373a6c70 IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com [ cut here ] kernel BUG at drivers/vhost/vhost.c:2335! invalid opcode: [#1] PREEMPT SMP KASAN CPU: 1 PID: 3597 Comm: vhost-3596 Not tainted 5.17.0-rc4-syzkaller-00054-gf71077a4d84b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vhost_get_vq_desc+0x1d43/0x22c0 drivers/vhost/vhost.c:2335 Code: 00 00 00 48 c7 c6 20 2c 9d 8a 48 c7 c7 98 a6 8e 8d 48 89 ca 48 c1 e1 04 48 01 d9 e8 b7 59 28 fd e9 74 ff ff ff e8 5d c8 a1 fa <0f> 0b e8 56 c8 a1 fa 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df RSP: 0018:c90001d1fb88 EFLAGS: 00010293 RAX: RBX: 0001 RCX: RDX: 8880234b RSI: 86d715c3 RDI: 0003 RBP: R08: R09: 0001 R10: 86d706bc R11: R12: 888072c24d68 R13: R14: dc00 R15: 888072c24bb0 FS: () GS:8880b9d0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 0002 CR3: 7902c000 CR4: 003506e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: vhost_vsock_handle_tx_kick+0x277/0xa20 drivers/vhost/vsock.c:522 vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372 kthread+0x2e9/0x3a0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 I don't see how this can trigger normally so I'm assuming another case of use after free. Yes, exactly. I think this issue is related to the issue fixed by this patch merged some days ago upstream: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 I patched it. Please see: https://lore.kernel.org/all/20220302075421.2131221-1-lee.jo...@linaro.org/T/#t I'm not sure that patch is avoiding the issue. I'll reply to it. My bad, I think it should be fine, because vhost_vq_reset() set vq->private_data to NULL and avoids the worker to run. Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [syzbot] kernel BUG in vhost_get_vq_desc
On Wed, Mar 02, 2022 at 08:29:41AM +, Lee Jones wrote: On Fri, 18 Feb 2022, Michael S. Tsirkin wrote: On Thu, Feb 17, 2022 at 05:21:20PM -0800, syzbot wrote: > syzbot has found a reproducer for the following issue on: > > HEAD commit:f71077a4d84b Merge tag 'mmc-v5.17-rc1-2' of git://git.kern.. > git tree: upstream > console output: https://syzkaller.appspot.com/x/log.txt?x=104c04ca70 > kernel config: https://syzkaller.appspot.com/x/.config?x=a78b064590b9f912 > dashboard link: https://syzkaller.appspot.com/bug?extid=3140b17cb44a7b174008 > compiler: gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for Debian) 2.35.2 > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=1362e23270 > C reproducer: https://syzkaller.appspot.com/x/repro.c?x=11373a6c70 > > IMPORTANT: if you fix the issue, please add the following tag to the commit: > Reported-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com > > [ cut here ] > kernel BUG at drivers/vhost/vhost.c:2335! > invalid opcode: [#1] PREEMPT SMP KASAN > CPU: 1 PID: 3597 Comm: vhost-3596 Not tainted 5.17.0-rc4-syzkaller-00054-gf71077a4d84b #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 > RIP: 0010:vhost_get_vq_desc+0x1d43/0x22c0 drivers/vhost/vhost.c:2335 > Code: 00 00 00 48 c7 c6 20 2c 9d 8a 48 c7 c7 98 a6 8e 8d 48 89 ca 48 c1 e1 04 48 01 d9 e8 b7 59 28 fd e9 74 ff ff ff e8 5d c8 a1 fa <0f> 0b e8 56 c8 a1 fa 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df > RSP: 0018:c90001d1fb88 EFLAGS: 00010293 > RAX: RBX: 0001 RCX: > RDX: 8880234b RSI: 86d715c3 RDI: 0003 > RBP: R08: R09: 0001 > R10: 86d706bc R11: R12: 888072c24d68 > R13: R14: dc00 R15: 888072c24bb0 > FS: () GS:8880b9d0() knlGS: > CS: 0010 DS: ES: CR0: 80050033 > CR2: 0002 CR3: 7902c000 CR4: 003506e0 > DR0: DR1: DR2: > DR3: DR6: fffe0ff0 DR7: 0400 > Call Trace: > > vhost_vsock_handle_tx_kick+0x277/0xa20 drivers/vhost/vsock.c:522 > vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372 > kthread+0x2e9/0x3a0 kernel/kthread.c:377 > ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 I don't see how this can trigger normally so I'm assuming another case of use after free. Yes, exactly. I think this issue is related to the issue fixed by this patch merged some days ago upstream: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 I patched it. Please see: https://lore.kernel.org/all/20220302075421.2131221-1-lee.jo...@linaro.org/T/#t I'm not sure that patch is avoiding the issue. I'll reply to it. Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [syzbot] kernel BUG in vhost_get_vq_desc
On Fri, 18 Feb 2022, Michael S. Tsirkin wrote: > On Thu, Feb 17, 2022 at 05:21:20PM -0800, syzbot wrote: > > syzbot has found a reproducer for the following issue on: > > > > HEAD commit:f71077a4d84b Merge tag 'mmc-v5.17-rc1-2' of git://git.kern.. > > git tree: upstream > > console output: https://syzkaller.appspot.com/x/log.txt?x=104c04ca70 > > kernel config: https://syzkaller.appspot.com/x/.config?x=a78b064590b9f912 > > dashboard link: https://syzkaller.appspot.com/bug?extid=3140b17cb44a7b174008 > > compiler: gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils > > for Debian) 2.35.2 > > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=1362e23270 > > C reproducer: https://syzkaller.appspot.com/x/repro.c?x=11373a6c70 > > > > IMPORTANT: if you fix the issue, please add the following tag to the commit: > > Reported-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com > > > > [ cut here ] > > kernel BUG at drivers/vhost/vhost.c:2335! > > invalid opcode: [#1] PREEMPT SMP KASAN > > CPU: 1 PID: 3597 Comm: vhost-3596 Not tainted > > 5.17.0-rc4-syzkaller-00054-gf71077a4d84b #0 > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > > Google 01/01/2011 > > RIP: 0010:vhost_get_vq_desc+0x1d43/0x22c0 drivers/vhost/vhost.c:2335 > > Code: 00 00 00 48 c7 c6 20 2c 9d 8a 48 c7 c7 98 a6 8e 8d 48 89 ca 48 c1 e1 > > 04 48 01 d9 e8 b7 59 28 fd e9 74 ff ff ff e8 5d c8 a1 fa <0f> 0b e8 56 c8 > > a1 fa 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df > > RSP: 0018:c90001d1fb88 EFLAGS: 00010293 > > RAX: RBX: 0001 RCX: > > RDX: 8880234b RSI: 86d715c3 RDI: 0003 > > RBP: R08: R09: 0001 > > R10: 86d706bc R11: R12: 888072c24d68 > > R13: R14: dc00 R15: 888072c24bb0 > > FS: () GS:8880b9d0() knlGS: > > CS: 0010 DS: ES: CR0: 80050033 > > CR2: 0002 CR3: 7902c000 CR4: 003506e0 > > DR0: DR1: DR2: > > DR3: DR6: fffe0ff0 DR7: 0400 > > Call Trace: > > > > vhost_vsock_handle_tx_kick+0x277/0xa20 drivers/vhost/vsock.c:522 > > vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372 > > kthread+0x2e9/0x3a0 kernel/kthread.c:377 > > ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 > > I don't see how this can trigger normally so I'm assuming > another case of use after free. Yes, exactly. I patched it. Please see: https://lore.kernel.org/all/20220302075421.2131221-1-lee.jo...@linaro.org/T/#t -- Lee Jones [李琼斯] Principal Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization