Re: [PATCH v2] virtio: vdpa: new SolidNET DPU driver.
Thanks I'll send a new version. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] vdpasim: support doorbell mapping
On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) wrote: > > From: Longpeng > > Support doorbell mapping for vdpasim devices, then we can test the notify > passthrough feature even if there's no real hardware on hand. > > Allocates a dummy page which is used to emulate the notify page of the device, > all VQs share the same notify register that initiated to 0x. A periodic > work will check whether there're requests need to process ( the value of the > notify register is 0x or not ). > > This cap is disabled as default, users can enable it as follow: > modprobe vdpa_sim notify_passthrough=true > > Signed-off-by: Longpeng > --- > Changes v1->v2: > - support both kick mode and passthrough mode. [Jason] > - poll the notify register first. [Jason, Michael] > --- > drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 > drivers/vdpa/vdpa_sim/vdpa_sim.h | 3 ++ > 2 files changed, 80 insertions(+) > > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c > b/drivers/vdpa/vdpa_sim/vdpa_sim.c > index 7438a89ce939..07096a04dabb 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > @@ -14,6 +14,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444); > MODULE_PARM_DESC(max_iotlb_entries, > "Maximum number of iotlb entries for each address space. 0 > means unlimited. (default: 2048)"); > > +static bool notify_passthrough; > +module_param(notify_passthrough, bool, 0444); > +MODULE_PARM_DESC(notify_passthrough, > +"Enable vq notify(doorbell) area mapping. (default: false)"); I'm not sure if it's worth doing this, I think we can afford the cost of periodic work (especially considering it's a simulator). > + > #define VDPASIM_QUEUE_ALIGN PAGE_SIZE > #define VDPASIM_QUEUE_MAX 256 > #define VDPASIM_VENDOR_ID 0 > +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */ > +#define VDPASIM_NOTIFY_DEFVAL 0x > > static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) > { > @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = { > static const struct vdpa_config_ops vdpasim_config_ops; > static const struct vdpa_config_ops vdpasim_batch_config_ops; > > +static void vdpasim_notify_work(struct work_struct *work) > +{ > + struct vdpasim *vdpasim; > + u16 *val; > + > + vdpasim = container_of(work, struct vdpasim, notify_work.work); > + > + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) > + goto out; > + > + if (!vdpasim->running) > + goto out; > + > + val = (u16 *)vdpasim->notify; > + if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL) > + schedule_work(&vdpasim->work); > + > +out: > + schedule_delayed_work(&vdpasim->notify_work, > + msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD)); > +} > + > struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, >const struct vdpa_dev_set_config *config) > { > @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr > *dev_attr, > set_dma_ops(dev, &vdpasim_dma_ops); > vdpasim->vdpa.mdev = dev_attr->mgmt_dev; > > + if (notify_passthrough) { > + INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work); > + > + vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO); > + if (!vdpasim->notify) > + goto err_iommu; > +#ifdef CONFIG_X86 > + set_memory_uc(vdpasim->notify, 1); > +#endif I had the same question with version 1, any reason for having this part uncacheable? It's a hint that we have bugs somewhere. Are we missing ACCESS/ORDER_PLATFORM or other features? > + *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL; WRITE_ONCE()? > + } > + > vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL); > if (!vdpasim->config) > goto err_iommu; > @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa) > return status; > } > > +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start) > +{ > + if (!notify_passthrough) > + return; Only two callers for this function: one is start another is stop. If we decide to get rid of notify_passthrough, I'd rather opencode the schedule/cancel_delayed(). Thanks > + > + if (start) > + schedule_delayed_work(&vdpasim->notify_work, > + > msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD)); > + else > + cancel_delayed_work_sync(&vdpasim->notify_work); > +} > + > static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) > { > struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device > *
Re: [PATCH v2] vp_vdpa: harden the logic of set status
On Tue, Dec 6, 2022 at 10:13 AM Longpeng(Mike) wrote: > > From: Longpeng > > 1. We should not set status to 0 when invoking vp_vdpa_set_status(), >trigger a warning in that case. > > 2. The driver MUST wait for a read of device_status to return 0 before >reinitializing the device. But we also don't want to keep us in an >infinite loop forever, so wait for 5s if we try to reset the device. > > Signed-off-by: Longpeng > --- > Changes v1->v2: > - use WARN_ON instead of BUG_ON. [Stefano] > - use "warning + failed" instead of "infinite loop". [Jason, Stefano] > - use usleep_range instead of msleep (checkpatch). [Longpeng] > > --- > drivers/vdpa/virtio_pci/vp_vdpa.c | 22 ++ > 1 file changed, 22 insertions(+) > > diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c > b/drivers/vdpa/virtio_pci/vp_vdpa.c > index 13701c2a1963..a2d3b13ac646 100644 > --- a/drivers/vdpa/virtio_pci/vp_vdpa.c > +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c > @@ -214,6 +214,9 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, > u8 status) > struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); > u8 s = vp_vdpa_get_status(vdpa); > > + /* We should never be setting status to 0. */ > + WARN_ON(status == 0); > + > if (status & VIRTIO_CONFIG_S_DRIVER_OK && > !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { > vp_vdpa_request_irq(vp_vdpa); > @@ -222,14 +225,33 @@ static void vp_vdpa_set_status(struct vdpa_device > *vdpa, u8 status) > vp_modern_set_status(mdev, status); > } > > +#define VP_VDPA_RESET_TMOUT_MS 5000 /* 5s */ > + > static int vp_vdpa_reset(struct vdpa_device *vdpa, bool clear) > { > struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); > struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); > u8 s = vp_vdpa_get_status(vdpa); > + unsigned long timeout; > > vp_modern_set_status(mdev, 0); > > + /* > +* As the virtio v1.1 spec (4.1.4.3.2) says: After writing 0 to > +* device_status, the driver MUST wait for a read of device_status > +* to return 0 before reinitializing the device. > +* To avoid keep us here forever, we only wait for 5 seconds. s/keep/keeping/ > +*/ > + timeout = jiffies + msecs_to_jiffies(VP_VDPA_RESET_TMOUT_MS); > + while (vp_modern_get_status(mdev)) { > + usleep_range(1000, 1500); > + if (time_after(jiffies, timeout)) { > + dev_err(&mdev->pci_dev->dev, > + "vp_vdpa: fail to reset device\n"); > + return -ETIMEDOUT; > + } Any chance to use readx_poll_timeout() here? Thanks > + } > + > if (s & VIRTIO_CONFIG_S_DRIVER_OK) > vp_vdpa_free_irq(vp_vdpa); > > -- > 2.23.0 > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH V2] vdpa: allow provisioning device features
On Tue, Dec 6, 2022 at 9:43 AM Si-Wei Liu wrote: > > > > On 12/4/2022 10:46 PM, Jason Wang wrote: > > On Thu, Dec 1, 2022 at 8:53 AM Si-Wei Liu wrote: > >> Sorry for getting back late due to the snag of the holidays. > > No worries :) > > > >> On 11/23/2022 11:13 PM, Jason Wang wrote: > >>> On Thu, Nov 24, 2022 at 6:53 AM Si-Wei Liu wrote: > > On 11/22/2022 7:35 PM, Jason Wang wrote: > > On Wed, Nov 23, 2022 at 6:29 AM Si-Wei Liu > > wrote: > >> On 11/16/2022 7:33 PM, Jason Wang wrote: > >>> This patch allows device features to be provisioned via vdpa. This > >>> will be useful for preserving migration compatibility between source > >>> and destination: > >>> > >>> # vdpa dev add name dev1 mgmtdev pci/:02:00.0 device_features > >>> 0x30002 > >> Miss the actual "vdpa dev config show" command below > > Right, let me fix that. > > > >>> # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 > >>> negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM > >>> > >>> Signed-off-by: Jason Wang > >>> --- > >>> Changes since v1: > >>> - Use uint64_t instead of __u64 for device_features > >>> - Fix typos and tweak the manpage > >>> - Add device_features to the help text > >>> --- > >>> man/man8/vdpa-dev.8| 15 +++ > >>> vdpa/include/uapi/linux/vdpa.h | 1 + > >>> vdpa/vdpa.c| 32 > >>> +--- > >>> 3 files changed, 45 insertions(+), 3 deletions(-) > >>> > >>> diff --git a/man/man8/vdpa-dev.8 b/man/man8/vdpa-dev.8 > >>> index 9faf3838..43e5bf48 100644 > >>> --- a/man/man8/vdpa-dev.8 > >>> +++ b/man/man8/vdpa-dev.8 > >>> @@ -31,6 +31,7 @@ vdpa-dev \- vdpa device configuration > >>> .I NAME > >>> .B mgmtdev > >>> .I MGMTDEV > >>> +.RI "[ device_features " DEVICE_FEATURES " ]" > >>> .RI "[ mac " MACADDR " ]" > >>> .RI "[ mtu " MTU " ]" > >>> .RI "[ max_vqp " MAX_VQ_PAIRS " ]" > >>> @@ -74,6 +75,15 @@ Name of the new vdpa device to add. > >>> Name of the management device to use for device addition. > >>> > >>> .PP > >>> +.BI device_features " DEVICE_FEATURES" > >>> +Specifies the virtio device features bit-mask that is provisioned > >>> for the new vdpa device. > >>> + > >>> +The bits can be found under include/uapi/linux/virtio*h. > >>> + > >>> +see macros such as VIRTIO_F_ and VIRTIO_XXX(e.g NET)_F_ for specific > >>> bit values. > >>> + > >>> +This is optional. > >> Document the behavior when this attribute is missing? For e.g. inherit > >> device features from parent device. > > This is the current behaviour but unless we've found a way to mandate > > it, I'd like to not mention it. Maybe add a description to say the > > user needs to check the features after the add if features are not > > specified. > Well, I think at least for live migration the mgmt software should get > to some consistent result between all vdpa parent drivers regarding > feature inheritance. > >>> It would be hard. Especially for the device: > >>> > >>> 1) ask device_features from the device, in this case, new features > >>> could be advertised after e.g a firmware update > >> The consistency I meant is to always inherit all device features from > >> the parent device for whatever it is capable of, > > This looks fragile. How about the features that are mutually > > exclusive? E.g FEATURE_X and FEATURE_Y that are both supported by the > > mgmt? > Hmmm, in theory, yes, it's a bit cumbersome. Is this for future proof, > since so far as I see the virtio spec doesn't seem to define features > that are mutually exclusive, and the way how driver should respond to > mutually exclusive features in feature negotiation is completely undefined? My understanding is that if a driver accepts two mutually exclusive features it should be a bug. But anyhow it's an example that it is not easy to have forward compatibility if we mandating to inherit all features from the management device. > > > > >> since that was the only > >> reasonable behavior pre-dated the device_features attribute, even though > >> there's no mandatory check by the vdpa core. This way it's > >> self-descriptive and consistent for the mgmt software to infer, as users > >> can check into dev_features at the parent mgmtdev level to know what > >> features will be ended up with after 'vdpa dev add'. I thought even > >> though inheritance is not mandated as part of uAPI, it should at least > >> be mentioned as a recommended guide line (for drivers in particular), > >> especially this is the only reasonable behavior with nowhere to check > >> what features are ended up after add (i.e. for now we can only set but > >> not possible to read the exact device_features at vd
Re: [PATCH v2] virtio: vdpa: new SolidNET DPU driver.
On Tue, Dec 6, 2022 at 12:55 AM Alvaro Karsz wrote: > > > We must send a SNET_MSG_DESTROY message to the DPU before removing the > > device. > > This remove() makes sure that if somehow remove was called > > before/without reset, the DPU will receive this message. > > I meant that the message must be sent before the PCI device is removed. > The message is sent from the vDPA reset cb, and the reset in the PCI > remove cb makes sure that PCI remove wasn't called without calling > vDPA reset. > I can remove it if you think that this is not required. The reset is usually done during probe: E.g register_virtio_device() or vhost_vdpa_open(). If it's not required, let's remove but if it is a must for your hardware, we need keep it. Thanks > > > Alvaro > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH V2] vdpa: allow provisioning device features
On 12/4/2022 10:46 PM, Jason Wang wrote: On Thu, Dec 1, 2022 at 8:53 AM Si-Wei Liu wrote: Sorry for getting back late due to the snag of the holidays. No worries :) On 11/23/2022 11:13 PM, Jason Wang wrote: On Thu, Nov 24, 2022 at 6:53 AM Si-Wei Liu wrote: On 11/22/2022 7:35 PM, Jason Wang wrote: On Wed, Nov 23, 2022 at 6:29 AM Si-Wei Liu wrote: On 11/16/2022 7:33 PM, Jason Wang wrote: This patch allows device features to be provisioned via vdpa. This will be useful for preserving migration compatibility between source and destination: # vdpa dev add name dev1 mgmtdev pci/:02:00.0 device_features 0x30002 Miss the actual "vdpa dev config show" command below Right, let me fix that. # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Signed-off-by: Jason Wang --- Changes since v1: - Use uint64_t instead of __u64 for device_features - Fix typos and tweak the manpage - Add device_features to the help text --- man/man8/vdpa-dev.8| 15 +++ vdpa/include/uapi/linux/vdpa.h | 1 + vdpa/vdpa.c| 32 +--- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/man/man8/vdpa-dev.8 b/man/man8/vdpa-dev.8 index 9faf3838..43e5bf48 100644 --- a/man/man8/vdpa-dev.8 +++ b/man/man8/vdpa-dev.8 @@ -31,6 +31,7 @@ vdpa-dev \- vdpa device configuration .I NAME .B mgmtdev .I MGMTDEV +.RI "[ device_features " DEVICE_FEATURES " ]" .RI "[ mac " MACADDR " ]" .RI "[ mtu " MTU " ]" .RI "[ max_vqp " MAX_VQ_PAIRS " ]" @@ -74,6 +75,15 @@ Name of the new vdpa device to add. Name of the management device to use for device addition. .PP +.BI device_features " DEVICE_FEATURES" +Specifies the virtio device features bit-mask that is provisioned for the new vdpa device. + +The bits can be found under include/uapi/linux/virtio*h. + +see macros such as VIRTIO_F_ and VIRTIO_XXX(e.g NET)_F_ for specific bit values. + +This is optional. Document the behavior when this attribute is missing? For e.g. inherit device features from parent device. This is the current behaviour but unless we've found a way to mandate it, I'd like to not mention it. Maybe add a description to say the user needs to check the features after the add if features are not specified. Well, I think at least for live migration the mgmt software should get to some consistent result between all vdpa parent drivers regarding feature inheritance. It would be hard. Especially for the device: 1) ask device_features from the device, in this case, new features could be advertised after e.g a firmware update The consistency I meant is to always inherit all device features from the parent device for whatever it is capable of, This looks fragile. How about the features that are mutually exclusive? E.g FEATURE_X and FEATURE_Y that are both supported by the mgmt? Hmmm, in theory, yes, it's a bit cumbersome. Is this for future proof, since so far as I see the virtio spec doesn't seem to define features that are mutually exclusive, and the way how driver should respond to mutually exclusive features in feature negotiation is completely undefined? since that was the only reasonable behavior pre-dated the device_features attribute, even though there's no mandatory check by the vdpa core. This way it's self-descriptive and consistent for the mgmt software to infer, as users can check into dev_features at the parent mgmtdev level to know what features will be ended up with after 'vdpa dev add'. I thought even though inheritance is not mandated as part of uAPI, it should at least be mentioned as a recommended guide line (for drivers in particular), especially this is the only reasonable behavior with nowhere to check what features are ended up after add (i.e. for now we can only set but not possible to read the exact device_features at vdpa dev level, as yet). I fully agree, but what I want to say is. Consider: 1) We've already had feature provisioning 2) It would be hard or even impossible to mandate the semantic (consistency) of the features inheritance. I'm fine with the doc, but the mgmt layer should not depend on this and they should use feature provisioning instead. OK, if it's for future proof to not mandate feature inheritance I think I see the point. 2) or have hierarchy architecture where several layers were placed between vDPA and the real hardware Not sure what it means but I don't get why extra layers are needed. Do you mean extra layer to validate resulting features during add? Why vdpa core is not the right place to do that? Just want to go wild because we can't expect how many layers are below vDPA. vDPA core is the right place but the validating should be done during feature provisioning since it's much more easier than trying to mandating code defined behaviour like inheritance. OK, thanks for the clarifications.
Re: [PATCH v3] net: vmw_vsock: vmci: Check memcpy_from_msg()
> On Dec 5, 2022, at 3:52 AM, Artem Chernyshev > wrote: > > vmci_transport_dgram_enqueue() does not check the return value > of memcpy_from_msg(). Return with an error if the memcpy fails. I think we can add some more information in the description. Sorry, I should've said this earlier. vmci_transport_dgram_enqueue() does not check the return value of memcpy_from_msg(). If memcpy_from_msg() fails, it is possible that uninitialized memory contents are sent unintentionally instead of user's message in the datagram to the destination. Return with an error if memcpy_from_msg() fails. > > Found by Linux Verification Center (linuxtesting.org) with SVACE. > > Fixes: 0f7db23a07af ("vmci_transport: switch ->enqeue_dgram, ->enqueue_stream > and ->dequeue_stream to msghdr") > Signed-off-by: Artem Chernyshev Thanks, Artem! This version looks good to me modulo my suggestion about the description above. Reviewed-by: Vishnu Dasa Regards, Vishnu > --- > V1->V2 Fix memory leaking and updates for description > V2->V3 Return the value of memcpy_from_msg() > > net/vmw_vsock/vmci_transport.c | 6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c > index 842c94286d31..36eb16a40745 100644 > --- a/net/vmw_vsock/vmci_transport.c > +++ b/net/vmw_vsock/vmci_transport.c > @@ -1711,7 +1711,11 @@ static int vmci_transport_dgram_enqueue( > if (!dg) > return -ENOMEM; > > - memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); > + err = memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); > + if (err) { > + kfree(dg); > + return err; > + } > > dg->dst = vmci_make_handle(remote_addr->svm_cid, > remote_addr->svm_port); > -- > 2.30.3 > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
On 12/5/22 1:29?PM, Michael S. Tsirkin wrote: > On Mon, Dec 05, 2022 at 11:53:51AM -0700, Jens Axboe wrote: >> On 12/5/22 11:36?AM, Alvaro Karsz wrote: >>> Hi, >>> Is this based on some spec? Because it looks pretty odd to me. There can be a pretty wide range of two/three/etc level cells with wildly different ranges of durability. And there's really not a lot of slc for generic devices these days, if any. >>> >>> Yes, this is based on the virtio spec >>> https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html >>> section 5.2.6 >> >> And where did this come from? > > > Here's the commit log from the spec: > In many embedded systems, virtio-blk implementations are > backed by eMMC or UFS storage devices, which are subject to > predictable and measurable wear over time due to repeated write > cycles. > > For such systems, it can be important to be able to track > accurately the amount of wear imposed on the storage over > time and surface it to applications. In a native deployments > this is generally handled by the physical block device driver > but no such provision is made in virtio-blk to expose these > metrics for devices where it makes sense to do so. > > This patch adds support to virtio-blk for lifetime and wear > metrics to be exposed to the guest when a deployment of > virtio-blk is done over compatible eMMC or UFS storage. > > Signed-off-by: Enrico Granata > > Cc Enrico Granata as well. Alvaro sent me this one privately too. To be honest, I don't like this patch very much, but that's mostly because the spec for this caters to a narrow use case of embedded flash. It's not a generic storage thing, it's just for mmc/ufs and the embedded space. That's a missed opportunity. And either it'll become mostly unused, or it'll actually be used and then extended at some point. While I'm not a fan at all, if you guys are willing to take it in virtio-blk, then I won't stand in your way. I would rename it though to more specifically detail what it deals with. -- Jens Axboe ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
On Mon, Dec 05, 2022 at 11:53:51AM -0700, Jens Axboe wrote: > On 12/5/22 11:36 AM, Alvaro Karsz wrote: > > Hi, > > > >> Is this based on some spec? Because it looks pretty odd to me. There > >> can be a pretty wide range of two/three/etc level cells with wildly > >> different ranges of durability. And there's really not a lot of slc > >> for generic devices these days, if any. > > > > Yes, this is based on the virtio spec > > https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html > > section 5.2.6 > > And where did this come from? Here's the commit log from the spec: In many embedded systems, virtio-blk implementations are backed by eMMC or UFS storage devices, which are subject to predictable and measurable wear over time due to repeated write cycles. For such systems, it can be important to be able to track accurately the amount of wear imposed on the storage over time and surface it to applications. In a native deployments this is generally handled by the physical block device driver but no such provision is made in virtio-blk to expose these metrics for devices where it makes sense to do so. This patch adds support to virtio-blk for lifetime and wear metrics to be exposed to the guest when a deployment of virtio-blk is done over compatible eMMC or UFS storage. Signed-off-by: Enrico Granata Cc Enrico Granata as well. > -- > Jens Axboe > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
On 12/5/22 11:36 AM, Alvaro Karsz wrote: > Hi, > >> Is this based on some spec? Because it looks pretty odd to me. There >> can be a pretty wide range of two/three/etc level cells with wildly >> different ranges of durability. And there's really not a lot of slc >> for generic devices these days, if any. > > Yes, this is based on the virtio spec > https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html > section 5.2.6 And where did this come from? -- Jens Axboe ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
Hi, > Is this based on some spec? Because it looks pretty odd to me. There > can be a pretty wide range of two/three/etc level cells with wildly > different ranges of durability. And there's really not a lot of slc > for generic devices these days, if any. Yes, this is based on the virtio spec https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html section 5.2.6 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
On 12/5/22 9:20 AM, Alvaro Karsz wrote: > Implement the VIRTIO_BLK_F_LIFETIME feature for VirtIO block devices. > > This commit introduces a new ioctl command, VBLK_LIFETIME. > > VBLK_LIFETIME ioctl asks for the block device to provide lifetime > information by sending a VIRTIO_BLK_T_GET_LIFETIME command to the device. s/VBLK_LIFETIME/VBLK_GET_LIFETIME for the above. -- Jens Axboe ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
On 12/5/22 9:20 AM, Alvaro Karsz wrote: > Implement the VIRTIO_BLK_F_LIFETIME feature for VirtIO block devices. > > This commit introduces a new ioctl command, VBLK_LIFETIME. > > VBLK_LIFETIME ioctl asks for the block device to provide lifetime > information by sending a VIRTIO_BLK_T_GET_LIFETIME command to the device. > > lifetime information fields: > > - pre_eol_info: specifies the percentage of reserved blocks that are > consumed. > optional values following virtio spec: > *) 0 - undefined. > *) 1 - normal, < 80% of reserved blocks are consumed. > *) 2 - warning, 80% of reserved blocks are consumed. > *) 3 - urgent, 90% of reserved blocks are consumed. > > - device_lifetime_est_typ_a: this field refers to wear of SLC cells and >is provided in increments of 10used, and so >on, thru to 11 meaning estimated lifetime >exceeded. All values above 11 are reserved. > > - device_lifetime_est_typ_b: this field refers to wear of MLC cells and is >provided with the same semantics as >device_lifetime_est_typ_a. > > The data received from the device will be sent as is to the user. > No data check/decode is done by virtblk. Is this based on some spec? Because it looks pretty odd to me. There can be a pretty wide range of two/three/etc level cells with wildly different ranges of durability. And there's really not a lot of slc for generic devices these days, if any. -- Jens Axboe ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio: vdpa: new SolidNET DPU driver.
> We must send a SNET_MSG_DESTROY message to the DPU before removing the > device. > This remove() makes sure that if somehow remove was called > before/without reset, the DPU will receive this message. I meant that the message must be sent before the PCI device is removed. The message is sent from the vDPA reset cb, and the reset in the PCI remove cb makes sure that PCI remove wasn't called without calling vDPA reset. I can remove it if you think that this is not required. Alvaro ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v3] virtio_blk: add VIRTIO_BLK_F_LIFETIME feature support
Implement the VIRTIO_BLK_F_LIFETIME feature for VirtIO block devices. This commit introduces a new ioctl command, VBLK_LIFETIME. VBLK_LIFETIME ioctl asks for the block device to provide lifetime information by sending a VIRTIO_BLK_T_GET_LIFETIME command to the device. lifetime information fields: - pre_eol_info: specifies the percentage of reserved blocks that are consumed. optional values following virtio spec: *) 0 - undefined. *) 1 - normal, < 80% of reserved blocks are consumed. *) 2 - warning, 80% of reserved blocks are consumed. *) 3 - urgent, 90% of reserved blocks are consumed. - device_lifetime_est_typ_a: this field refers to wear of SLC cells and is provided in increments of 10used, and so on, thru to 11 meaning estimated lifetime exceeded. All values above 11 are reserved. - device_lifetime_est_typ_b: this field refers to wear of MLC cells and is provided with the same semantics as device_lifetime_est_typ_a. The data received from the device will be sent as is to the user. No data check/decode is done by virtblk. Signed-off-by: Alvaro Karsz -- v2: - Remove (void *) casting. - Fix comments format in virtio_blk.h. - Set ioprio value for legacy devices for REQ_OP_DRV_IN operations. v3: - Initialize struct virtio_blk_lifetime to 0 instead of memset. - Rename ioctl from VBLK_LIFETIME to VBLK_GET_LIFETIME. - Return EOPNOTSUPP insted of ENOTTY if ioctl is not supported. - Check if process is CAP_SYS_ADMIN capable in lifetime ioctl. - Check if vdev is not NULL before accessing it in lifetime ioctl. -- --- drivers/block/virtio_blk.c | 106 ++-- include/uapi/linux/virtio_blk.h | 32 ++ 2 files changed, 133 insertions(+), 5 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 19da5defd73..392d57fd6b7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -101,6 +101,18 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr) } } +static inline int virtblk_ioctl_result(struct virtblk_req *vbr) +{ + switch (vbr->status) { + case VIRTIO_BLK_S_OK: + return 0; + case VIRTIO_BLK_S_UNSUPP: + return -EOPNOTSUPP; + default: + return -EIO; + } +} + static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx) { struct virtio_blk *vblk = hctx->queue->queuedata; @@ -218,6 +230,7 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, u32 type; vbr->out_hdr.sector = 0; + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); switch (req_op(req)) { case REQ_OP_READ: @@ -244,15 +257,14 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, type = VIRTIO_BLK_T_SECURE_ERASE; break; case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; - break; + /* type is set in virtblk_get_id/virtblk_ioctl_lifetime */ + return 0; default: WARN_ON_ONCE(1); return BLK_STS_IOERR; } vbr->out_hdr.type = cpu_to_virtio32(vdev, type); - vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES || type == VIRTIO_BLK_T_SECURE_ERASE) { @@ -459,12 +471,16 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct virtio_blk *vblk = disk->private_data; struct request_queue *q = vblk->disk->queue; struct request *req; + struct virtblk_req *vbr; int err; req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return PTR_ERR(req); + vbr = blk_mq_rq_to_pdu(req); + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); + err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); if (err) goto out; @@ -508,6 +524,85 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) return ret; } +/* Get lifetime information from device */ +static int virtblk_ioctl_lifetime(struct virtio_blk *vblk, unsigned long arg) +{ + struct request_queue *q = vblk->disk->queue; + struct request *req = NULL; + struct virtblk_req *vbr; + struct virtio_blk_lifetime lifetime = {}; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* The virtio_blk_lifetime struct fields follow virtio spec. +* There is no check/decode on values rece
Re: [RFC PATCH v3 4/4] test/vsock: vsock_perf utility
On Sun, Dec 04, 2022 at 07:24:33PM +, Arseniy Krasnov wrote: This adds utility to check vsock rx/tx performance. Usage as sender: ./vsock_perf --port --mb --port --so_rcvlowat Signed-off-by: Arseniy Krasnov --- tools/testing/vsock/Makefile | 1 + tools/testing/vsock/README | 34 +++ tools/testing/vsock/vsock_perf.c | 449 +++ 3 files changed, 484 insertions(+) create mode 100644 tools/testing/vsock/vsock_perf.c diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index f8293c6910c9..d36fdd59fe2e 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -3,6 +3,7 @@ all: test test: vsock_test vsock_diag_test vsock_test: vsock_test.o timeout.o control.o util.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o +vsock_perf: vsock_perf.o I think we can add "vsock_perf" to "all" target. CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 4d5045e7d2c3..64c5757b7ecc 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -35,3 +35,37 @@ Invoke test binaries in both directions as follows: --control-port=$GUEST_IP \ --control-port=1234 \ --peer-cid=3 + +vsock_perf utility +--- +'vsock_perf' is a simple tool to measure vsock performance. It works in +sender/receiver modes: sender waits for connection at specified port, +and after accepting it, starts data transmission to the receiver. After data +processing is done, it prints several metrics(see below). + +Usage: +# run as sender +# listen port 1234, tx buffer size is 1MB, send of 1G data +./vsock_perf --sender --port 1234 --buf-size 1MB --mb 1G + +Output: +tx performance: A Gb/s + +Output explanation: +A is calculated as "number of bytes to send" / "time in tx loop" + +# run as receiver +# connect to CID 2, port 1234, rx buffer size is 1MB, peer buf is 1G, SO_RCVLOWAT is 65536 +./vsock_perf --cid 2 --port 1234 --buf-size 1MB --vsk-size 1G -so_rcvlowat 65536 + +Output: +rx performance: A Gb/s +total in 'read()': B sec +POLLIN wakeups: C +average in 'read()': D ns + +Output explanation: +A is calculated as "number of received bytes" / "time in rx loop". +B is time, spent in 'read()' system call(excluding 'poll()') +C is number of 'poll()' wake ups with POLLIN bit set. +D is B / C, e.g. average amount of time, spent in single 'read()'. diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c new file mode 100644 index ..69e3b24868d7 --- /dev/null +++ b/tools/testing/vsock/vsock_perf.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_perf - benchmark utility for vsock. + * + * Copyright (C) 2022 SberDevices. + * + * Author: Arseniy Krasnov + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEFAULT_BUF_SIZE_BYTES (128 * 1024) +#define DEFAULT_TO_SEND_BYTES (64 * 1024) +#define DEFAULT_VSOCK_BUF_BYTES (256 * 1024) +#define DEFAULT_RCVLOWAT_BYTES 1 +#define DEFAULT_PORT 1234 +#define DEFAULT_CID2 + +#define BYTES_PER_GB (1024 * 1024 * 1024ULL) +#define NSEC_PER_SEC (10ULL) + +static unsigned int port = DEFAULT_PORT; +static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; +static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; + +static inline time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) { + perror("clock_gettime"); + exit(EXIT_FAILURE); + } + + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +/* From lib/cmdline.c. */ +static unsigned long memparse(const char *ptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + return ret; +} + +static void vsock_increase_buf_size(int fd) +{ + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) { + perror("setsockopt"); As for the previous patch, I would add the opt that is failing to make it easier to analyze in case of failure. +
Re: [RFC PATCH v3 2/4] test/vsock: rework message bounds test
On Sun, Dec 04, 2022 at 07:20:52PM +, Arseniy Krasnov wrote: This updates message bound test making it more complex. Instead of sending 1 bytes messages with one MSG_EOR bit, it sends messages of random length(one half of messages are smaller than page size, second half are bigger) with random number of MSG_EOR bits set. Receiver also don't know total number of messages. Signed-off-by: Arseniy Krasnov --- tools/testing/vsock/control.c| 28 +++ tools/testing/vsock/control.h| 2 + tools/testing/vsock/util.c | 13 tools/testing/vsock/util.h | 1 + tools/testing/vsock/vsock_test.c | 124 +++ 5 files changed, 155 insertions(+), 13 deletions(-) diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index 4874872fc5a3..d2deb4b15b94 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -141,6 +141,34 @@ void control_writeln(const char *str) timeout_end(); } +void control_writeulong(unsigned long value) +{ + char str[32]; + + if (snprintf(str, sizeof(str), "%lu", value) >= sizeof(str)) { + perror("snprintf"); + exit(EXIT_FAILURE); + } + + control_writeln(str); +} + +unsigned long control_readulong(void) +{ + unsigned long value; + char *str; + + str = control_readln(); + + if (!str) + exit(EXIT_FAILURE); + + value = strtoul(str, NULL, 10); + free(str); + + return value; +} + /* Return the next line from the control socket (without the trailing newline). * * The program terminates if a timeout occurs. diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h index 51814b4f9ac1..c1f77fdb2c7a 100644 --- a/tools/testing/vsock/control.h +++ b/tools/testing/vsock/control.h @@ -9,7 +9,9 @@ void control_init(const char *control_host, const char *control_port, void control_cleanup(void); void control_writeln(const char *str); char *control_readln(void); +unsigned long control_readulong(void); void control_expectln(const char *str); bool control_cmpln(char *line, const char *str, bool fail); +void control_writeulong(unsigned long value); #endif /* CONTROL_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 2acbb7703c6a..01b636d3039a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -395,3 +395,16 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, test_cases[test_id].skip = true; } + +unsigned long hash_djb2(const void *data, size_t len) +{ + unsigned long hash = 5381; + int i = 0; + + while (i < len) { + hash = ((hash << 5) + hash) + ((unsigned char *)data)[i]; + i++; + } + + return hash; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index a3375ad2fb7f..fb99208a95ea 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -49,4 +49,5 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +unsigned long hash_djb2(const void *data, size_t len); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index bb6d691cb30d..a5904ee39e91 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -284,10 +284,14 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) close(fd); } -#define MESSAGES_CNT 7 -#define MSG_EOR_IDX (MESSAGES_CNT / 2) +#define SOCK_BUF_SIZE (2 * 1024 * 1024) +#define MAX_MSG_SIZE (32 * 1024) + static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { + unsigned long curr_hash; + int page_size; + int msg_count; int fd; fd = vsock_seqpacket_connect(opts->peer_cid, 1234); @@ -296,18 +300,79 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - /* Send several messages, one with MSG_EOR flag */ - for (int i = 0; i < MESSAGES_CNT; i++) - send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0); + /* Wait, until receiver sets buffer size. */ + control_expectln("SRVREADY"); + + curr_hash = 0; + page_size = getpagesize(); + msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + + for (int i = 0; i < msg_count; i++) { + ssize_t send_size; + size_t buf_size; + int flags; + void *buf; + + /* Use "small" buffers and "big" buffers. */ + if (i & 1) + buf_size = page_size + + (rand() % (MAX_MSG_SIZE - page_size)); + else + buf_size = 1 + (rand() % page_size); + + buf = malloc
Re: [RFC PATCH v3 1/4] vsock: return errors other than -ENOMEM to socket
On Sun, Dec 04, 2022 at 07:19:20PM +, Arseniy Krasnov wrote: From: Bobby Eshleman This removes behaviour, where error code returned from any transport was always switched to ENOMEM. I would add here the example you described in the cover letter with EMSGSIZE, so the problem is better explained. Signed-off-by: Bobby Eshleman Signed-off-by: Arseniy Krasnov --- net/vmw_vsock/af_vsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 884eca7f6743..61ddab664c33 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1862,8 +1862,9 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, written = transport->stream_enqueue(vsk, msg, len - total_written); } + if (written < 0) { - err = -ENOMEM; + err = written; goto out_err; } -- 2.25.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] net: vmw_vsock: vmci: Check memcpy_from_msg()
On Mon, Dec 05, 2022 at 02:52:00PM +0300, Artem Chernyshev wrote: vmci_transport_dgram_enqueue() does not check the return value of memcpy_from_msg(). Return with an error if the memcpy fails. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 0f7db23a07af ("vmci_transport: switch ->enqeue_dgram, ->enqueue_stream and ->dequeue_stream to msghdr") Signed-off-by: Artem Chernyshev --- V1->V2 Fix memory leaking and updates for description V2->V3 Return the value of memcpy_from_msg() net/vmw_vsock/vmci_transport.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) Reviewed-by: Stefano Garzarella ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v5] virtio/vsock: replace virtio_vsock_pkt with sk_buff
On Fri, Dec 02, 2022 at 09:35:18AM -0800, Bobby Eshleman wrote: This commit changes virtio/vsock to use sk_buff instead of virtio_vsock_pkt. Beyond better conforming to other net code, using sk_buff allows vsock to use sk_buff-dependent features in the future (such as sockmap) and improves throughput. This patch introduces the following performance changes: Tool/Config: uperf w/ 64 threads, SOCK_STREAM Test Runs: 5, mean of results Before: commit 95ec6bce2a0b ("Merge branch 'net-ipa-more-endpoints'") Test: 64KB, g2h Before: 21.63 Gb/s After: 25.59 Gb/s (+18%) Test: 16B, g2h Before: 11.86 Mb/s After: 17.41 Mb/s (+46%) Test: 64KB, h2g Before: 2.15 Gb/s After: 3.6 Gb/s (+67%) Test: 16B, h2g Before: 14.38 Mb/s After: 18.43 Mb/s (+28%) Signed-off-by: Bobby Eshleman --- Changes in v5: - last_skb instead of skb: last_hdr->len = cpu_to_le32(last_skb->len) With this issue fixed, I confirm that all the tests passed: Reviewed-by: Stefano Garzarella As pointed out in v4, this is net-next material, so you should use the net-next tag and base the patch on the net-next tree: https://www.kernel.org/doc/html/v6.0/process/maintainer-netdev.html#netdev-faq I locally applied the patch on net-next and everything is fine, so maybe the maintainers can apply it, otherwise you should resend it with the right tag. Ah, in that case I suggest you send it before the next merge window opens (I guess next week), because net-next closes and you'll have to wait for the next cycle. Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] net: vmw_vsock: vmci: Check memcpy_from_msg()
On Sat, Dec 03, 2022 at 11:33:12AM +0300, Artem Chernyshev wrote: vmci_transport_dgram_enqueue() does not check the return value of memcpy_from_msg(). Return with an error if the memcpy fails. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 0f7db23a07af ("vmci_transport: switch ->enqeue_dgram, ->enqueue_stream and ->dequeue_stream to msghdr") Signed-off-by: Artem Chernyshev --- V1->V2 Fix memory leaking and updates for description net/vmw_vsock/vmci_transport.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 842c94286d31..c94c3deaa09d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1711,7 +1711,10 @@ static int vmci_transport_dgram_enqueue( if (!dg) return -ENOMEM; - memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); + if (memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len)) { + kfree(dg); + return -EFAULT; Since memcpy_from_msg() is a wrapper of copy_from_iter_full() that simply returns -EFAULT in case of an error, perhaps it would be better here to return the value of memcpy_from_msg() instead of wiring the error. However in the end the behavior is the same, so even if you don't want to change it I'll leave my R-b: Reviewed-by: Stefano Garzarella Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v2 5/6] test/vsock: add big message test
On Thu, Dec 01, 2022 at 11:44:39AM +, Arseniy Krasnov wrote: On 01.12.2022 12:45, Stefano Garzarella wrote: On Fri, Nov 25, 2022 at 05:13:06PM +, Arseniy Krasnov wrote: This adds test for sending message, bigger than peer's buffer size. For SOCK_SEQPACKET socket it must fail, as this type of socket has message size limit. Signed-off-by: Arseniy Krasnov --- tools/testing/vsock/vsock_test.c | 69 1 file changed, 69 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 12ef0cca6f93..a8e43424fb32 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -569,6 +569,70 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) close(fd); } +static void test_seqpacket_bigmsg_client(const struct test_opts *opts) +{ + unsigned long sock_buf_size; + ssize_t send_size; + socklen_t len; + void *data; + int fd; + + len = sizeof(sock_buf_size); + + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, &len)) { + perror("getsockopt"); + exit(EXIT_FAILURE); + } + + sock_buf_size++; + + data = malloc(sock_buf_size); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + send_size = send(fd, data, sock_buf_size, 0); + if (send_size != -1) { + fprintf(stderr, "expected 'send(2)' failure, got %zi\n", + send_size); + exit(EXIT_FAILURE); + } + + if (errno != EMSGSIZE) { + fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n", + errno); + exit(EXIT_FAILURE); + } We should make sure that this is true for all transports, but since now only virtio-vsock supports it, we should be okay. Hm, in general: I've tested this test suite for vmci may be several months ago, and found, that some tests didn't work. I'm thinking about reworking this test suite a little bit: each transport must have own set of tests for features that it supports. I had feeling, that all these tests are run only with virtio transport :) Because for example SEQPACKET mode is suported only for virtio. Yep, when we developed it, we added the "--skip" param for that. Ideally there should be no difference, but I remember VMCI had a different behavior and we couldn't change it for backward compatibility, so we added "--skip". Thanks, Steano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio: vdpa: new SolidNET DPU driver.
Hi Jason, Thanks for your comment. > I wonder if vq_num and max_size_host_cfg is not about to be changed by > the upper layer, any reason not fail the psnet_read_cfg()? You're right, I can catch this error earlier in psnet_read_cfg. > So during reset, the irqs were freed, but I don't see how they are > allocated here. The IRQs are allocated in probe and are freed in reset. This assumes that the device won't be revived after reset. I will change it and request the IRQs in snet_set_status. > Any reason for this reset in the remove()? We must send a SNET_MSG_DESTROY message to the DPU before removing the device. This remove() makes sure that if somehow remove was called before/without reset, the DPU will receive this message. Alvaro ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] virtio: vdpa: new SolidNET DPU driver.
在 2022/11/29 22:33, Alvaro Karsz 写道: This commit includes: 1) The driver to manage the controlplane over vDPA bus. 2) A HW monitor device to read health values from the DPU. Signed-off-by: Alvaro Karsz -- v2: - Auto detect the BAR used for communication. - When reading the DPU config, wait for 5secs before giving up on the device. - Return EOPNOTSUPP error code in vDPA set_vq_state callback if the vq state is not the same as the initial one. - Implement a vDPA reset callback. - Wait for an ACK when sending a message to the DPU. - Add endianness comments on 64bit read/write functions. - Remove the get_iova_range and free vDPA callbacks. - Usage of managed device functions to ioremap a region. - Call pci_set_drvdata and pci_set_master before vdpa_register_device. - Create DMA isolation between the vDPA devices by using the chip SR-IOV feature. Every vDPA device gets a PCIe VF with its own DMA device. Hi Alvaro: Looks good to me overall, just few comments. See below. --- MAINTAINERS|4 + drivers/vdpa/Kconfig | 11 + drivers/vdpa/Makefile |1 + drivers/vdpa/solidrun/Makefile |3 + drivers/vdpa/solidrun/snet_hwmon.c | 191 + drivers/vdpa/solidrun/snet_main.c | 1086 drivers/vdpa/solidrun/snet_vdpa.h | 192 + 7 files changed, 1488 insertions(+) create mode 100644 drivers/vdpa/solidrun/Makefile create mode 100644 drivers/vdpa/solidrun/snet_hwmon.c create mode 100644 drivers/vdpa/solidrun/snet_main.c create mode 100644 drivers/vdpa/solidrun/snet_vdpa.h diff --git a/MAINTAINERS b/MAINTAINERS index 046ff06ff97..ae425b3bed4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21725,6 +21725,10 @@ IFCVF VIRTIO DATA PATH ACCELERATOR R:Zhu Lingshan F:drivers/vdpa/ifcvf/ +SNET DPU VIRTIO DATA PATH ACCELERATOR +R: Alvaro Karsz +F: drivers/vdpa/solidrun/ + VIRTIO BALLOON M:"Michael S. Tsirkin" M:David Hildenbrand diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 50f45d03761..cb1ff0f9e27 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -86,4 +86,15 @@ config ALIBABA_ENI_VDPA VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon virtio 0.9.5 specification. + config SNET_VDPA + tristate "SolidRun's vDPA driver for SolidNET" + select HWMON + depends on PCI_MSI && PCI_IOV + help + vDPA driver for SolidNET DPU. + With this driver, the VirtIO dataplane can be + offloaded to a SolidNET DPU. + This driver includes a HW monitor device that + reads health values from the DPU. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 15665563a7f..59396ff2a31 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_IFCVF)+= ifcvf/ obj-$(CONFIG_MLX5_VDPA) += mlx5/ obj-$(CONFIG_VP_VDPA)+= virtio_pci/ obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/ +obj-$(CONFIG_SNET_VDPA) += solidrun/ diff --git a/drivers/vdpa/solidrun/Makefile b/drivers/vdpa/solidrun/Makefile new file mode 100644 index 000..0acabd040fd --- /dev/null +++ b/drivers/vdpa/solidrun/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_SNET_VDPA) += snet_vdpa.o +snet_vdpa-$(CONFIG_SNET_VDPA) += snet_main.o snet_hwmon.o diff --git a/drivers/vdpa/solidrun/snet_hwmon.c b/drivers/vdpa/solidrun/snet_hwmon.c new file mode 100644 index 000..ec6d09f09c7 --- /dev/null +++ b/drivers/vdpa/solidrun/snet_hwmon.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SolidRun DPU driver for control plane + * + * Copyright (C) 2022 SolidRun + * + * Author: Alvaro Karsz + * + */ + +#include +#include + +#include "snet_vdpa.h" + +/* Monitor offsets */ +#define SNET_MON_TMP0_IN_OFF 0x00 +#define SNET_MON_TMP0_MAX_OFF 0x08 +#define SNET_MON_TMP0_CRIT_OFF0x10 +#define SNET_MON_TMP1_IN_OFF 0x18 +#define SNET_MON_TMP1_CRIT_OFF0x20 +#define SNET_MON_CURR_IN_OFF 0x28 +#define SNET_MON_CURR_MAX_OFF 0x30 +#define SNET_MON_CURR_CRIT_OFF0x38 +#define SNET_MON_PWR_IN_OFF 0x40 +#define SNET_MON_VOLT_IN_OFF 0x48 +#define SNET_MON_VOLT_CRIT_OFF0x50 +#define SNET_MON_VOLT_LCRIT_OFF 0x58 + +static void snet_hwmon_read_reg(struct psnet *psnet, u32 reg, long *out) +{ + *out = (long)psnet_read64(psnet, psnet->cfg.hwmon_off + reg); +} + +static umode_t snet_howmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + /* Files are read only */ + return 0444; +} + +static int snet_howmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr,