Re: [PATCH] x86/paravirt: convert simple paravirt functions to asm

2023-03-16 Thread Peter Zijlstra
On Wed, Mar 08, 2023 at 04:42:10PM +0100, Juergen Gross wrote:

> +DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .text);
> +DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .text);
> +DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .text);

per these v, the above ^ should be in .noinstr.text

> -static noinstr unsigned long pv_native_read_cr2(void)
> -static noinstr void pv_native_irq_enable(void)
> -static noinstr void pv_native_irq_disable(void)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 3/3] virtio_ring: Use const to annotate read-only pointer params

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 2:55 AM Feng Liu  wrote:
>
> Add const to make the read-only pointer parameters clear, similar to
> many existing functions.
>
> Use `container_of_const` to implement `to_vvq`, which ensures the
> const-ness of read-only parameters and avoids accidental modification
> of their members.
>
> Signed-off-by: Feng Liu 
> Reviewed-by: Jiri Pirko 

Acked-by: Jason Wang 

Thanks

>
> ---
> v0 -> v1
> feedbacks from Michael S. Tsirkin
> - use `container_of_const` to implement `to_vvq`
> ---
>  drivers/virtio/virtio_ring.c | 36 ++--
>  include/linux/virtio.h   | 14 +++---
>  2 files changed, 25 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index a26fab91c59f..4c3bb0ddeb9b 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -231,9 +231,9 @@ static void vring_free(struct virtqueue *_vq);
>   * Helpers.
>   */
>
> -#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
> +#define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
>
> -static bool virtqueue_use_indirect(struct vring_virtqueue *vq,
> +static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
>unsigned int total_sg)
>  {
> /*
> @@ -269,7 +269,7 @@ static bool virtqueue_use_indirect(struct vring_virtqueue 
> *vq,
>   * unconditionally on data path.
>   */
>
> -static bool vring_use_dma_api(struct virtio_device *vdev)
> +static bool vring_use_dma_api(const struct virtio_device *vdev)
>  {
> if (!virtio_has_dma_quirk(vdev))
> return true;
> @@ -289,7 +289,7 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
> return false;
>  }
>
> -size_t virtio_max_dma_size(struct virtio_device *vdev)
> +size_t virtio_max_dma_size(const struct virtio_device *vdev)
>  {
> size_t max_segment_size = SIZE_MAX;
>
> @@ -423,7 +423,7 @@ static void virtqueue_init(struct vring_virtqueue *vq, 
> u32 num)
>   */
>
>  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> -  struct vring_desc *desc)
> +  const struct vring_desc *desc)
>  {
> u16 flags;
>
> @@ -1183,7 +1183,7 @@ static u16 packed_last_used(u16 last_used_idx)
>  }
>
>  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> -struct vring_desc_extra *extra)
> +const struct vring_desc_extra *extra)
>  {
> u16 flags;
>
> @@ -1206,7 +1206,7 @@ static void vring_unmap_extra_packed(const struct 
> vring_virtqueue *vq,
>  }
>
>  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> -  struct vring_packed_desc *desc)
> +   const struct vring_packed_desc *desc)
>  {
> u16 flags;
>
> @@ -2786,10 +2786,10 @@ EXPORT_SYMBOL_GPL(vring_transport_features);
>   * Returns the size of the vring.  This is mainly used for boasting to
>   * userspace.  Unlike other operations, this need not be serialized.
>   */
> -unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
> +unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
>  {
>
> -   struct vring_virtqueue *vq = to_vvq(_vq);
> +   const struct vring_virtqueue *vq = to_vvq(_vq);
>
> return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
>  }
> @@ -2819,9 +2819,9 @@ void __virtqueue_unbreak(struct virtqueue *_vq)
>  }
>  EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
>
> -bool virtqueue_is_broken(struct virtqueue *_vq)
> +bool virtqueue_is_broken(const struct virtqueue *_vq)
>  {
> -   struct vring_virtqueue *vq = to_vvq(_vq);
> +   const struct vring_virtqueue *vq = to_vvq(_vq);
>
> return READ_ONCE(vq->broken);
>  }
> @@ -2868,9 +2868,9 @@ void __virtio_unbreak_device(struct virtio_device *dev)
>  }
>  EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
>
> -dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
> +dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
>  {
> -   struct vring_virtqueue *vq = to_vvq(_vq);
> +   const struct vring_virtqueue *vq = to_vvq(_vq);
>
> BUG_ON(!vq->we_own_ring);
>
> @@ -2881,9 +2881,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue 
> *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
>
> -dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> +dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
>  {
> -   struct vring_virtqueue *vq = to_vvq(_vq);
> +   const struct vring_virtqueue *vq = to_vvq(_vq);
>
> BUG_ON(!vq->we_own_ring);
>
> @@ -2895,9 +2895,9 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue 
> *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
>
> -dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
> 

Re: [PATCH RFC v2 virtio 7/7] pds_vdpa: pds_vdps.rst and Kconfig

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 11:25 AM Shannon Nelson  wrote:
>
> On 3/15/23 12:05 AM, Jason Wang wrote:
> > On Thu, Mar 9, 2023 at 9:31 AM Shannon Nelson  
> > wrote:
> >>
> >> Add the documentation and Kconfig entry for pds_vdpa driver.
> >>
> >> Signed-off-by: Shannon Nelson 
> >> ---
> >>   .../ethernet/pensando/pds_vdpa.rst| 84 +++
> >>   MAINTAINERS   |  4 +
> >>   drivers/vdpa/Kconfig  |  8 ++
> >>   3 files changed, 96 insertions(+)
> >>   create mode 100644 
> >> Documentation/networking/device_drivers/ethernet/pensando/pds_vdpa.rst
> >>
> >> diff --git 
> >> a/Documentation/networking/device_drivers/ethernet/pensando/pds_vdpa.rst 
> >> b/Documentation/networking/device_drivers/ethernet/pensando/pds_vdpa.rst
> >> new file mode 100644
> >> index ..d41f6dd66e3e
> >> --- /dev/null
> >> +++ 
> >> b/Documentation/networking/device_drivers/ethernet/pensando/pds_vdpa.rst
> >> @@ -0,0 +1,84 @@
> >> +.. SPDX-License-Identifier: GPL-2.0+
> >> +.. note: can be edited and viewed with /usr/bin/formiko-vim
> >> +
> >> +==
> >> +PCI vDPA driver for the AMD/Pensando(R) DSC adapter family
> >> +==
> >> +
> >> +AMD/Pensando vDPA VF Device Driver
> >> +Copyright(c) 2023 Advanced Micro Devices, Inc
> >> +
> >> +Overview
> >> +
> >> +
> >> +The ``pds_vdpa`` driver is an auxiliary bus driver that supplies
> >> +a vDPA device for use by the virtio network stack.  It is used with
> >> +the Pensando Virtual Function devices that offer vDPA and virtio queue
> >> +services.  It depends on the ``pds_core`` driver and hardware for the PF
> >> +and VF PCI handling as well as for device configuration services.
> >> +
> >> +Using the device
> >> +
> >> +
> >> +The ``pds_vdpa`` device is enabled via multiple configuration steps and
> >> +depends on the ``pds_core`` driver to create and enable SR-IOV Virtual
> >> +Function devices.
> >> +
> >> +Shown below are the steps to bind the driver to a VF and also to the
> >> +associated auxiliary device created by the ``pds_core`` driver.
> >> +
> >> +.. code-block:: bash
> >> +
> >> +  #!/bin/bash
> >> +
> >> +  modprobe pds_core
> >> +  modprobe vdpa
> >> +  modprobe pds_vdpa
> >> +
> >> +  PF_BDF=`grep -H "vDPA.*1" /sys/kernel/debug/pds_core/*/viftypes | head 
> >> -1 | awk -F / '{print $6}'`
> >> +
> >> +  # Enable vDPA VF auxiliary device(s) in the PF
> >> +  devlink dev param set pci/$PF_BDF name enable_vnet value true cmode 
> >> runtime
> >> +
> >
> > Does this mean we can't do per VF configuration for vDPA enablement
> > (e.g VF0 for vdpa VF1 to other type)?
>
> For now, yes, a PF only supports one VF type at a time.  We've thought
> about possibilities for some heterogeneous configurations, and tried to
> do some planning for future flexibility, but our current needs don't go
> that far.  If and when we get there, we might look at how Guatam's group
> did their VF personalities in their EF100 driver, or some other
> possibilities.

That's fine.


>
> Thanks for looking through these, I appreciate your time and comments.

You are welcome.

Thanks

>
> sln
>
>
> >
> > Thanks
> >
> >
> >> +  # Create a VF for vDPA use
> >> +  echo 1 > /sys/bus/pci/drivers/pds_core/$PF_BDF/sriov_numvfs
> >> +
> >> +  # Find the vDPA services/devices available
> >> +  PDS_VDPA_MGMT=`vdpa mgmtdev show | grep vDPA | head -1 | cut -d: -f1`
> >> +
> >> +  # Create a vDPA device for use in virtio network configurations
> >> +  vdpa dev add name vdpa1 mgmtdev $PDS_VDPA_MGMT mac 00:11:22:33:44:55
> >> +
> >> +  # Set up an ethernet interface on the vdpa device
> >> +  modprobe virtio_vdpa
> >> +
> >> +
> >> +
> >> +Enabling the driver
> >> +===
> >> +
> >> +The driver is enabled via the standard kernel configuration system,
> >> +using the make command::
> >> +
> >> +  make oldconfig/menuconfig/etc.
> >> +
> >> +The driver is located in the menu structure at:
> >> +
> >> +  -> Device Drivers
> >> +-> Network device support (NETDEVICES [=y])
> >> +  -> Ethernet driver support
> >> +-> Pensando devices
> >> +  -> Pensando Ethernet PDS_VDPA Support
> >> +
> >> +Support
> >> +===
> >> +
> >> +For general Linux networking support, please use the netdev mailing
> >> +list, which is monitored by Pensando personnel::
> >> +
> >> +  net...@vger.kernel.org
> >> +
> >> +For more specific support needs, please use the Pensando driver support
> >> +email::
> >> +
> >> +  driv...@pensando.io
> >> diff --git a/MAINTAINERS b/MAINTAINERS
> >> index cb21dcd3a02a..da981c5bc830 100644
> >> --- a/MAINTAINERS
> >> +++ b/MAINTAINERS
> >> @@ -22120,6 +22120,10 @@ SNET DPU VIRTIO DATA PATH ACCELERATOR
> >>   R: Alvaro Karsz 
> >>   F: drivers/vdpa/solidrun/
> >>
> >> +PDS DSC VIRTIO DATA PATH ACCELERATOR
> >> +R: Shannon Nelson 
> >> +F: 

Re: [PATCH v2 1/2] vdpa/mlx5: Extend driver support for new features

2023-03-16 Thread Jason Wang
On Wed, Mar 15, 2023 at 3:28 PM Eli Cohen  wrote:
>
> Extend the possible list for features that can be supported by firmware.
> Note that different versions of firmware may or may not support these
> features. The driver is made aware of them by querying the firmware.
>
> While doing this, improve the code so we use enum names instead of hard
> coded numerical values.
>
> The new features supported by the driver are the following:
>
> VIRTIO_NET_F_MRG_RXBUF
> VIRTIO_NET_F_HOST_UFO
> VIRTIO_NET_F_HOST_ECN
> VIRTIO_NET_F_GUEST_UFO
> VIRTIO_NET_F_GUEST_ECN
> VIRTIO_NET_F_GUEST_TSO6
> VIRTIO_NET_F_GUEST_TSO4
>
> Signed-off-by: Eli Cohen 

Acked-by: Jason Wang 

Thanks

> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 64 +++
>  1 file changed, 48 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 520646ae7fa0..4abc3a4ee515 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -778,12 +778,32 @@ static bool vq_is_tx(u16 idx)
> return idx % 2;
>  }
>
> -static u16 get_features_12_3(u64 features)
> +enum {
> +   MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
> +   MLX5_VIRTIO_NET_F_HOST_UFO = 3,
> +   MLX5_VIRTIO_NET_F_HOST_ECN = 4,
> +   MLX5_VIRTIO_NET_F_GUEST_UFO = 5,
> +   MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
> +   MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
> +   MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
> +   MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
> +   MLX5_VIRTIO_NET_F_CSUM = 10,
> +   MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
> +   MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
> +};
> +
> +static u16 get_features(u64 features)
>  {
> -   return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
> -  (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
> -  (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
> -  (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
> +   return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << 
> MLX5_VIRTIO_NET_F_MRG_RXBUF) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_UFO)) << 
> MLX5_VIRTIO_NET_F_HOST_UFO) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << 
> MLX5_VIRTIO_NET_F_HOST_ECN) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_UFO)) << 
> MLX5_VIRTIO_NET_F_GUEST_UFO) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << 
> MLX5_VIRTIO_NET_F_GUEST_ECN) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << 
> MLX5_VIRTIO_NET_F_GUEST_TSO6) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << 
> MLX5_VIRTIO_NET_F_GUEST_TSO4) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 
> MLX5_VIRTIO_NET_F_CSUM) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 
> MLX5_VIRTIO_NET_F_HOST_TSO6) |
> +  (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 
> MLX5_VIRTIO_NET_F_HOST_TSO4);
>  }
>
>  static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
> @@ -797,6 +817,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, 
> struct mlx5_vdpa_virtque
> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
> void *obj_context;
> +   u16 mlx_features;
> void *cmd_hdr;
> void *vq_ctx;
> void *in;
> @@ -812,6 +833,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, 
> struct mlx5_vdpa_virtque
> goto err_alloc;
> }
>
> +   mlx_features = get_features(ndev->mvdev.actual_features);
> cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, 
> general_obj_in_cmd_hdr);
>
> MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, 
> MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
> @@ -822,7 +844,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, 
> struct mlx5_vdpa_virtque
> MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, 
> mvq->avail_idx);
> MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, 
> mvq->used_idx);
> MLX5_SET(virtio_net_q_object, obj_context, 
> queue_feature_bit_mask_12_3,
> -get_features_12_3(ndev->mvdev.actual_features));
> +mlx_features >> 3);
> +   MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
> +mlx_features & 7);
> vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, 
> virtio_q_context);
> MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
>
> @@ -2171,23 +2195,31 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device 
> *vdev, u16 idx)
> return MLX5_VDPA_DATAVQ_GROUP;
>  }
>
> -enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
> -   MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
> -   MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
> -   MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
> -};
> -
>  static u64 mlx_to_vritio_features(u16 

Re: [PATCH RFC v2 virtio 2/7] pds_vdpa: get vdpa management info

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 11:25 AM Shannon Nelson  wrote:
>
> On 3/15/23 12:05 AM, Jason Wang wrote:
> > On Thu, Mar 9, 2023 at 9:31 AM Shannon Nelson  
> > wrote:
> >>
> >> Find the vDPA management information from the DSC in order to
> >> advertise it to the vdpa subsystem.
> >>
> >> Signed-off-by: Shannon Nelson 
> >> ---
> >>   drivers/vdpa/pds/Makefile|   3 +-
> >>   drivers/vdpa/pds/aux_drv.c   |  13 
> >>   drivers/vdpa/pds/aux_drv.h   |   7 +++
> >>   drivers/vdpa/pds/debugfs.c   |   3 +
> >>   drivers/vdpa/pds/vdpa_dev.c  | 113 +++
> >>   drivers/vdpa/pds/vdpa_dev.h  |  15 +
> >>   include/linux/pds/pds_vdpa.h |  92 
> >>   7 files changed, 245 insertions(+), 1 deletion(-)
> >>   create mode 100644 drivers/vdpa/pds/vdpa_dev.c
> >>   create mode 100644 drivers/vdpa/pds/vdpa_dev.h
> >>
> >> diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile
> >> index a9cd2f450ae1..13b50394ec64 100644
> >> --- a/drivers/vdpa/pds/Makefile
> >> +++ b/drivers/vdpa/pds/Makefile
> >> @@ -3,6 +3,7 @@
> >>
> >>   obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
> >>
> >> -pds_vdpa-y := aux_drv.o
> >> +pds_vdpa-y := aux_drv.o \
> >> + vdpa_dev.o
> >>
> >>   pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
> >> diff --git a/drivers/vdpa/pds/aux_drv.c b/drivers/vdpa/pds/aux_drv.c
> >> index b3f36170253c..63e40ae68211 100644
> >> --- a/drivers/vdpa/pds/aux_drv.c
> >> +++ b/drivers/vdpa/pds/aux_drv.c
> >> @@ -2,6 +2,8 @@
> >>   /* Copyright(c) 2023 Advanced Micro Devices, Inc */
> >>
> >>   #include 
> >> +#include 
> >> +#include 
> >>
> >>   #include 
> >>   #include 
> >> @@ -9,6 +11,7 @@
> >>
> >>   #include "aux_drv.h"
> >>   #include "debugfs.h"
> >> +#include "vdpa_dev.h"
> >>
> >>   static const struct auxiliary_device_id pds_vdpa_id_table[] = {
> >>  { .name = PDS_VDPA_DEV_NAME, },
> >> @@ -30,6 +33,7 @@ static int pds_vdpa_probe(struct auxiliary_device 
> >> *aux_dev,
> >>  return -ENOMEM;
> >>
> >>  vdpa_aux->padev = padev;
> >> +   vdpa_aux->vf_id = pci_iov_vf_id(padev->vf->pdev);
> >>  auxiliary_set_drvdata(aux_dev, vdpa_aux);
> >>
> >>  /* Register our PDS client with the pds_core */
> >> @@ -40,8 +44,15 @@ static int pds_vdpa_probe(struct auxiliary_device 
> >> *aux_dev,
> >>  goto err_free_mem;
> >>  }
> >>
> >> +   /* Get device ident info and set up the vdpa_mgmt_dev */
> >> +   err = pds_vdpa_get_mgmt_info(vdpa_aux);
> >> +   if (err)
> >> +   goto err_aux_unreg;
> >> +
> >>  return 0;
> >>
> >> +err_aux_unreg:
> >> +   padev->ops->unregister_client(padev);
> >>   err_free_mem:
> >>  kfree(vdpa_aux);
> >>  auxiliary_set_drvdata(aux_dev, NULL);
> >> @@ -54,6 +65,8 @@ static void pds_vdpa_remove(struct auxiliary_device 
> >> *aux_dev)
> >>  struct pds_vdpa_aux *vdpa_aux = auxiliary_get_drvdata(aux_dev);
> >>  struct device *dev = _dev->dev;
> >>
> >> +   pci_free_irq_vectors(vdpa_aux->padev->vf->pdev);
> >> +
> >>  vdpa_aux->padev->ops->unregister_client(vdpa_aux->padev);
> >>
> >>  kfree(vdpa_aux);
> >> diff --git a/drivers/vdpa/pds/aux_drv.h b/drivers/vdpa/pds/aux_drv.h
> >> index 14e465944dfd..94ba7abcaa43 100644
> >> --- a/drivers/vdpa/pds/aux_drv.h
> >> +++ b/drivers/vdpa/pds/aux_drv.h
> >> @@ -10,6 +10,13 @@
> >>   struct pds_vdpa_aux {
> >>  struct pds_auxiliary_dev *padev;
> >>
> >> +   struct vdpa_mgmt_dev vdpa_mdev;
> >> +
> >> +   struct pds_vdpa_ident ident;
> >> +
> >> +   int vf_id;
> >>  struct dentry *dentry;
> >> +
> >> +   int nintrs;
> >>   };
> >>   #endif /* _AUX_DRV_H_ */
> >> diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
> >> index 3c163dc7b66f..7b7e90fd6578 100644
> >> --- a/drivers/vdpa/pds/debugfs.c
> >> +++ b/drivers/vdpa/pds/debugfs.c
> >> @@ -1,7 +1,10 @@
> >>   // SPDX-License-Identifier: GPL-2.0-only
> >>   /* Copyright(c) 2023 Advanced Micro Devices, Inc */
> >>
> >> +#include 
> >> +
> >>   #include 
> >> +#include 
> >>   #include 
> >>
> >>   #include "aux_drv.h"
> >> diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c
> >> new file mode 100644
> >> index ..bd840688503c
> >> --- /dev/null
> >> +++ b/drivers/vdpa/pds/vdpa_dev.c
> >> @@ -0,0 +1,113 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/* Copyright(c) 2023 Advanced Micro Devices, Inc */
> >> +
> >> +#include 
> >> +#include 
> >> +#include 
> >> +
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +
> >> +#include "vdpa_dev.h"
> >> +#include "aux_drv.h"
> >> +
> >> +static struct virtio_device_id pds_vdpa_id_table[] = {
> >> +   {VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID},
> >> +   {0},
> >> +};
> >> +
> >> +static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
> >> +   const struct vdpa_dev_set_config *add_config)
> >> +{
> >> 

Re: [PATCH] x86/paravirt: convert simple paravirt functions to asm

2023-03-16 Thread Juergen Gross via Virtualization

On 16.03.23 21:14, Peter Zijlstra wrote:

On Wed, Mar 08, 2023 at 04:42:10PM +0100, Juergen Gross wrote:


+DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .text);
+DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .text);
+DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .text);


per these v, the above ^ should be in .noinstr.text


Yes, and I'm inclined to even put pv_native_save_fl into the noinstr
section. After paravirt patching it isn't called anymore anyway.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 4/8] vringh: support VA with iotlb

2023-03-16 Thread Jason Wang
On Fri, Mar 17, 2023 at 12:07 AM Stefano Garzarella  wrote:
>
> On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin  
> wrote:
> >
> > On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella  
> > wrote:
> > >
> > > vDPA supports the possibility to use user VA in the iotlb messages.
> > > So, let's add support for user VA in vringh to use it in the vDPA
> > > simulators.
> > >
> > > Signed-off-by: Stefano Garzarella 
> > > ---
> > >
> > > Notes:
> > > v2:
> > > - replace kmap_atomic() with kmap_local_page() [see previous patch]
> > > - fix cast warnings when build with W=1 C=1
> > >
> > >  include/linux/vringh.h|   5 +-
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
> > >  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
> > >  drivers/vhost/vringh.c| 247 --
> > >  4 files changed, 205 insertions(+), 53 deletions(-)
> > >
>
> [...]
>
> >
> > It seems to me iotlb_translate_va and iotlb_translate_pa are very
> > similar, their only difference is that the argument is that iov is
> > iovec instead of bio_vec. And how to fill it, obviously.
> >
> > It would be great to merge both functions, only differing with a
> > conditional on vrh->use_va, or generics, or similar. Or, if following
> > the style of the rest of vringh code, to provide a callback to fill
> > iovec (although I like conditional more).
> >
> > However I cannot think of an easy way to perform that without long
> > macros or type erasure.
>
> Thank you for pushing me :-)
> I finally managed to avoid code duplication (partial patch attached,
> but not yet fully tested).
>
> @Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
> also avoided getu16_iotlb_va/pa.
>
> I will send the full patch in v3, but I would like to get your opinion
> first ;-)

Fine with me.

Thanks

>
>
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..71dd67700e36 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
>  static int iotlb_translate(const struct vringh *vrh,
>u64 addr, u64 len, u64 *translated,
> -  struct bio_vec iov[],
> -  int iov_size, u32 perm)
> +  void *iov, int iov_size, bool iovec, u32 perm)
>  {
> struct vhost_iotlb_map *map;
> struct vhost_iotlb *iotlb = vrh->iotlb;
> @@ -1107,7 +1106,7 @@ static int iotlb_translate(const struct vringh *vrh,
> spin_lock(vrh->iotlb_lock);
>
> while (len > s) {
> -   u64 size, pa, pfn;
> +   u64 size;
>
> if (unlikely(ret >= iov_size)) {
> ret = -ENOBUFS;
> @@ -1124,10 +1123,22 @@ static int iotlb_translate(const struct vringh *vrh,
> }
>
> size = map->size - addr + map->start;
> -   pa = map->addr + addr - map->start;
> -   pfn = pa >> PAGE_SHIFT;
> -   bvec_set_page([ret], pfn_to_page(pfn), min(len - s, size),
> - pa & (PAGE_SIZE - 1));
> +   if (iovec) {
> +   struct iovec *iovec = iov;
> +
> +   iovec[ret].iov_len = min(len - s, size);
> +   iovec[ret].iov_base = (void __user *)(unsigned long)
> + (map->addr + addr - map->start);
> +   } else {
> +   u64 pa = map->addr + addr - map->start;
> +   u64 pfn = pa >> PAGE_SHIFT;
> +   struct bio_vec *bvec = iov;
> +
> +   bvec_set_page([ret], pfn_to_page(pfn),
> + min(len - s, size),
> + pa & (PAGE_SIZE - 1));
> +   }
> +
> s += size;
> addr += size;
> ++ret;
> @@ -1141,26 +1152,38 @@ static int iotlb_translate(const struct vringh *vrh,
> return ret;
>  }
>
> +#define IOTLB_IOV_SIZE 16
> +
>  static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
>   void *src, size_t len)
>  {
> u64 total_translated = 0;
>
> while (total_translated < len) {
> -   struct bio_vec iov[16];
> +   union {
> +   struct iovec iovec[IOTLB_IOV_SIZE];
> +   struct bio_vec bvec[IOTLB_IOV_SIZE];
> +   } iov;
> struct iov_iter iter;
> u64 translated;
> int ret;
>
> ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
>   len - total_translated, ,
> - iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> + , IOTLB_IOV_SIZE, vrh->use_va,
> + VHOST_MAP_RO);
> 

Re: [PATCH RFC v2 virtio 4/7] pds_vdpa: add vdpa config client commands

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 11:25 AM Shannon Nelson  wrote:
>
> On 3/15/23 12:05 AM, Jason Wang wrote:
> > On Thu, Mar 9, 2023 at 9:31 AM Shannon Nelson  
> > wrote:
> >>
> >> These are the adminq commands that will be needed for
> >> setting up and using the vDPA device.
> >
> > It's better to explain under which case the driver should use adminq,
> > I see some functions overlap with common configuration capability.
> > More below.
>
> Yes, I agree this needs to be more clearly stated.  The overlap is
> because the original FW didn't have the virtio device as well modeled
> and we had to go through adminq calls to get things done.

Does this mean the device could be actually probed by a virtio-pci driver?

>  Now that we
> have a reasonable virtio emulation and can use the virtio_net_config, we
> have a lot less need for the adminq calls.

Please add those in the changelog. Btw, adminq should be more flexible
since it's easier to extend for new features. If there's no plan to
model a virtio-pci driver we can even avoid mapping PCI capabilities
which may simplify the codes.

Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 1/3] virtio_ring: Allow non power of 2 sizes for packed virtqueue

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 2:55 AM Feng Liu  wrote:
>
> According to the Virtio Specification, the Queue Size parameter of a
> virtqueue corresponds to the maximum number of descriptors in that
> queue, and it does not have to be a power of 2 for packed virtqueues.
> However, the virtio_pci_modern driver enforced a power of 2 check for
> virtqueue sizes, which is unnecessary and restrictive for packed
> virtuqueue.
>
> Split virtqueue still needs to check the virtqueue size is power_of_2
> which has been done in vring_alloc_queue_split of the virtio_ring layer.
>
> To validate this change, we tested various virtqueue sizes for packed
> rings, including 128, 256, 512, 100, 200, 500, and 1000, with
> CONFIG_PAGE_POISONING enabled, and all tests passed successfully.
>
> Signed-off-by: Feng Liu 
> Reviewed-by: Jiri Pirko 

Acked-by: Jason Wang 

Thanks

>
> ---
> v0 -> v1
> feedbacks from Jason Wang and Michael S. Tsirkin
> - remove power_of_2 check of virtqueue size
>
> v1 -> v2
> feedbacks from Parav Pandit and Jiri Pirko
> - keep power_of_2 check of split virtqueue in vring_alloc_queue_split of
>   virtio_ring layer.
> ---
>  drivers/virtio/virtio_pci_modern.c | 5 -
>  1 file changed, 5 deletions(-)
>
> diff --git a/drivers/virtio/virtio_pci_modern.c 
> b/drivers/virtio/virtio_pci_modern.c
> index 9e496e288cfa..6e713904d8e8 100644
> --- a/drivers/virtio/virtio_pci_modern.c
> +++ b/drivers/virtio/virtio_pci_modern.c
> @@ -310,11 +310,6 @@ static struct virtqueue *setup_vq(struct 
> virtio_pci_device *vp_dev,
> if (!num || vp_modern_get_queue_enable(mdev, index))
> return ERR_PTR(-ENOENT);
>
> -   if (!is_power_of_2(num)) {
> -   dev_warn(_dev->pci_dev->dev, "bad queue size %u", num);
> -   return ERR_PTR(-EINVAL);
> -   }
> -
> info->msix_vector = msix_vec;
>
> /* create the vring */
> --
> 2.34.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 2/3] virtio_ring: Avoid using inline for small functions

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 2:55 AM Feng Liu  wrote:
>
> Remove the inline keyword, according to kernel coding style [1], defining
> inline functions is not necessary for samll functions.
>
> It is verified with GCC 12.2.0, the generated code with/without inline
> is the same. Additionally tested with kernel pktgen and iperf, and
> verified the result, pps of the results are the same in the cases of
> with/without inline.
>
> [1]
> https://www.kernel.org/doc/html/v6.2-rc3/process/coding-style.html#the-inline-disease
>
> Signed-off-by: Feng Liu 
> Reviewed-by: Jiri Pirko 

Acked-by: Jason Wang 

Thanks

> ---
>  drivers/virtio/virtio_ring.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 41144b5246a8..a26fab91c59f 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -233,8 +233,8 @@ static void vring_free(struct virtqueue *_vq);
>
>  #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
>
> -static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
> - unsigned int total_sg)
> +static bool virtqueue_use_indirect(struct vring_virtqueue *vq,
> +  unsigned int total_sg)
>  {
> /*
>  * If the host supports indirect descriptor tables, and we have 
> multiple
> @@ -349,7 +349,7 @@ static void vring_free_queue(struct virtio_device *vdev, 
> size_t size,
>   * making all of the arch DMA ops work on the vring device itself
>   * is a mess.
>   */
> -static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> +static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
>  {
> return vq->dma_dev;
>  }
> @@ -784,7 +784,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, 
> unsigned int head,
> }
>  }
>
> -static inline bool more_used_split(const struct vring_virtqueue *vq)
> +static bool more_used_split(const struct vring_virtqueue *vq)
>  {
> return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
> vq->split.vring.used->idx);
> @@ -1172,12 +1172,12 @@ static int virtqueue_resize_split(struct virtqueue 
> *_vq, u32 num)
>  /*
>   * Packed ring specific functions - *_packed().
>   */
> -static inline bool packed_used_wrap_counter(u16 last_used_idx)
> +static bool packed_used_wrap_counter(u16 last_used_idx)
>  {
> return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
>  }
>
> -static inline u16 packed_last_used(u16 last_used_idx)
> +static u16 packed_last_used(u16 last_used_idx)
>  {
> return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
>  }
> @@ -1612,7 +1612,7 @@ static inline bool is_used_desc_packed(const struct 
> vring_virtqueue *vq,
> return avail == used && used == used_wrap_counter;
>  }
>
> -static inline bool more_used_packed(const struct vring_virtqueue *vq)
> +static bool more_used_packed(const struct vring_virtqueue *vq)
>  {
> u16 last_used;
> u16 last_used_idx;
> --
> 2.34.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 2/2] vdpa/mlx5: Make VIRTIO_NET_F_MRG_RXBUF off by default

2023-03-16 Thread Jason Wang
On Wed, Mar 15, 2023 at 3:28 PM Eli Cohen  wrote:
>
> One can still enable it when creating the vdpa device using vdpa tool by
> providing features that include it.
>
> For example:
> $ vdpa dev add name vdpa0 mgmtdev pci/:86:00.2 device_features 0x300cb982b
>
> Please be aware that this feature was not supported before the previous
> patch in this list was introduced so we don't change user experience.

Patch looks good but we need to document why we disable mrg rxbuf by
default in the changlog.

With that,

Acked-by: Jason Wang 

Thanks

>
> Signed-off-by: Eli Cohen 
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 4abc3a4ee515..3858ba1e8975 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -3154,6 +3154,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev 
> *v_mdev, const char *name,
> return -EINVAL;
> }
> device_features &= add_config->device_features;
> +   } else {
> +   device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
> }
> if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
>   device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
> --
> 2.38.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] vdpa/mlx5: Remove debugfs file after device unregister

2023-03-16 Thread Jason Wang
On Sun, Mar 12, 2023 at 4:41 PM Eli Cohen  wrote:
>
> When deleting the vdpa device, the debugfs files need to be removed so
> need to remove debugfs after the device has been unregistered.
>
> This fixes null pointer dereference when someone deletes the device
> after debugfs has been populated.
>
> Fixes: 294221004322 ("vdpa/mlx5: Add debugfs subtree")
> Signed-off-by: Eli Cohen 
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 3858ba1e8975..3f6149f2ffd4 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -3322,8 +3322,6 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev 
> *v_mdev, struct vdpa_device *
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> struct workqueue_struct *wq;
>
> -   mlx5_vdpa_remove_debugfs(ndev->debugfs);
> -   ndev->debugfs = NULL;
> if (ndev->nb_registered) {
> ndev->nb_registered = false;
> mlx5_notifier_unregister(mvdev->mdev, >nb);
> @@ -3332,6 +3330,8 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev 
> *v_mdev, struct vdpa_device *
> mvdev->wq = NULL;
> destroy_workqueue(wq);
> _vdpa_unregister_device(dev);

What if the user tries to access debugfs after _vdpa_unregister_device()?

Thanks

> +   mlx5_vdpa_remove_debugfs(ndev->debugfs);
> +   ndev->debugfs = NULL;
> mgtdev->ndev = NULL;
>  }
>
> --
> 2.38.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v2 virtio 3/7] pds_vdpa: virtio bar setup for vdpa

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 11:25 AM Shannon Nelson  wrote:
>
> On 3/15/23 12:05 AM, Jason Wang wrote:
> > On Thu, Mar 9, 2023 at 9:31 AM Shannon Nelson  
> > wrote:
> >>
> >> The PDS vDPA device has a virtio BAR for describing itself, and
> >> the pds_vdpa driver needs to access it.  Here we copy liberally
> >> from the existing drivers/virtio/virtio_pci_modern_dev.c as it
> >> has what we need, but we need to modify it so that it can work
> >> with our device id and so we can use our own DMA mask.
> >
> > By passing a pointer to a customized id probing routine to 
> > vp_modern_probe()?
>
> The only real differences are that we needed to cut out the device id
> checks to use our vDPA VF device id, and remove
> dma_set_mask_and_coherent() because we need a different DMA_BIT_MASK().
>
> Maybe a function pointer to something that can validate the device id,
> and a bitmask for setting DMA mapping; if they are 0/NULL, use the
> default device id check and DMA mask.
>
> Adding them as extra arguments to the function call seems a bit messy,
> maybe add them to the struct virtio_pci_modern_device and the caller can
> set them as overrides if needed?
>
> struct virtio_pci_modern_device {
>
> ...
>
> int (*device_id_check_override(struct pci_dev *pdev));
> u64 dma_mask_override;
> }

Looks fine.

Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 2/8] vhost-vdpa: use bind_mm/unbind_mm device callbacks

2023-03-16 Thread Stefano Garzarella

On Tue, Mar 14, 2023 at 11:48:33AM +0800, Jason Wang wrote:

On Thu, Mar 2, 2023 at 7:34 PM Stefano Garzarella  wrote:


When the user call VHOST_SET_OWNER ioctl and the vDPA device
has `use_va` set to true, let's call the bind_mm callback.
In this way we can bind the device to the user address space
and directly use the user VA.

The unbind_mm callback is called during the release after
stopping the device.

Signed-off-by: Stefano Garzarella 
---

Notes:
v2:
- call the new unbind_mm callback during the release [Jason]
- avoid to call bind_mm callback after the reset, since the device
  is not detaching it now during the reset

 drivers/vhost/vdpa.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index dc12dbd5b43b..1ab89fccd825 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -219,6 +219,28 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
return vdpa_reset(vdpa);
 }

+static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
+{
+   struct vdpa_device *vdpa = v->vdpa;
+   const struct vdpa_config_ops *ops = vdpa->config;
+
+   if (!vdpa->use_va || !ops->bind_mm)
+   return 0;
+
+   return ops->bind_mm(vdpa, v->vdev.mm);
+}
+
+static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
+{
+   struct vdpa_device *vdpa = v->vdpa;
+   const struct vdpa_config_ops *ops = vdpa->config;
+
+   if (!vdpa->use_va || !ops->unbind_mm)
+   return;
+
+   ops->unbind_mm(vdpa);
+}
+
 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 {
struct vdpa_device *vdpa = v->vdpa;
@@ -711,6 +733,13 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
default:
r = vhost_dev_ioctl(>vdev, cmd, argp);
+   if (!r && cmd == VHOST_SET_OWNER) {
+   r = vhost_vdpa_bind_mm(v);
+   if (r) {
+   vhost_dev_reset_owner(>vdev, NULL);
+   break;
+   }
+   }


Nit: is it better to have a new condition/switch branch instead of
putting them under default? (as what vring_ioctl did).


Yep, I agree!

I'll change it.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 7/8] vdpa_sim: replace the spinlock with a mutex to protect the state

2023-03-16 Thread Stefano Garzarella

On Tue, Mar 14, 2023 at 01:31:25PM +0800, Jason Wang wrote:

On Tue, Mar 14, 2023 at 1:29 PM Jason Wang  wrote:


On Thu, Mar 2, 2023 at 7:35 PM Stefano Garzarella  wrote:
>
> The spinlock we use to protect the state of the simulator is sometimes
> held for a long time (for example, when devices handle requests).
>
> This also prevents us from calling functions that might sleep (such as
> kthread_flush_work() in the next patch), and thus having to release
> and retake the lock.
>
> For these reasons, let's replace the spinlock with a mutex that gives
> us more flexibility.
>
> Suggested-by: Jason Wang 
> Signed-off-by: Stefano Garzarella 

Acked-by: Jason Wang 

Thanks


Btw, though it looks fine but we'd better double confirm virtio_vdpa works well.


I tested it, but I will do it more carefully to make sure everything
is okay.



(I think so since there's transport that might sleep).


I see.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 4/8] vringh: support VA with iotlb

2023-03-16 Thread Stefano Garzarella

On Tue, Mar 14, 2023 at 12:53:57PM +0800, Jason Wang wrote:

On Thu, Mar 2, 2023 at 7:35 PM Stefano Garzarella  wrote:


vDPA supports the possibility to use user VA in the iotlb messages.
So, let's add support for user VA in vringh to use it in the vDPA
simulators.

Signed-off-by: Stefano Garzarella 
---

Notes:
v2:
- replace kmap_atomic() with kmap_local_page() [see previous patch]
- fix cast warnings when build with W=1 C=1

 include/linux/vringh.h|   5 +-
 drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
 drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
 drivers/vhost/vringh.c| 247 --
 4 files changed, 205 insertions(+), 53 deletions(-)

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 1991a02c6431..d39b9f2dcba0 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -32,6 +32,9 @@ struct vringh {
/* Can we get away with weak barriers? */
bool weak_barriers;

+   /* Use user's VA */
+   bool use_va;
+
/* Last available index we saw (ie. where we're up to). */
u16 last_avail_idx;

@@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct 
vhost_iotlb *iotlb,
  spinlock_t *iotlb_lock);

 int vringh_init_iotlb(struct vringh *vrh, u64 features,
- unsigned int num, bool weak_barriers,
+ unsigned int num, bool weak_barriers, bool use_va,
  struct vring_desc *desc,
  struct vring_avail *avail,
  struct vring_used *used);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 3a0e721aef05..babc8dd171a6 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2537,7 +2537,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)

if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
err = vringh_init_iotlb(>vring, mvdev->actual_features,
-   MLX5_CVQ_MAX_ENT, false,
+   MLX5_CVQ_MAX_ENT, false, false,
(struct vring_desc 
*)(uintptr_t)cvq->desc_addr,
(struct vring_avail 
*)(uintptr_t)cvq->driver_addr,
(struct vring_used 
*)(uintptr_t)cvq->device_addr);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 6a0a65814626..481eb156658b 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -60,7 +60,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, 
unsigned int idx)
struct vdpasim_virtqueue *vq = >vqs[idx];
uint16_t last_avail_idx = vq->vring.last_avail_idx;

-   vringh_init_iotlb(>vring, vdpasim->features, vq->num, true,
+   vringh_init_iotlb(>vring, vdpasim->features, vq->num, true, false,
  (struct vring_desc *)(uintptr_t)vq->desc_addr,
  (struct vring_avail *)
  (uintptr_t)vq->driver_addr,
@@ -81,7 +81,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
vq->cb = NULL;
vq->private = NULL;
vringh_init_iotlb(>vring, vdpasim->dev_attr.supported_features,
- VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+ VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);

vq->vring.notify = NULL;
 }
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 0ba3ef809e48..61c79cea44ca 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);

 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)

-static int iotlb_translate(const struct vringh *vrh,
-  u64 addr, u64 len, u64 *translated,
-  struct bio_vec iov[],
-  int iov_size, u32 perm)
+static int iotlb_translate_va(const struct vringh *vrh,
+ u64 addr, u64 len, u64 *translated,
+ struct iovec iov[],
+ int iov_size, u32 perm)
 {
struct vhost_iotlb_map *map;
struct vhost_iotlb *iotlb = vrh->iotlb;
+   u64 s = 0, last = addr + len - 1;
int ret = 0;
+
+   spin_lock(vrh->iotlb_lock);
+
+   while (len > s) {
+   u64 size;
+
+   if (unlikely(ret >= iov_size)) {
+   ret = -ENOBUFS;
+   break;
+   }
+
+   map = vhost_iotlb_itree_first(iotlb, addr, last);
+   if (!map || map->start > addr) {
+   ret = -EINVAL;
+   break;
+   } else if (!(map->perm & perm)) {
+   ret = -EPERM;
+   break;
+   }
+
+   size = map->size - addr + 

Re: [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback

2023-03-16 Thread Jason Wang


在 2023/2/28 17:41, Xie Yongji 写道:

Add eventfd for the vdpa callback so that user
can signal it directly instead of running the
callback. It will be used for vhost-vdpa case.

Signed-off-by: Xie Yongji 
---
  drivers/vhost/vdpa.c | 2 ++
  drivers/virtio/virtio_vdpa.c | 1 +
  include/linux/vdpa.h | 3 +++
  3 files changed, 6 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index dc12dbd5b43b..ae89c0ccc2bb 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, 
unsigned int cmd,
if (vq->call_ctx.ctx) {
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
+   cb.irq_ctx = vq->call_ctx.ctx;
} else {
cb.callback = NULL;
cb.private = NULL;
+   cb.irq_ctx = NULL;
}
ops->set_vq_cb(vdpa, idx, );
vhost_vdpa_setup_vq_irq(v, idx);
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 9eee8afabda8..a5cecafbc2d1 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -195,6 +195,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned 
int index,
/* Setup virtqueue callback */
cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
cb.private = info;
+   cb.irq_ctx = NULL;
ops->set_vq_cb(vdpa, index, );
ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
  
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h

index 10bd22387276..94a7ec49583a 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -13,10 +13,13 @@
   * struct vdpa_calllback - vDPA callback definition.
   * @callback: interrupt callback function
   * @private: the data passed to the callback function
+ * @irq_ctx: the eventfd for the callback, user can signal
+ *   it directly instead of running the callback



I'd suggest to do more tweaks to mention:

1) irq_ctx is optional
2) that when the irq_ctx is set, the vDPA driver must guarantee that 
signaling it is functional equivalent to triggering the callback. When 
set, vDPA parent can signal it directly instead of triggering the callback.



   */
  struct vdpa_callback {
irqreturn_t (*callback)(void *data);
void *private;
+   struct eventfd_ctx *irq_ctx;



There's no IRQ concept at the virtual vDPA bus level, so it's probably 
better to rename it as "trigger".


Btw, should we select EVENTFD for vDPA?

Thanks



  };
  
  /**


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 09/11] vduse: Signal interrupt's eventfd directly in vhost-vdpa case

2023-03-16 Thread Jason Wang
On Tue, Feb 28, 2023 at 5:42 PM Xie Yongji  wrote:
>
> Now the vdpa callback will associate an eventfd in
> vhost-vdpa case.

I'd suggest avoiding mentioning drivers since vDPA parents should not
know which vDPA driver is bound.

We could say "signal vq trigger eventfd directly if possible"?

With those tweaked.

Acked-by: Jason Wang 

Thanks

> For performance reasons, VDUSE can
> signal it directly during irq injection.
>
> Signed-off-by: Xie Yongji 
> ---
>  drivers/vdpa/vdpa_user/vduse_dev.c | 27 +++
>  1 file changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
> b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 869cc7860d82..56f3c2480c2a 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -461,6 +461,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> spin_lock(>irq_lock);
> vq->cb.callback = NULL;
> vq->cb.private = NULL;
> +   vq->cb.irq_ctx = NULL;
> spin_unlock(>irq_lock);
> flush_work(>inject);
> flush_work(>kick);
> @@ -526,6 +527,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device 
> *vdpa, u16 idx,
> spin_lock(>irq_lock);
> vq->cb.callback = cb->callback;
> vq->cb.private = cb->private;
> +   vq->cb.irq_ctx = cb->irq_ctx;
> spin_unlock(>irq_lock);
>  }
>
> @@ -1020,6 +1022,20 @@ static void vduse_vq_irq_inject(struct work_struct 
> *work)
> spin_unlock_irq(>irq_lock);
>  }
>
> +static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
> +{
> +   bool signal = false;
> +
> +   spin_lock_irq(>irq_lock);
> +   if (vq->ready && vq->cb.irq_ctx) {
> +   eventfd_signal(vq->cb.irq_ctx, 1);
> +   signal = true;
> +   }
> +   spin_unlock_irq(>irq_lock);
> +
> +   return signal;
> +}
> +
>  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> struct work_struct *irq_work,
> int irq_effective_cpu)
> @@ -1322,11 +1338,14 @@ static long vduse_dev_ioctl(struct file *file, 
> unsigned int cmd,
> if (index >= dev->vq_num)
> break;
>
> +   ret = 0;
> index = array_index_nospec(index, dev->vq_num);
> -
> -   vduse_vq_update_effective_cpu(dev->vqs[index]);
> -   ret = vduse_dev_queue_irq_work(dev, >vqs[index]->inject,
> -   dev->vqs[index]->irq_effective_cpu);
> +   if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
> +   vduse_vq_update_effective_cpu(dev->vqs[index]);
> +   ret = vduse_dev_queue_irq_work(dev,
> +   >vqs[index]->inject,
> +   
> dev->vqs[index]->irq_effective_cpu);
> +   }
> break;
> }
> case VDUSE_IOTLB_REG_UMEM: {
> --
> 2.20.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback

2023-03-16 Thread Jason Wang
On Thu, Mar 16, 2023 at 5:25 PM Jason Wang  wrote:
>
>
> 在 2023/2/28 17:41, Xie Yongji 写道:
> > Add eventfd for the vdpa callback so that user
> > can signal it directly instead of running the
> > callback. It will be used for vhost-vdpa case.
> >
> > Signed-off-by: Xie Yongji 
> > ---
> >   drivers/vhost/vdpa.c | 2 ++
> >   drivers/virtio/virtio_vdpa.c | 1 +
> >   include/linux/vdpa.h | 3 +++
> >   3 files changed, 6 insertions(+)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index dc12dbd5b43b..ae89c0ccc2bb 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa 
> > *v, unsigned int cmd,
> >   if (vq->call_ctx.ctx) {
> >   cb.callback = vhost_vdpa_virtqueue_cb;
> >   cb.private = vq;
> > + cb.irq_ctx = vq->call_ctx.ctx;
> >   } else {
> >   cb.callback = NULL;
> >   cb.private = NULL;
> > + cb.irq_ctx = NULL;
> >   }
> >   ops->set_vq_cb(vdpa, idx, );
> >   vhost_vdpa_setup_vq_irq(v, idx);
> > diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> > index 9eee8afabda8..a5cecafbc2d1 100644
> > --- a/drivers/virtio/virtio_vdpa.c
> > +++ b/drivers/virtio/virtio_vdpa.c
> > @@ -195,6 +195,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, 
> > unsigned int index,
> >   /* Setup virtqueue callback */
> >   cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
> >   cb.private = info;
> > + cb.irq_ctx = NULL;
> >   ops->set_vq_cb(vdpa, index, );
> >   ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
> >
> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> > index 10bd22387276..94a7ec49583a 100644
> > --- a/include/linux/vdpa.h
> > +++ b/include/linux/vdpa.h
> > @@ -13,10 +13,13 @@
> >* struct vdpa_calllback - vDPA callback definition.
> >* @callback: interrupt callback function
> >* @private: the data passed to the callback function
> > + * @irq_ctx: the eventfd for the callback, user can signal
> > + *   it directly instead of running the callback
>
>
> I'd suggest to do more tweaks to mention:
>
> 1) irq_ctx is optional
> 2) that when the irq_ctx is set, the vDPA driver must guarantee that
> signaling it is functional equivalent to triggering the callback. When
> set, vDPA parent can signal it directly instead of triggering the callback.
>
> >*/
> >   struct vdpa_callback {
> >   irqreturn_t (*callback)(void *data);
> >   void *private;
> > + struct eventfd_ctx *irq_ctx;
>
>
> There's no IRQ concept at the virtual vDPA bus level, so it's probably
> better to rename it as "trigger".
>
> Btw, should we select EVENTFD for vDPA?

Looks like we are fine here since we only use the pointer to the eventfd_ctx.

Thanks

>
> Thanks
>
>
> >   };
> >
> >   /**

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 1/8] vdpa: add bind_mm/unbind_mm callbacks

2023-03-16 Thread Stefano Garzarella

On Tue, Mar 14, 2023 at 11:39:42AM +0800, Jason Wang wrote:

On Thu, Mar 2, 2023 at 7:34 PM Stefano Garzarella  wrote:


These new optional callbacks is used to bind/unbind the device to
a specific address space so the vDPA framework can use VA when
these callbacks are implemented.

Suggested-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
---


One thing that came into my mind is that after this commit:

commit 5ce995f313ce56c0c62425c3ddc37c5c50fc33db
Author: Jason Wang 
Date:   Fri May 29 16:02:59 2020 +0800

   vhost: use mmgrab() instead of mmget() for non worker device

   For the device that doesn't use vhost worker and use_mm(), mmget() is
   too heavy weight and it may brings troubles for implementing mmap()
   support for vDPA device.

We don't hold the address space after this commit, so the userspace
mapping could be invalid if the owner exits?


Thanks for mentioning it, I'll take a look at it!

In case maybe I should do a mmget (or get_task_mm) in vhost-vdpa before
calling the callback, or in the parent driver inside the callback, but
it seems duplicating code.

Thanks,
Stefano



Thanks



Notes:
v2:
- removed `struct task_struct *owner` param (unused for now, maybe
  useful to support cgroups) [Jason]
- add unbind_mm callback [Jason]

 include/linux/vdpa.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 43f59ef10cc9..369c21394284 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -290,6 +290,14 @@ struct vdpa_map_file {
  * @vdev: vdpa device
  * @idx: virtqueue index
  * Returns pointer to structure device or error 
(NULL)
+ * @bind_mm:   Bind the device to a specific address space
+ * so the vDPA framework can use VA when this
+ * callback is implemented. (optional)
+ * @vdev: vdpa device
+ * @mm: address space to bind
+ * @unbind_mm: Unbind the device from the address space
+ * bound using the bind_mm callback. (optional)
+ * @vdev: vdpa device
  * @free:  Free resources that belongs to vDPA (optional)
  * @vdev: vdpa device
  */
@@ -351,6 +359,8 @@ struct vdpa_config_ops {
int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
  unsigned int asid);
struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
+   int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm);
+   void (*unbind_mm)(struct vdpa_device *vdev);

/* Free device resources */
void (*free)(struct vdpa_device *vdev);
--
2.39.2





___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 05/11] vduse: Support automatic irq callback affinity

2023-03-16 Thread Jason Wang


在 2023/2/28 17:41, Xie Yongji 写道:

This brings current interrupt affinity spreading mechanism
to vduse device. We will make use of group_cpus_evenly()
to create an irq callback affinity mask for each virtqueue of
vduse device. Then we will spread IRQs between CPUs in the affinity
mask, in a round-robin manner, to run the irq callback.

Signed-off-by: Xie Yongji 
---
  drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++--
  1 file changed, 123 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
b/drivers/vdpa/vdpa_user/vduse_dev.c
index 98359d87a06f..bde28a8692d5 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -23,6 +23,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  #include 
  #include 
  #include 
@@ -41,6 +43,8 @@
  #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
  #define VDUSE_MSG_DEFAULT_TIMEOUT 30
  
+#define IRQ_UNBOUND -1

+
  struct vduse_virtqueue {
u16 index;
u16 num_max;
@@ -57,6 +61,8 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+   int irq_effective_cpu;
+   struct cpumask irq_affinity;
  };
  
  struct vduse_dev;

@@ -128,6 +134,7 @@ static struct class *vduse_class;
  static struct cdev vduse_ctrl_cdev;
  static struct cdev vduse_cdev;
  static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
  
  static u32 allowed_device_id[] = {

VIRTIO_ID_BLOCK,
@@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device 
*vdpa)
return dev->generation;
  }
  
+static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)

+{
+   affd->nr_sets = 1;
+   affd->set_size[0] = affvecs;
+}
+
+struct cpumask *
+create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+{
+   unsigned int affvecs = 0, curvec, usedvecs, i;
+   struct cpumask *masks = NULL;
+
+   if (nvecs > affd->pre_vectors + affd->post_vectors)
+   affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+
+   if (!affd->calc_sets)
+   affd->calc_sets = default_calc_sets;
+
+   affd->calc_sets(affd, affvecs);
+
+   if (!affvecs)
+   return NULL;
+
+   masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
+   if (!masks)
+   return NULL;
+
+   /* Fill out vectors at the beginning that don't need affinity */
+   for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+   cpumask_setall([curvec]);
+
+   for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+   unsigned int this_vecs = affd->set_size[i];
+   int j;
+   struct cpumask *result = group_cpus_evenly(this_vecs);
+
+   if (!result) {
+   kfree(masks);
+   return NULL;
+   }
+
+   for (j = 0; j < this_vecs; j++)
+   cpumask_copy([curvec + j], [j]);
+   kfree(result);
+
+   curvec += this_vecs;
+   usedvecs += this_vecs;
+   }
+
+   /* Fill out vectors at the end that don't need affinity */
+   if (usedvecs >= affvecs)
+   curvec = affd->pre_vectors + affvecs;
+   else
+   curvec = affd->pre_vectors + usedvecs;
+   for (; curvec < nvecs; curvec++)
+   cpumask_setall([curvec]);
+
+   return masks;
+}
+
+static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
+   struct irq_affinity *desc)
+{
+   struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+   struct cpumask *masks;
+   int i;
+
+   masks = create_affinity_masks(dev->vq_num, desc);
+   if (!masks)
+   return;
+
+   for (i = 0; i < dev->vq_num; i++)
+   cpumask_copy(>vqs[i]->irq_affinity, [i]);
+   kfree(masks);
+}
+
  static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
unsigned int asid,
struct vhost_iotlb *iotlb)
@@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = 
{
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+   .set_irq_affinity   = vduse_vdpa_set_irq_affinity,
.reset  = vduse_vdpa_reset,
.set_map= vduse_vdpa_set_map,
.free   = vduse_vdpa_free,
@@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
  }
  
  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,

-   struct work_struct *irq_work)
+   struct work_struct *irq_work,
+   int irq_effective_cpu)
  {
int ret = 

Re: [PATCH v2 3/8] vringh: replace kmap_atomic() with kmap_local_page()

2023-03-16 Thread Stefano Garzarella

On Thu, Mar 16, 2023 at 10:13:39AM +0100, Fabio M. De Francesco wrote:

On giovedì 2 marzo 2023 12:34:16 CET Stefano Garzarella wrote:

kmap_atomic() is deprecated in favor of kmap_local_page().

With kmap_local_page() the mappings are per thread, CPU local, can take
page-faults, and can be called from any context (including interrupts).
Furthermore, the tasks can be preempted and, when they are scheduled to
run again, the kernel virtual addresses are restored and still valid.

kmap_atomic() is implemented like a kmap_local_page() which also disables
page-faults and preemption (the latter only for !PREEMPT_RT kernels,
otherwise it only disables migration).

The code within the mappings/un-mappings in getu16_iotlb() and
putu16_iotlb() don't depend on the above-mentioned side effects of
kmap_atomic(), so that mere replacements of the old API with the new one
is all that is required (i.e., there is no need to explicitly add calls
to pagefault_disable() and/or preempt_disable()).


It seems that my commit message is quite clear and complete and therefore has
already been reused by others who have somehow given me credit.

I would really appreciate it being mentioned here that you are reusing a
"boiler plate" commit message of my own making and Cc me :-)


Yes of course, sorry for not doing this previously!

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 3/8] vringh: replace kmap_atomic() with kmap_local_page()

2023-03-16 Thread Stefano Garzarella
On Wed, Mar 15, 2023 at 10:12 PM Fabio M. De Francesco
 wrote:
>
> On martedì 14 marzo 2023 04:56:08 CET Jason Wang wrote:
> > On Thu, Mar 2, 2023 at 7:34 PM Stefano Garzarella 
> wrote:
> > > kmap_atomic() is deprecated in favor of kmap_local_page().
> >
> > It's better to mention the commit or code that introduces this.
> >
> > > With kmap_local_page() the mappings are per thread, CPU local, can take
> > > page-faults, and can be called from any context (including interrupts).
> > > Furthermore, the tasks can be preempted and, when they are scheduled to
> > > run again, the kernel virtual addresses are restored and still valid.
> > >
> > > kmap_atomic() is implemented like a kmap_local_page() which also disables
> > > page-faults and preemption (the latter only for !PREEMPT_RT kernels,
> > > otherwise it only disables migration).
> > >
> > > The code within the mappings/un-mappings in getu16_iotlb() and
> > > putu16_iotlb() don't depend on the above-mentioned side effects of
> > > kmap_atomic(),
> >
> > Note we used to use spinlock to protect simulators (at least until
> > patch 7, so we probably need to re-order the patches at least) so I
> > think this is only valid when:
> >
> > The vringh IOTLB helpers are not used in atomic context (e.g spinlock,
> > interrupts).
>
> I'm probably missing some context but it looks that you are saying that
> kmap_local_page() is not suited for any use in atomic context (you are
> mentioning spinlocks).
>
> The commit message (that I know pretty well since it's the exact copy, word by
> word, of my boiler plate commits)

I hope it's not a problem for you, should I mention it somehow?

I searched for the last commits that made a similar change and found
yours that explained it perfectly ;-)

Do I need to rephrase?

> explains that kmap_local_page() is perfectly
> usable in atomic context (including interrupts).
>
> I don't know this code, however I am not able to see why these vringh IOTLB
> helpers cannot work if used under spinlocks. Can you please elaborate a little
> more?
>
> > If yes, should we document this? (Or should we introduce a boolean to
> > say whether an IOTLB variant can be used in an atomic context)?
>
> Again, you'll have no problems from the use of kmap_local_page() and so you
> don't need any boolean to tell whether or not the code is running in atomic
> context.
>
> Please take a look at the Highmem documentation which has been recently
> reworked and extended by me: https://docs.kernel.org/mm/highmem.html
>
> Anyway, I have been ATK 12 or 13 hours in a row. So I'm probably missing the
> whole picture.

Thanks for your useful info!
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 8/8] vdpa_sim: add support for user VA

2023-03-16 Thread Stefano Garzarella

On Tue, Mar 14, 2023 at 01:36:13PM +0800, Jason Wang wrote:

On Thu, Mar 2, 2023 at 7:35 PM Stefano Garzarella  wrote:


The new "use_va" module parameter (default: false) is used in
vdpa_alloc_device() to inform the vDPA framework that the device
supports VA.

vringh is initialized to use VA only when "use_va" is true and the
user's mm has been bound. So, only when the bus supports user VA
(e.g. vhost-vdpa).

vdpasim_mm_work_fn work is used to attach the kthread to the user
address space when the .bind_mm callback is invoked, and to detach
it when the .unbind_mm callback is invoked.

Signed-off-by: Stefano Garzarella 
---

Notes:
v2:
- `use_va` set to true by default [Eugenio]
- supported the new unbind_mm callback [Jason]
- removed the unbind_mm call in vdpasim_do_reset() [Jason]
- avoided to release the lock while call kthread_flush_work() since we
  are now using a mutex to protect the device state

 drivers/vdpa/vdpa_sim/vdpa_sim.h |  1 +
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 98 +++-
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 4774292fba8c..3a42887d05d9 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -59,6 +59,7 @@ struct vdpasim {
struct vdpasim_virtqueue *vqs;
struct kthread_worker *worker;
struct kthread_work work;
+   struct mm_struct *mm_bound;
struct vdpasim_dev_attr dev_attr;
/* mutex to synchronize virtqueue state */
struct mutex mutex;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index a28103a67ae7..eda26bc33df5 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -35,10 +35,77 @@ module_param(max_iotlb_entries, int, 0444);
 MODULE_PARM_DESC(max_iotlb_entries,
 "Maximum number of iotlb entries for each address space. 0 means 
unlimited. (default: 2048)");

+static bool use_va = true;
+module_param(use_va, bool, 0444);
+MODULE_PARM_DESC(use_va, "Enable/disable the device's ability to use VA");
+
 #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
 #define VDPASIM_QUEUE_MAX 256
 #define VDPASIM_VENDOR_ID 0

+struct vdpasim_mm_work {
+   struct kthread_work work;
+   struct mm_struct *mm;
+   bool bind;
+   int ret;
+};
+
+static void vdpasim_mm_work_fn(struct kthread_work *work)
+{
+   struct vdpasim_mm_work *mm_work =
+   container_of(work, struct vdpasim_mm_work, work);
+
+   mm_work->ret = 0;
+
+   if (mm_work->bind) {
+   kthread_use_mm(mm_work->mm);
+   //TODO: should we attach the cgroup of the mm owner?
+   } else {
+   kthread_unuse_mm(mm_work->mm);
+   }
+}
+
+static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
+   struct vdpasim_mm_work *mm_work)
+{


Nit: we need to tweak the name as it does flush besides queuing the work.


Yep, or split in 2 functions.




+   struct kthread_work *work = _work->work;
+
+   kthread_init_work(work, vdpasim_mm_work_fn);
+   kthread_queue_work(vdpasim->worker, work);
+
+   kthread_flush_work(work);
+}
+
+static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
+ struct mm_struct *new_mm)
+{
+   struct vdpasim_mm_work mm_work;
+
+   mm_work.mm = new_mm;
+   mm_work.bind = true;
+
+   vdpasim_worker_queue_mm(vdpasim, _work);
+
+   if (!mm_work.ret)
+   vdpasim->mm_bound = new_mm;
+
+   return mm_work.ret;
+}
+
+static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
+{
+   struct vdpasim_mm_work mm_work;
+
+   if (!vdpasim->mm_bound)
+   return;
+
+   mm_work.mm = vdpasim->mm_bound;
+   mm_work.bind = false;


Can we simply use mm_work.mm = NULL for unbinding?


+
+   vdpasim_worker_queue_mm(vdpasim, _work);
+
+   vdpasim->mm_bound = NULL;


And change the mm_bound in the worker?


Yep, I need to put `vdpasim` in struct vdpasim_mm_work.

I'll do in the next version.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 01/11] lib/group_cpus: Export group_cpus_evenly()

2023-03-16 Thread Jason Wang
On Tue, Feb 28, 2023 at 5:42 PM Xie Yongji  wrote:
>
> Export group_cpus_evenly() so that some modules
> can make use of it to group CPUs evenly according
> to NUMA and CPU locality.
>
> Signed-off-by: Xie Yongji 

Acked-by: Jason Wang 

Thanks

> ---
>  lib/group_cpus.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/lib/group_cpus.c b/lib/group_cpus.c
> index 9c837a35fef7..aa3f6815bb12 100644
> --- a/lib/group_cpus.c
> +++ b/lib/group_cpus.c
> @@ -426,3 +426,4 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
> return masks;
>  }
>  #endif /* CONFIG_SMP */
> +EXPORT_SYMBOL_GPL(group_cpus_evenly);
> --
> 2.20.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/6] drm/fbdev-generic: Remove unused prefer_shadow_fbdev flag

2023-03-16 Thread Thomas Zimmermann

Hi

Am 16.03.23 um 03:16 schrieb Zack Rusin:

On Wed, 2023-03-15 at 17:14 +0100, Thomas Zimmermann wrote:

Remove the flag prefer_shadow_fbdev from struct drm_mode_config.
Drivers set this flag to enable shadow buffering in the generic
fbdev emulation. Such shadow buffering is now mandatory, so the
flag is unused.

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/tiny/bochs.c    | 1 -
  drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 1 -
  include/drm/drm_mode_config.h   | 7 ---
  3 files changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c
index 024346054c70..d254679a136e 100644
--- a/drivers/gpu/drm/tiny/bochs.c
+++ b/drivers/gpu/drm/tiny/bochs.c
@@ -545,7 +545,6 @@ static int bochs_kms_init(struct bochs_device *bochs)
  
 bochs->dev->mode_config.preferred_depth = 24;

 bochs->dev->mode_config.prefer_shadow = 0;
-   bochs->dev->mode_config.prefer_shadow_fbdev = 1;
 bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true;
  
 bochs->dev->mode_config.funcs = _mode_funcs;

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 84d6380b9895..5162a7a12792 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2112,7 +2112,6 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 dev->mode_config.max_width = dev_priv->texture_max_width;
 dev->mode_config.max_height = dev_priv->texture_max_height;
 dev->mode_config.preferred_depth = dev_priv->assume_16bpp ? 16 : 32;
-   dev->mode_config.prefer_shadow_fbdev = !dev_priv->has_mob;
  
 drm_mode_create_suggested_offset_properties(dev);

 vmw_kms_create_hotplug_mode_update_property(dev_priv);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index e5b053001d22..973119a9176b 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -890,13 +890,6 @@ struct drm_mode_config {
 /* dumb ioctl parameters */
 uint32_t preferred_depth, prefer_shadow;
  
-   /**

-    * @prefer_shadow_fbdev:
-    *
-    * Hint to framebuffer emulation to prefer shadow-fb rendering.
-    */
-   bool prefer_shadow_fbdev;
-
 /**
  * @quirk_addfb_prefer_xbgr_30bpp:
  *


For this one:
Reviewed-by: Zack Rusin 

The entire series looks great. I think it's a great cleanup:
Acked-by: Zack Rusin 
For the rest.


Thanks a lot!



z



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 2/8] vhost-vdpa: use bind_mm/unbind_mm device callbacks

2023-03-16 Thread Stefano Garzarella
On Thu, Mar 16, 2023 at 9:31 AM Stefano Garzarella  wrote:
>
> On Tue, Mar 14, 2023 at 11:48:33AM +0800, Jason Wang wrote:
> >On Thu, Mar 2, 2023 at 7:34 PM Stefano Garzarella  
> >wrote:
> >>
> >> When the user call VHOST_SET_OWNER ioctl and the vDPA device
> >> has `use_va` set to true, let's call the bind_mm callback.
> >> In this way we can bind the device to the user address space
> >> and directly use the user VA.
> >>
> >> The unbind_mm callback is called during the release after
> >> stopping the device.
> >>
> >> Signed-off-by: Stefano Garzarella 
> >> ---
> >>
> >> Notes:
> >> v2:
> >> - call the new unbind_mm callback during the release [Jason]
> >> - avoid to call bind_mm callback after the reset, since the device
> >>   is not detaching it now during the reset
> >>
> >>  drivers/vhost/vdpa.c | 30 ++
> >>  1 file changed, 30 insertions(+)
> >>
> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >> index dc12dbd5b43b..1ab89fccd825 100644
> >> --- a/drivers/vhost/vdpa.c
> >> +++ b/drivers/vhost/vdpa.c
> >> @@ -219,6 +219,28 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
> >> return vdpa_reset(vdpa);
> >>  }
> >>
> >> +static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
> >> +{
> >> +   struct vdpa_device *vdpa = v->vdpa;
> >> +   const struct vdpa_config_ops *ops = vdpa->config;
> >> +
> >> +   if (!vdpa->use_va || !ops->bind_mm)
> >> +   return 0;
> >> +
> >> +   return ops->bind_mm(vdpa, v->vdev.mm);
> >> +}
> >> +
> >> +static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
> >> +{
> >> +   struct vdpa_device *vdpa = v->vdpa;
> >> +   const struct vdpa_config_ops *ops = vdpa->config;
> >> +
> >> +   if (!vdpa->use_va || !ops->unbind_mm)
> >> +   return;
> >> +
> >> +   ops->unbind_mm(vdpa);
> >> +}
> >> +
> >>  static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user 
> >> *argp)
> >>  {
> >> struct vdpa_device *vdpa = v->vdpa;
> >> @@ -711,6 +733,13 @@ static long vhost_vdpa_unlocked_ioctl(struct file 
> >> *filep,
> >> break;
> >> default:
> >> r = vhost_dev_ioctl(>vdev, cmd, argp);
> >> +   if (!r && cmd == VHOST_SET_OWNER) {
> >> +   r = vhost_vdpa_bind_mm(v);
> >> +   if (r) {
> >> +   vhost_dev_reset_owner(>vdev, NULL);
> >> +   break;
> >> +   }
> >> +   }
> >
> >Nit: is it better to have a new condition/switch branch instead of
> >putting them under default? (as what vring_ioctl did).
>
> Yep, I agree!
>
> I'll change it.

Or maybe I can simply add `case VHOST_SET_OWNER` on this switch and call
vhost_dev_set_owner() and vhost_vdpa_bind_mm(), I mean something like
this:

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 331d4a718bf6..20250c3418b2 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -731,15 +731,16 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_RESUME:
r = vhost_vdpa_resume(v);
break;
+   case VHOST_SET_OWNER:
+   r = vhost_dev_set_owner(d);
+   if (r)
+   break;
+   r = vhost_vdpa_bind_mm(v);
+   if (r)
+   vhost_dev_reset_owner(d, NULL);
+   break;
default:
r = vhost_dev_ioctl(>vdev, cmd, argp);
-   if (!r && cmd == VHOST_SET_OWNER) {
-   r = vhost_vdpa_bind_mm(v);
-   if (r) {
-   vhost_dev_reset_owner(>vdev, NULL);
-   break;
-   }
-   }
if (r == -ENOIOCTLCMD)
r = vhost_vdpa_vring_ioctl(v, cmd, argp);
break;

WDYT?

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH 28/28] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)

2023-03-16 Thread David Howells
[!] Note: This is a work in progress.  At the moment, some things won't
build if this patch is applied.  nvme, kcm, smc, tls.

Remove ->sendpage() and ->sendpage_locked().  sendmsg() with
MSG_SPLICE_PAGES should be used instead.  This allows multiple pages and
multipage folios to be passed through.

Signed-off-by: David Howells 
cc: "David S. Miller" 
cc: Eric Dumazet 
cc: Jakub Kicinski 
cc: Paolo Abeni 
cc: Jens Axboe 
cc: Matthew Wilcox 
cc: b...@vger.kernel.org
cc: d...@vger.kernel.org
cc: linux-...@lists.infradead.org
cc: linux-arm-...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: linux-cry...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: linux-h...@vger.kernel.org
cc: linux-ker...@vger.kernel.org
cc: linux-r...@vger.kernel.org
cc: linux-s...@vger.kernel.org
cc: linux-w...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: mp...@lists.linux.dev
cc: net...@vger.kernel.org
cc: rds-de...@oss.oracle.com
cc: tipc-discuss...@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
---
 Documentation/networking/scaling.rst |   4 +-
 crypto/af_alg.c  |  29 --
 crypto/algif_aead.c  |  22 +
 crypto/algif_rng.c   |   2 -
 crypto/algif_skcipher.c  |  14 ---
 include/linux/net.h  |   8 --
 include/net/inet_common.h|   2 -
 include/net/sock.h   |   6 --
 net/appletalk/ddp.c  |   1 -
 net/atm/pvc.c|   1 -
 net/atm/svc.c|   1 -
 net/ax25/af_ax25.c   |   1 -
 net/caif/caif_socket.c   |   2 -
 net/can/bcm.c|   1 -
 net/can/isotp.c  |   1 -
 net/can/j1939/socket.c   |   1 -
 net/can/raw.c|   1 -
 net/core/sock.c  |  35 +--
 net/dccp/ipv4.c  |   1 -
 net/dccp/ipv6.c  |   1 -
 net/ieee802154/socket.c  |   2 -
 net/ipv4/af_inet.c   |  21 
 net/ipv4/tcp.c   |  36 ---
 net/ipv4/tcp_bpf.c   |  21 +---
 net/ipv4/tcp_ipv4.c  |   1 -
 net/ipv4/udp.c   |  22 -
 net/ipv4/udp_impl.h  |   2 -
 net/ipv4/udplite.c   |   1 -
 net/ipv6/af_inet6.c  |   3 -
 net/ipv6/raw.c   |   1 -
 net/ipv6/tcp_ipv6.c  |   1 -
 net/key/af_key.c |   1 -
 net/l2tp/l2tp_ip.c   |   1 -
 net/l2tp/l2tp_ip6.c  |   1 -
 net/llc/af_llc.c |   1 -
 net/mctp/af_mctp.c   |   1 -
 net/mptcp/protocol.c |   2 -
 net/netlink/af_netlink.c |   1 -
 net/netrom/af_netrom.c   |   1 -
 net/packet/af_packet.c   |   2 -
 net/phonet/socket.c  |   2 -
 net/qrtr/af_qrtr.c   |   1 -
 net/rds/af_rds.c |   1 -
 net/rose/af_rose.c   |   1 -
 net/rxrpc/af_rxrpc.c |   1 -
 net/sctp/protocol.c  |   1 -
 net/socket.c |  48 -
 net/tipc/socket.c|   3 -
 net/unix/af_unix.c   | 139 ---
 net/vmw_vsock/af_vsock.c |   3 -
 net/x25/af_x25.c |   1 -
 net/xdp/xsk.c|   1 -
 52 files changed, 9 insertions(+), 449 deletions(-)

diff --git a/Documentation/networking/scaling.rst 
b/Documentation/networking/scaling.rst
index 3d435caa3ef2..92c9fb46d6a2 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -269,8 +269,8 @@ a single application thread handles flows with many 
different flow hashes.
 rps_sock_flow_table is a global flow table that contains the *desired* CPU
 for flows: the CPU that is currently processing the flow in userspace.
 Each table value is a CPU index that is updated during calls to recvmsg
-and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
-and tcp_splice_read()).
+and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and
+tcp_splice_read()).
 
 When the scheduler moves a thread to a new CPU while it has outstanding
 receive packets on the old CPU, packets may arrive out of order. To
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 0e77fce60876..225c90657f58 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -483,7 +483,6 @@ static const struct proto_ops alg_proto_ops = {
.listen =   sock_no_listen,
.shutdown   =   sock_no_shutdown,
.mmap   =   sock_no_mmap,
-   .sendpage   =   sock_no_sendpage,
.sendmsg=   sock_no_sendmsg,
.recvmsg=   sock_no_recvmsg,
 
@@ -1135,34 +1134,6 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr 
*msg, size_t size,
 }
 

Re: [PATCH v2 4/8] vringh: support VA with iotlb

2023-03-16 Thread Stefano Garzarella
On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin  wrote:
>
> On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella  
> wrote:
> >
> > vDPA supports the possibility to use user VA in the iotlb messages.
> > So, let's add support for user VA in vringh to use it in the vDPA
> > simulators.
> >
> > Signed-off-by: Stefano Garzarella 
> > ---
> >
> > Notes:
> > v2:
> > - replace kmap_atomic() with kmap_local_page() [see previous patch]
> > - fix cast warnings when build with W=1 C=1
> >
> >  include/linux/vringh.h|   5 +-
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
> >  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   4 +-
> >  drivers/vhost/vringh.c| 247 --
> >  4 files changed, 205 insertions(+), 53 deletions(-)
> >

[...]

>
> It seems to me iotlb_translate_va and iotlb_translate_pa are very
> similar, their only difference is that the argument is that iov is
> iovec instead of bio_vec. And how to fill it, obviously.
>
> It would be great to merge both functions, only differing with a
> conditional on vrh->use_va, or generics, or similar. Or, if following
> the style of the rest of vringh code, to provide a callback to fill
> iovec (although I like conditional more).
>
> However I cannot think of an easy way to perform that without long
> macros or type erasure.

Thank you for pushing me :-)
I finally managed to avoid code duplication (partial patch attached,
but not yet fully tested).

@Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
also avoided getu16_iotlb_va/pa.

I will send the full patch in v3, but I would like to get your opinion
first ;-)



diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 0ba3ef809e48..71dd67700e36 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
 
 static int iotlb_translate(const struct vringh *vrh,
   u64 addr, u64 len, u64 *translated,
-  struct bio_vec iov[],
-  int iov_size, u32 perm)
+  void *iov, int iov_size, bool iovec, u32 perm)
 {
struct vhost_iotlb_map *map;
struct vhost_iotlb *iotlb = vrh->iotlb;
@@ -1107,7 +1106,7 @@ static int iotlb_translate(const struct vringh *vrh,
spin_lock(vrh->iotlb_lock);
 
while (len > s) {
-   u64 size, pa, pfn;
+   u64 size;
 
if (unlikely(ret >= iov_size)) {
ret = -ENOBUFS;
@@ -1124,10 +1123,22 @@ static int iotlb_translate(const struct vringh *vrh,
}
 
size = map->size - addr + map->start;
-   pa = map->addr + addr - map->start;
-   pfn = pa >> PAGE_SHIFT;
-   bvec_set_page([ret], pfn_to_page(pfn), min(len - s, size),
- pa & (PAGE_SIZE - 1));
+   if (iovec) {
+   struct iovec *iovec = iov;
+
+   iovec[ret].iov_len = min(len - s, size);
+   iovec[ret].iov_base = (void __user *)(unsigned long)
+ (map->addr + addr - map->start);
+   } else {
+   u64 pa = map->addr + addr - map->start;
+   u64 pfn = pa >> PAGE_SHIFT;
+   struct bio_vec *bvec = iov;
+
+   bvec_set_page([ret], pfn_to_page(pfn),
+ min(len - s, size),
+ pa & (PAGE_SIZE - 1));
+   }
+
s += size;
addr += size;
++ret;
@@ -1141,26 +1152,38 @@ static int iotlb_translate(const struct vringh *vrh,
return ret;
 }
 
+#define IOTLB_IOV_SIZE 16
+
 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
  void *src, size_t len)
 {
u64 total_translated = 0;
 
while (total_translated < len) {
-   struct bio_vec iov[16];
+   union {
+   struct iovec iovec[IOTLB_IOV_SIZE];
+   struct bio_vec bvec[IOTLB_IOV_SIZE];
+   } iov;
struct iov_iter iter;
u64 translated;
int ret;
 
ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
  len - total_translated, ,
- iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
+ , IOTLB_IOV_SIZE, vrh->use_va,
+ VHOST_MAP_RO);
if (ret == -ENOBUFS)
-   ret = ARRAY_SIZE(iov);
+   ret = IOTLB_IOV_SIZE;
else if (ret < 0)
return ret;
 
-   iov_iter_bvec(, ITER_SOURCE, iov, ret, translated);
+   if (vrh->use_va) {
+   

Re: [PATCH] x86/paravirt: convert simple paravirt functions to asm

2023-03-16 Thread Borislav Petkov
On Wed, Mar 08, 2023 at 04:42:10PM +0100, Juergen Gross wrote:
> All functions referenced via __PV_IS_CALLEE_SAVE() need to be assembler
> functions, as those functions calls are hidden from gcc. In case the
> kernel is compiled with "-fzero-call-used-regs" the compiler will
> clobber caller-saved registers at the end of C functions, which will
> result in unexpectedly zeroed registers at the call site of the
> related paravirt functions.
> 
> Replace the C functions with DEFINE_PARAVIRT_ASM() constructs using
> the same instructions as the related paravirt calls in the
> PVOP_ALT_[V]CALLEE*() macros.
> 
> Signed-off-by: Juergen Gross 
> ---
>  arch/x86/include/asm/paravirt_types.h |  8 +++-
>  arch/x86/kernel/paravirt.c| 27 ++-
>  2 files changed, 13 insertions(+), 22 deletions(-)

objtool's not happy with this for whatever reason. I'll look later as to
why. .config is allmodconfig with this patch ontop of tip:x86/paravirt:

vmlinux.o: warning: objtool: pv_ops[31]: pv_native_irq_disable
vmlinux.o: warning: objtool: default_idle+0x1e: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[31]: pv_native_irq_disable
vmlinux.o: warning: objtool: mwait_idle+0x5d: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[31]: pv_native_irq_disable
vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x94: call to {dynamic}() 
leaves .noinstr.text section
vmlinux.o: warning: objtool: pv_ops[31]: pv_native_irq_disable
vmlinux.o: warning: objtool: intel_idle_irq+0xab: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[31]: pv_native_irq_disable
vmlinux.o: warning: objtool: acpi_safe_halt+0x2a: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[31]: pv_native_irq_disable
vmlinux.o: warning: objtool: poll_idle+0x86: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[42]: pv_native_read_cr2
vmlinux.o: warning: objtool: exc_double_fault+0x3b: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[42]: pv_native_read_cr2
vmlinux.o: warning: objtool: exc_nmi+0x188: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: __sev_put_ghcb+0x11: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: __sev_get_ghcb+0x13: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[42]: pv_native_read_cr2
vmlinux.o: warning: objtool: exc_page_fault+0x1e: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: lockdep_hardirqs_on+0xd0: call to {dynamic}() 
leaves .noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: lockdep_hardirqs_off+0xe7: call to {dynamic}() 
leaves .noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: look_up_lock_class+0x52: call to {dynamic}() 
leaves .noinstr.text section
vmlinux.o: warning: objtool: pv_ops[32]: pv_native_irq_enable
vmlinux.o: warning: objtool: lock_is_held_type+0x143: call to {dynamic}() 
leaves .noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: ct_kernel_enter.constprop.0+0x37: call to 
{dynamic}() leaves .noinstr.text section
vmlinux.o: warning: objtool: pv_ops[32]: pv_native_irq_enable
vmlinux.o: warning: objtool: ct_idle_exit+0x51: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: ct_idle_enter+0xe: call to {dynamic}() leaves 
.noinstr.text section
vmlinux.o: warning: objtool: pv_ops[30]: pv_native_save_fl
vmlinux.o: warning: objtool: check_preemption_disabled+0x4c: call to 
{dynamic}() leaves .noinstr.text section

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization