Re: [RFC v4 11/11] vduse: Support binding irq to the specified cpu

2021-03-03 Thread Jason Wang


On 2021/2/23 7:50 下午, Xie Yongji wrote:

Add a parameter for the ioctl VDUSE_INJECT_VQ_IRQ to support
injecting virtqueue's interrupt to the specified cpu.



How userspace know which CPU is this irq for? It looks to me we need to 
do it at different level.


E.g introduce some API in sys to allow admin to tune for that.

But I think we can do that in antoher patch on top of this series.

Thanks




Signed-off-by: Xie Yongji 
---
  drivers/vdpa/vdpa_user/vduse_dev.c | 22 +-
  include/uapi/linux/vduse.h |  7 ++-
  2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
b/drivers/vdpa/vdpa_user/vduse_dev.c
index f5adeb9ee027..df3d467fff40 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -923,14 +923,27 @@ static long vduse_dev_ioctl(struct file *file, unsigned 
int cmd,
break;
}
case VDUSE_INJECT_VQ_IRQ: {
+   struct vduse_vq_irq irq;
struct vduse_virtqueue *vq;
  
+		ret = -EFAULT;

+   if (copy_from_user(, argp, sizeof(irq)))
+   break;
+
ret = -EINVAL;
-   if (arg >= dev->vq_num)
+   if (irq.index >= dev->vq_num)
+   break;
+
+   if (irq.cpu != -1 && (irq.cpu >= nr_cpu_ids ||
+   !cpu_online(irq.cpu)))
break;
  
-		vq = >vqs[arg];

-   queue_work(vduse_irq_wq, >inject);
+   ret = 0;
+   vq = >vqs[irq.index];
+   if (irq.cpu == -1)
+   queue_work(vduse_irq_wq, >inject);
+   else
+   queue_work_on(irq.cpu, vduse_irq_wq, >inject);
break;
}
case VDUSE_INJECT_CONFIG_IRQ:
@@ -1342,8 +1355,7 @@ static int vduse_init(void)
if (ret)
goto err_chardev;
  
-	vduse_irq_wq = alloc_workqueue("vduse-irq",

-   WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
+   vduse_irq_wq = alloc_workqueue("vduse-irq", WQ_HIGHPRI, 0);
if (!vduse_irq_wq)
goto err_wq;
  
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h

index 9070cd512cb4..9c70fd842ce5 100644
--- a/include/uapi/linux/vduse.h
+++ b/include/uapi/linux/vduse.h
@@ -116,6 +116,11 @@ struct vduse_vq_eventfd {
int fd; /* eventfd, -1 means de-assigning the eventfd */
  };
  
+struct vduse_vq_irq {

+   __u32 index; /* virtqueue index */
+   int cpu; /* bind irq to the specified cpu, -1 means running on the 
current cpu */
+};
+
  #define VDUSE_BASE0x81
  
  /* Create a vduse device which is represented by a char device (/dev/vduse/) */

@@ -131,7 +136,7 @@ struct vduse_vq_eventfd {
  #define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x04, struct vduse_vq_eventfd)
  
  /* Inject an interrupt for specific virtqueue */

-#define VDUSE_INJECT_VQ_IRQ_IO(VDUSE_BASE, 0x05)
+#define VDUSE_INJECT_VQ_IRQ_IOW(VDUSE_BASE, 0x05, struct vduse_vq_irq)
  
  /* Inject a config interrupt */

  #define VDUSE_INJECT_CONFIG_IRQ   _IO(VDUSE_BASE, 0x06)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC v4 10/11] vduse: Introduce a workqueue for irq injection

2021-03-03 Thread Jason Wang


On 2021/2/23 7:50 下午, Xie Yongji wrote:

This patch introduces a workqueue to support injecting
virtqueue's interrupt asynchronously. This is mainly
for performance considerations which makes sure the push()
and pop() for used vring can be asynchronous.



Do you have pref numbers for this patch?

Thanks




Signed-off-by: Xie Yongji 
---
  drivers/vdpa/vdpa_user/vduse_dev.c | 29 +++--
  1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
b/drivers/vdpa/vdpa_user/vduse_dev.c
index 8042d3fa57f1..f5adeb9ee027 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -42,6 +42,7 @@ struct vduse_virtqueue {
spinlock_t irq_lock;
struct eventfd_ctx *kickfd;
struct vdpa_callback cb;
+   struct work_struct inject;
  };
  
  struct vduse_dev;

@@ -99,6 +100,7 @@ static DEFINE_IDA(vduse_ida);
  
  static dev_t vduse_major;

  static struct class *vduse_class;
+static struct workqueue_struct *vduse_irq_wq;
  
  static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)

  {
@@ -852,6 +854,17 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
return 0;
  }
  
+static void vduse_vq_irq_inject(struct work_struct *work)

+{
+   struct vduse_virtqueue *vq = container_of(work,
+   struct vduse_virtqueue, inject);
+
+   spin_lock_irq(>irq_lock);
+   if (vq->ready && vq->cb.callback)
+   vq->cb.callback(vq->cb.private);
+   spin_unlock_irq(>irq_lock);
+}
+
  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
  {
@@ -917,12 +930,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned 
int cmd,
break;
  
  		vq = >vqs[arg];

-   spin_lock_irq(>irq_lock);
-   if (vq->ready && vq->cb.callback) {
-   vq->cb.callback(vq->cb.private);
-   ret = 0;
-   }
-   spin_unlock_irq(>irq_lock);
+   queue_work(vduse_irq_wq, >inject);
break;
}
case VDUSE_INJECT_CONFIG_IRQ:
@@ -1109,6 +1117,7 @@ static int vduse_create_dev(struct vduse_dev_config 
*config)
  
  	for (i = 0; i < dev->vq_num; i++) {

dev->vqs[i].index = i;
+   INIT_WORK(>vqs[i].inject, vduse_vq_irq_inject);
spin_lock_init(>vqs[i].kick_lock);
spin_lock_init(>vqs[i].irq_lock);
}
@@ -1333,6 +1342,11 @@ static int vduse_init(void)
if (ret)
goto err_chardev;
  
+	vduse_irq_wq = alloc_workqueue("vduse-irq",

+   WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
+   if (!vduse_irq_wq)
+   goto err_wq;
+
ret = vduse_domain_init();
if (ret)
goto err_domain;
@@ -1344,6 +1358,8 @@ static int vduse_init(void)
return 0;
  err_mgmtdev:
vduse_domain_exit();
+err_wq:
+   destroy_workqueue(vduse_irq_wq);
  err_domain:
unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  err_chardev:
@@ -1359,6 +1375,7 @@ static void vduse_exit(void)
misc_deregister(_misc);
class_destroy(vduse_class);
unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
+   destroy_workqueue(vduse_irq_wq);
vduse_domain_exit();
vduse_mgmtdev_exit();
  }


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC v4 09/11] Documentation: Add documentation for VDUSE

2021-03-03 Thread Jason Wang


On 2021/2/23 7:50 下午, Xie Yongji wrote:

VDUSE (vDPA Device in Userspace) is a framework to support
implementing software-emulated vDPA devices in userspace. This
document is intended to clarify the VDUSE design and usage.

Signed-off-by: Xie Yongji 
---
  Documentation/userspace-api/index.rst |   1 +
  Documentation/userspace-api/vduse.rst | 112 ++
  2 files changed, 113 insertions(+)
  create mode 100644 Documentation/userspace-api/vduse.rst

diff --git a/Documentation/userspace-api/index.rst 
b/Documentation/userspace-api/index.rst
index acd2cc2a538d..f63119130898 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -24,6 +24,7 @@ place where this information is gathered.
 ioctl/index
 iommu
 media/index
+   vduse
  
  .. only::  subproject and html
  
diff --git a/Documentation/userspace-api/vduse.rst b/Documentation/userspace-api/vduse.rst

new file mode 100644
index ..2a20e686bb59
--- /dev/null
+++ b/Documentation/userspace-api/vduse.rst
@@ -0,0 +1,112 @@
+==
+VDUSE - "vDPA Device in Userspace"
+==
+
+vDPA (virtio data path acceleration) device is a device that uses a
+datapath which complies with the virtio specifications with vendor
+specific control path. vDPA devices can be both physically located on
+the hardware or emulated by software. VDUSE is a framework that makes it
+possible to implement software-emulated vDPA devices in userspace.
+
+How VDUSE works
+
+Each userspace vDPA device is created by the VDUSE_CREATE_DEV ioctl on
+the character device (/dev/vduse/control). Then a device file with the
+specified name (/dev/vduse/$NAME) will appear, which can be used to
+implement the userspace vDPA device's control path and data path.



It's better to mention that in order to le thte device to be registered 
on the bus, admin need to use the management API(netlink) to create the 
vDPA device.


Some codes to demnonstrate how to create the device will be better.



+
+To implement control path, a message-based communication protocol and some
+types of control messages are introduced in the VDUSE framework:
+
+- VDUSE_SET_VQ_ADDR: Set the vring address of virtqueue.
+
+- VDUSE_SET_VQ_NUM: Set the size of virtqueue
+
+- VDUSE_SET_VQ_READY: Set ready status of virtqueue
+
+- VDUSE_GET_VQ_READY: Get ready status of virtqueue
+
+- VDUSE_SET_VQ_STATE: Set the state for virtqueue
+
+- VDUSE_GET_VQ_STATE: Get the state for virtqueue
+
+- VDUSE_SET_FEATURES: Set virtio features supported by the driver
+
+- VDUSE_GET_FEATURES: Get virtio features supported by the device
+
+- VDUSE_SET_STATUS: Set the device status
+
+- VDUSE_GET_STATUS: Get the device status
+
+- VDUSE_SET_CONFIG: Write to device specific configuration space
+
+- VDUSE_GET_CONFIG: Read from device specific configuration space
+
+- VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping in device 
IOTLB
+
+Those control messages are mostly based on the vdpa_config_ops in
+include/linux/vdpa.h which defines a unified interface to control
+different types of vdpa device. Userspace needs to read()/write()
+on the VDUSE device file to receive/reply those control messages
+from/to VDUSE kernel module as follows:
+
+.. code-block:: c
+
+   static int vduse_message_handler(int dev_fd)
+   {
+   int len;
+   struct vduse_dev_request req;
+   struct vduse_dev_response resp;
+
+   len = read(dev_fd, , sizeof(req));
+   if (len != sizeof(req))
+   return -1;
+
+   resp.request_id = req.unique;
+
+   switch (req.type) {
+
+   /* handle different types of message */
+
+   }
+
+   len = write(dev_fd, , sizeof(resp));
+   if (len != sizeof(resp))
+   return -1;
+
+   return 0;
+   }
+
+In the deta path, vDPA device's iova regions will be mapped into userspace
+with the help of VDUSE_IOTLB_GET_FD ioctl on the VDUSE device file:
+
+- VDUSE_IOTLB_GET_FD: get the file descriptor to iova region. Userspace can
+  access this iova region by passing the fd to mmap().



It would be better to have codes to explain how it is expected to work here.



+
+Besides, the following ioctls on the VDUSE device file are provided to support
+interrupt injection and setting up eventfd for virtqueue kicks:
+
+- VDUSE_VQ_SETUP_KICKFD: set the kickfd for virtqueue, this eventfd is used
+  by VDUSE kernel module to notify userspace to consume the vring.
+
+- VDUSE_INJECT_VQ_IRQ: inject an interrupt for specific virtqueue
+
+- VDUSE_INJECT_CONFIG_IRQ: inject a config interrupt
+
+MMU-based IOMMU Driver
+--
+In virtio-vdpa case, VDUSE framework implements an MMU-based on-chip IOMMU
+driver to support mapping the kernel DMA buffer into the userspace iova
+region dynamically.
+
+The 

Re: [RFC v4 07/11] vduse: Introduce VDUSE - vDPA Device in Userspace

2021-03-03 Thread Jason Wang


On 2021/2/23 7:50 下午, Xie Yongji wrote:

This VDUSE driver enables implementing vDPA devices in userspace.
Both control path and data path of vDPA devices will be able to
be handled in userspace.

In the control path, the VDUSE driver will make use of message
mechnism to forward the config operation from vdpa bus driver
to userspace. Userspace can use read()/write() to receive/reply
those control messages.

In the data path, VDUSE_IOTLB_GET_FD ioctl will be used to get
the file descriptors referring to vDPA device's iova regions. Then
userspace can use mmap() to access those iova regions. Besides,
userspace can use ioctl() to inject interrupt and use the eventfd
mechanism to receive virtqueue kicks.

Signed-off-by: Xie Yongji 
---
  Documentation/userspace-api/ioctl/ioctl-number.rst |1 +
  drivers/vdpa/Kconfig   |   10 +
  drivers/vdpa/Makefile  |1 +
  drivers/vdpa/vdpa_user/Makefile|5 +
  drivers/vdpa/vdpa_user/vduse_dev.c | 1348 
  include/uapi/linux/vduse.h |  136 ++
  6 files changed, 1501 insertions(+)
  create mode 100644 drivers/vdpa/vdpa_user/Makefile
  create mode 100644 drivers/vdpa/vdpa_user/vduse_dev.c
  create mode 100644 include/uapi/linux/vduse.h

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a4c75a28c839..71722e6f8f23 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -300,6 +300,7 @@ Code  Seq#Include File  
 Comments
  'z'   10-4F  drivers/s390/crypto/zcrypt_api.hconflict!
  '|'   00-7F  linux/media.h
  0x80  00-1F  linux/fb.h
+0x81  00-1F  linux/vduse.h
  0x89  00-06  arch/x86/include/asm/sockios.h
  0x89  0B-DF  linux/sockios.h
  0x89  E0-EF  linux/sockios.h 
SIOCPROTOPRIVATE range
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index ffd1e098bfd2..92f07715e3b6 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -25,6 +25,16 @@ config VDPA_SIM_NET
help
  vDPA networking device simulator which loops TX traffic back to RX.
  
+config VDPA_USER

+   tristate "VDUSE (vDPA Device in Userspace) support"
+   depends on EVENTFD && MMU && HAS_DMA
+   select DMA_OPS
+   select VHOST_IOTLB
+   select IOMMU_IOVA
+   help
+ With VDUSE it is possible to emulate a vDPA Device
+ in a userspace program.
+
  config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index d160e9b63a66..66e97778ad03 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -1,5 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  obj-$(CONFIG_VDPA) += vdpa.o
  obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
+obj-$(CONFIG_VDPA_USER) += vdpa_user/
  obj-$(CONFIG_IFCVF)+= ifcvf/
  obj-$(CONFIG_MLX5_VDPA) += mlx5/
diff --git a/drivers/vdpa/vdpa_user/Makefile b/drivers/vdpa/vdpa_user/Makefile
new file mode 100644
index ..260e0b26af99
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+vduse-y := vduse_dev.o iova_domain.o
+
+obj-$(CONFIG_VDPA_USER) += vduse.o
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
b/drivers/vdpa/vdpa_user/vduse_dev.c
new file mode 100644
index ..393bf99c48be
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -0,0 +1,1348 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDUSE: vDPA Device in Userspace
+ *
+ * Copyright (C) 2020 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author: Xie Yongji 
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "iova_domain.h"
+
+#define DRV_VERSION  "1.0"
+#define DRV_AUTHOR   "Yongji Xie "
+#define DRV_DESC "vDPA Device in Userspace"
+#define DRV_LICENSE  "GPL v2"
+
+#define VDUSE_DEV_MAX (1U << MINORBITS)
+
+struct vduse_virtqueue {
+   u16 index;
+   bool ready;
+   spinlock_t kick_lock;
+   spinlock_t irq_lock;
+   struct eventfd_ctx *kickfd;
+   struct vdpa_callback cb;
+};
+
+struct vduse_dev;
+
+struct vduse_vdpa {
+   struct vdpa_device vdpa;
+   struct vduse_dev *dev;
+};
+
+struct vduse_dev {
+   struct vduse_vdpa *vdev;
+   struct device dev;
+   struct cdev cdev;
+   struct vduse_virtqueue *vqs;
+   struct vduse_iova_domain *domain;
+   struct vhost_iotlb *iommu;
+   spinlock_t iommu_lock;
+   atomic_t bounce_map;
+   struct mutex msg_lock;
+   atomic64_t msg_unique;



"next_request_id" should be better.



+   wait_queue_head_t waitq;
+   struct 

Re: [PATCH v6] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread kernel test robot
Hi Jie,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on wsa/i2c/for-next]
[also build test ERROR on vhost/linux-next linux/master linus/master v5.12-rc1 
next-20210303]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Jie-Deng/i2c-virtio-add-a-virtio-i2c-frontend-driver/20210304-100543
base:   https://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git 
i2c/for-next
config: x86_64-randconfig-a014-20210304 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 
eec7f8f7b1226be422a76542cb403d02538f453a)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# 
https://github.com/0day-ci/linux/commit/bb645653b6d7750a026229726f8d9d0371457ac6
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Jie-Deng/i2c-virtio-add-a-virtio-i2c-frontend-driver/20210304-100543
git checkout bb645653b6d7750a026229726f8d9d0371457ac6
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from :1:
>> ./usr/include/linux/virtio_i2c.h:35:2: error: unknown type name 'u8'
   u8 status;
   ^
   1 error generated.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

RE: [PATCH RFC v1 12/15] iommu/virtio: Add support for INVALIDATE request

2021-03-03 Thread Tian, Kevin
> From: Jacob Pan 
> Sent: Thursday, March 4, 2021 2:29 AM
> 
> Hi Vivek,
> 
> On Fri, 15 Jan 2021 17:43:39 +0530, Vivek Gautam 
> wrote:
> 
> > From: Jean-Philippe Brucker 
> >
> > Add support for tlb invalidation ops that can send invalidation
> > requests to back-end virtio-iommu when stage-1 page tables are
> > supported.
> >
> Just curious if it possible to reuse the iommu uapi for invalidation and 
> others.
> When we started out designing the iommu uapi, the intention was to support
> both emulated and virtio iommu.

IIUC this patch is about the protocol between virtio-iommu frontend and backend.
After the virtio-iommu backend receives invalidation ops, it then needs to
forward the request to the host IOMMU driver through the existing iommu
uapi that you referred to, as a emulated VT-d or SMMU would do.

Thanks
Kevin

> 
> > Signed-off-by: Jean-Philippe Brucker 
> > [Vivek: Refactoring the iommu_flush_ops, and adding only one pasid sync
> > op that's needed with current iommu-pasid-table infrastructure.
> > Also updating uapi defines as required by latest changes]
> > Signed-off-by: Vivek Gautam 
> > Cc: Joerg Roedel 
> > Cc: Will Deacon 
> > Cc: Michael S. Tsirkin 
> > Cc: Robin Murphy 
> > Cc: Jean-Philippe Brucker 
> > Cc: Eric Auger 
> > Cc: Alex Williamson 
> > Cc: Kevin Tian 
> > Cc: Jacob Pan 
> > Cc: Liu Yi L 
> > Cc: Lorenzo Pieralisi 
> > Cc: Shameerali Kolothum Thodi 
> > ---
> >  drivers/iommu/virtio-iommu.c | 95
> 
> >  1 file changed, 95 insertions(+)
> >
> > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> > index ae5dfd3f8269..004ea94e3731 100644
> > --- a/drivers/iommu/virtio-iommu.c
> > +++ b/drivers/iommu/virtio-iommu.c
> > @@ -13,6 +13,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -63,6 +64,8 @@ struct viommu_mapping {
> >  };
> >
> >  struct viommu_mm {
> > +   int pasid;
> > +   u64 archid;
> > struct io_pgtable_ops   *ops;
> > struct viommu_domain*domain;
> >  };
> > @@ -692,6 +695,98 @@ static void viommu_event_handler(struct
> virtqueue
> > *vq) virtqueue_kick(vq);
> >  }
> >
> > +/* PASID and pgtable APIs */
> > +
> > +static void __viommu_flush_pasid_tlb_all(struct viommu_domain
> *vdomain,
> > +int pasid, u64 arch_id, int
> > type) +{
> > +   struct virtio_iommu_req_invalidate req = {
> > +   .head.type  = VIRTIO_IOMMU_T_INVALIDATE,
> > +   .inv_gran   =
> > cpu_to_le32(VIRTIO_IOMMU_INVAL_G_PASID),
> > +   .flags  =
> > cpu_to_le32(VIRTIO_IOMMU_INVAL_F_PASID),
> > +   .inv_type   = cpu_to_le32(type),
> > +
> > +   .domain = cpu_to_le32(vdomain->id),
> > +   .pasid  = cpu_to_le32(pasid),
> > +   .archid = cpu_to_le64(arch_id),
> > +   };
> > +
> > +   if (viommu_send_req_sync(vdomain->viommu, , sizeof(req)))
> > +   pr_debug("could not send invalidate request\n");
> > +}
> > +
> > +static void viommu_flush_tlb_add(struct iommu_iotlb_gather *gather,
> > +unsigned long iova, size_t granule,
> > +void *cookie)
> > +{
> > +   struct viommu_mm *viommu_mm = cookie;
> > +   struct viommu_domain *vdomain = viommu_mm->domain;
> > +   struct iommu_domain *domain = >domain;
> > +
> > +   iommu_iotlb_gather_add_page(domain, gather, iova, granule);
> > +}
> > +
> > +static void viommu_flush_tlb_walk(unsigned long iova, size_t size,
> > + size_t granule, void *cookie)
> > +{
> > +   struct viommu_mm *viommu_mm = cookie;
> > +   struct viommu_domain *vdomain = viommu_mm->domain;
> > +   struct virtio_iommu_req_invalidate req = {
> > +   .head.type  = VIRTIO_IOMMU_T_INVALIDATE,
> > +   .inv_gran   = cpu_to_le32(VIRTIO_IOMMU_INVAL_G_VA),
> > +   .inv_type   =
> cpu_to_le32(VIRTIO_IOMMU_INV_T_IOTLB),
> > +   .flags  =
> > cpu_to_le32(VIRTIO_IOMMU_INVAL_F_ARCHID), +
> > +   .domain = cpu_to_le32(vdomain->id),
> > +   .pasid  = cpu_to_le32(viommu_mm->pasid),
> > +   .archid = cpu_to_le64(viommu_mm->archid),
> > +   .virt_start = cpu_to_le64(iova),
> > +   .nr_pages   = cpu_to_le64(size / granule),
> > +   .granule= ilog2(granule),
> > +   };
> > +
> > +   if (viommu_add_req(vdomain->viommu, , sizeof(req)))
> > +   pr_debug("could not add invalidate request\n");
> > +}
> > +
> > +static void viommu_flush_tlb_all(void *cookie)
> > +{
> > +   struct viommu_mm *viommu_mm = cookie;
> > +
> > +   if (!viommu_mm->archid)
> > +   return;
> > +
> > +   __viommu_flush_pasid_tlb_all(viommu_mm->domain,
> viommu_mm->pasid,
> > +viommu_mm->archid,
> > + 

Re: [PATCH v6] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread kernel test robot
Hi Jie,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on wsa/i2c/for-next]
[also build test WARNING on vhost/linux-next linux/master linus/master 
v5.12-rc1 next-20210303]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Jie-Deng/i2c-virtio-add-a-virtio-i2c-frontend-driver/20210304-100543
base:   https://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git 
i2c/for-next
config: xtensa-randconfig-s031-20210304 (attached as .config)
compiler: xtensa-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-245-gacc5c298-dirty
# 
https://github.com/0day-ci/linux/commit/bb645653b6d7750a026229726f8d9d0371457ac6
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Jie-Deng/i2c-virtio-add-a-virtio-i2c-frontend-driver/20210304-100543
git checkout bb645653b6d7750a026229726f8d9d0371457ac6
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=xtensa 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


"sparse warnings: (new ones prefixed by >>)"
>> drivers/i2c/busses/i2c-virtio.c:78:47: sparse: sparse: invalid assignment: |=
>> drivers/i2c/busses/i2c-virtio.c:78:47: sparse:left side has type 
>> restricted __le32
>> drivers/i2c/busses/i2c-virtio.c:78:47: sparse:right side has type int

vim +78 drivers/i2c/busses/i2c-virtio.c

59  
60  static int virtio_i2c_send_reqs(struct virtqueue *vq,
61  struct virtio_i2c_req *reqs,
62  struct i2c_msg *msgs, int nr)
63  {
64  struct scatterlist *sgs[3], out_hdr, msg_buf, in_hdr;
65  int i, outcnt, incnt, err = 0;
66  u8 *buf;
67  
68  for (i = 0; i < nr; i++) {
69  if (!msgs[i].len)
70  break;
71  
72  /* Only 7-bit mode supported for this moment. For the 
address format,
73   * Please check the Virtio I2C Specification.
74   */
75  reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1);
76  
77  if (i != nr - 1)
  > 78  reqs[i].out_hdr.flags |= 
VIRTIO_I2C_FLAGS_FAIL_NEXT;
79  
80  outcnt = incnt = 0;
81  sg_init_one(_hdr, [i].out_hdr, 
sizeof(reqs[i].out_hdr));
82  sgs[outcnt++] = _hdr;
83  
84  buf = kzalloc(msgs[i].len, GFP_KERNEL);
85  if (!buf)
86  break;
87  
88  reqs[i].buf = buf;
89  sg_init_one(_buf, reqs[i].buf, msgs[i].len);
90  
91  if (msgs[i].flags & I2C_M_RD) {
92  sgs[outcnt + incnt++] = _buf;
93  } else {
94  memcpy(reqs[i].buf, msgs[i].buf, msgs[i].len);
95  sgs[outcnt++] = _buf;
96  }
97  
98  sg_init_one(_hdr, [i].in_hdr, 
sizeof(reqs[i].in_hdr));
99  sgs[outcnt + incnt++] = _hdr;
   100  
   101  err = virtqueue_add_sgs(vq, sgs, outcnt, incnt, 
[i], GFP_KERNEL);
   102  if (err < 0) {
   103  pr_err("failed to add msg[%d] to virtqueue.\n", 
i);
   104  kfree(reqs[i].buf);
   105  reqs[i].buf = NULL;
   106  break;
   107  }
   108  }
   109  
   110  return i;
   111  }
   112  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v6] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread kernel test robot
Hi Jie,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on wsa/i2c/for-next]
[also build test ERROR on vhost/linux-next linux/master linus/master v5.12-rc1 
next-20210303]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Jie-Deng/i2c-virtio-add-a-virtio-i2c-frontend-driver/20210304-100543
base:   https://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git 
i2c/for-next
config: i386-randconfig-a006-20210304 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# 
https://github.com/0day-ci/linux/commit/bb645653b6d7750a026229726f8d9d0371457ac6
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Jie-Deng/i2c-virtio-add-a-virtio-i2c-frontend-driver/20210304-100543
git checkout bb645653b6d7750a026229726f8d9d0371457ac6
# save the attached .config to linux build tree
make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from :32:
>> ./usr/include/linux/virtio_i2c.h:35:2: error: unknown type name 'u8'
  35 |  u8 status;
 |  ^~

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC v4 06/11] vduse: Implement an MMU-based IOMMU driver

2021-03-03 Thread Jason Wang


On 2021/2/23 7:50 下午, Xie Yongji wrote:

This implements a MMU-based IOMMU driver to support mapping
kernel dma buffer into userspace. The basic idea behind it is
treating MMU (VA->PA) as IOMMU (IOVA->PA). The driver will set
up MMU mapping instead of IOMMU mapping for the DMA transfer so
that the userspace process is able to use its virtual address to
access the dma buffer in kernel.

And to avoid security issue, a bounce-buffering mechanism is
introduced to prevent userspace accessing the original buffer
directly.

Signed-off-by: Xie Yongji 
---
  drivers/vdpa/vdpa_user/iova_domain.c | 486 +++
  drivers/vdpa/vdpa_user/iova_domain.h |  61 +
  2 files changed, 547 insertions(+)
  create mode 100644 drivers/vdpa/vdpa_user/iova_domain.c
  create mode 100644 drivers/vdpa/vdpa_user/iova_domain.h

diff --git a/drivers/vdpa/vdpa_user/iova_domain.c 
b/drivers/vdpa/vdpa_user/iova_domain.c
new file mode 100644
index ..9285d430d486
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MMU-based IOMMU implementation
+ *
+ * Copyright (C) 2020 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author: Xie Yongji 
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "iova_domain.h"
+
+#define IOVA_START_PFN 1
+#define IOVA_ALLOC_ORDER 12
+#define IOVA_ALLOC_SIZE (1 << IOVA_ALLOC_ORDER)
+
+static inline struct page *
+vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
+{
+   u64 index = iova >> PAGE_SHIFT;
+
+   return domain->bounce_pages[index];
+}
+
+static inline void
+vduse_domain_set_bounce_page(struct vduse_iova_domain *domain,
+   u64 iova, struct page *page)
+{
+   u64 index = iova >> PAGE_SHIFT;
+
+   domain->bounce_pages[index] = page;
+}
+
+static enum dma_data_direction perm_to_dir(int perm)
+{
+   enum dma_data_direction dir;
+
+   switch (perm) {
+   case VHOST_MAP_WO:
+   dir = DMA_FROM_DEVICE;
+   break;
+   case VHOST_MAP_RO:
+   dir = DMA_TO_DEVICE;
+   break;
+   case VHOST_MAP_RW:
+   dir = DMA_BIDIRECTIONAL;
+   break;
+   default:
+   break;
+   }
+
+   return dir;
+}
+
+static int dir_to_perm(enum dma_data_direction dir)
+{
+   int perm = -EFAULT;
+
+   switch (dir) {
+   case DMA_FROM_DEVICE:
+   perm = VHOST_MAP_WO;
+   break;
+   case DMA_TO_DEVICE:
+   perm = VHOST_MAP_RO;
+   break;
+   case DMA_BIDIRECTIONAL:
+   perm = VHOST_MAP_RW;
+   break;
+   default:
+   break;
+   }
+
+   return perm;
+}



Let's move the above two helpers to vhost_iotlb.h so they could be used 
by other driver e.g (vpda_sim)




+
+static void do_bounce(phys_addr_t orig, void *addr, size_t size,
+   enum dma_data_direction dir)
+{
+   unsigned long pfn = PFN_DOWN(orig);
+
+   if (PageHighMem(pfn_to_page(pfn))) {
+   unsigned int offset = offset_in_page(orig);
+   char *buffer;
+   unsigned int sz = 0;
+   unsigned long flags;
+
+   while (size) {
+   sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+   local_irq_save(flags);
+   buffer = kmap_atomic(pfn_to_page(pfn));
+   if (dir == DMA_TO_DEVICE)
+   memcpy(addr, buffer + offset, sz);
+   else
+   memcpy(buffer + offset, addr, sz);
+   kunmap_atomic(buffer);
+   local_irq_restore(flags);



I wonder why we need to deal with highmem and irq flags explicitly like 
this. Doesn't kmap_atomic() will take care all of those?




+
+   size -= sz;
+   pfn++;
+   addr += sz;
+   offset = 0;
+   }
+   } else if (dir == DMA_TO_DEVICE) {
+   memcpy(addr, phys_to_virt(orig), size);
+   } else {
+   memcpy(phys_to_virt(orig), addr, size);
+   }
+}
+
+static struct page *
+vduse_domain_get_mapping_page(struct vduse_iova_domain *domain, u64 iova)
+{
+   u64 start = iova & PAGE_MASK;
+   u64 last = start + PAGE_SIZE - 1;
+   struct vhost_iotlb_map *map;
+   struct page *page = NULL;
+
+   spin_lock(>iotlb_lock);
+   map = vhost_iotlb_itree_first(domain->iotlb, start, last);
+   if (!map)
+   goto out;
+
+   page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
+   get_page(page);
+out:
+   spin_unlock(>iotlb_lock);
+
+   return page;
+}
+
+static struct page *
+vduse_domain_alloc_bounce_page(struct vduse_iova_domain *domain, u64 iova)
+{
+   u64 start = iova & 

Re: [PATCH RFC v1 15/15] iommu/virtio: Update fault type and reason info for viommu fault

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:42PM +0530, Vivek Gautam wrote:
> Fault type information can tell about a page request fault or
> an unreceoverable fault, and further additions to fault reasons
> and the related PASID information can help in handling faults
> efficiently.
> 
> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Michael S. Tsirkin 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  drivers/iommu/virtio-iommu.c  | 27 +--
>  include/uapi/linux/virtio_iommu.h | 13 -
>  2 files changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index 9cc3d35125e9..10ef9e98214a 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -652,9 +652,16 @@ static int viommu_fault_handler(struct viommu_dev 
> *viommu,
>   char *reason_str;
>  
>   u8 reason   = fault->reason;
> + u16 type= fault->flt_type;
>   u32 flags   = le32_to_cpu(fault->flags);
>   u32 endpoint= le32_to_cpu(fault->endpoint);
>   u64 address = le64_to_cpu(fault->address);
> + u32 pasid   = le32_to_cpu(fault->pasid);
> +
> + if (type == VIRTIO_IOMMU_FAULT_F_PAGE_REQ) {
> + dev_info(viommu->dev, "Page request fault - unhandled\n");
> + return 0;
> + }
>  
>   switch (reason) {
>   case VIRTIO_IOMMU_FAULT_R_DOMAIN:
> @@ -663,6 +670,21 @@ static int viommu_fault_handler(struct viommu_dev 
> *viommu,
>   case VIRTIO_IOMMU_FAULT_R_MAPPING:
>   reason_str = "page";
>   break;
> + case VIRTIO_IOMMU_FAULT_R_WALK_EABT:
> + reason_str = "page walk external abort";
> + break;
> + case VIRTIO_IOMMU_FAULT_R_PTE_FETCH:
> + reason_str = "pte fetch";
> + break;
> + case VIRTIO_IOMMU_FAULT_R_PERMISSION:
> + reason_str = "permission";
> + break;
> + case VIRTIO_IOMMU_FAULT_R_ACCESS:
> + reason_str = "access";
> + break;
> + case VIRTIO_IOMMU_FAULT_R_OOR_ADDRESS:
> + reason_str = "output address";
> + break;
>   case VIRTIO_IOMMU_FAULT_R_UNKNOWN:
>   default:
>   reason_str = "unknown";
> @@ -671,8 +693,9 @@ static int viommu_fault_handler(struct viommu_dev *viommu,
>  
>   /* TODO: find EP by ID and report_iommu_fault */
>   if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS)
> - dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx 
> [%s%s%s]\n",
> - reason_str, endpoint, address,
> + dev_err_ratelimited(viommu->dev,
> + "%s fault from EP %u PASID %u at %#llx 
> [%s%s%s]\n",
> + reason_str, endpoint, pasid, address,
>   flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : 
> "",
>   flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : 
> "",
>   flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : 
> "");
> diff --git a/include/uapi/linux/virtio_iommu.h 
> b/include/uapi/linux/virtio_iommu.h
> index 608c8d642e1f..a537d82777f7 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -290,19 +290,30 @@ struct virtio_iommu_req_invalidate {
>  #define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0
>  #define VIRTIO_IOMMU_FAULT_R_DOMAIN  1
>  #define VIRTIO_IOMMU_FAULT_R_MAPPING 2
> +#define VIRTIO_IOMMU_FAULT_R_WALK_EABT   3
> +#define VIRTIO_IOMMU_FAULT_R_PTE_FETCH   4
> +#define VIRTIO_IOMMU_FAULT_R_PERMISSION  5
> +#define VIRTIO_IOMMU_FAULT_R_ACCESS  6
> +#define VIRTIO_IOMMU_FAULT_R_OOR_ADDRESS 7
>  
>  #define VIRTIO_IOMMU_FAULT_F_READ(1 << 0)
>  #define VIRTIO_IOMMU_FAULT_F_WRITE   (1 << 1)
>  #define VIRTIO_IOMMU_FAULT_F_EXEC(1 << 2)
>  #define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8)
>  
> +#define VIRTIO_IOMMU_FAULT_F_DMA_UNRECOV 1
> +#define VIRTIO_IOMMU_FAULT_F_PAGE_REQ2

Currently all reported faults are unrecoverable, so to be consistent
DMA_UNRECOV should be 0. But I'd prefer having just a new "page request"
flag in the flags field, instead of the flt_type field.

For page requests we'll also need a 16-bit fault ID field to store the PRI
"page request group index" or the stall "stag". "last" and "privileged"
flags as well, to match the PRI page request. And a new command to
complete a page fault.

> +
>  struct virtio_iommu_fault {
>   __u8reason;
> - __u8reserved[3];
> + __le16  flt_type;
> + __u8   

Re: [RFC v4 05/11] vdpa: Support transferring virtual addressing during DMA mapping

2021-03-03 Thread Jason Wang


On 2021/2/23 7:50 下午, Xie Yongji wrote:

This patch introduces an attribute for vDPA device to indicate
whether virtual address can be used. If vDPA device driver set
it, vhost-vdpa bus driver will not pin user page and transfer
userspace virtual address instead of physical address during
DMA mapping. And corresponding vma->vm_file and offset will be
also passed as an opaque pointer.

Suggested-by: Jason Wang 
Signed-off-by: Xie Yongji 
---
  drivers/vdpa/ifcvf/ifcvf_main.c   |   2 +-
  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
  drivers/vdpa/vdpa.c   |   9 +++-
  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   2 +-
  drivers/vhost/vdpa.c  | 104 +++---
  include/linux/vdpa.h  |  20 ++--
  6 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 7c8bbfcf6c3e..228b9f920fea 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -432,7 +432,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
  
  	adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,

dev, _vdpa_ops,
-   IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
+   IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
if (adapter == NULL) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return -ENOMEM;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 029822060017..54290438da28 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1964,7 +1964,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
  
  	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, _vdpa_ops,

-2 * mlx5_vdpa_max_qps(max_vqs), NULL);
+2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
  
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c

index 9700a0adcca0..fafc0ee5eb05 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -72,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
   * @nvqs: number of virtqueues supported by this device
   * @size: size of the parent structure that contains private data
   * @name: name of the vdpa device; optional.
+ * @use_va: indicate whether virtual address can be used by this device



I think "use_va" means va must be used instead of "can be" here.



   *
   * Driver should use vdpa_alloc_device() wrapper macro instead of
   * using this directly.
@@ -81,7 +82,8 @@ static void vdpa_release_dev(struct device *d)
   */
  struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
-   int nvqs, size_t size, const char *name)
+   int nvqs, size_t size, const char *name,
+   bool use_va)
  {
struct vdpa_device *vdev;
int err = -EINVAL;
@@ -92,6 +94,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device 
*parent,
if (!!config->dma_map != !!config->dma_unmap)
goto err;
  
+	/* It should only work for the device that use on-chip IOMMU */

+   if (use_va && !(config->dma_map || config->set_map))
+   goto err;
+
err = -ENOMEM;
vdev = kzalloc(size, GFP_KERNEL);
if (!vdev)
@@ -108,6 +114,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device 
*parent,
vdev->config = config;
vdev->features_valid = false;
vdev->nvqs = nvqs;
+   vdev->use_va = use_va;
  
  	if (name)

err = dev_set_name(>dev, "%s", name);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 5cfc262ce055..3a9a2dd4e987 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr 
*dev_attr)
ops = _config_ops;
  
  	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,

-   dev_attr->nvqs, dev_attr->name);
+   dev_attr->nvqs, dev_attr->name, false);
if (!vdpasim)
goto err_alloc;
  
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c

index 70857fe3263c..93769ace34df 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -480,21 +480,31 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
  static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
  {
struct vhost_dev *dev = >vdev;
+   struct vdpa_device *vdpa = v->vdpa;
struct vhost_iotlb *iotlb = 

[PATCH v6] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread Jie Deng
Add an I2C bus driver for virtio para-virtualization.

The controller can be emulated by the backend driver in
any device model software by following the virtio protocol.

The device specification can be found on
https://lists.oasis-open.org/archives/virtio-comment/202101/msg8.html.

By following the specification, people may implement different
backend drivers to emulate different controllers according to
their needs.

Co-developed-by: Conghui Chen 
Signed-off-by: Conghui Chen 
Signed-off-by: Jie Deng 
---
 drivers/i2c/busses/Kconfig  |  11 ++
 drivers/i2c/busses/Makefile |   3 +
 drivers/i2c/busses/i2c-virtio.c | 289 
 include/uapi/linux/virtio_i2c.h |  42 ++
 include/uapi/linux/virtio_ids.h |   1 +
 5 files changed, 346 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-virtio.c
 create mode 100644 include/uapi/linux/virtio_i2c.h

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 05ebf75..0860395 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -21,6 +21,17 @@ config I2C_ALI1535
  This driver can also be built as a module.  If so, the module
  will be called i2c-ali1535.
 
+config I2C_VIRTIO
+   tristate "Virtio I2C Adapter"
+   depends on VIRTIO
+   help
+ If you say yes to this option, support will be included for the virtio
+ I2C adapter driver. The hardware can be emulated by any device model
+ software according to the virtio protocol.
+
+ This driver can also be built as a module. If so, the module
+ will be called i2c-virtio.
+
 config I2C_ALI1563
tristate "ALI 1563"
depends on PCI
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 615f35e..b88da08 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -6,6 +6,9 @@
 # ACPI drivers
 obj-$(CONFIG_I2C_SCMI) += i2c-scmi.o
 
+# VIRTIO I2C host controller driver
+obj-$(CONFIG_I2C_VIRTIO)   += i2c-virtio.o
+
 # PC SMBus host controller drivers
 obj-$(CONFIG_I2C_ALI1535)  += i2c-ali1535.o
 obj-$(CONFIG_I2C_ALI1563)  += i2c-ali1563.o
diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
new file mode 100644
index 000..98c0fcc
--- /dev/null
+++ b/drivers/i2c/busses/i2c-virtio.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio I2C Bus Driver
+ *
+ * The Virtio I2C Specification:
+ * 
https://raw.githubusercontent.com/oasis-tcs/virtio-spec/master/virtio-i2c.tex
+ *
+ * Copyright (c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+/**
+ * struct virtio_i2c - virtio I2C data
+ * @vdev: virtio device for this controller
+ * @completion: completion of virtio I2C message
+ * @adap: I2C adapter for this controller
+ * @i2c_lock: lock for virtqueue processing
+ * @vq: the virtio virtqueue for communication
+ */
+struct virtio_i2c {
+   struct virtio_device *vdev;
+   struct completion completion;
+   struct i2c_adapter adap;
+   struct mutex i2c_lock;
+   struct virtqueue *vq;
+};
+
+/**
+ * struct virtio_i2c_req - the virtio I2C request structure
+ * @out_hdr: the OUT header of the virtio I2C message
+ * @buf: the buffer into which data is read, or from which it's written
+ * @in_hdr: the IN header of the virtio I2C message
+ */
+struct virtio_i2c_req {
+   struct virtio_i2c_out_hdr out_hdr;
+   u8 *buf;
+   struct virtio_i2c_in_hdr in_hdr;
+};
+
+static void virtio_i2c_msg_done(struct virtqueue *vq)
+{
+   struct virtio_i2c *vi = vq->vdev->priv;
+
+   complete(>completion);
+}
+
+static int virtio_i2c_send_reqs(struct virtqueue *vq,
+   struct virtio_i2c_req *reqs,
+   struct i2c_msg *msgs, int nr)
+{
+   struct scatterlist *sgs[3], out_hdr, msg_buf, in_hdr;
+   int i, outcnt, incnt, err = 0;
+   u8 *buf;
+
+   for (i = 0; i < nr; i++) {
+   if (!msgs[i].len)
+   break;
+
+   /* Only 7-bit mode supported for this moment. For the address 
format,
+* Please check the Virtio I2C Specification.
+*/
+   reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1);
+
+   if (i != nr - 1)
+   reqs[i].out_hdr.flags |= VIRTIO_I2C_FLAGS_FAIL_NEXT;
+
+   outcnt = incnt = 0;
+   sg_init_one(_hdr, [i].out_hdr, 
sizeof(reqs[i].out_hdr));
+   sgs[outcnt++] = _hdr;
+
+   buf = kzalloc(msgs[i].len, GFP_KERNEL);
+   if (!buf)
+   break;
+
+   reqs[i].buf = buf;
+   sg_init_one(_buf, reqs[i].buf, msgs[i].len);
+
+   if (msgs[i].flags & I2C_M_RD) {
+   sgs[outcnt + 

Re: [PATCH v5] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread Jie Deng



On 2021/3/3 17:38, Viresh Kumar wrote:

On 03-03-21, 16:46, Jie Deng wrote:

This is not a problem. My original proposal was to mirror the struct
i2c_msg.
The code you looked at was based on that.
However, the virtio TC prefer not to mirror it. They have some concerns.
For example, there is a bit I2C_M_RD in i2c_msg.flag which has the same
meaning with
the R/W in virtio descriptor. This is a repetition which may cause problems.
So the virtio_i2c_out_hdr.flags is used to instead of i2c_msg.flags for
extension.

So by default we don't support any of the existing flags except
I2C_M_RD?

Yes. That's the current status.

#define I2C_M_TEN   0x0010  /* this is a ten bit chip address */
#define I2C_M_RD0x0001  /* read data, from slave to master */
#define I2C_M_STOP  0x8000  /* if I2C_FUNC_PROTOCOL_MANGLING */
#define I2C_M_NOSTART   0x4000  /* if I2C_FUNC_NOSTART */
#define I2C_M_REV_DIR_ADDR  0x2000  /* if I2C_FUNC_PROTOCOL_MANGLING */
#define I2C_M_IGNORE_NAK0x1000  /* if I2C_FUNC_PROTOCOL_MANGLING */
#define I2C_M_NO_RD_ACK 0x0800  /* if I2C_FUNC_PROTOCOL_MANGLING */
#define I2C_M_RECV_LEN  0x0400  /* length will be first received byte */

How do we work with clients who want to use such flags now ?
My plan is to have a minimum driver get merged. Then we have a base and 
we can
update virtio_i2c_out_hdr.flags for the feature extensibility. Then, If 
you want to help to develop
this stuff, you can just follow the same flow. First, you can update the 
Spec by sending
comments to virtio-comm...@lists.oasis-open.org. Once your Spec patch is 
acked by the

virtio TC, you can then send patches to update the corresponding drivers.

Thanks.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v1 13/15] iommu/virtio: Attach Arm PASID tables when available

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:40PM +0530, Vivek Gautam wrote:
[...]
> +static int viommu_setup_pgtable(struct viommu_endpoint *vdev,
> + struct viommu_domain *vdomain)
> +{
> + int ret, id;
> + u32 asid;
> + enum io_pgtable_fmt fmt;
> + struct io_pgtable_ops *ops = NULL;
> + struct viommu_dev *viommu = vdev->viommu;
> + struct virtio_iommu_probe_table_format *desc = vdev->pgtf;
> + struct iommu_pasid_table *tbl = vdomain->pasid_tbl;
> + struct iommu_vendor_psdtable_cfg *pst_cfg;
> + struct arm_smmu_cfg_info *cfgi;
> + struct io_pgtable_cfg cfg = {
> + .iommu_dev  = viommu->dev->parent,
> + .tlb= _flush_ops,
> + .pgsize_bitmap  = vdev->pgsize_mask ? vdev->pgsize_mask :
> +   vdomain->domain.pgsize_bitmap,
> + .ias= (vdev->input_end ? ilog2(vdev->input_end) :
> +
> ilog2(vdomain->domain.geometry.aperture_end)) + 1,
> + .oas= vdev->output_bits,
> + };
> +
> + if (!desc)
> + return -EINVAL;
> +
> + if (!vdev->output_bits)
> + return -ENODEV;
> +
> + switch (le16_to_cpu(desc->format)) {
> + case VIRTIO_IOMMU_FOMRAT_PGTF_ARM_LPAE:
> + fmt = ARM_64_LPAE_S1;
> + break;
> + default:
> + dev_err(vdev->dev, "unsupported page table format 0x%x\n",
> + le16_to_cpu(desc->format));
> + return -EINVAL;
> + }
> +
> + if (vdomain->mm.ops) {
> + /*
> +  * TODO: attach additional endpoint to the domain. Check that
> +  * the config is sane.
> +  */
> + return -EEXIST;
> + }
> +
> + vdomain->mm.domain = vdomain;
> + ops = alloc_io_pgtable_ops(fmt, , >mm);
> + if (!ops)
> + return -ENOMEM;
> +
> + pst_cfg = >cfg;
> + cfgi = _cfg->vendor.cfg;
> + id = ida_simple_get(_ida, 1, 1 << desc->asid_bits, GFP_KERNEL);
> + if (id < 0) {
> + ret = id;
> + goto err_free_pgtable;
> + }
> +
> + asid = id;
> + ret = iommu_psdtable_prepare(tbl, pst_cfg, , asid);
> + if (ret)
> + goto err_free_asid;
> +
> + /*
> +  * Strange to setup an op here?
> +  * cd-lib is the actual user of sync op, and therefore the platform
> +  * drivers should assign this sync/maintenance ops as per need.
> +  */
> + tbl->ops->sync = viommu_flush_pasid;

But this function deals with page tables, not pasid tables. As said on an
earlier patch, the TLB flush ops should probably be passed during table
registration - those ops are global so should really be const.

> +
> + /* Right now only PASID 0 supported ?? */
> + ret = iommu_psdtable_write(tbl, pst_cfg, 0, >s1_cfg->cd);
> + if (ret)
> + goto err_free_asid;
> +
> + vdomain->mm.ops = ops;
> + dev_dbg(vdev->dev, "using page table format 0x%x\n", fmt);
> +
> + return 0;
> +
> +err_free_asid:
> + ida_simple_remove(_ida, asid);
> +err_free_pgtable:
> + free_io_pgtable_ops(ops);
> + return ret;
> +}
> +
> +static int viommu_config_arm_pst(struct iommu_vendor_psdtable_cfg *pst_cfg,
> +  struct virtio_iommu_req_attach_pst_arm *req)
> +{
> + struct arm_smmu_s1_cfg *s1_cfg = pst_cfg->vendor.cfg.s1_cfg;
> +
> + if (!s1_cfg)
> + return -ENODEV;
> +
> + req->format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3);
> + req->s1fmt  = s1_cfg->s1fmt;
> + req->s1dss  = VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_DSS_0;
> + req->s1contextptr = cpu_to_le64(pst_cfg->base);
> + req->s1cdmax= cpu_to_le32(s1_cfg->s1cdmax);
> +
> + return 0;
> +}
> +
> +static int viommu_config_pst(struct iommu_vendor_psdtable_cfg *pst_cfg,
> +  void *req, enum pasid_table_fmt fmt)
> +{
> + int ret;
> +
> + switch (fmt) {
> + case PASID_TABLE_ARM_SMMU_V3:
> + ret = viommu_config_arm_pst(pst_cfg, req);
> + break;
> + default:
> + ret = -EINVAL;
> + WARN_ON(1);
> + }
> +
> + return ret;
> +}
> +
> +static int viommu_prepare_arm_pst(struct viommu_endpoint *vdev,
> +   struct iommu_vendor_psdtable_cfg *pst_cfg)
> +{
> + struct virtio_iommu_probe_table_format *pgtf = vdev->pgtf;
> + struct arm_smmu_cfg_info *cfgi = _cfg->vendor.cfg;
> + struct arm_smmu_s1_cfg *cfg;
> +
> + /* Some sanity checks */
> + if (pgtf->asid_bits != 8 && pgtf->asid_bits != 16)
> + return -EINVAL;

No need for this, next patch cheks asid size in viommu_config_arm_pgt()

> +
> + cfg = devm_kzalloc(pst_cfg->iommu_dev, sizeof(cfg), GFP_KERNEL);
> + if (!cfg)
> + return -ENOMEM;
> +
> + cfgi->s1_cfg = cfg;
> + cfg->s1cdmax = vdev->pasid_bits;
> + cfg->cd.asid = 

Re: [PATCH RFC v1 08/15] iommu: Add asid_bits to arm smmu-v3 stage1 table info

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:35PM +0530, Vivek Gautam wrote:
> aisd_bits data is required to prepare stage-1 tables for arm-smmu-v3.
> 
> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  include/uapi/linux/iommu.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 082d758dd016..96abbfc7c643 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -357,7 +357,7 @@ struct iommu_pasid_smmuv3 {
>   __u32   version;
>   __u8s1fmt;
>   __u8s1dss;
> - __u8padding[2];
> + __u16   asid_bits;

Is this used anywhere?  This struct is passed from host userspace to host
kernel to attach the PASID table, so I don't think it needs an asid_bits
field.

Thanks,
Jean

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v1 05/15] iommu/arm-smmu-v3: Set sync op from consumer driver of cd-lib

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:32PM +0530, Vivek Gautam wrote:
> Te change allows different consumers of arm-smmu-v3-cd-lib to set
> their respective sync op for pasid entries.
> 
> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c | 1 -
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c| 7 +++
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
> index ec37476c8d09..acaa09acecdd 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
> @@ -265,7 +265,6 @@ struct iommu_vendor_psdtable_ops arm_cd_table_ops = {
>   .free= arm_smmu_free_cd_tables,
>   .prepare = arm_smmu_prepare_cd,
>   .write   = arm_smmu_write_ctx_desc,
> - .sync= arm_smmu_sync_cd,
>  };
>  
>  struct iommu_pasid_table *arm_smmu_register_cd_table(struct device *dev,
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 2f86c6ac42b6..0c644be22b4b 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1869,6 +1869,13 @@ static int arm_smmu_domain_finalise_s1(struct 
> arm_smmu_domain *smmu_domain,
>   if (ret)
>   goto out_free_cd_tables;
>  
> + /*
> +  * Strange to setup an op here?
> +  * cd-lib is the actual user of sync op, and therefore the platform
> +  * drivers should assign this sync/maintenance ops as per need.
> +  */
> + tbl->ops->sync = arm_smmu_sync_cd;
> +

Modifying a static struct from here doesn't feel right. I think the
interface should be roughly similar to io-pgtable since the principle is
the same. So the sync() op should be separate from arm_cd_table_ops since
it's a callback into the driver. Maybe pass it to
iommu_register_pasid_table().

Thanks,
Jean
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread Stefan Hajnoczi
On Tue, Mar 02, 2021 at 11:54:02AM +0100, Arnd Bergmann wrote:
> On Tue, Mar 2, 2021 at 10:51 AM Stefan Hajnoczi  wrote:
> > On Tue, Mar 02, 2021 at 10:42:06AM +0800, Jie Deng wrote:
> > > > > +/*
> > > > > + * Definitions for virtio I2C Adpter
> > > > > + *
> > > > > + * Copyright (c) 2021 Intel Corporation. All rights reserved.
> > > > > + */
> > > > > +
> > > > > +#ifndef _UAPI_LINUX_VIRTIO_I2C_H
> > > > > +#define _UAPI_LINUX_VIRTIO_I2C_H
> > > > Why is this a uapi header? Can't this all be moved into the driver
> > > > itself?
> >
> > Linux VIRTIO drivers provide a uapi header with structs and constants
> > that describe the device interface. This allows other software like
> > QEMU, other operating systems, etc to reuse these headers instead of
> > redefining everything.
> >
> > These files should contain:
> > 1. Device-specific feature bits (VIRTIO__F_)
> > 2. VIRTIO Configuration Space layout (struct virtio__config)
> > 3. Virtqueue request layout (struct virtio__)
> >
> > For examples, see  and .
> 
> Ok, makes sense. So it's not strictly uapi but just helpful for
> virtio applications to reference these. I suppose it is slightly harder
> when building qemu on other operating systems though, how does
> it get these headers on BSD or Windows?

qemu.git imports Linux headers from time to time and can use them
instead of system headers:

  
https://gitlab.com/qemu-project/qemu/-/blob/master/scripts/update-linux-headers.sh

Stefan


signature.asc
Description: PGP signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v1 09/15] iommu/virtio: Update table format probing header

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:36PM +0530, Vivek Gautam wrote:
> Add info about asid_bits and additional flags to table format
> probing header.
> 
> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Michael S. Tsirkin 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  include/uapi/linux/virtio_iommu.h | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/virtio_iommu.h 
> b/include/uapi/linux/virtio_iommu.h
> index 43821e33e7af..8a0624bab4b2 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -169,7 +169,10 @@ struct virtio_iommu_probe_pasid_size {
>  struct virtio_iommu_probe_table_format {
>   struct virtio_iommu_probe_property  head;
>   __le16  format;
> - __u8reserved[2];
> + __le16  asid_bits;
> +
> + __le32  flags;

This struct should only contain the head and format fields. asid and flags
should go in a specialized structure - virtio_iommu_probe_pgt_arm64 in the
latest spec draft, where I dropped the asid_bits field in favor of an
"ASID16" flag.

Thanks,
Jean

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v1 02/15] iommu: Add a simple PASID table library

2021-03-03 Thread Jean-Philippe Brucker
Hi Vivek,

Thanks again for working on this. I have a few comments but it looks
sensible overall.

Regarding the overall design, I was initially assigning page directories
instead of whole PASID tables, which would simplify the driver and host
implementation. A major complication, however, is SMMUv3 accesses PASID
tables using a guest-physical address, so there is a messy negotiation
needed between host and guest when the host needs to allocate PASID
tables. Plus vSMMU needs PASID table assignment, so that's what the host
driver will implement.

On Fri, Jan 15, 2021 at 05:43:29PM +0530, Vivek Gautam wrote:
> Add a small API in iommu subsystem to handle PASID table allocation
> requests from different consumer drivers, such as a paravirtualized
> iommu driver. The API provides ops for allocating and freeing PASID
> table, writing to it and managing the table caches.
> 
> This library also provides for registering a vendor API that attaches
> to these ops. The vendor APIs would eventually perform arch level
> implementations for these PASID tables.

Although Arm might be the only vendor to ever use this, I think the
abstraction makes sense and isn't too invasive. Even if we called directly
into the SMMU driver from the virtio one, we'd still need patch 3 and
separate TLB invalidations ops.

> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  drivers/iommu/iommu-pasid-table.h | 134 ++
>  1 file changed, 134 insertions(+)
>  create mode 100644 drivers/iommu/iommu-pasid-table.h
> 
> diff --git a/drivers/iommu/iommu-pasid-table.h 
> b/drivers/iommu/iommu-pasid-table.h
> new file mode 100644
> index ..bd4f57656f67
> --- /dev/null
> +++ b/drivers/iommu/iommu-pasid-table.h
> @@ -0,0 +1,134 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * PASID table management for the IOMMU
> + *
> + * Copyright (C) 2021 Arm Ltd.
> + */
> +#ifndef __IOMMU_PASID_TABLE_H
> +#define __IOMMU_PASID_TABLE_H
> +
> +#include 
> +
> +#include "arm/arm-smmu-v3/arm-smmu-v3.h"
> +
> +enum pasid_table_fmt {
> + PASID_TABLE_ARM_SMMU_V3,
> + PASID_TABLE_NUM_FMTS,
> +};
> +
> +/**
> + * struct arm_smmu_cfg_info - arm-smmu-v3 specific configuration data
> + *
> + * @s1_cfg: arm-smmu-v3 stage1 config data
> + * @feat_flag: features supported by arm-smmu-v3 implementation
> + */
> +struct arm_smmu_cfg_info {
> + struct arm_smmu_s1_cfg  *s1_cfg;
> + u32 feat_flag;
> +};
> +
> +/**
> + * struct iommu_vendor_psdtable_cfg - Configuration data for PASID tables
> + *
> + * @iommu_dev: device performing the DMA table walks
> + * @fmt: The PASID table format
> + * @base: DMA address of the allocated table, set by the vendor driver
> + * @cfg: arm-smmu-v3 specific config data
> + */
> +struct iommu_vendor_psdtable_cfg {
> + struct device   *iommu_dev;
> + enum pasid_table_fmtfmt;
> + dma_addr_t  base;
> + union {
> + struct arm_smmu_cfg_infocfg;

For the union to be extensible, that field should be called "arm" or
something like that.

Thanks,
Jean

> + } vendor;
> +};
> +
> +struct iommu_vendor_psdtable_ops;
> +
> +/**
> + * struct iommu_pasid_table - describes a set of PASID tables
> + *
> + * @cookie: An opaque token provided by the IOMMU driver and passed back to 
> any
> + * callback routine.
> + * @cfg: A copy of the PASID table configuration
> + * @ops: The PASID table operations in use for this set of page tables
> + */
> +struct iommu_pasid_table {
> + void*cookie;
> + struct iommu_vendor_psdtable_cfgcfg;
> + struct iommu_vendor_psdtable_ops*ops;
> +};
> +
> +#define pasid_table_cfg_to_table(pst_cfg) \
> + container_of((pst_cfg), struct iommu_pasid_table, cfg)
> +
> +struct iommu_vendor_psdtable_ops {
> + int (*alloc)(struct iommu_vendor_psdtable_cfg *cfg);
> + void (*free)(struct iommu_vendor_psdtable_cfg *cfg);
> + void (*prepare)(struct iommu_vendor_psdtable_cfg *cfg,
> + struct io_pgtable_cfg *pgtbl_cfg, u32 asid);
> + int (*write)(struct iommu_vendor_psdtable_cfg *cfg, int ssid,
> +  void *cookie);
> + void (*sync)(void *cookie, int ssid, bool leaf);
> +};
> +
> +static inline int iommu_psdtable_alloc(struct iommu_pasid_table *tbl,
> +struct iommu_vendor_psdtable_cfg *cfg)
> +{
> + if (!tbl->ops->alloc)
> + return -ENOSYS;
> +
> + return tbl->ops->alloc(cfg);
> +}
> +
> +static inline void iommu_psdtable_free(struct iommu_pasid_table *tbl,
> +struct iommu_vendor_psdtable_cfg *cfg)
> +{
> + if (!tbl->ops->free)
> + return;
> +
> + 

Re: [PATCH RFC v1 04/15] iommu/arm-smmu-v3: Update CD base address info for user-space

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:31PM +0530, Vivek Gautam wrote:
> Update base address information in vendor pasid table info to pass that
> to user-space for stage1 table management.
> 
> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
> index 8a7187534706..ec37476c8d09 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
> @@ -55,6 +55,9 @@ static __le64 *arm_smmu_get_cd_ptr(struct 
> iommu_vendor_psdtable_cfg *pst_cfg,
>   if (arm_smmu_alloc_cd_leaf_table(dev, l1_desc))
>   return NULL;
>  
> + if (s1cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
> + pst_cfg->base = l1_desc->l2ptr_dma;
> +

This isn't the right place, because this path allocates second-level
tables for two-level tables. I don't think we need pst_cfg->base at all,
because for both linear and two-level tables, the base pointer is in
cdcfg->cdtab_dma, which can be read directly.

Thanks,
Jean

>   l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
>   arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
>   /* An invalid L1CD can be cached */
> @@ -211,6 +214,9 @@ static int arm_smmu_alloc_cd_tables(struct 
> iommu_vendor_psdtable_cfg *pst_cfg)
>   goto err_free_l1;
>   }
>  
> + if (s1cfg->s1fmt == STRTAB_STE_0_S1FMT_64K_L2)
> + pst_cfg->base = cdcfg->cdtab_dma;
> +
>   return 0;
>  
>  err_free_l1:
> -- 
> 2.17.1
> 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v1 06/15] iommu/virtio: Add headers for table format probing

2021-03-03 Thread Jean-Philippe Brucker
On Fri, Jan 15, 2021 at 05:43:33PM +0530, Vivek Gautam wrote:
> From: Jean-Philippe Brucker 
> 
> Add required UAPI defines for probing table format for underlying
> iommu hardware. The device may provide information about hardware
> tables and additional capabilities for each device.
> This allows guest to correctly fabricate stage-1 page tables.
> 
> Signed-off-by: Jean-Philippe Brucker 
> [Vivek: Use a single "struct virtio_iommu_probe_table_format" rather
> than separate structures for page table and pasid table format.

Makes sense. I've integrated that into the spec draft, added more precise
documentation and modified some of the definitions.

The current draft is here:
https://jpbrucker.net/virtio-iommu/spec/virtio-iommu-v0.13.pdf
Posted on the list here
https://lists.oasis-open.org/archives/virtio-dev/202102/msg00012.html

>   Also update commit message.]
> Signed-off-by: Vivek Gautam 
> Cc: Joerg Roedel 
> Cc: Will Deacon 
> Cc: Michael S. Tsirkin 
> Cc: Robin Murphy 
> Cc: Jean-Philippe Brucker 
> Cc: Eric Auger 
> Cc: Alex Williamson 
> Cc: Kevin Tian 
> Cc: Jacob Pan 
> Cc: Liu Yi L 
> Cc: Lorenzo Pieralisi 
> Cc: Shameerali Kolothum Thodi 
> ---
>  include/uapi/linux/virtio_iommu.h | 44 ++-
>  1 file changed, 43 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/virtio_iommu.h 
> b/include/uapi/linux/virtio_iommu.h
> index 237e36a280cb..43821e33e7af 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -2,7 +2,7 @@
>  /*
>   * Virtio-iommu definition v0.12
>   *
> - * Copyright (C) 2019 Arm Ltd.
> + * Copyright (C) 2019-2021 Arm Ltd.

Not strictly necessary. But if you're modifying this comment please also
remove the "v0.12" above

>   */
>  #ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
>  #define _UAPI_LINUX_VIRTIO_IOMMU_H
> @@ -111,6 +111,12 @@ struct virtio_iommu_req_unmap {
>  
>  #define VIRTIO_IOMMU_PROBE_T_NONE0
>  #define VIRTIO_IOMMU_PROBE_T_RESV_MEM1
> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK  2
> +#define VIRTIO_IOMMU_PROBE_T_INPUT_RANGE 3
> +#define VIRTIO_IOMMU_PROBE_T_OUTPUT_SIZE 4
> +#define VIRTIO_IOMMU_PROBE_T_PASID_SIZE  5
> +#define VIRTIO_IOMMU_PROBE_T_PAGE_TABLE_FMT  6
> +#define VIRTIO_IOMMU_PROBE_T_PASID_TABLE_FMT 7

Since there is a single struct we can have a single
VIRTIO_IOMMU_PROBE_T_TABLE_FORMAT.

>  
>  #define VIRTIO_IOMMU_PROBE_T_MASK0xfff
>  
> @@ -130,6 +136,42 @@ struct virtio_iommu_probe_resv_mem {
>   __le64  end;
>  };
>  
> +struct virtio_iommu_probe_page_size_mask {
> + struct virtio_iommu_probe_property  head;
> + __u8reserved[4];
> + __le64  mask;
> +};
> +
> +struct virtio_iommu_probe_input_range {
> + struct virtio_iommu_probe_property  head;
> + __u8reserved[4];
> + __le64  start;
> + __le64  end;
> +};
> +
> +struct virtio_iommu_probe_output_size {
> + struct virtio_iommu_probe_property  head;
> + __u8bits;
> + __u8reserved[3];
> +};
> +
> +struct virtio_iommu_probe_pasid_size {
> + struct virtio_iommu_probe_property  head;
> + __u8bits;
> + __u8reserved[3];
> +};
> +
> +/* Arm LPAE page table format */
> +#define VIRTIO_IOMMU_FOMRAT_PGTF_ARM_LPAE1

s/FOMRAT/FORMAT

> +/* Arm smmu-v3 type PASID table format */
> +#define VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3 2

These should be with the Arm-specific definitions patches 11 and 14

Thanks,
Jean

> +
> +struct virtio_iommu_probe_table_format {
> + struct virtio_iommu_probe_property  head;
> + __le16  format;
> + __u8reserved[2];
> +};
> +
>  struct virtio_iommu_req_probe {
>   struct virtio_iommu_req_headhead;
>   __le32  endpoint;
> -- 
> 2.17.1
> 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-comment] [RFC PATCH v1 1/1] virtio-vsock: add SOCK_SEQPACKET description

2021-03-03 Thread Cornelia Huck
On Wed, 3 Mar 2021 11:52:43 -0500
"Michael S. Tsirkin"  wrote:

> On Wed, Mar 03, 2021 at 01:08:43PM +0100, Cornelia Huck wrote:
> > On Wed, 24 Feb 2021 10:32:00 +0100
> > Stefano Garzarella  wrote:
> >   
> > > On Thu, Feb 18, 2021 at 09:08:23AM +0300, Arseny Krasnov wrote:  
> > > >Signed-off-by: Arseny Krasnov 
> > > >---
> > > > virtio-vsock.tex | 40 +---
> > > > 1 file changed, 37 insertions(+), 3 deletions(-)
> > > >
> > > >diff --git a/virtio-vsock.tex b/virtio-vsock.tex
> > > >index da7e641..1ee8f99 100644
> > > >--- a/virtio-vsock.tex
> > > >+++ b/virtio-vsock.tex
> > > >@@ -102,6 +102,11 @@ \subsection{Device Operation}\label{sec:Device 
> > > >Types / Socket Device / Device Op
> > > > VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
> > > > /* Request the peer to send the credit info to us */
> > > > VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
> > > >+
> > > >+/* Message begin for SOCK_SEQPACKET */
> > > >+VIRTIO_VSOCK_OP_SEQ_BEGIN = 8,
> > > >+/* Message end for SOCK_SEQPACKET */
> > > >+VIRTIO_VSOCK_OP_SEQ_END = 9,
> > > > };
> > > > \end{lstlisting}
> > > >
> > > >@@ -140,11 +145,11 @@ \subsubsection{Addressing}\label{sec:Device Types 
> > > >/ Socket Device / Device Opera
> > > > consists of a (cid, port number) tuple. The header fields used for this 
> > > > are
> > > > \field{src_cid}, \field{src_port}, \field{dst_cid}, and 
> > > > \field{dst_port}.
> > > >
> > > >-Currently only stream sockets are supported. \field{type} is 1 for 
> > > >stream
> > > >-socket types.
> > > >+Currently stream and seqpacket sockets are supported. \field{type} is 1 
> > > >for
> > > >+stream socket types. \field{type} is 2 for seqpacket socket types.
> > > >
> > > > Stream sockets provide in-order, guaranteed, connection-oriented 
> > > > delivery
> > > >-without message boundaries.
> > > >+without message boundaries. Seqpacket sockets also provide message 
> > > >boundaries.
> > > >
> > > > \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket 
> > > > Device / Device Operation / Buffer Space Management}
> > > > \field{buf_alloc} and \field{fwd_cnt} are used for buffer space 
> > > > management of
> > > >@@ -240,6 +245,35 @@ \subsubsection{Stream Sockets}\label{sec:Device 
> > > >Types / Socket Device / Device O
> > > > destination) address tuple for a new connection while the other peer is 
> > > > still
> > > > processing the old connection.
> > > >
> > > >+\subsubsection{Seqpacket Sockets}\label{sec:Device Types / Socket 
> > > >Device / Device Operation / Seqpacket Sockets}
> > > >+
> > > >+Seqpacket sockets differ from stream sockets only in data transmission 
> > > >way: in
> > > >+stream sockets all data is sent using only VIRTIO_VSOCK_OP_RW packets. 
> > > >In
> > > >+seqpacket sockets, to provide message boundaries, every sequence of
> > > >+VIRTIO_VSOCK_OP_RW packets of each message is headed with
> > >   ^
> > > Since this is a spec, I think we should use MUST when something must be 
> > > respected by the peer, for example here we can say "MUST be headed"
> > >   
> > > >+VIRTIO_VSOCK_OP_SEQ_BEGIN and tailed with VIRTIO_VSOCK_OP_SEQ_END 
> > > >packets.
> > > >+Both VIRTIO_VSOCK_OP_SEQ_BEGIN and VIRTIO_VSOCK_OP_SEQ_END packets 
> > > >carry
> > >   ^
> > > Same here "MUST carry" and in the rest of the patch.  
> > 
> > Actually, MUST and friends are really for normative sections; I'd
> > advise to have a description of how this feature works and then some
> > device/driver normative clauses with MUST statements (like "the device
> > MUST reject " or so).  
> 
> I agree we do want normative sections but please don't add MUST etc elsewhere.
> Also vague text saying "malformed" isn't all that helpful if it's a
> MUST. How does driver know for sure it's malformed? easy to miss
> some requirement.

Actually, I intended "" to be a placeholder for a
precise description...

> Therefore easiest thing it just to do some copy-pasting.
> 
> E.g. You start with above and add a normative section saying:
> Driver MUST use XYZ in VIRTIO_VSOCK_OP_SEQ_END packets.
> 
> We typically don't specify behaviour when out of spec,
> if we should here then please make a separate chapter
> for this explaining the how and the why.
> 

I think it makes sense if we want to be concrete on what should happen
for out-of-spec operation (e.g. in cases where ignoring it is
preferable to actively rejecting it.)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-comment] [RFC PATCH v1 1/1] virtio-vsock: add SOCK_SEQPACKET description

2021-03-03 Thread Michael S. Tsirkin
On Wed, Mar 03, 2021 at 01:08:43PM +0100, Cornelia Huck wrote:
> On Wed, 24 Feb 2021 10:32:00 +0100
> Stefano Garzarella  wrote:
> 
> > On Thu, Feb 18, 2021 at 09:08:23AM +0300, Arseny Krasnov wrote:
> > >Signed-off-by: Arseny Krasnov 
> > >---
> > > virtio-vsock.tex | 40 +---
> > > 1 file changed, 37 insertions(+), 3 deletions(-)
> > >
> > >diff --git a/virtio-vsock.tex b/virtio-vsock.tex
> > >index da7e641..1ee8f99 100644
> > >--- a/virtio-vsock.tex
> > >+++ b/virtio-vsock.tex
> > >@@ -102,6 +102,11 @@ \subsection{Device Operation}\label{sec:Device Types 
> > >/ Socket Device / Device Op
> > >   VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
> > >   /* Request the peer to send the credit info to us */
> > >   VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
> > >+
> > >+  /* Message begin for SOCK_SEQPACKET */
> > >+  VIRTIO_VSOCK_OP_SEQ_BEGIN = 8,
> > >+  /* Message end for SOCK_SEQPACKET */
> > >+  VIRTIO_VSOCK_OP_SEQ_END = 9,
> > > };
> > > \end{lstlisting}
> > >
> > >@@ -140,11 +145,11 @@ \subsubsection{Addressing}\label{sec:Device Types / 
> > >Socket Device / Device Opera
> > > consists of a (cid, port number) tuple. The header fields used for this 
> > > are
> > > \field{src_cid}, \field{src_port}, \field{dst_cid}, and \field{dst_port}.
> > >
> > >-Currently only stream sockets are supported. \field{type} is 1 for stream
> > >-socket types.
> > >+Currently stream and seqpacket sockets are supported. \field{type} is 1 
> > >for
> > >+stream socket types. \field{type} is 2 for seqpacket socket types.
> > >
> > > Stream sockets provide in-order, guaranteed, connection-oriented delivery
> > >-without message boundaries.
> > >+without message boundaries. Seqpacket sockets also provide message 
> > >boundaries.
> > >
> > > \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket 
> > > Device / Device Operation / Buffer Space Management}
> > > \field{buf_alloc} and \field{fwd_cnt} are used for buffer space 
> > > management of
> > >@@ -240,6 +245,35 @@ \subsubsection{Stream Sockets}\label{sec:Device Types 
> > >/ Socket Device / Device O
> > > destination) address tuple for a new connection while the other peer is 
> > > still
> > > processing the old connection.
> > >
> > >+\subsubsection{Seqpacket Sockets}\label{sec:Device Types / Socket Device 
> > >/ Device Operation / Seqpacket Sockets}
> > >+
> > >+Seqpacket sockets differ from stream sockets only in data transmission 
> > >way: in
> > >+stream sockets all data is sent using only VIRTIO_VSOCK_OP_RW packets. In
> > >+seqpacket sockets, to provide message boundaries, every sequence of
> > >+VIRTIO_VSOCK_OP_RW packets of each message is headed with  
> >   ^
> > Since this is a spec, I think we should use MUST when something must be 
> > respected by the peer, for example here we can say "MUST be headed"
> > 
> > >+VIRTIO_VSOCK_OP_SEQ_BEGIN and tailed with VIRTIO_VSOCK_OP_SEQ_END packets.
> > >+Both VIRTIO_VSOCK_OP_SEQ_BEGIN and VIRTIO_VSOCK_OP_SEQ_END packets carry  
> >   ^
> > Same here "MUST carry" and in the rest of the patch.
> 
> Actually, MUST and friends are really for normative sections; I'd
> advise to have a description of how this feature works and then some
> device/driver normative clauses with MUST statements (like "the device
> MUST reject " or so).

I agree we do want normative sections but please don't add MUST etc elsewhere.
Also vague text saying "malformed" isn't all that helpful if it's a
MUST. How does driver know for sure it's malformed? easy to miss
some requirement.
Therefore easiest thing it just to do some copy-pasting.

E.g. You start with above and add a normative section saying:
Driver MUST use XYZ in VIRTIO_VSOCK_OP_SEQ_END packets.

We typically don't specify behaviour when out of spec,
if we should here then please make a separate chapter
for this explaining the how and the why.

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/5] x86/sev-es: Introduce ip_within_syscall_gap() helper

2021-03-03 Thread Joerg Roedel
From: Joerg Roedel 

Introduce a helper to check whether an exception came from the syscall
gap and use it in the SEV-ES code. Extend the check to also cover the
compatibility SYSCALL entry path.

Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI 
handler")
Cc: sta...@vger.kernel.org # 5.10+
Signed-off-by: Joerg Roedel 
---
 arch/x86/entry/entry_64_compat.S |  2 ++
 arch/x86/include/asm/proto.h |  1 +
 arch/x86/include/asm/ptrace.h| 15 +++
 arch/x86/kernel/traps.c  |  3 +--
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 541fdaf64045..0051cf5c792d 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
/* Switch to the kernel stack */
movqPER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
+SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq   $__USER32_DS/* pt_regs->ss */
pushq   %r8 /* pt_regs->sp */
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 2c35f1c01a2d..b6a9d51d1d79 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void);
 void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
+void entry_SYSCALL_compat_safe_stack(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d8324a236696..409f661481e1 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -94,6 +94,8 @@ struct pt_regs {
 #include 
 #endif
 
+#include 
+
 struct cpuinfo_x86;
 struct task_struct;
 
@@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()   current_pt_regs()->sp
 #define compat_user_stack_pointer()current_pt_regs()->sp
+
+static inline bool ip_within_syscall_gap(struct pt_regs *regs)
+{
+   bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
+
+#ifdef CONFIG_IA32_EMULATION
+   ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
+ regs->ip <  (unsigned 
long)entry_SYSCALL_compat_safe_stack);
+#endif
+
+   return ret;
+}
 #endif
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7f5aec758f0e..ac1874a2a70e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs 
*vc_switch_off_ist(struct pt_regs *r
 * In the SYSCALL entry path the RSP value comes from user-space - don't
 * trust it and switch to the current kernel stack
 */
-   if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
-   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack) {
+   if (ip_within_syscall_gap(regs)) {
sp = this_cpu_read(cpu_current_top_of_stack);
goto sync;
}
-- 
2.30.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 2/5] x86/sev-es: Check if regs->sp is trusted before adjusting #VC IST stack

2021-03-03 Thread Joerg Roedel
From: Joerg Roedel 

The code in the NMI handler to adjust the #VC handler IST stack is
needed in case an NMI hits when the #VC handler is still using its IST
stack.
But the check for this condition also needs to look if the regs->sp
value is trusted, meaning it was not set by user-space. Extend the
check to not use regs->sp when the NMI interrupted user-space code or
the SYSCALL gap.

Reported-by: Andy Lutomirski 
Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI 
handler")
Cc: sta...@vger.kernel.org # 5.10+
Signed-off-by: Joerg Roedel 
---
 arch/x86/kernel/sev-es.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 1e78f4bd7bf2..28b0144daddd 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu)
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
 }
 
-static __always_inline bool on_vc_stack(unsigned long sp)
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
 {
+   unsigned long sp = regs->sp;
+
+   /* User-mode RSP is not trusted */
+   if (user_mode(regs))
+   return false;
+
+   /* SYSCALL gap still has user-mode RSP */
+   if (ip_within_syscall_gap(regs))
+   return false;
+
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < 
__this_cpu_ist_top_va(VC)));
 }
 
@@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
 
/* Make room on the IST stack */
-   if (on_vc_stack(regs->sp))
+   if (on_vc_stack(regs))
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
else
new_ist = old_ist - sizeof(old_ist);
-- 
2.30.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 5/5] x86/sev-es: Use __copy_from_user_inatomic()

2021-03-03 Thread Joerg Roedel
From: Joerg Roedel 

The #VC handler must run atomic and can not be put to sleep. This is a
problem when it tries to fetch instruction bytes from user-space via
copy_from_user.

Introduce a insn_fetch_from_user_inatomic() helper which uses
__copy_from_user_inatomic() to safely copy the instruction bytes to
kernel memory in the #VC handler.

Fixes: 5e3427a7bc432 ("x86/sev-es: Handle instruction fetches from user-space")
Cc: sta...@vger.kernel.org # v5.10+
Signed-off-by: Joerg Roedel 
---
 arch/x86/include/asm/insn-eval.h |  2 +
 arch/x86/kernel/sev-es.c |  2 +-
 arch/x86/lib/insn-eval.c | 66 +---
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index a0f839aa144d..98b4dae5e8bc 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int 
seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
 int insn_fetch_from_user(struct pt_regs *regs,
 unsigned char buf[MAX_INSN_SIZE]);
+int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
 bool insn_decode(struct insn *insn, struct pt_regs *regs,
 unsigned char buf[MAX_INSN_SIZE], int buf_size);
 
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 3d8ec5bf6f79..26f5479a97a8 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -270,7 +270,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt 
*ctxt)
int res;
 
if (user_mode(ctxt->regs)) {
-   res = insn_fetch_from_user(ctxt->regs, buffer);
+   res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (!res) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 4229950a5d78..bb0b3fe1e0a0 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct 
pt_regs *regs)
}
 }
 
+static unsigned long insn_get_effective_ip(struct pt_regs *regs)
+{
+   unsigned long seg_base = 0;
+
+   /*
+* If not in user-space long mode, a custom code segment could be in
+* use. This is true in protected mode (if the process defined a local
+* descriptor table), or virtual-8086 mode. In most of the cases
+* seg_base will be zero as in USER_CS.
+*/
+   if (!user_64bit_mode(regs)) {
+   seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+   if (seg_base == -1L)
+   return 0;
+   }
+
+   return seg_base + regs->ip;
+}
+
 /**
  * insn_fetch_from_user() - Copy instruction bytes from user-space memory
  * @regs:  Structure with register values as seen when entering kernel mode
@@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, 
struct pt_regs *regs)
  */
 int insn_fetch_from_user(struct pt_regs *regs, unsigned char 
buf[MAX_INSN_SIZE])
 {
-   unsigned long seg_base = 0;
+   unsigned long ip;
int not_copied;
 
-   /*
-* If not in user-space long mode, a custom code segment could be in
-* use. This is true in protected mode (if the process defined a local
-* descriptor table), or virtual-8086 mode. In most of the cases
-* seg_base will be zero as in USER_CS.
-*/
-   if (!user_64bit_mode(regs)) {
-   seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
-   if (seg_base == -1L)
-   return 0;
-   }
+   ip = insn_get_effective_ip(regs);
+   if (!ip)
+   return 0;
+
+   not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
 
+   return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space 
memory
+ *   while in atomic code
+ * @regs:  Structure with register values as seen when entering kernel mode
+ * @buf:   Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf. This function must be used in atomic context.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char 
buf[MAX_INSN_SIZE])
+{
+   unsigned long ip;
+   int not_copied;
+
+   ip = insn_get_effective_ip(regs);
+   if (!ip)
+   return 0;
 
-   not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
-   MAX_INSN_SIZE);
+   not_copied = 

[PATCH 3/5] x86/sev-es: Optimize __sev_es_ist_enter() for better readability

2021-03-03 Thread Joerg Roedel
From: Joerg Roedel 

Reorganize the code and improve the comments to make the function more
readable and easier to understand.

Signed-off-by: Joerg Roedel 
---
 arch/x86/kernel/sev-es.c | 36 
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 28b0144daddd..e1eeb3ef58c5 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -137,29 +137,41 @@ static __always_inline bool on_vc_stack(struct pt_regs 
*regs)
 }
 
 /*
- * This function handles the case when an NMI is raised in the #VC exception
- * handler entry code. In this case, the IST entry for #VC must be adjusted, so
- * that any subsequent #VC exception will not overwrite the stack contents of 
the
- * interrupted #VC handler.
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
  *
  * The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested
- * sev_es_ist_exit() call may adjust back the IST entry too early.
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
  */
 void noinstr __sev_es_ist_enter(struct pt_regs *regs)
 {
unsigned long old_ist, new_ist;
 
/* Read old IST entry */
-   old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+   new_ist = old_ist = 
__this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
 
-   /* Make room on the IST stack */
+   /*
+* If NMI happened while on the #VC IST stack, set the new IST
+* value below regs->sp, so that the interrupted stack frame is
+* not overwritten by subsequent #VC exceptions.
+*/
if (on_vc_stack(regs))
-   new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
-   else
-   new_ist = old_ist - sizeof(old_ist);
+   new_ist = regs->sp;
 
-   /* Store old IST entry */
+   /*
+* Reserve additional 8 bytes and store old IST value so this
+* adjustment can be unrolled in __sev_es_ist_exit().
+*/
+   new_ist -= sizeof(old_ist);
*(unsigned long *)new_ist = old_ist;
 
/* Set new IST entry */
-- 
2.30.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 0/5 v2] x86/sev-es: SEV-ES Fixes for v5.12

2021-03-03 Thread Joerg Roedel
From: Joerg Roedel 

Hi,

here are a couple of fixes for 5.12 in the SEV-ES guest support code.
Patches 1-3 have in a similar form already been posted, so this is v2.
The last two patches are new an arose from me running an SEV-ES guest
with more debugging features and instrumentation enabled.  I changed
the first patches according to the review comments I received.

Please review.

Thanks,

Joerg

Joerg Roedel (5):
  x86/sev-es: Introduce ip_within_syscall_gap() helper
  x86/sev-es: Check if regs->sp is trusted before adjusting #VC IST
stack
  x86/sev-es: Optimize __sev_es_ist_enter() for better readability
  x86/sev-es: Correctly track IRQ states in runtime #VC handler
  x86/sev-es: Use __copy_from_user_inatomic()

 arch/x86/entry/entry_64_compat.S |  2 +
 arch/x86/include/asm/insn-eval.h |  2 +
 arch/x86/include/asm/proto.h |  1 +
 arch/x86/include/asm/ptrace.h| 15 
 arch/x86/kernel/sev-es.c | 58 
 arch/x86/kernel/traps.c  |  3 +-
 arch/x86/lib/insn-eval.c | 66 +---
 7 files changed, 114 insertions(+), 33 deletions(-)

-- 
2.30.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4/5] x86/sev-es: Correctly track IRQ states in runtime #VC handler

2021-03-03 Thread Joerg Roedel
From: Joerg Roedel 

Call irqentry_nmi_enter()/irqentry_nmi_exit() in the #VC handler to
correctly track the IRQ state during its execution.

Reported-by: Andy Lutomirski 
Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Cc: sta...@vger.kernel.org # v5.10+
Signed-off-by: Joerg Roedel 
---
 arch/x86/kernel/sev-es.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index e1eeb3ef58c5..3d8ec5bf6f79 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -1270,13 +1270,12 @@ static __always_inline bool on_vc_fallback_stack(struct 
pt_regs *regs)
 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 {
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+   irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
enum es_result result;
struct ghcb *ghcb;
 
-   lockdep_assert_irqs_disabled();
-
/*
 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
 */
@@ -1285,6 +1284,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
return;
}
 
+   irq_state = irqentry_nmi_enter(regs);
+   lockdep_assert_irqs_disabled();
instrumentation_begin();
 
/*
@@ -1347,6 +1348,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 
 out:
instrumentation_end();
+   irqentry_nmi_exit(regs, irq_state);
 
return;
 
-- 
2.30.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] scsi: target: vhost-scsi: remove redundant initialization of variable ret

2021-03-03 Thread Colin King
From: Colin Ian King 

The variable ret is being initialized with a value that is never read
and it is being updated later with a new value.  The initialization is
redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King 
---
 drivers/vhost/scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index d16c04dcc144..9129ab8187fd 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -2465,7 +2465,7 @@ static const struct target_core_fabric_ops vhost_scsi_ops 
= {
 
 static int __init vhost_scsi_init(void)
 {
-   int ret = -ENOMEM;
+   int ret;
 
pr_debug("TCM_VHOST fabric module %s on %s/%s"
" on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
-- 
2.30.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-comment] [RFC PATCH v1 1/1] virtio-vsock: add SOCK_SEQPACKET description

2021-03-03 Thread Stefano Garzarella

On Wed, Mar 03, 2021 at 01:08:43PM +0100, Cornelia Huck wrote:

On Wed, 24 Feb 2021 10:32:00 +0100
Stefano Garzarella  wrote:


On Thu, Feb 18, 2021 at 09:08:23AM +0300, Arseny Krasnov wrote:
>Signed-off-by: Arseny Krasnov 
>---
> virtio-vsock.tex | 40 +---
> 1 file changed, 37 insertions(+), 3 deletions(-)
>
>diff --git a/virtio-vsock.tex b/virtio-vsock.tex
>index da7e641..1ee8f99 100644
>--- a/virtio-vsock.tex
>+++ b/virtio-vsock.tex
>@@ -102,6 +102,11 @@ \subsection{Device Operation}\label{sec:Device Types / 
Socket Device / Device Op
>VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
>/* Request the peer to send the credit info to us */
>VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
>+
>+   /* Message begin for SOCK_SEQPACKET */
>+   VIRTIO_VSOCK_OP_SEQ_BEGIN = 8,
>+   /* Message end for SOCK_SEQPACKET */
>+   VIRTIO_VSOCK_OP_SEQ_END = 9,
> };
> \end{lstlisting}
>
>@@ -140,11 +145,11 @@ \subsubsection{Addressing}\label{sec:Device Types / 
Socket Device / Device Opera
> consists of a (cid, port number) tuple. The header fields used for this are
> \field{src_cid}, \field{src_port}, \field{dst_cid}, and \field{dst_port}.
>
>-Currently only stream sockets are supported. \field{type} is 1 for stream
>-socket types.
>+Currently stream and seqpacket sockets are supported. \field{type} is 1 for
>+stream socket types. \field{type} is 2 for seqpacket socket types.
>
> Stream sockets provide in-order, guaranteed, connection-oriented delivery
>-without message boundaries.
>+without message boundaries. Seqpacket sockets also provide message boundaries.
>
> \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket 
Device / Device Operation / Buffer Space Management}
> \field{buf_alloc} and \field{fwd_cnt} are used for buffer space management of
>@@ -240,6 +245,35 @@ \subsubsection{Stream Sockets}\label{sec:Device Types / 
Socket Device / Device O
> destination) address tuple for a new connection while the other peer is still
> processing the old connection.
>
>+\subsubsection{Seqpacket Sockets}\label{sec:Device Types / Socket Device / 
Device Operation / Seqpacket Sockets}
>+
>+Seqpacket sockets differ from stream sockets only in data transmission way: in
>+stream sockets all data is sent using only VIRTIO_VSOCK_OP_RW packets. In
>+seqpacket sockets, to provide message boundaries, every sequence of
>+VIRTIO_VSOCK_OP_RW packets of each message is headed with
  ^
Since this is a spec, I think we should use MUST when something must be
respected by the peer, for example here we can say "MUST be headed"

>+VIRTIO_VSOCK_OP_SEQ_BEGIN and tailed with VIRTIO_VSOCK_OP_SEQ_END packets.
>+Both VIRTIO_VSOCK_OP_SEQ_BEGIN and VIRTIO_VSOCK_OP_SEQ_END packets carry
  ^
Same here "MUST carry" and in the rest of the patch.


Actually, MUST and friends are really for normative sections; I'd
advise to have a description of how this feature works and then some
device/driver normative clauses with MUST statements (like "the device
MUST reject " or so).



Okay, got it.

Thanks for the clarification,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-comment] [RFC PATCH v1 1/1] virtio-vsock: add SOCK_SEQPACKET description

2021-03-03 Thread Cornelia Huck
On Wed, 24 Feb 2021 10:32:00 +0100
Stefano Garzarella  wrote:

> On Thu, Feb 18, 2021 at 09:08:23AM +0300, Arseny Krasnov wrote:
> >Signed-off-by: Arseny Krasnov 
> >---
> > virtio-vsock.tex | 40 +---
> > 1 file changed, 37 insertions(+), 3 deletions(-)
> >
> >diff --git a/virtio-vsock.tex b/virtio-vsock.tex
> >index da7e641..1ee8f99 100644
> >--- a/virtio-vsock.tex
> >+++ b/virtio-vsock.tex
> >@@ -102,6 +102,11 @@ \subsection{Device Operation}\label{sec:Device Types / 
> >Socket Device / Device Op
> > VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
> > /* Request the peer to send the credit info to us */
> > VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
> >+
> >+/* Message begin for SOCK_SEQPACKET */
> >+VIRTIO_VSOCK_OP_SEQ_BEGIN = 8,
> >+/* Message end for SOCK_SEQPACKET */
> >+VIRTIO_VSOCK_OP_SEQ_END = 9,
> > };
> > \end{lstlisting}
> >
> >@@ -140,11 +145,11 @@ \subsubsection{Addressing}\label{sec:Device Types / 
> >Socket Device / Device Opera
> > consists of a (cid, port number) tuple. The header fields used for this are
> > \field{src_cid}, \field{src_port}, \field{dst_cid}, and \field{dst_port}.
> >
> >-Currently only stream sockets are supported. \field{type} is 1 for stream
> >-socket types.
> >+Currently stream and seqpacket sockets are supported. \field{type} is 1 for
> >+stream socket types. \field{type} is 2 for seqpacket socket types.
> >
> > Stream sockets provide in-order, guaranteed, connection-oriented delivery
> >-without message boundaries.
> >+without message boundaries. Seqpacket sockets also provide message 
> >boundaries.
> >
> > \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket 
> > Device / Device Operation / Buffer Space Management}
> > \field{buf_alloc} and \field{fwd_cnt} are used for buffer space management 
> > of
> >@@ -240,6 +245,35 @@ \subsubsection{Stream Sockets}\label{sec:Device Types / 
> >Socket Device / Device O
> > destination) address tuple for a new connection while the other peer is 
> > still
> > processing the old connection.
> >
> >+\subsubsection{Seqpacket Sockets}\label{sec:Device Types / Socket Device / 
> >Device Operation / Seqpacket Sockets}
> >+
> >+Seqpacket sockets differ from stream sockets only in data transmission way: 
> >in
> >+stream sockets all data is sent using only VIRTIO_VSOCK_OP_RW packets. In
> >+seqpacket sockets, to provide message boundaries, every sequence of
> >+VIRTIO_VSOCK_OP_RW packets of each message is headed with  
>   ^
> Since this is a spec, I think we should use MUST when something must be 
> respected by the peer, for example here we can say "MUST be headed"
> 
> >+VIRTIO_VSOCK_OP_SEQ_BEGIN and tailed with VIRTIO_VSOCK_OP_SEQ_END packets.
> >+Both VIRTIO_VSOCK_OP_SEQ_BEGIN and VIRTIO_VSOCK_OP_SEQ_END packets carry  
>   ^
> Same here "MUST carry" and in the rest of the patch.

Actually, MUST and friends are really for normative sections; I'd
advise to have a description of how this feature works and then some
device/driver normative clauses with MUST statements (like "the device
MUST reject " or so).

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH linux-next 7/9] vdpa/mlx5: Provide device generated random MAC address

2021-03-03 Thread Parav Pandit


> From: Michael S. Tsirkin 
> Sent: Wednesday, March 3, 2021 2:59 PM
> 
> On Wed, Mar 03, 2021 at 08:33:50AM +0200, Eli Cohen wrote:
> > On Wed, Mar 03, 2021 at 05:59:50AM +0200, Parav Pandit wrote:
> > > Hi Eli,
> > >
> > > > From: Eli Cohen 
> > > > Sent: Tuesday, March 2, 2021 11:09 AM
> > > >
> > > > On Mon, Mar 01, 2021 at 11:12:33AM -0500, Michael S. Tsirkin wrote:
> > > > > On Mon, Mar 01, 2021 at 03:19:51PM +0200, Eli Cohen wrote:
> > > > > > On Mon, Mar 01, 2021 at 08:09:48AM -0500, Michael S. Tsirkin wrote:
> > > > > > > On Mon, Mar 01, 2021 at 09:08:28AM +0200, Eli Cohen wrote:
> > > > > > > > On Wed, Feb 24, 2021 at 05:11:23PM +0800, Jason Wang wrote:
> > > > > > > > >
> > > > > > > > > On 2021/2/24 2:18 下午, Parav Pandit wrote:
> > > > > > > > > > From: Eli Cohen 
> > > > > > > > > >
> > > > > > > > > > Use a randomly generated MAC address to be applied in
> > > > > > > > > > case it is not configured by management tool.
> > > > > > > > > >
> > > > > > > > > > The value queried through
> > > > > > > > > > mlx5_query_nic_vport_mac_address()
> > > > > > > > > > is not relelavnt to vdpa since it is the mac that
> > > > > > > > > > should be used by the regular NIC driver.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Eli Cohen 
> > > > > > > > > > Reviewed-by: Parav Pandit 
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Acked-by: Jason Wang 
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > ---
> > > > > > > > > >   drivers/vdpa/mlx5/net/mlx5_vnet.c | 5 +
> > > > > > > > > >   1 file changed, 1 insertion(+), 4 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > > index b67bba581dfd..ece2183e7b20 100644
> > > > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > > @@ -2005,10 +2005,7 @@ static int
> > > > > > > > > > mlx5_vdpa_dev_add(struct
> > > > vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > > > > > > if (err)
> > > > > > > > > > goto err_mtu;
> > > > > > > > > > -   err = mlx5_query_nic_vport_mac_address(mdev, 0,
> 0, config-
> > > > >mac);
> > > > > > > > > > -   if (err)
> > > > > > > > > > -   goto err_mtu;
> > > > > > > > > > -
> > > > > > > > > > +   eth_random_addr(config->mac);
> > > > > > > >
> > > > > > > > I think this patch is missing setting VIRTIO_NET_F_MTU. I
> > > > > > > > will post v2 with the other fixes in this series.
> > > > > > >
> > > > > > > I don't really understand why this is a good idea.
> > > > > > >
> > > > > > > If userspace wants a random mac it can set it, with this
> > > > > > > patch it is impossible to know whether the mac is a hardware
> > > > > > > one (which will be persistent e.g. across reboots) or a random
> one.
> > > > > > >
> > > > > >
> > > > > > You can still get a persistent MAC set by the vdpa tool. e.g
> > > > > >
> > > > > > vdpa dev config set vdpa0 mac 00:11:22:33:44:55
> > > > > >
> > > > > > If you don't use vdpa tool, the device assigns a random MAC.
> > > > > > This MAC is used to filter imcoming unicast traffic (done in a
> > > > > > subsequent
> > > > patch).
> > > > >
> > > > > Well previously device learned the MAC from outgoing traffic and
> > > > > used that for the filter.
> > > >
> > > > No, was is added only in the last series that Parav send. Before
> > > > that the device did not filter and forwarded any packet that was
> > > > forwarded to it buy the eswitch.
> > > >
> > > > > Is changing that to a random value really all that useful as a
> > > > > default?  Why not do the randomization in userspace?
> > > > >
> > > >
> > > > I think we want management entity to set the MAC, that's the VDPA
> tool.
> > > > So as things are right now (with the last series applied), the
> > > > vdpa tool is the tool to assign MAC addresses and if it doesn't, a
> > > > device randomly generated MAC applies. Currently MAC addresses set
> > > > by qemu command line are ignored (set_config is not implementd).
> > > > We can allow this but this will override vdpa tool configuration.
> > >
> > > I believe its incorrect to always do random generated mac as of this
> patch.
> > > This is because, doing so ignores any admin config done by the sysadmin
> on the switch (ovs side) using [1].
> > >
> > Well, when this patch was sent, we still had thoughts to let mlx5e NIC
> > and the VDPA to co-exist on the same function (VF or SF). Now that
> > we're off this idea you can become tempted to use the MAC address
> > configured for the NIC but I don't think it's a good idea. We already
> > have a dedicated tool to configure MAC for VDPA so let's use it.
> 
> Right. And users can decide to reuse the hardware MAC if they want to.
Right.
If user chose to reuse the hw mac set by the eswitch, so let them use it.
When smartnic is used, some users do not trust compute side for network 
attributes configuration.
So 

Re: [PATCH linux-next 7/9] vdpa/mlx5: Provide device generated random MAC address

2021-03-03 Thread Michael S. Tsirkin
On Wed, Mar 03, 2021 at 03:59:50AM +, Parav Pandit wrote:
> Hi Eli,
> 
> > From: Eli Cohen 
> > Sent: Tuesday, March 2, 2021 11:09 AM
> > 
> > On Mon, Mar 01, 2021 at 11:12:33AM -0500, Michael S. Tsirkin wrote:
> > > On Mon, Mar 01, 2021 at 03:19:51PM +0200, Eli Cohen wrote:
> > > > On Mon, Mar 01, 2021 at 08:09:48AM -0500, Michael S. Tsirkin wrote:
> > > > > On Mon, Mar 01, 2021 at 09:08:28AM +0200, Eli Cohen wrote:
> > > > > > On Wed, Feb 24, 2021 at 05:11:23PM +0800, Jason Wang wrote:
> > > > > > >
> > > > > > > On 2021/2/24 2:18 下午, Parav Pandit wrote:
> > > > > > > > From: Eli Cohen 
> > > > > > > >
> > > > > > > > Use a randomly generated MAC address to be applied in case
> > > > > > > > it is not configured by management tool.
> > > > > > > >
> > > > > > > > The value queried through mlx5_query_nic_vport_mac_address()
> > > > > > > > is not relelavnt to vdpa since it is the mac that should be
> > > > > > > > used by the regular NIC driver.
> > > > > > > >
> > > > > > > > Signed-off-by: Eli Cohen 
> > > > > > > > Reviewed-by: Parav Pandit 
> > > > > > >
> > > > > > >
> > > > > > > Acked-by: Jason Wang 
> > > > > > >
> > > > > > >
> > > > > > > > ---
> > > > > > > >   drivers/vdpa/mlx5/net/mlx5_vnet.c | 5 +
> > > > > > > >   1 file changed, 1 insertion(+), 4 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > index b67bba581dfd..ece2183e7b20 100644
> > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > @@ -2005,10 +2005,7 @@ static int mlx5_vdpa_dev_add(struct
> > vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > > > > if (err)
> > > > > > > > goto err_mtu;
> > > > > > > > -   err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, 
> > > > > > > > config-
> > >mac);
> > > > > > > > -   if (err)
> > > > > > > > -   goto err_mtu;
> > > > > > > > -
> > > > > > > > +   eth_random_addr(config->mac);
> > > > > >
> > > > > > I think this patch is missing setting VIRTIO_NET_F_MTU. I will
> > > > > > post v2 with the other fixes in this series.
> > > > >
> > > > > I don't really understand why this is a good idea.
> > > > >
> > > > > If userspace wants a random mac it can set it, with this patch it
> > > > > is impossible to know whether the mac is a hardware one (which
> > > > > will be persistent e.g. across reboots) or a random one.
> > > > >
> > > >
> > > > You can still get a persistent MAC set by the vdpa tool. e.g
> > > >
> > > > vdpa dev config set vdpa0 mac 00:11:22:33:44:55
> > > >
> > > > If you don't use vdpa tool, the device assigns a random MAC. This
> > > > MAC is used to filter imcoming unicast traffic (done in a subsequent
> > patch).
> > >
> > > Well previously device learned the MAC from outgoing traffic and used
> > > that for the filter.
> > 
> > No, was is added only in the last series that Parav send. Before that the
> > device did not filter and forwarded any packet that was forwarded to it buy
> > the eswitch.
> > 
> > > Is changing that to a random value really all that useful as a
> > > default?  Why not do the randomization in userspace?
> > >
> > 
> > I think we want management entity to set the MAC, that's the VDPA tool.
> > So as things are right now (with the last series applied), the vdpa tool is 
> > the
> > tool to assign MAC addresses and if it doesn't, a device randomly generated
> > MAC applies. Currently MAC addresses set by qemu command line are
> > ignored (set_config is not implementd). We can allow this but this will
> > override vdpa tool configuration.
> 
> I believe its incorrect to always do random generated mac as of this patch.
> This is because, doing so ignores any admin config done by the sysadmin on 
> the switch (ovs side) using [1].
> 
> So if user choose to configure using eswitch config, mlx5_vnet to honor that.
> And if user prefers to override is using vdpa tool or set_config from QEMU 
> side, so be it.
> Hence, instead of reporting all zeros, mlx5 should query own vport context 
> and publish that mac in the config layout and rx steering side.
> 
> If user choose to override it, config layout and rx rules will have to 
> updated on such config.
> 
> [1] devlink port function set pci/:03:00.0// hw_addr 
> 00:11:22:33:44:55

well it has reported all zeros to mean "learning bridge" for a
so while I suspect changing that is an ABI change, though
a minor one ...

If you do change it please add some other way to find out
whether it still learns from outgoing traffic
(ie whether mac spoofing is enabled).

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH linux-next 7/9] vdpa/mlx5: Provide device generated random MAC address

2021-03-03 Thread Michael S. Tsirkin
On Wed, Mar 03, 2021 at 08:33:50AM +0200, Eli Cohen wrote:
> On Wed, Mar 03, 2021 at 05:59:50AM +0200, Parav Pandit wrote:
> > Hi Eli,
> > 
> > > From: Eli Cohen 
> > > Sent: Tuesday, March 2, 2021 11:09 AM
> > > 
> > > On Mon, Mar 01, 2021 at 11:12:33AM -0500, Michael S. Tsirkin wrote:
> > > > On Mon, Mar 01, 2021 at 03:19:51PM +0200, Eli Cohen wrote:
> > > > > On Mon, Mar 01, 2021 at 08:09:48AM -0500, Michael S. Tsirkin wrote:
> > > > > > On Mon, Mar 01, 2021 at 09:08:28AM +0200, Eli Cohen wrote:
> > > > > > > On Wed, Feb 24, 2021 at 05:11:23PM +0800, Jason Wang wrote:
> > > > > > > >
> > > > > > > > On 2021/2/24 2:18 下午, Parav Pandit wrote:
> > > > > > > > > From: Eli Cohen 
> > > > > > > > >
> > > > > > > > > Use a randomly generated MAC address to be applied in case
> > > > > > > > > it is not configured by management tool.
> > > > > > > > >
> > > > > > > > > The value queried through mlx5_query_nic_vport_mac_address()
> > > > > > > > > is not relelavnt to vdpa since it is the mac that should be
> > > > > > > > > used by the regular NIC driver.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Eli Cohen 
> > > > > > > > > Reviewed-by: Parav Pandit 
> > > > > > > >
> > > > > > > >
> > > > > > > > Acked-by: Jason Wang 
> > > > > > > >
> > > > > > > >
> > > > > > > > > ---
> > > > > > > > >   drivers/vdpa/mlx5/net/mlx5_vnet.c | 5 +
> > > > > > > > >   1 file changed, 1 insertion(+), 4 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > index b67bba581dfd..ece2183e7b20 100644
> > > > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > > > @@ -2005,10 +2005,7 @@ static int mlx5_vdpa_dev_add(struct
> > > vdpa_mgmt_dev *v_mdev, const char *name)
> > > > > > > > >   if (err)
> > > > > > > > >   goto err_mtu;
> > > > > > > > > - err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, 
> > > > > > > > > config-
> > > >mac);
> > > > > > > > > - if (err)
> > > > > > > > > - goto err_mtu;
> > > > > > > > > -
> > > > > > > > > + eth_random_addr(config->mac);
> > > > > > >
> > > > > > > I think this patch is missing setting VIRTIO_NET_F_MTU. I will
> > > > > > > post v2 with the other fixes in this series.
> > > > > >
> > > > > > I don't really understand why this is a good idea.
> > > > > >
> > > > > > If userspace wants a random mac it can set it, with this patch it
> > > > > > is impossible to know whether the mac is a hardware one (which
> > > > > > will be persistent e.g. across reboots) or a random one.
> > > > > >
> > > > >
> > > > > You can still get a persistent MAC set by the vdpa tool. e.g
> > > > >
> > > > > vdpa dev config set vdpa0 mac 00:11:22:33:44:55
> > > > >
> > > > > If you don't use vdpa tool, the device assigns a random MAC. This
> > > > > MAC is used to filter imcoming unicast traffic (done in a subsequent
> > > patch).
> > > >
> > > > Well previously device learned the MAC from outgoing traffic and used
> > > > that for the filter.
> > > 
> > > No, was is added only in the last series that Parav send. Before that the
> > > device did not filter and forwarded any packet that was forwarded to it 
> > > buy
> > > the eswitch.
> > > 
> > > > Is changing that to a random value really all that useful as a
> > > > default?  Why not do the randomization in userspace?
> > > >
> > > 
> > > I think we want management entity to set the MAC, that's the VDPA tool.
> > > So as things are right now (with the last series applied), the vdpa tool 
> > > is the
> > > tool to assign MAC addresses and if it doesn't, a device randomly 
> > > generated
> > > MAC applies. Currently MAC addresses set by qemu command line are
> > > ignored (set_config is not implementd). We can allow this but this will
> > > override vdpa tool configuration.
> > 
> > I believe its incorrect to always do random generated mac as of this patch.
> > This is because, doing so ignores any admin config done by the sysadmin on 
> > the switch (ovs side) using [1].
> > 
> Well, when this patch was sent, we still had thoughts to let mlx5e NIC
> and the VDPA to co-exist on the same function (VF or SF). Now that we're
> off this idea you can become tempted to use the MAC address configured
> for the NIC but I don't think it's a good idea. We already have a
> dedicated tool to configure MAC for VDPA so let's use it.

Right. And users can decide to reuse the hardware MAC if they want to.

> > So if user choose to configure using eswitch config, mlx5_vnet to honor 
> > that.
> > And if user prefers to override is using vdpa tool or set_config from QEMU 
> > side, so be it.
> 
> I agree that we should let the user to configure the MAC through qemu
> argument when booting the VM. So I'll add this for the next spin of this
> series.

OK so
- if MAC is configured it is used
- if not configured 0 is reported to userspace

fair 

Re: [PATCH linux-next 2/9] vdpa: Introduce query of device config layout

2021-03-03 Thread Adrian Moreno


On 3/2/21 5:25 AM, Jason Wang wrote:
> 
> On 2021/3/1 8:06 下午, Sean Mooney wrote:
>> On Mon, 2021-03-01 at 11:28 +0100, Adrian Moreno wrote:
>>> On 3/1/21 8:50 AM, Jason Wang wrote:
 On 2021/3/1 3:29 下午, Parav Pandit wrote:
>> From: Jason Wang 
>> Sent: Monday, March 1, 2021 9:29 AM
>>
>> On 2021/2/26 4:50 下午, Parav Pandit wrote:
 From: Jason Wang 
 Sent: Friday, February 26, 2021 1:56 PM


 On 2021/2/26 1:32 下午, Parav Pandit wrote:
>> From: Jason Wang 
>> Sent: Wednesday, February 24, 2021 2:18 PM
>>
>> On 2021/2/24 2:18 下午, Parav Pandit wrote:
>>> +
>>> +    if (nla_put_u16(msg,
>> VDPA_ATTR_DEV_NET_CFG_MAX_VQP,
>>> +    config->max_virtqueue_pairs))
>>> +    return -EMSGSIZE;
>> We probably need to check VIRTIO_NET_F_RSS here.
> Yes. Will use it in v2.
>
>>> +    if (nla_put_u8(msg,
>> VDPA_ATTR_DEV_NET_CFG_RSS_MAX_KEY_LEN,
>>> +   config->rss_max_key_size))
>>> +    return -EMSGSIZE;
>>> +
>>> +    rss_field = le16_to_cpu(config->rss_max_key_size);
>>> +    if (nla_put_u16(msg,
>> VDPA_ATTR_DEV_NET_CFG_RSS_MAX_IT_LEN,
>> rss_field))
>>> +    return -EMSGSIZE;
>>> +
>>> +    hash_types = le32_to_cpu(config->supported_hash_types);
>>> +    if (nla_put_u32(msg,
>> VDPA_ATTR_DEV_NET_CFG_RSS_HASH_TYPES,
>>> +    config->supported_hash_types))
>>> +    return -EMSGSIZE;
>>> +    return 0;
>>> +}
>>> +
>>> +static int vdpa_dev_net_config_fill(struct vdpa_device *vdev,
>>> +struct sk_buff *msg) {
>>> +    struct virtio_net_config config = {};
>>> +
>>> +    vdev->config->get_config(vdev, 0, , sizeof(config));
>> Do we need to sync with other possible get_config calls here?
> To readers of get_config() is ok. No need to sync.
> However, I think set_config() and get_config() should sync otherwise
 get_config can get partial value.
> Will probably have to add vdpa_device->config_access_lock().
 Probably. And the set_config() should be synchronized with the
 requrest that comes from vDPA bus.
>>> Yes, a rw semaphore is good enough.
>>> Device config fields can be change from the device side anyway, such as
>> link status anyway.
>>> Sync using lock shouldn’t be problem.
>>>
 This makes me think whether we should go back to two phases method,
 create and enable.

 The vDPA device is only registred after enabling, then there's no
 need to sync with vDPA bus, and mgmt is not allowed to change config
>> after enalbing?
>>> In that case we should be able to disable it as well. Disable should 
>>> take
>>> the
>> device of the bus.
>>> I find it weird to have this model to linger around the device without
>> anchoring on the bus.
>>> For example device is not yet enabled, so it is not anchored on the 
>>> bus, but
>> its name is still have to remain unique.
>>
>>
>> Can we do some synchornization between vdpa device allocation and vdpa
>> device registier to solve the naming issue?
> mgmt tool querying the device config space after vdpa device is in use is
> real.
> So I don't see solving it any differently.
>
> Now upper layers of vdpa to not access the device on the placed on the 
> vdpa
> bus, can be taken care by existing driver code using
> echo 0 > /sys/bus/vdpa/drivers_autoprobe
>
> By default vdpa device placed on the bus wont bind to any driver 
> (net/vhost
> etc).
> 1. Decision to bind to which driver is done after config of the vdpa 
> device is
> done.
> 2. orchestration sw does create and config before it binds to the right 
> driver
> 3. which driver to bind to decided based on the use case of that 
> individual
> vdpa device
> For example,
> (a) vdpa0 binds to net driver to have Netdev for container
> (b) vdpa1 binds to vhost driver to map vdpa device to VM

 I think it should work.
> Adding Adrian, Maxime and Sean for more comments.
>>> Is this a strong requirement or just a optional operation?
> 
> 
> So my understanding is that something needs to be done to make sure:
> 

> 1) the vpda device is not bound to virtio-vdpaThis is a problem of api not 
> being unified, right? If vdpa device is bound to
virtio-vdpa I would expect some net-specific options (like mac or tso) would be
modifiable through net_device_ops if the needed capabilities are held.

> 2) config is not changed if it has been set by management
>For virtio-vdpa (virtio-net) devices, that would be controlled via 
>CAP_NET_ADMIN.
For vhost-vdpa case, are you thinking of a way of stopping a user from 

Re: [PATCH v5] i2c: virtio: add a virtio i2c frontend driver

2021-03-03 Thread Jie Deng

On 2021/3/3 15:54, Viresh Kumar wrote:


On 01-03-21, 14:41, Jie Deng wrote:

diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
+static int virtio_i2c_send_reqs(struct virtqueue *vq,
+   struct virtio_i2c_req *reqs,
+   struct i2c_msg *msgs, int nr)
+{
+   struct scatterlist *sgs[3], out_hdr, msg_buf, in_hdr;
+   int i, outcnt, incnt, err = 0;
+   u8 *buf;
+
+   for (i = 0; i < nr; i++) {
+   if (!msgs[i].len)
+   break;
+
+   reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1);
+
+   if (i != nr - 1)
+   reqs[i].out_hdr.flags |= VIRTIO_I2C_FLAGS_FAIL_NEXT;
+
+   outcnt = incnt = 0;
+   sg_init_one(_hdr, [i].out_hdr, 
sizeof(reqs[i].out_hdr));
+   sgs[outcnt++] = _hdr;
+
+   buf = kzalloc(msgs[i].len, GFP_KERNEL);
+   if (!buf)
+   break;
+
+   if (msgs[i].flags & I2C_M_RD) {
+   reqs[i].read_buf = buf;
+   sg_init_one(_buf, reqs[i].read_buf, msgs[i].len);
+   sgs[outcnt + incnt++] = _buf;
+   } else {
+   reqs[i].write_buf = buf;
+   memcpy(reqs[i].write_buf, msgs[i].buf, msgs[i].len);
+   sg_init_one(_buf, reqs[i].write_buf, msgs[i].len);
+   sgs[outcnt++] = _buf;
+   }
+
+   sg_init_one(_hdr, [i].in_hdr, sizeof(reqs[i].in_hdr));
+   sgs[outcnt + incnt++] = _hdr;
+
+   err = virtqueue_add_sgs(vq, sgs, outcnt, incnt, [i], 
GFP_KERNEL);
+   if (err < 0) {
+   pr_err("failed to add msg[%d] to virtqueue.\n", i);
+   if (msgs[i].flags & I2C_M_RD) {
+   kfree(reqs[i].read_buf);
+   reqs[i].read_buf = NULL;
+   } else {
+   kfree(reqs[i].write_buf);
+   reqs[i].write_buf = NULL;
+   }
+   break;
+   }
+   }
+
+   return i;
+}
diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h
+/**
+ * struct virtio_i2c_out_hdr - the virtio I2C message OUT header
+ * @addr: the controlled device address
+ * @padding: used to pad to full dword
+ * @flags: used for feature extensibility
+ */
+struct virtio_i2c_out_hdr {
+   __le16 addr;
+   __le16 padding;
+   __le32 flags;
+};

Both this code and the virtio spec (which is already merged) are
missing msgs[i].flags and they are never sent to backend. The only
flags available here are the ones defined by virtio spec and these are
not i2c flags.

I also looked at your i2c backend for acrn and it mistakenly copies
the hdr.flag, which is the virtio flag and not i2c flag.

https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/hw/pci/virtio/virtio_i2c.c#L539

I will send a fix for the specs if you agree that there is a problem
here.

what am I missing here ? This should have been caught in your testing
and so I feel I must be missing something.
This is not a problem. My original proposal was to mirror the struct 
i2c_msg.

The code you looked at was based on that.
However, the virtio TC prefer not to mirror it. They have some concerns.
For example, there is a bit I2C_M_RD in i2c_msg.flag which has the same 
meaning with

the R/W in virtio descriptor. This is a repetition which may cause problems.
So the virtio_i2c_out_hdr.flags is used to instead of i2c_msg.flags for 
extension.


You can check this link 
https://github.com/oasis-tcs/virtio-spec/issues/88 to learn the whole story.

There are discussions about the spec (v1 ~ v7).

I'm updating these drivers step by step to make sure they finally follow 
the spec.


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-dev] Re: [PATCH] vdpa/mlx5: set_features should allow reset to zero

2021-03-03 Thread Cornelia Huck
On Wed, 3 Mar 2021 12:01:01 +0800
Jason Wang  wrote:

> On 2021/3/2 8:08 下午, Cornelia Huck wrote:
> > On Mon, 1 Mar 2021 11:51:08 +0800
> > Jason Wang  wrote:
> >  
> >> On 2021/3/1 5:25 上午, Michael S. Tsirkin wrote:  
> >>> On Fri, Feb 26, 2021 at 04:19:16PM +0800, Jason Wang wrote:  
>  On 2021/2/26 2:53 上午, Michael S. Tsirkin wrote:  
> > Confused. What is wrong with the above? It never reads the
> > field unless the feature has been offered by device.  
>  So the spec said:
> 
>  "
> 
>  The following driver-read-only field, max_virtqueue_pairs only exists if
>  VIRTIO_NET_F_MQ is set.
> 
>  "
> 
>  If I read this correctly, there will be no max_virtqueue_pairs field if 
>  the
>  VIRTIO_NET_F_MQ is not offered by device? If yes the offsetof() violates
>  what spec said.
> 
>  Thanks  
> >>> I think that's a misunderstanding. This text was never intended to
> >>> imply that field offsets change beased on feature bits.
> >>> We had this pain with legacy and we never wanted to go back there.
> >>>
> >>> This merely implies that without VIRTIO_NET_F_MQ the field
> >>> should not be accessed. Exists in the sense "is accessible to driver".
> >>>
> >>> Let's just clarify that in the spec, job done.  
> >>
> >> Ok, agree. That will make things more eaiser.  
> > Yes, that makes much more sense.
> >
> > What about adding the following to the "Basic Facilities of a Virtio
> > Device/Device Configuration Space" section of the spec:
> >
> > "If an optional configuration field does not exist, the corresponding
> > space is still present, but reserved."  
> 
> 
> This became interesting after re-reading some of the qemu codes.
> 
> E.g in virtio-net.c we had:
> 
> *static VirtIOFeature feature_sizes[] = {
>      {.flags = 1ULL << VIRTIO_NET_F_MAC,
>   .end = endof(struct virtio_net_config, mac)},
>      {.flags = 1ULL << VIRTIO_NET_F_STATUS,
>   .end = endof(struct virtio_net_config, status)},
>      {.flags = 1ULL << VIRTIO_NET_F_MQ,
>   .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
>      {.flags = 1ULL << VIRTIO_NET_F_MTU,
>   .end = endof(struct virtio_net_config, mtu)},
>      {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
>   .end = endof(struct virtio_net_config, duplex)},
>      {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << 
> VIRTIO_NET_F_HASH_REPORT),
>   .end = endof(struct virtio_net_config, supported_hash_types)},
>      {}
> };*
> 
> *It has a implict dependency chain. E.g MTU doesn't presnet if 
> DUPLEX/RSS is not offered ...
> *

But I think it covers everything up to the relevant field, no? So MTU
is included if we have the feature bit, even if we don't have
DUPLEX/RSS.

Given that a config space may be shorter (but must not collapse
non-existing fields), maybe a better wording would be:

"If an optional configuration field does not exist, the corresponding
space will still be present if it is not at the end of the
configuration space (i.e., further configuration fields exist.) This
implies that a given field, if it exists, is always at the same offset
from the beginning of the configuration space."


> >
> > (Do we need to specify what a device needs to do if the driver tries to
> > access a non-existing field? We cannot really make assumptions about
> > config space accesses; virtio-ccw can just copy a chunk of config space
> > that contains non-existing fields...  
> 
> 
> Right, it looks to me ccw doesn't depends on config len which is kind of 
> interesting. I think the answer depends on the implementation of both 
> transport and device:

(virtio-ccw is a bit odd, because channel I/O does not have the concept
of a config space and I needed to come up with something)

> 
> Let's take virtio-net-pci as an example, it fills status unconditionally 
> in virtio_net_set_config() even if VIRTIO_NET_F_STATUS is not 
> negotiated. So with the above feature_sizes:
> 
> 1) if one of the MQ, MTU, DUPLEX or RSS is implemented, driver can still 
> read status in this case
> 2) if none of the above four is negotied, driver can only read a 0xFF 
> (virtio_config_readb())
> 
> 
> > I guess the device could ignore
> > writes and return zeroes on read?)  
> 
> 
> So consider the above, it might be too late to fix/clarify that in the 
> spec on the expected behaviour of reading/writing non-exist fields.

We could still advise behaviour via SHOULD, though I'm not sure it adds
much at this point in time.

It really feels a bit odd that a driver can still read and write fields
for features that it did not negotiate, but I fear we're stuck with it.

> 
> Thanks
> 
> 
> >
> > I've opened https://github.com/oasis-tcs/virtio-spec/issues/98 for the
> > spec issues.
> >  

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization