Re: [PATCH V2 vhost next 10/10] vdpa/mlx5: Add VDPA driver for supported mlx5 devices

2020-07-20 Thread Jason Wang


On 2020/7/20 下午3:14, Eli Cohen wrote:

Add a front end VDPA driver that registers in the VDPA bus and provides
networking to a guest. The VDPA driver creates the necessary resources
on the VF it is driving such that data path will be offloaded.

Notifications are being communicated through the driver.

Currently, only VFs are supported. In subsequent patches we will have
devlink support to control which VF is used for VDPA and which function
is used for regular networking.

Reviewed-by: Parav Pandit
Signed-off-by: Eli Cohen
---
Changes from V0:
1. Fix include path usage
2. Fix use after free in qp_create()
3. Consistently use mvq->initialized to check if a vq was initialized.
4. Remove unused local variable.
5. Defer modifyig vq to ready to driver ok
6. suspend hardware vq in set_vq_ready(0)
7. Remove reservation for control VQ since it multi queue is not supported in 
this version
8. Avoid call put_device() since this is not a pci device driver.



Looks good to me.

Acked-by: Jason Wang 


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V2 vhost next 06/10] vdpa: Modify get_vq_state() to return error code

2020-07-20 Thread Jason Wang


On 2020/7/20 下午3:14, Eli Cohen wrote:

Modify get_vq_state() so it returns an error code. In case of hardware
acceleration, the available index may be retrieved from the device, an
operation that can possibly fail.

Reviewed-by: Parav Pandit 
Signed-off-by: Eli Cohen 



Acked-by: Jason Wang 



---
  drivers/vdpa/ifcvf/ifcvf_main.c  | 5 +++--
  drivers/vdpa/vdpa_sim/vdpa_sim.c | 5 +++--
  drivers/vhost/vdpa.c | 5 -
  include/linux/vdpa.h | 4 ++--
  4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 69032ee97824..d9b5f465ac81 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -235,12 +235,13 @@ static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device 
*vdpa_dev)
return IFCVF_QUEUE_MAX;
  }
  
-static void ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,

-   struct vdpa_vq_state *state)
+static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
+  struct vdpa_vq_state *state)
  {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
  
  	state->avail_index = ifcvf_get_vq_state(vf, qid);

+   return 0;
  }
  
  static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 599519039f8d..ddf6086d43c2 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -427,14 +427,15 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, 
u16 idx,
return 0;
  }
  
-static void vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,

-struct vdpa_vq_state *state)
+static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,
+   struct vdpa_vq_state *state)
  {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
struct vdpasim_virtqueue *vq = >vqs[idx];
struct vringh *vrh = >vring;
  
  	state->avail_index = vrh->last_avail_idx;

+   return 0;
  }
  
  static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index af98c11c9d26..fadad74f882e 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -360,7 +360,10 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, 
unsigned int cmd,
}
  
  	if (cmd == VHOST_GET_VRING_BASE) {

-   ops->get_vq_state(v->vdpa, idx, _state);
+   r = ops->get_vq_state(v->vdpa, idx, _state);
+   if (r)
+   return r;
+
vq->last_avail_idx = vq_state.avail_index;
}
  
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h

index 7b088bebffe8..000d71a9f988 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -185,8 +185,8 @@ struct vdpa_config_ops {
bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx);
int (*set_vq_state)(struct vdpa_device *vdev, u16 idx,
const struct vdpa_vq_state *state);
-   void (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
-struct vdpa_vq_state *state);
+   int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
+   struct vdpa_vq_state *state);
struct vdpa_notification_area
(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
  


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V2 vhost next 05/10] vhost: Fix documentation

2020-07-20 Thread Jason Wang


On 2020/7/20 下午3:14, Eli Cohen wrote:

Fix documentation to match actual function prototypes.

Reviewed-by: Parav Pandit 
Signed-off-by: Eli Cohen 



Acked-by: Jason Wang 



---
  drivers/vhost/iotlb.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 1f0ca6e44410..0d4213a54a88 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_free);
   * vhost_iotlb_itree_first - return the first overlapped range
   * @iotlb: the IOTLB
   * @start: start of IOVA range
- * @end: end of IOVA range
+ * @last: last byte in IOVA range
   */
  struct vhost_iotlb_map *
  vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last)
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first);
   * vhost_iotlb_itree_first - return the next overlapped range
   * @iotlb: the IOTLB
   * @start: start of IOVA range
- * @end: end of IOVA range
+ * @last: last byte IOVA range
   */
  struct vhost_iotlb_map *
  vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V2 vhost next 10/10] vdpa/mlx5: Add VDPA driver for supported mlx5 devices

2020-07-20 Thread kernel test robot
Hi Eli,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200717]

url:
https://github.com/0day-ci/linux/commits/Eli-Cohen/VDPA-support-for-Mellanox-ConnectX-devices/20200720-160220
base:aab7ee9f8ff0110bfcd594b33dc33748dc1baf46
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
cf1105069648446d58adfb7a6cc590013d6886ba)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from drivers/vdpa/mlx5/net/mlx5_vnet.c:12:
>> drivers/vdpa/mlx5/net/mlx5_vnet.h:13:10: fatal error: 'mlx5_vdpa.h' file not 
>> found
   #include "mlx5_vdpa.h"
^
   1 error generated.

vim +13 drivers/vdpa/mlx5/net/mlx5_vnet.h

 6  
 7  #include 
 8  #include 
 9  #include 
10  #include 
11  #include 
12  #include 
  > 13  #include "mlx5_vdpa.h"
14  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v8 02/11] vhost: use batched get_vq_desc version

2020-07-20 Thread Jason Wang


On 2020/7/20 下午7:16, Eugenio Pérez wrote:

On Mon, Jul 20, 2020 at 11:27 AM Michael S. Tsirkin  wrote:

On Thu, Jul 16, 2020 at 07:16:27PM +0200, Eugenio Perez Martin wrote:

On Fri, Jul 10, 2020 at 7:58 AM Michael S. Tsirkin  wrote:

On Fri, Jul 10, 2020 at 07:39:26AM +0200, Eugenio Perez Martin wrote:

How about playing with the batch size? Make it a mod parameter instead
of the hard coded 64, and measure for all values 1 to 64 ...

Right, according to the test result, 64 seems to be too aggressive in
the case of TX.


Got it, thanks both!

In particular I wonder whether with batch size 1
we get same performance as without batching
(would indicate 64 is too aggressive)
or not (would indicate one of the code changes
affects performance in an unexpected way).

--
MST


Hi!

Varying batch_size as drivers/vhost/net.c:VHOST_NET_BATCH,

sorry this is not what I meant.

I mean something like this:


diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0b509be8d7b1..b94680e5721d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1279,6 +1279,10 @@ static void handle_rx_net(struct vhost_work *work)
 handle_rx(net);
  }

+MODULE_PARM_DESC(batch_num, "Number of batched descriptors. (offset from 64)");
+module_param(batch_num, int, 0644);
+static int batch_num = 0;
+
  static int vhost_net_open(struct inode *inode, struct file *f)
  {
 struct vhost_net *n;
@@ -1333,7 +1337,7 @@ static int vhost_net_open(struct inode *inode, struct 
file *f)
 vhost_net_buf_init(>vqs[i].rxq);
 }
 vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
-  UIO_MAXIOV + VHOST_NET_BATCH,
+  UIO_MAXIOV + VHOST_NET_BATCH + batch_num,
VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
NULL);


then you can try tweaking batching and playing with mod parameter without
recompiling.


VHOST_NET_BATCH affects lots of other things.


Ok, got it. Since they were aligned from the start, I thought it was a good 
idea to maintain them in-sync.


and testing
the pps as previous mail says. This means that we have either only
vhost_net batching (in base testing, like previously to apply this
patch) or both batching sizes the same.

I've checked that vhost process (and pktgen) goes 100% cpu also.

For tx: Batching decrements always the performance, in all cases. Not
sure why bufapi made things better the last time.

Batching makes improvements until 64 bufs, I see increments of pps but like 1%.

For rx: Batching always improves performance. It seems that if we
batch little, bufapi decreases performance, but beyond 64, bufapi is
much better. The bufapi version keeps improving until I set a batching
of 1024. So I guess it is super good to have a bunch of buffers to
receive.

Since with this test I cannot disable event_idx or things like that,
what would be the next step for testing?

Thanks!

--
Results:
# Buf size: 1,16,32,64,128,256,512

# Tx
# ===
# Base
2293304.308,3396057.769,3540860.615,3636056.077,3332950.846,3694276.154,3689820
# Batch
2286723.857,3307191.643,3400346.571,3452527.786,3460766.857,3431042.5,3440722.286
# Batch + Bufapi
2257970.769,3151268.385,3260150.538,3379383.846,3424028.846,3433384.308,3385635.231,3406554.538

# Rx
# ==
# pktgen results (pps)
1223275,1668868,1728794,1769261,1808574,1837252,1846436
1456924,1797901,1831234,1868746,1877508,1931598,1936402
1368923,1719716,1794373,1865170,1884803,1916021,1975160

# Testpmd pps results
1222698.143,1670604,1731040.6,1769218,1811206,1839308.75,1848478.75
1450140.5,1799985.75,1834089.75,1871290,1880005.5,1934147.25,1939034
1370621,1721858,1796287.75,1866618.5,1885466.5,1918670.75,1976173.5,1988760.75,1978316

pktgen was run again for rx with 1024 and 2048 buf size, giving
1988760.75 and 1978316 pps. Testpmd goes the same way.

Don't really understand what does this data mean.
Which number of descs is batched for each run?


Sorry, I should have explained better. I will expand here, but feel free to 
skip it since we are going to discard the
data anyway. Or to propose a better way to tell them.

Is a CSV with the values I've obtained, in pps, from pktgen and testpmd. This 
way is easy to plot them.

Maybe is easier as tables, if mail readers/gmail does not misalign them.


# Tx
# ===

Base: With the previous code, not integrating any patch. testpmd is txonly 
mode, tap interface is XDP_DROP everything.
We vary VHOST_NET_BATCH (1, 16, 32, ...). As Jason put in a previous mail:

TX: testpmd(txonly) -> virtio-user -> vhost_net -> XDP_DROP on TAP


  1 | 16 | 32 | 64 | 128|256 |  
 512  |
2293304.308| 3396057.769| 3540860.615| 3636056.077| 3332950.846| 3694276.154| 
3689820|

If we add the batching part of the series, but not the bufapi:

   1 | 16 | 32 | 64 | 128|256|  
   512|
2286723.857 | 3307191.643| 3400346.571| 3452527.786| 3460766.857| 3431042.5 | 

Re: [PATCH V2 3/6] vDPA: implement IRQ offloading helpers in vDPA core

2020-07-20 Thread Jason Wang


On 2020/7/21 上午10:02, Zhu, Lingshan wrote:



On 7/20/2020 5:40 PM, Jason Wang wrote:


On 2020/7/20 下午5:07, Zhu, Lingshan wrote:


+}
+
+static void vdpa_unsetup_irq(struct vdpa_device *vdev, int qid)
+{
+    struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
+
+    if (drv->unsetup_vq_irq)
+    drv->unsetup_vq_irq(vdev, qid);



Do you need to check the existence of drv before calling 
unset_vq_irq()?

Yes, we should check this when we take the releasing path into account.


And how can this synchronize with driver releasing and binding?

Will add an vdpa_unsetup_irq() call in vhsot_vdpa_release().
For binding, I think it is a new dev bound to the the driver,
it should go through the vdpa_setup_irq() routine. or if it is
a device re-bind to vhost_vdpa, I think we have cleaned up
irq_bypass_producer for it as we would call vhdpa_unsetup_irq()
in the release function.



I meant can the following things happen?

1) some vDPA device driver probe the hardware and call 
vdpa_request_irq() in its PCI probe function.

2) vDPA device is probed by vhost-vDPA

Then irq bypass can't work since we when vdpa_unsetup_irq() is 
called, there's no driver bound. Or is there a requirement that 
vdap_request/free_irq() must be called somewhere (e.g in the 
set_status bus operations)? If yes, we need document those requirements.

vdpa_unseup_irq is only called when we want to unregister the producer,



Typo, I meant vdpa_setup_irq().

Thanks



  now we have two code path using it: free_irq and relaase(). I agree we can 
document this requirements for the helpers, these functions can only be called 
through status changes(DRIVER_OK and !DRIVER_OK).

Thanks,
BR
Zhu Lingshan


Thanks



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] VIRTIO CORE AND NET DRIVERS: Replace HTTP links with HTTPS ones

2020-07-20 Thread Rob Herring
On Thu, 09 Jul 2020 08:47:55 +0200, Alexander A. Klimov wrote:
> Rationale:
> Reduces attack surface on kernel devs opening the links for MITM
> as HTTPS traffic is much harder to manipulate.
> 
> Deterministic algorithm:
> For each file:
>   If not .svg:
> For each line:
>   If doesn't contain `\bxmlns\b`:
> For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
> If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
> If both the HTTP and HTTPS versions
> return 200 OK and serve the same content:
>   Replace HTTP with HTTPS.
> 
> Signed-off-by: Alexander A. Klimov 
> ---
>  Continuing my work started at 93431e0607e5.
>  See also: git log --oneline '--author=Alexander A. Klimov 
> ' v5.7..master
>  (Actually letting a shell for loop submit all this stuff for me.)
> 
>  If there are any URLs to be removed completely or at least not HTTPSified:
>  Just clearly say so and I'll *undo my change*.
>  See also: https://lkml.org/lkml/2020/6/27/64
> 
>  If there are any valid, but yet not changed URLs:
>  See: https://lkml.org/lkml/2020/6/26/837
> 
>  If you apply the patch, please let me know.
> 
> 
>  Documentation/devicetree/bindings/virtio/mmio.txt | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 

Applied, thanks!
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 02/19] compiler.h: Split {READ, WRITE}_ONCE definitions out into rwonce.h

2020-07-20 Thread Will Deacon
On Mon, Jul 13, 2020 at 08:23:22PM +0800, boqun.f...@gmail.com wrote:
> On Fri, Jul 10, 2020 at 05:51:46PM +0100, Will Deacon wrote:
> > diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
> > new file mode 100644
> > index ..92cc2f223cb3
> > --- /dev/null
> > +++ b/include/asm-generic/rwonce.h
> > @@ -0,0 +1,91 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Prevent the compiler from merging or refetching reads or writes. The
> > + * compiler is also forbidden from reordering successive instances of
> > + * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
> > + * particular ordering. One way to make the compiler aware of ordering is 
> > to
> > + * put the two invocations of READ_ONCE or WRITE_ONCE in different C
> > + * statements.
> > + *
> > + * These two macros will also work on aggregate data types like structs or
> > + * unions.
> > + *
> > + * Their two major use cases are: (1) Mediating communication between
> > + * process-level code and irq/NMI handlers, all running on the same CPU,
> > + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
> > + * mutilate accesses that either do not require ordering or that interact
> > + * with an explicit memory barrier or atomic instruction that provides the
> > + * required ordering.
> > + */
> > +#ifndef __ASM_GENERIC_RWONCE_H
> > +#define __ASM_GENERIC_RWONCE_H
> > +
> > +#ifndef __ASSEMBLY__
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include 
> > +
> > +/*
> > + * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
> > + * atomicity or dependency ordering guarantees. Note that this may result
> > + * in tears!
> > + */
> > +#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) 
> > *)&(x))
> > +
> > +#define __READ_ONCE_SCALAR(x)  
> > \
> > +({ \
> > +   __unqual_scalar_typeof(x) __x = __READ_ONCE(x); \
> > +   smp_read_barrier_depends(); \
> > +   (typeof(x))__x; \
> > +})
> > +
> > +#define READ_ONCE(x)   
> > \
> > +({ \
> > +   compiletime_assert_rwonce_type(x);  \
> 
> Does it make sense if we also move the definition of this compile time
> assertion into rwonce.h too?

Yes, that looks straightforward enough. Thanks for the suggestion!

I'll also try to get this lot into -next this week.

Will
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 70/75] x86/head/64: Don't call verify_cpu() on starting APs

2020-07-20 Thread Joerg Roedel
On Wed, Jul 15, 2020 at 12:49:23PM -0700, Kees Cook wrote:
> Aaah. I see. Thanks for the details there. So ... can you add a bunch
> more comments about why/when the new entry path is being used? I really
> don't want to accidentally discover some unrelated refactoring down
> the road (in months, years, unrelated to SEV, etc) starts to also skip
> verify_cpu() on Intel systems. There had been a lot of BIOSes that set
> this MSR to disable NX, and I don't want to repeat that pain: Linux must
> never start an Intel CPU with that MSR set. :P

Understood :)

I added a comment above the label explaining why it is only used for
SEV-ES guests and pointing out the importance of running verify_cpu() on
all other systems, especially if they are Intel based.

Regards,

Joerg
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 vhost next 10/10] vdpa/mlx5: Add VDPA driver for supported mlx5 devices

2020-07-20 Thread kernel test robot
Hi Eli,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200717]

url:
https://github.com/0day-ci/linux/commits/Eli-Cohen/VDPA-support-for-Mellanox-ConnectX-devices/20200720-160220
base:aab7ee9f8ff0110bfcd594b33dc33748dc1baf46
config: arc-allyesconfig (attached as .config)
compiler: arc-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> drivers/vdpa/mlx5/net/main.c:7:10: fatal error: mlx5_vdpa_ifc.h: No such 
>> file or directory
   7 | #include "mlx5_vdpa_ifc.h"
 |  ^
   compilation terminated.
--
   In file included from drivers/vdpa/mlx5/net/mlx5_vnet.c:12:
>> drivers/vdpa/mlx5/net/mlx5_vnet.h:13:10: fatal error: mlx5_vdpa.h: No such 
>> file or directory
  13 | #include "mlx5_vdpa.h"
 |  ^
   compilation terminated.

vim +7 drivers/vdpa/mlx5/net/main.c

 3  
 4  #include 
 5  #include 
 6  #include 
   > 7  #include "mlx5_vdpa_ifc.h"
 8  #include "mlx5_vnet.h"
 9  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v8 02/11] vhost: use batched get_vq_desc version

2020-07-20 Thread Michael S. Tsirkin
On Mon, Jul 20, 2020 at 01:16:47PM +0200, Eugenio Pérez wrote:
> 
> On Mon, Jul 20, 2020 at 11:27 AM Michael S. Tsirkin  wrote:
> > On Thu, Jul 16, 2020 at 07:16:27PM +0200, Eugenio Perez Martin wrote:
> > > On Fri, Jul 10, 2020 at 7:58 AM Michael S. Tsirkin  
> > > wrote:
> > > > On Fri, Jul 10, 2020 at 07:39:26AM +0200, Eugenio Perez Martin wrote:
> > > > > > > How about playing with the batch size? Make it a mod parameter 
> > > > > > > instead
> > > > > > > of the hard coded 64, and measure for all values 1 to 64 ...
> > > > > > 
> > > > > > Right, according to the test result, 64 seems to be too aggressive 
> > > > > > in
> > > > > > the case of TX.
> > > > > > 
> > > > > 
> > > > > Got it, thanks both!
> > > > 
> > > > In particular I wonder whether with batch size 1
> > > > we get same performance as without batching
> > > > (would indicate 64 is too aggressive)
> > > > or not (would indicate one of the code changes
> > > > affects performance in an unexpected way).
> > > > 
> > > > --
> > > > MST
> > > > 
> > > 
> > > Hi!
> > > 
> > > Varying batch_size as drivers/vhost/net.c:VHOST_NET_BATCH,
> > 
> > sorry this is not what I meant.
> > 
> > I mean something like this:
> > 
> > 
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > index 0b509be8d7b1..b94680e5721d 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -1279,6 +1279,10 @@ static void handle_rx_net(struct vhost_work *work)
> > handle_rx(net);
> >  }
> > 
> > +MODULE_PARM_DESC(batch_num, "Number of batched descriptors. (offset from 
> > 64)");
> > +module_param(batch_num, int, 0644);
> > +static int batch_num = 0;
> > +
> >  static int vhost_net_open(struct inode *inode, struct file *f)
> >  {
> > struct vhost_net *n;
> > @@ -1333,7 +1337,7 @@ static int vhost_net_open(struct inode *inode, struct 
> > file *f)
> > vhost_net_buf_init(>vqs[i].rxq);
> > }
> > vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
> > -  UIO_MAXIOV + VHOST_NET_BATCH,
> > +  UIO_MAXIOV + VHOST_NET_BATCH + batch_num,
> >VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
> >NULL);
> > 
> > 
> > then you can try tweaking batching and playing with mod parameter without
> > recompiling.
> > 
> > 
> > VHOST_NET_BATCH affects lots of other things.
> > 
> 
> Ok, got it. Since they were aligned from the start, I thought it was a good 
> idea to maintain them in-sync.
> 
> > > and testing
> > > the pps as previous mail says. This means that we have either only
> > > vhost_net batching (in base testing, like previously to apply this
> > > patch) or both batching sizes the same.
> > > 
> > > I've checked that vhost process (and pktgen) goes 100% cpu also.
> > > 
> > > For tx: Batching decrements always the performance, in all cases. Not
> > > sure why bufapi made things better the last time.
> > > 
> > > Batching makes improvements until 64 bufs, I see increments of pps but 
> > > like 1%.
> > > 
> > > For rx: Batching always improves performance. It seems that if we
> > > batch little, bufapi decreases performance, but beyond 64, bufapi is
> > > much better. The bufapi version keeps improving until I set a batching
> > > of 1024. So I guess it is super good to have a bunch of buffers to
> > > receive.
> > > 
> > > Since with this test I cannot disable event_idx or things like that,
> > > what would be the next step for testing?
> > > 
> > > Thanks!
> > > 
> > > --
> > > Results:
> > > # Buf size: 1,16,32,64,128,256,512
> > > 
> > > # Tx
> > > # ===
> > > # Base
> > > 2293304.308,3396057.769,3540860.615,3636056.077,3332950.846,3694276.154,3689820
> > > # Batch
> > > 2286723.857,3307191.643,3400346.571,3452527.786,3460766.857,3431042.5,3440722.286
> > > # Batch + Bufapi
> > > 2257970.769,3151268.385,3260150.538,3379383.846,3424028.846,3433384.308,3385635.231,3406554.538
> > > 
> > > # Rx
> > > # ==
> > > # pktgen results (pps)
> > > 1223275,1668868,1728794,1769261,1808574,1837252,1846436
> > > 1456924,1797901,1831234,1868746,1877508,1931598,1936402
> > > 1368923,1719716,1794373,1865170,1884803,1916021,1975160
> > > 
> > > # Testpmd pps results
> > > 1222698.143,1670604,1731040.6,1769218,1811206,1839308.75,1848478.75
> > > 1450140.5,1799985.75,1834089.75,1871290,1880005.5,1934147.25,1939034
> > > 1370621,1721858,1796287.75,1866618.5,1885466.5,1918670.75,1976173.5,1988760.75,1978316
> > > 
> > > pktgen was run again for rx with 1024 and 2048 buf size, giving
> > > 1988760.75 and 1978316 pps. Testpmd goes the same way.
> > 
> > Don't really understand what does this data mean.
> > Which number of descs is batched for each run?
> > 
> 
> Sorry, I should have explained better. I will expand here, but feel free to 
> skip it since we are going to discard the
> data anyway. Or to propose a better way to tell them.
> 
> Is a CSV with the values I've obtained, in pps, from pktgen and testpmd. This 
> way is easy to plot them.
> 
> 

Re: [PATCH V2 3/6] vDPA: implement IRQ offloading helpers in vDPA core

2020-07-20 Thread Jason Wang


On 2020/7/20 下午5:07, Zhu, Lingshan wrote:


+}
+
+static void vdpa_unsetup_irq(struct vdpa_device *vdev, int qid)
+{
+    struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
+
+    if (drv->unsetup_vq_irq)
+    drv->unsetup_vq_irq(vdev, qid);



Do you need to check the existence of drv before calling unset_vq_irq()?

Yes, we should check this when we take the releasing path into account.


And how can this synchronize with driver releasing and binding?

Will add an vdpa_unsetup_irq() call in vhsot_vdpa_release().
For binding, I think it is a new dev bound to the the driver,
it should go through the vdpa_setup_irq() routine. or if it is
a device re-bind to vhost_vdpa, I think we have cleaned up
irq_bypass_producer for it as we would call vhdpa_unsetup_irq()
in the release function.



I meant can the following things happen?

1) some vDPA device driver probe the hardware and call 
vdpa_request_irq() in its PCI probe function.

2) vDPA device is probed by vhost-vDPA

Then irq bypass can't work since we when vdpa_unsetup_irq() is called, 
there's no driver bound. Or is there a requirement that 
vdap_request/free_irq() must be called somewhere (e.g in the set_status 
bus operations)? If yes, we need document those requirements.


Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v8 02/11] vhost: use batched get_vq_desc version

2020-07-20 Thread Michael S. Tsirkin
On Thu, Jul 16, 2020 at 07:16:27PM +0200, Eugenio Perez Martin wrote:
> On Fri, Jul 10, 2020 at 7:58 AM Michael S. Tsirkin  wrote:
> >
> > On Fri, Jul 10, 2020 at 07:39:26AM +0200, Eugenio Perez Martin wrote:
> > > > > How about playing with the batch size? Make it a mod parameter instead
> > > > > of the hard coded 64, and measure for all values 1 to 64 ...
> > > >
> > > >
> > > > Right, according to the test result, 64 seems to be too aggressive in
> > > > the case of TX.
> > > >
> > >
> > > Got it, thanks both!
> >
> > In particular I wonder whether with batch size 1
> > we get same performance as without batching
> > (would indicate 64 is too aggressive)
> > or not (would indicate one of the code changes
> > affects performance in an unexpected way).
> >
> > --
> > MST
> >
> 
> Hi!
> 
> Varying batch_size as drivers/vhost/net.c:VHOST_NET_BATCH,

sorry this is not what I meant.

I mean something like this:


diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0b509be8d7b1..b94680e5721d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1279,6 +1279,10 @@ static void handle_rx_net(struct vhost_work *work)
handle_rx(net);
 }
 
+MODULE_PARM_DESC(batch_num, "Number of batched descriptors. (offset from 64)");
+module_param(batch_num, int, 0644);
+static int batch_num = 0;
+
 static int vhost_net_open(struct inode *inode, struct file *f)
 {
struct vhost_net *n;
@@ -1333,7 +1337,7 @@ static int vhost_net_open(struct inode *inode, struct 
file *f)
vhost_net_buf_init(>vqs[i].rxq);
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
-  UIO_MAXIOV + VHOST_NET_BATCH,
+  UIO_MAXIOV + VHOST_NET_BATCH + batch_num,
   VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
   NULL);
 

then you can try tweaking batching and playing with mod parameter without
recompiling.


VHOST_NET_BATCH affects lots of other things.


> and testing
> the pps as previous mail says. This means that we have either only
> vhost_net batching (in base testing, like previously to apply this
> patch) or both batching sizes the same.
> 
> I've checked that vhost process (and pktgen) goes 100% cpu also.
> 
> For tx: Batching decrements always the performance, in all cases. Not
> sure why bufapi made things better the last time.
> 
> Batching makes improvements until 64 bufs, I see increments of pps but like 
> 1%.
> 
> For rx: Batching always improves performance. It seems that if we
> batch little, bufapi decreases performance, but beyond 64, bufapi is
> much better. The bufapi version keeps improving until I set a batching
> of 1024. So I guess it is super good to have a bunch of buffers to
> receive.
> 
> Since with this test I cannot disable event_idx or things like that,
> what would be the next step for testing?
> 
> Thanks!
> 
> --
> Results:
> # Buf size: 1,16,32,64,128,256,512
> 
> # Tx
> # ===
> # Base
> 2293304.308,3396057.769,3540860.615,3636056.077,3332950.846,3694276.154,3689820
> # Batch
> 2286723.857,3307191.643,3400346.571,3452527.786,3460766.857,3431042.5,3440722.286
> # Batch + Bufapi
> 2257970.769,3151268.385,3260150.538,3379383.846,3424028.846,3433384.308,3385635.231,3406554.538
> 
> # Rx
> # ==
> # pktgen results (pps)
> 1223275,1668868,1728794,1769261,1808574,1837252,1846436
> 1456924,1797901,1831234,1868746,1877508,1931598,1936402
> 1368923,1719716,1794373,1865170,1884803,1916021,1975160
> 
> # Testpmd pps results
> 1222698.143,1670604,1731040.6,1769218,1811206,1839308.75,1848478.75
> 1450140.5,1799985.75,1834089.75,1871290,1880005.5,1934147.25,1939034
> 1370621,1721858,1796287.75,1866618.5,1885466.5,1918670.75,1976173.5,1988760.75,1978316
> 
> pktgen was run again for rx with 1024 and 2048 buf size, giving
> 1988760.75 and 1978316 pps. Testpmd goes the same way.

Don't really understand what does this data mean.
Which number of descs is batched for each run?

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v8 02/11] vhost: use batched get_vq_desc version

2020-07-20 Thread Jason Wang


On 2020/7/17 上午1:16, Eugenio Perez Martin wrote:

On Fri, Jul 10, 2020 at 7:58 AM Michael S. Tsirkin  wrote:

On Fri, Jul 10, 2020 at 07:39:26AM +0200, Eugenio Perez Martin wrote:

How about playing with the batch size? Make it a mod parameter instead
of the hard coded 64, and measure for all values 1 to 64 ...


Right, according to the test result, 64 seems to be too aggressive in
the case of TX.


Got it, thanks both!

In particular I wonder whether with batch size 1
we get same performance as without batching
(would indicate 64 is too aggressive)
or not (would indicate one of the code changes
affects performance in an unexpected way).

--
MST


Hi!

Varying batch_size as drivers/vhost/net.c:VHOST_NET_BATCH,



Did you mean varying the value of VHOST_NET_BATCH itself or the number 
of batched descriptors?




and testing
the pps as previous mail says. This means that we have either only
vhost_net batching (in base testing, like previously to apply this
patch) or both batching sizes the same.

I've checked that vhost process (and pktgen) goes 100% cpu also.

For tx: Batching decrements always the performance, in all cases. Not
sure why bufapi made things better the last time.

Batching makes improvements until 64 bufs, I see increments of pps but like 1%.

For rx: Batching always improves performance. It seems that if we
batch little, bufapi decreases performance, but beyond 64, bufapi is
much better. The bufapi version keeps improving until I set a batching
of 1024. So I guess it is super good to have a bunch of buffers to
receive.

Since with this test I cannot disable event_idx or things like that,
what would be the next step for testing?

Thanks!

--
Results:
# Buf size: 1,16,32,64,128,256,512

# Tx
# ===
# Base
2293304.308,3396057.769,3540860.615,3636056.077,3332950.846,3694276.154,3689820



What's the meaning of buf size in the context of "base"?

And I wonder maybe perf diff can help.

Thanks



# Batch
2286723.857,3307191.643,3400346.571,3452527.786,3460766.857,3431042.5,3440722.286
# Batch + Bufapi
2257970.769,3151268.385,3260150.538,3379383.846,3424028.846,3433384.308,3385635.231,3406554.538

# Rx
# ==
# pktgen results (pps)
1223275,1668868,1728794,1769261,1808574,1837252,1846436
1456924,1797901,1831234,1868746,1877508,1931598,1936402
1368923,1719716,1794373,1865170,1884803,1916021,1975160

# Testpmd pps results
1222698.143,1670604,1731040.6,1769218,1811206,1839308.75,1848478.75
1450140.5,1799985.75,1834089.75,1871290,1880005.5,1934147.25,1939034
1370621,1721858,1796287.75,1866618.5,1885466.5,1918670.75,1976173.5,1988760.75,1978316

pktgen was run again for rx with 1024 and 2048 buf size, giving
1988760.75 and 1978316 pps. Testpmd goes the same way.



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] vhost: vdpa: remove per device feature whitelist

2020-07-20 Thread Jason Wang
We used to have a per device feature whitelist to filter out the
unsupported virtio features. But this seems unnecessary since:

- the main idea behind feature whitelist is to block control vq
  feature until we finalize the control virtqueue API. But the current
  vhost-vDPA uAPI is sufficient to support control virtqueue. For
  device that has hardware control virtqueue, the vDPA device driver
  can just setup the hardware virtqueue and let userspace to use
  hardware virtqueue directly. For device that doesn't have a control
  virtqueue, the vDPA device driver need to use e.g vringh to emulate
  a software control virtqueue.
- we don't do it in virtio-vDPA driver

So remove this limitation.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vdpa.c | 37 -
 1 file changed, 37 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 77a0c9fb6cc3..f7f6ddd681ce 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -26,35 +26,6 @@
 
 #include "vhost.h"
 
-enum {
-   VHOST_VDPA_FEATURES =
-   (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
-   (1ULL << VIRTIO_F_ANY_LAYOUT) |
-   (1ULL << VIRTIO_F_VERSION_1) |
-   (1ULL << VIRTIO_F_IOMMU_PLATFORM) |
-   (1ULL << VIRTIO_F_RING_PACKED) |
-   (1ULL << VIRTIO_F_ORDER_PLATFORM) |
-   (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
-   (1ULL << VIRTIO_RING_F_EVENT_IDX),
-
-   VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES |
-   (1ULL << VIRTIO_NET_F_CSUM) |
-   (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
-   (1ULL << VIRTIO_NET_F_MTU) |
-   (1ULL << VIRTIO_NET_F_MAC) |
-   (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
-   (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
-   (1ULL << VIRTIO_NET_F_GUEST_ECN) |
-   (1ULL << VIRTIO_NET_F_GUEST_UFO) |
-   (1ULL << VIRTIO_NET_F_HOST_TSO4) |
-   (1ULL << VIRTIO_NET_F_HOST_TSO6) |
-   (1ULL << VIRTIO_NET_F_HOST_ECN) |
-   (1ULL << VIRTIO_NET_F_HOST_UFO) |
-   (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-   (1ULL << VIRTIO_NET_F_STATUS) |
-   (1ULL << VIRTIO_NET_F_SPEED_DUPLEX),
-};
-
 /* Currently, only network backend w/o multiqueue is supported. */
 #define VHOST_VDPA_VQ_MAX  2
 
@@ -79,10 +50,6 @@ static DEFINE_IDA(vhost_vdpa_ida);
 
 static dev_t vhost_vdpa_major;
 
-static const u64 vhost_vdpa_features[] = {
-   [VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES,
-};
-
 static void handle_vq_kick(struct vhost_work *work)
 {
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -255,7 +222,6 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, 
u64 __user *featurep)
u64 features;
 
features = ops->get_features(vdpa);
-   features &= vhost_vdpa_features[v->virtio_id];
 
if (copy_to_user(featurep, , sizeof(features)))
return -EFAULT;
@@ -279,9 +245,6 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, 
u64 __user *featurep)
if (copy_from_user(, featurep, sizeof(features)))
return -EFAULT;
 
-   if (features & ~vhost_vdpa_features[v->virtio_id])
-   return -EINVAL;
-
if (ops->set_features(vdpa, features))
return -EINVAL;
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization