from:"gregkh"

Re: [PATCH -next] applicom: fix some err codes returned by ac_ioctl

2021-03-28 Thread gregkh

On Wed, Mar 24, 2021 at 01:03:50PM +0100, Arnd Bergmann wrote:
> On Wed, Mar 24, 2021 at 8:20 AM Xu Jia  wrote:
> >
> > When cmd > 6 or copy_to_user() fail, The variable 'ret' would not be
> > returned back. Fix the 'ret' set but not used.
> >
> > Signed-off-by: Xu Jia 
> 
> Reviewed-by: Arnd Bergmann 
> 
> > diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
> > index 14b2d8034c51..0ab765143354 100644
> > --- a/drivers/char/applicom.c
> > +++ b/drivers/char/applicom.c
> > @@ -839,7 +839,7 @@ static long ac_ioctl(struct file *file, unsigned int 
> > cmd, unsigned long arg)
> > Dummy = readb(apbs[IndexCard].RamIO + VERS);
> > kfree(adgl);
> > mutex_unlock(_mutex);
> > -   return 0;
> > +   return ret;
> >
> 
> Apparently this has been broken since the driver was first merged in
> linux-2.3.16. I could find no indication of anyone using the driver
> and reporting any problems in the git history and it clearly still has
> the style of drivers writting in the 1990s. On the other hand, this is
> (was) used in some very long-lived systems and you can still
> buy old applicom cards from artisan[1].
> 
> Is there any chance this driver is still used anywhere with modern
> kernels? I suspect we could move it to staging to find out.

No objection from me to move it to staging, want to send a patch or I
can.

thanks,

greg k-h

Re: Linux 4.9.262

2021-03-17 Thread gregkh

From: Greg Kroah-Hartman 

diff --git a/Makefile b/Makefile
index 7a233c641906..be5eac0a12d3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 261
+SUBLEVEL = 262
 EXTRAVERSION =
 NAME = Roaring Lionus
 
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index 94f587535dee..181254a20a2a 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -341,45 +341,17 @@ __asm__ __volatile__("1: stb %r2,%1\n"
\
  * Complex access routines
  */
 
-/* This little bit of silliness is to get the GP loaded for a function
-   that ordinarily wouldn't.  Otherwise we could have it done by the macro
-   directly, which can be optimized the linker.  */
-#ifdef MODULE
-#define __module_address(sym)  "r"(sym),
-#define __module_call(ra, arg, sym)"jsr $" #ra ",(%" #arg ")," #sym
-#else
-#define __module_address(sym)
-#define __module_call(ra, arg, sym)"bsr $" #ra "," #sym " !samegp"
-#endif
-
-extern void __copy_user(void);
-
-extern inline long
-__copy_tofrom_user_nocheck(void *to, const void *from, long len)
-{
-   register void * __cu_to __asm__("$6") = to;
-   register const void * __cu_from __asm__("$7") = from;
-   register long __cu_len __asm__("$0") = len;
-
-   __asm__ __volatile__(
-   __module_call(28, 3, __copy_user)
-   : "=r" (__cu_len), "=r" (__cu_from), "=r" (__cu_to)
-   : __module_address(__copy_user)
- "0" (__cu_len), "1" (__cu_from), "2" (__cu_to)
-   : "$1", "$2", "$3", "$4", "$5", "$28", "memory");
-
-   return __cu_len;
-}
+extern long __copy_user(void *to, const void *from, long len);
 
-#define __copy_to_user(to, from, n)\
-({ \
-   __chk_user_ptr(to); \
-   __copy_tofrom_user_nocheck((__force void *)(to), (from), (n));  \
+#define __copy_to_user(to, from, n)\
+({ \
+   __chk_user_ptr(to); \
+   __copy_user((__force void *)(to), (from), (n)); \
 })
-#define __copy_from_user(to, from, n)  \
-({ \
-   __chk_user_ptr(from);   \
-   __copy_tofrom_user_nocheck((to), (__force void *)(from), (n));  \
+#define __copy_from_user(to, from, n)  \
+({ \
+   __chk_user_ptr(from);   \
+   __copy_user((to), (__force void *)(from), (n)); \
 })
 
 #define __copy_to_user_inatomic __copy_to_user
@@ -389,7 +361,7 @@ extern inline long
 copy_to_user(void __user *to, const void *from, long n)
 {
if (likely(__access_ok((unsigned long)to, n, get_fs(
-   n = __copy_tofrom_user_nocheck((__force void *)to, from, n);
+   n = __copy_user((__force void *)to, from, n);
return n;
 }
 
@@ -404,21 +376,7 @@ copy_from_user(void *to, const void __user *from, long n)
return res;
 }
 
-extern void __do_clear_user(void);
-
-extern inline long
-__clear_user(void __user *to, long len)
-{
-   register void __user * __cl_to __asm__("$6") = to;
-   register long __cl_len __asm__("$0") = len;
-   __asm__ __volatile__(
-   __module_call(28, 2, __do_clear_user)
-   : "=r"(__cl_len), "=r"(__cl_to)
-   : __module_address(__do_clear_user)
- "0"(__cl_len), "1"(__cl_to)
-   : "$1", "$2", "$3", "$4", "$5", "$28", "memory");
-   return __cl_len;
-}
+extern long __clear_user(void __user *to, long len);
 
 extern inline long
 clear_user(void __user *to, long len)
@@ -428,9 +386,6 @@ clear_user(void __user *to, long len)
return len;
 }
 
-#undef __module_address
-#undef __module_call
-
 #define user_addr_max() \
 (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
 
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 59660743237c..a80815960364 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -20,12 +20,8 @@ lib-y =  __divqu.o __remqu.o __divlu.o __remlu.o \
checksum.o \
csum_partial_copy.o \
$(ev67-y)strlen.o \
-   $(ev67-y)strcat.o \
-   strcpy.o \
-   $(ev67-y)strncat.o \
-   strncpy.o \
-   $(ev6-y)stxcpy.o \
-   $(ev6-y)stxncpy.o \
+   stycpy.o \
+   styncpy.o \
$(ev67-y)strchr.o \
$(ev67-y)strrchr.o \
$(ev6-y)memchr.o \
@@ -46,11 +42,20 @@ AFLAGS___remqu.o =   -DREM
 AFLAGS___divlu.o = -DDIV   -DINTSIZE
 AFLAGS___remlu.o =   -DREM -DINTSIZE
 
-$(obj)/__divqu.o: $(obj)/$(ev6-y)divide.S
-   $(cmd_as_o_S)
-$(obj)/__remqu.o: $(obj)/$(ev6-y)divide.S

Linux 4.9.262

2021-03-17 Thread gregkh

From: Greg Kroah-Hartman 

I'm announcing the release of the 4.9.262 kernel.

All users of the 4.9 kernel series must upgrade.

The updated 4.9.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-4.9.y
and can be browsed at the normal kernel.org git web browser:

https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile  |2 
 arch/alpha/include/asm/uaccess.h  |   67 +--
 arch/alpha/lib/Makefile   |   33 +++--
 arch/alpha/lib/clear_user.S   |   66 ---
 arch/alpha/lib/copy_user.S|   82 +
 arch/alpha/lib/ev6-clear_user.S   |   84 +-
 arch/alpha/lib/ev6-copy_user.S|  104 +++--
 arch/arm/kvm/mmu.c|2 
 arch/powerpc/include/asm/code-patching.h  |2 
 arch/powerpc/perf/core-book3s.c   |   19 ++-
 arch/s390/kernel/smp.c|2 
 drivers/block/rsxx/core.c |1 
 drivers/hwmon/lm90.c  |   42 ++-
 drivers/iio/imu/adis16400_buffer.c|5 
 drivers/iio/imu/adis_buffer.c |5 
 drivers/media/usb/usbtv/usbtv-audio.c |2 
 drivers/mmc/core/mmc.c|   15 +-
 drivers/mmc/host/mtk-sd.c |   18 +--
 drivers/mmc/host/mxs-mmc.c|2 
 drivers/net/can/flexcan.c |   12 +-
 drivers/net/ethernet/atheros/alx/main.c   |8 +
 drivers/net/ethernet/davicom/dm9000.c |   21 ++-
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c   |2 
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c|2 
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |1 
 drivers/net/ethernet/renesas/sh_eth.c |2 
 drivers/net/wan/lapbether.c   |3 
 drivers/net/wireless/ath/ath9k/ath9k.h|3 
 drivers/net/wireless/ath/ath9k/xmit.c |6 +
 drivers/pci/host/pci-xgene-msi.c  |   10 -
 drivers/s390/block/dasd.c |3 
 drivers/scsi/libiscsi.c   |   11 -
 drivers/staging/comedi/drivers/addi_apci_1032.c   |4 
 drivers/staging/comedi/drivers/addi_apci_1500.c   |   18 +--
 drivers/staging/comedi/drivers/adv_pci1710.c  |   10 -
 drivers/staging/comedi/drivers/das6402.c  |2 
 drivers/staging/comedi/drivers/das800.c   |2 
 drivers/staging/comedi/drivers/dmm32at.c  |2 
 drivers/staging/comedi/drivers/me4000.c   |2 
 drivers/staging/comedi/drivers/pcl711.c   |2 
 drivers/staging/comedi/drivers/pcl818.c   |2 
 drivers/staging/ks7010/ks_wlan_net.c  |6 -
 drivers/staging/rtl8188eu/core/rtw_ap.c   |5 
 drivers/staging/rtl8188eu/os_dep/ioctl_linux.c|6 -
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c|7 -
 drivers/staging/rtl8192u/r8192U_wx.c  |6 -
 drivers/staging/rtl8712/rtl871x_cmd.c |6 -
 drivers/staging/rtl8712/rtl871x_ioctl_linux.c |2 
 drivers/usb/class/cdc-acm.c   |5 
 drivers/usb/gadget/function/f_uac2.c  |2 
 drivers/usb/host/xhci.c   |   16 ++
 drivers/usb/renesas_usbhs/pipe.c  |2 
 drivers/usb/serial/ch341.c|1 
 drivers/usb/serial/cp210x.c   |3 
 drivers/usb/serial/io_edgeport.c  |   26 ++--
 drivers/usb/usbip/stub_dev.c  |   42 +--
 drivers/usb/usbip/vhci_sysfs.c|   39 +-
 drivers/usb/usbip/vudc_sysfs.c|   10 +
 drivers/xen/events/events_2l.c|   22 ++-
 drivers/xen/events/events_base.c  |  130 --
 drivers/xen/events/events_fifo.c  |7 -
 drivers/xen/events/events_internal.h  |   22 ++-
 fs/binfmt_misc.c  |   29 ++--
 fs/cifs/cifsfs.c  |2 
 fs/configfs/file.c|6 -
 fs/nfs/nfs4proc.c |2 
 fs/udf/inode.c|9 +
 include/linux/can/skb.h   |8 +
 include/uapi/linux/netfilter/nfnetlink_cthelper.h |2 
 mm/slub.c |2 
 net/ipv4/udp_offload.c|2 
 net/mpls/mpls_gso.c   |3 
 net/netfilter/x_tables.c  |6 -
 net/sched/sch_api.c   |8 -
 scripts/recordmcount.c

Re: Linux 4.4.262

2021-03-17 Thread gregkh

From: Greg Kroah-Hartman 

diff --git a/Makefile b/Makefile
index 607f1b19555f..11acd6dd024a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 261
+SUBLEVEL = 262
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index ffd9cf5ec8c4..bf8475ce85ee 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,6 +3,7 @@
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
+generic-y += export.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index c0ddbbf73400..89413a29cb07 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -341,45 +341,17 @@ __asm__ __volatile__("1: stb %r2,%1\n"
\
  * Complex access routines
  */
 
-/* This little bit of silliness is to get the GP loaded for a function
-   that ordinarily wouldn't.  Otherwise we could have it done by the macro
-   directly, which can be optimized the linker.  */
-#ifdef MODULE
-#define __module_address(sym)  "r"(sym),
-#define __module_call(ra, arg, sym)"jsr $" #ra ",(%" #arg ")," #sym
-#else
-#define __module_address(sym)
-#define __module_call(ra, arg, sym)"bsr $" #ra "," #sym " !samegp"
-#endif
-
-extern void __copy_user(void);
-
-extern inline long
-__copy_tofrom_user_nocheck(void *to, const void *from, long len)
-{
-   register void * __cu_to __asm__("$6") = to;
-   register const void * __cu_from __asm__("$7") = from;
-   register long __cu_len __asm__("$0") = len;
-
-   __asm__ __volatile__(
-   __module_call(28, 3, __copy_user)
-   : "=r" (__cu_len), "=r" (__cu_from), "=r" (__cu_to)
-   : __module_address(__copy_user)
- "0" (__cu_len), "1" (__cu_from), "2" (__cu_to)
-   : "$1", "$2", "$3", "$4", "$5", "$28", "memory");
-
-   return __cu_len;
-}
+extern long __copy_user(void *to, const void *from, long len);
 
-#define __copy_to_user(to, from, n)\
-({ \
-   __chk_user_ptr(to); \
-   __copy_tofrom_user_nocheck((__force void *)(to), (from), (n));  \
+#define __copy_to_user(to, from, n)\
+({ \
+   __chk_user_ptr(to); \
+   __copy_user((__force void *)(to), (from), (n)); \
 })
-#define __copy_from_user(to, from, n)  \
-({ \
-   __chk_user_ptr(from);   \
-   __copy_tofrom_user_nocheck((to), (__force void *)(from), (n));  \
+#define __copy_from_user(to, from, n)  \
+({ \
+   __chk_user_ptr(from);   \
+   __copy_user((to), (__force void *)(from), (n)); \
 })
 
 #define __copy_to_user_inatomic __copy_to_user
@@ -389,35 +361,22 @@ extern inline long
 copy_to_user(void __user *to, const void *from, long n)
 {
if (likely(__access_ok((unsigned long)to, n, get_fs(
-   n = __copy_tofrom_user_nocheck((__force void *)to, from, n);
+   n = __copy_user((__force void *)to, from, n);
return n;
 }
 
 extern inline long
 copy_from_user(void *to, const void __user *from, long n)
 {
+   long res = n;
if (likely(__access_ok((unsigned long)from, n, get_fs(
-   n = __copy_tofrom_user_nocheck(to, (__force void *)from, n);
-   else
-   memset(to, 0, n);
-   return n;
+   res = __copy_from_user_inatomic(to, from, n);
+   if (unlikely(res))
+   memset(to + (n - res), 0, res);
+   return res;
 }
 
-extern void __do_clear_user(void);
-
-extern inline long
-__clear_user(void __user *to, long len)
-{
-   register void __user * __cl_to __asm__("$6") = to;
-   register long __cl_len __asm__("$0") = len;
-   __asm__ __volatile__(
-   __module_call(28, 2, __do_clear_user)
-   : "=r"(__cl_len), "=r"(__cl_to)
-   : __module_address(__do_clear_user)
- "0"(__cl_len), "1"(__cl_to)
-   : "$1", "$2", "$3", "$4", "$5", "$28", "memory");
-   return __cl_len;
-}
+extern long __clear_user(void __user *to, long len);
 
 extern inline long
 clear_user(void __user *to, long len)
@@ -427,9 +386,6 @@ clear_user(void __user *to, long len)
return len;
 }
 
-#undef __module_address
-#undef __module_call
-
 #define user_addr_max() \
 (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
 
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index

Linux 4.4.262

2021-03-17 Thread gregkh

From: Greg Kroah-Hartman 

I'm announcing the release of the 4.4.262 kernel.

All users of the 4.4 kernel series must upgrade.

The updated 4.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-4.4.y
and can be browsed at the normal kernel.org git web browser:

https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile  |2 
 arch/alpha/include/asm/Kbuild |1 
 arch/alpha/include/asm/uaccess.h  |   76 +---
 arch/alpha/kernel/Makefile|2 
 arch/alpha/kernel/alpha_ksyms.c   |  102 --
 arch/alpha/kernel/machvec_impl.h  |6 
 arch/alpha/kernel/setup.c |1 
 arch/alpha/lib/Makefile   |   33 ++-
 arch/alpha/lib/callback_srm.S |5 
 arch/alpha/lib/checksum.c |3 
 arch/alpha/lib/clear_page.S   |3 
 arch/alpha/lib/clear_user.S   |   66 ++
 arch/alpha/lib/copy_page.S|3 
 arch/alpha/lib/copy_user.S|  101 --
 arch/alpha/lib/csum_ipv6_magic.S  |2 
 arch/alpha/lib/csum_partial_copy.c|2 
 arch/alpha/lib/dec_and_lock.c |2 
 arch/alpha/lib/divide.S   |3 
 arch/alpha/lib/ev6-clear_page.S   |3 
 arch/alpha/lib/ev6-clear_user.S   |   85 +++-
 arch/alpha/lib/ev6-copy_page.S|3 
 arch/alpha/lib/ev6-copy_user.S|  130 +
 arch/alpha/lib/ev6-csum_ipv6_magic.S  |2 
 arch/alpha/lib/ev6-divide.S   |3 
 arch/alpha/lib/ev6-memchr.S   |3 
 arch/alpha/lib/ev6-memcpy.S   |3 
 arch/alpha/lib/ev6-memset.S   |7 
 arch/alpha/lib/ev67-strcat.S  |3 
 arch/alpha/lib/ev67-strchr.S  |3 
 arch/alpha/lib/ev67-strlen.S  |3 
 arch/alpha/lib/ev67-strncat.S |3 
 arch/alpha/lib/ev67-strrchr.S |3 
 arch/alpha/lib/fpreg.c|7 
 arch/alpha/lib/memchr.S   |3 
 arch/alpha/lib/memcpy.c   |5 
 arch/alpha/lib/memmove.S  |3 
 arch/alpha/lib/memset.S   |7 
 arch/alpha/lib/strcat.S   |2 
 arch/alpha/lib/strchr.S   |3 
 arch/alpha/lib/strcpy.S   |3 
 arch/alpha/lib/strlen.S   |3 
 arch/alpha/lib/strncat.S  |3 
 arch/alpha/lib/strncpy.S  |3 
 arch/alpha/lib/strrchr.S  |3 
 arch/arm/kvm/mmu.c|2 
 arch/powerpc/include/asm/code-patching.h  |2 
 arch/powerpc/perf/core-book3s.c   |   19 +-
 arch/s390/kernel/smp.c|2 
 drivers/block/floppy.c|   35 ++-
 drivers/block/rsxx/core.c |1 
 drivers/iio/imu/adis16400_buffer.c|5 
 drivers/iio/imu/adis_buffer.c |5 
 drivers/media/usb/hdpvr/hdpvr-core.c  |   33 ++-
 drivers/media/usb/usbtv/usbtv-audio.c |2 
 drivers/mmc/core/mmc.c|   15 +
 drivers/mmc/host/mtk-sd.c |   18 +
 drivers/mmc/host/mxs-mmc.c|2 
 drivers/net/can/flexcan.c |   12 -
 drivers/net/ethernet/davicom/dm9000.c |   21 +-
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c   |2 
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c|2 
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |1 
 drivers/net/wan/lapbether.c   |3 
 drivers/net/wireless/ath/ath9k/ath9k.h|3 
 drivers/net/wireless/ath/ath9k/xmit.c |6 
 drivers/net/wireless/libertas/if_sdio.c   |5 
 drivers/pci/host/pci-xgene-msi.c  |   10 -
 drivers/s390/block/dasd.c |3 
 drivers/scsi/libiscsi.c   |   11 -
 drivers/staging/comedi/drivers/addi_apci_1032.c   |4 
 drivers/staging/comedi/drivers/addi_apci_1500.c   |   18 -
 drivers/staging/comedi/drivers/adv_pci1710.c  |   10 -
 drivers/staging/comedi/drivers/das6402.c  |2 
 drivers/staging/comedi/drivers/das800.c   |2 
 drivers/staging/comedi/drivers/dmm32at.c  |2 
 drivers/staging/comedi/drivers/me4000.c   |

Re: Re: [PATCH v2] staging: rtl8192u: remove extra lines

2021-03-16 Thread gregkh

On Tue, Mar 16, 2021 at 06:03:17PM +0800, 赵晓 wrote:
> This email message is intended only for the use of the individual or entity 
> who
> /which is the intended recipient and may contain information that is 
> privileged
> or confidential. If you are not the intended recipient, you are hereby 
> notified
> that any use, dissemination, distribution or copying of, or taking any action
> in reliance on, this e-mail is strictly prohibited. If you have received this
> email in error, please notify UnionTech Software Technology  immediately by
> replying to this e-mail and immediately delete and discard all copies of the
> e-mail and the attachment thereto (if any). Thank you.  

This text is not compatible with Linux development sorry, email is now
deleted.

[PATCH] MAINTAINERS: move the staging subsystem to lists.linux.dev

2021-03-16 Thread gregkh

From: Greg Kroah-Hartman 

The drivers/staging/ tree has a new mailing list,
linux-stag...@lists.linux.dev, so move the MAINTAINER entry to point to
it so that we get patches sent to the proper place.

There was no need to specify a list for the hikey9xx driver, the tools
pick up the "base" list for drivers/staging/* so remove that line to
make the file simpler.

Signed-off-by: Greg Kroah-Hartman 
---
 MAINTAINERS | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d7c25c0fc08a..9e876927c60d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8116,7 +8116,6 @@ F:drivers/crypto/hisilicon/sec2/sec_main.c
 
 HISILICON STAGING DRIVERS FOR HIKEY 960/970
 M: Mauro Carvalho Chehab 
-L: de...@driverdev.osuosl.org
 S: Maintained
 F: drivers/staging/hikey9xx/
 
@@ -17040,7 +17039,7 @@ F:  drivers/staging/vt665?/
 
 STAGING SUBSYSTEM
 M: Greg Kroah-Hartman 
-L: de...@driverdev.osuosl.org
+L: linux-stag...@lists.linux.dev
 S: Supported
 T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
 F: drivers/staging/
-- 
2.30.2

[PATCH 5.11 303/306] mm/memcg: set memcg when splitting page

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Zhou Guanghui 

commit e1baddf8475b06cc56f4bafecf9a32a124343d9f upstream.

As described in the split_page() comment, for the non-compound high order
page, the sub-pages must be freed individually.  If the memcg of the first
page is valid, the tail pages cannot be uncharged when be freed.

For example, when alloc_pages_exact is used to allocate 1MB continuous
physical memory, 2MB is charged(kmemcg is enabled and __GFP_ACCOUNT is
set).  When make_alloc_exact free the unused 1MB and free_pages_exact free
the applied 1MB, actually, only 4KB(one page) is uncharged.

Therefore, the memcg of the tail page needs to be set when splitting a
page.

Michel:

There are at least two explicit users of __GFP_ACCOUNT with
alloc_exact_pages added recently.  See 7efe8ef274024 ("KVM: arm64:
Allocate stage-2 pgd pages with GFP_KERNEL_ACCOUNT") and c419621873713
("KVM: s390: Add memcg accounting to KVM allocations"), so this is not
just a theoretical issue.

Link: https://lkml.kernel.org/r/20210304074053.65527-3-zhouguangh...@huawei.com
Signed-off-by: Zhou Guanghui 
Acked-by: Johannes Weiner 
Reviewed-by: Zi Yan 
Reviewed-by: Shakeel Butt 
Acked-by: Michal Hocko 
Cc: Hanjun Guo 
Cc: Hugh Dickins 
Cc: Kefeng Wang 
Cc: "Kirill A. Shutemov" 
Cc: Nicholas Piggin 
Cc: Rui Xiang 
Cc: Tianhong Ding 
Cc: Weilong Chen 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/page_alloc.c |1 +
 1 file changed, 1 insertion(+)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3313,6 +3313,7 @@ void split_page(struct page *page, unsig
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
split_page_owner(page, 1 << order);
+   split_page_memcg(page, 1 << order);
 }
 EXPORT_SYMBOL_GPL(split_page);

[PATCH 5.11 304/306] mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Zhou Guanghui 

commit be6c8982e4ab9a41907555f601b711a7e2a17d4c upstream.

Rename mem_cgroup_split_huge_fixup to split_page_memcg and explicitly pass
in page number argument.

In this way, the interface name is more common and can be used by
potential users.  In addition, the complete info(memcg and flag) of the
memcg needs to be set to the tail pages.

Link: https://lkml.kernel.org/r/20210304074053.65527-2-zhouguangh...@huawei.com
Signed-off-by: Zhou Guanghui 
Acked-by: Johannes Weiner 
Reviewed-by: Zi Yan 
Reviewed-by: Shakeel Butt 
Acked-by: Michal Hocko 
Cc: Hugh Dickins 
Cc: "Kirill A. Shutemov" 
Cc: Nicholas Piggin 
Cc: Kefeng Wang 
Cc: Hanjun Guo 
Cc: Tianhong Ding 
Cc: Weilong Chen 
Cc: Rui Xiang 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 include/linux/memcontrol.h |6 ++
 mm/huge_memory.c   |2 +-
 mm/memcontrol.c|   15 ++-
 3 files changed, 9 insertions(+), 14 deletions(-)

--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1072,9 +1072,7 @@ static inline void memcg_memory_event_mm
rcu_read_unlock();
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head);
-#endif
+void split_page_memcg(struct page *head, unsigned int nr);
 
 #else /* CONFIG_MEMCG */
 
@@ -1416,7 +1414,7 @@ unsigned long mem_cgroup_soft_limit_recl
return 0;
 }
 
-static inline void mem_cgroup_split_huge_fixup(struct page *head)
+static inline void split_page_memcg(struct page *head, unsigned int nr)
 {
 }
 
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2465,7 +2465,7 @@ static void __split_huge_page(struct pag
int i;
 
/* complete memcg works before add pages to LRU */
-   mem_cgroup_split_huge_fixup(head);
+   split_page_memcg(head, nr);
 
if (PageAnon(head) && PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3296,24 +3296,21 @@ void obj_cgroup_uncharge(struct obj_cgro
 
 #endif /* CONFIG_MEMCG_KMEM */
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
- * Because page_memcg(head) is not set on compound tails, set it now.
+ * Because page_memcg(head) is not set on tails, set it now.
  */
-void mem_cgroup_split_huge_fixup(struct page *head)
+void split_page_memcg(struct page *head, unsigned int nr)
 {
struct mem_cgroup *memcg = page_memcg(head);
int i;
 
-   if (mem_cgroup_disabled())
+   if (mem_cgroup_disabled() || !memcg)
return;
 
-   for (i = 1; i < HPAGE_PMD_NR; i++) {
-   css_get(>css);
-   head[i].memcg_data = (unsigned long)memcg;
-   }
+   for (i = 1; i < nr; i++)
+   head[i].memcg_data = head->memcg_data;
+   css_get_many(>css, nr - 1);
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_MEMCG_SWAP
 /**

[PATCH 5.11 305/306] mm/page_alloc.c: refactor initialization of struct page for holes in memory layout

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Mike Rapoport 

commit 0740a50b9baa4472cfb12442df4b39e2712a64a4 upstream.

There could be struct pages that are not backed by actual physical memory.
This can happen when the actual memory bank is not a multiple of
SECTION_SIZE or when an architecture does not register memory holes
reserved by the firmware as memblock.memory.

Such pages are currently initialized using init_unavailable_mem() function
that iterates through PFNs in holes in memblock.memory and if there is a
struct page corresponding to a PFN, the fields of this page are set to
default values and it is marked as Reserved.

init_unavailable_mem() does not take into account zone and node the page
belongs to and sets both zone and node links in struct page to zero.

Before commit 73a6e474cb37 ("mm: memmap_init: iterate over memblock
regions rather that check each PFN") the holes inside a zone were
re-initialized during memmap_init() and got their zone/node links right.
However, after that commit nothing updates the struct pages representing
such holes.

On a system that has firmware reserved holes in a zone above ZONE_DMA, for
instance in a configuration below:

# grep -A1 E820 /proc/iomem
7a17b000-7a216fff : Unknown E820 type
7a217000-7bff : System RAM

unset zone link in struct page will trigger

VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);

in set_pfnblock_flags_mask() when called with a struct page from a range
other than E820_TYPE_RAM because there are pages in the range of
ZONE_DMA32 but the unset zone link in struct page makes them appear as a
part of ZONE_DMA.

Interleave initialization of the unavailable pages with the normal
initialization of memory map, so that zone and node information will be
properly set on struct pages that are not backed by the actual memory.

With this change the pages for holes inside a zone will get proper
zone/node links and the pages that are not spanned by any node will get
links to the adjacent zone/node.  The holes between nodes will be
prepended to the zone/node above the hole and the trailing pages in the
last section that will be appended to the zone/node below.

[a...@linux-foundation.org: don't initialize static to zero, use %llu for u64]

Link: https://lkml.kernel.org/r/20210225224351.7356-2-r...@kernel.org
Fixes: 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather 
that check each PFN")
Signed-off-by: Mike Rapoport 
Reported-by: Qian Cai 
Reported-by: Andrea Arcangeli 
Reviewed-by: Baoquan He 
Acked-by: Vlastimil Babka 
Reviewed-by: David Hildenbrand 
Cc: Borislav Petkov 
Cc: Chris Wilson 
Cc: "H. Peter Anvin" 
Cc: Łukasz Majczak 
Cc: Ingo Molnar 
Cc: Mel Gorman 
Cc: Michal Hocko 
Cc: "Sarvela, Tomi P" 
Cc: Thomas Gleixner 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Mike Rapoport 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/page_alloc.c |  158 ++--
 1 file changed, 75 insertions(+), 83 deletions(-)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6262,13 +6262,66 @@ static void __meminit zone_init_free_lis
}
 }

+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
+/*
+ * Only struct pages that correspond to ranges defined by memblock.memory
+ * are zeroed and initialized by going through __init_single_page() during
+ * memmap_init_zone().
+ *
+ * But, there could be struct pages that correspond to holes in
+ * memblock.memory. This can happen because of the following reasons:
+ * - physical memory bank size is not necessarily the exact multiple of the
+ *   arbitrary section size
+ * - early reserved memory may not be listed in memblock.memory
+ * - memory layouts defined with memmap= kernel parameter may not align
+ *   nicely with memmap sections
+ *
+ * Explicitly initialize those struct pages so that:
+ * - PG_Reserved is set
+ * - zone and node links point to zone and node that span the page if the
+ *   hole is in the middle of a zone
+ * - zone and node links point to adjacent zone/node if the hole falls on
+ *   the zone boundary; the pages in such holes will be prepended to the
+ *   zone/node above the hole except for the trailing pages in the last
+ *   section that will be appended to the zone/node below.
+ */
+static u64 __meminit init_unavailable_range(unsigned long spfn,
+   unsigned long epfn,
+   int zone, int node)
+{
+   unsigned long pfn;
+   u64 pgcnt = 0;
+
+   for (pfn = spfn; pfn < epfn; pfn++) {
+   if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
+   pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+   + pageblock_nr_pages - 1;
+   continue;
+   }
+   __init_single_page(pfn_to_page(pfn), pfn, zone, node);
+   __SetPageReserved(pfn_to_page(pfn));
+   pgcnt++;
+   }
+
+

[PATCH 5.11 301/306] mm/userfaultfd: fix memory corruption due to writeprotect

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Nadav Amit 

commit 6ce64428d62026a10cb5d80138ff2f90cc21d367 upstream.

Userfaultfd self-test fails occasionally, indicating a memory corruption.

Analyzing this problem indicates that there is a real bug since mmap_lock
is only taken for read in mwriteprotect_range() and defers flushes, and
since there is insufficient consideration of concurrent deferred TLB
flushes in wp_page_copy().  Although the PTE is flushed from the TLBs in
wp_page_copy(), this flush takes place after the copy has already been
performed, and therefore changes of the page are possible between the time
of the copy and the time in which the PTE is flushed.

To make matters worse, memory-unprotection using userfaultfd also poses a
problem.  Although memory unprotection is logically a promotion of PTE
permissions, and therefore should not require a TLB flush, the current
userrfaultfd code might actually cause a demotion of the architectural PTE
permission: when userfaultfd_writeprotect() unprotects memory region, it
unintentionally *clears* the RW-bit if it was already set.  Note that this
unprotecting a PTE that is not write-protected is a valid use-case: the
userfaultfd monitor might ask to unprotect a region that holds both
write-protected and write-unprotected PTEs.

The scenario that happens in selftests/vm/userfaultfd is as follows:

cpu0cpu1cpu2

[ Writable PTE
  cached in TLB ]
userfaultfd_writeprotect()
[ write-*unprotect* ]
mwriteprotect_range()
mmap_read_lock()
change_protection()

change_protection_range()
...
change_pte_range()
[ *clear* “write”-bit ]
[ defer TLB flushes ]
[ page-fault ]
...
wp_page_copy()
 cow_user_page()
  [ copy page ]
[ write to old
  page ]
...
 set_pte_at_notify()

A similar scenario can happen:

cpu0cpu1cpu2cpu3

[ Writable PTE
  cached in TLB ]
userfaultfd_writeprotect()
[ write-protect ]
[ deferred TLB flush ]
userfaultfd_writeprotect()
[ write-unprotect ]
[ deferred TLB flush]
[ page-fault ]
wp_page_copy()
 cow_user_page()
 [ copy page ]
 ...[ write to page ]
set_pte_at_notify()

This race exists since commit 292924b26024 ("userfaultfd: wp: apply
_PAGE_UFFD_WP bit").  Yet, as Yu Zhao pointed, these races became apparent
since commit 09854ba94c6a ("mm: do_wp_page() simplification") which made
wp_page_copy() more likely to take place, specifically if page_count(page)
> 1.

To resolve the aforementioned races, check whether there are pending
flushes on uffd-write-protected VMAs, and if there are, perform a flush
before doing the COW.

Further optimizations will follow to avoid during uffd-write-unprotect
unnecassary PTE write-protection and TLB flushes.

Link: https://lkml.kernel.org/r/20210304095423.3825684-1-na...@vmware.com
Fixes: 09854ba94c6a ("mm: do_wp_page() simplification")
Signed-off-by: Nadav Amit 
Suggested-by: Yu Zhao 
Reviewed-by: Peter Xu 
Tested-by: Peter Xu 
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Pavel Emelyanov 
Cc: Mike Kravetz 
Cc: Mike Rapoport 
Cc: Minchan Kim 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: [5.9+]
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/memory.c |8 
 1 file changed, 8 insertions(+)

--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3092,6 +3092,14 @@ static vm_fault_t do_wp_page(struct vm_f
return handle_userfault(vmf, VM_UFFD_WP);
}

+   /*
+* Userfaultfd write-protect can defer flushes. Ensure the TLB
+* is flushed in this case before copying.
+*/
+   if (unlikely(userfaultfd_wp(vmf->vma) &&
+mm_tlb_flush_pending(vmf->vma->vm_mm)))
+   flush_tlb_page(vmf->vma, vmf->address);
+
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) {
/*

[PATCH 5.11 306/306] KVM: arm64: Fix nVHE hyp panic host context restore

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Andrew Scull 

Commit c4b000c3928d4f20acef79dccf3a65ae3795e0b0 upstream.

When panicking from the nVHE hyp and restoring the host context, x29 is
expected to hold a pointer to the host context. This wasn't being done
so fix it to make sure there's a valid pointer the host context being
used.

Rather than passing a boolean indicating whether or not the host context
should be restored, instead pass the pointer to the host context. NULL
is passed to indicate that no context should be restored.

Fixes: a2e102e20fd6 ("KVM: arm64: nVHE: Handle hyp panics")
Cc: sta...@vger.kernel.org # 5.11.y only
Signed-off-by: Andrew Scull 
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210219122406.1337626-1-asc...@google.com
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/include/asm/kvm_hyp.h |3 ++-
 arch/arm64/kvm/hyp/nvhe/host.S   |   20 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c |3 +--
 3 files changed, 13 insertions(+), 13 deletions(-)

--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -102,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cp
 
 void __noreturn hyp_panic(void);
 #ifdef __KVM_NVHE_HYPERVISOR__
-void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
+void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+  u64 elr, u64 par);
 #endif
 
 #endif /* __ARM64_KVM_HYP_H__ */
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -71,10 +71,15 @@ SYM_FUNC_START(__host_enter)
 SYM_FUNC_END(__host_enter)
 
 /*
- * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 
par);
+ * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+ *   u64 elr, u64 par);
  */
 SYM_FUNC_START(__hyp_do_panic)
-   /* Load the format arguments into x1-7 */
+   mov x29, x0
+
+   /* Load the format string into x0 and arguments into x1-7 */
+   ldr x0, =__hyp_panic_string
+
mov x6, x3
get_vcpu_ptr x7, x3
 
@@ -89,13 +94,8 @@ SYM_FUNC_START(__hyp_do_panic)
ldr lr, =panic
msr elr_el2, lr
 
-   /*
-* Set the panic format string and enter the host, conditionally
-* restoring the host context.
-*/
-   cmp x0, xzr
-   ldr x0, =__hyp_panic_string
-   b.eq__host_enter_without_restoring
+   /* Enter the host, conditionally restoring the host context. */
+   cbz x29, __host_enter_without_restoring
b   __host_enter_for_panic
 SYM_FUNC_END(__hyp_do_panic)
 
@@ -150,7 +150,7 @@ SYM_FUNC_END(__hyp_do_panic)
 
 .macro invalid_host_el1_vect
.align 7
-   mov x0, xzr /* restore_host = false */
+   mov x0, xzr /* host_ctxt = NULL */
mrs x1, spsr_el2
mrs x2, elr_el2
mrs x3, par_el1
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -266,7 +266,6 @@ void __noreturn hyp_panic(void)
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg_par();
-   bool restore_host = true;
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
 
@@ -280,7 +279,7 @@ void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}
 
-   __hyp_do_panic(restore_host, spsr, elr, par);
+   __hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
 }

[PATCH 5.11 302/306] mm/madvise: replace ptrace attach requirement for process_madvise

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Suren Baghdasaryan 

commit 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e upstream.

process_madvise currently requires ptrace attach capability.
PTRACE_MODE_ATTACH gives one process complete control over another
process.  It effectively removes the security boundary between the two
processes (in one direction).  Granting ptrace attach capability even to a
system process is considered dangerous since it creates an attack surface.
This severely limits the usage of this API.

The operations process_madvise can perform do not affect the correctness
of the operation of the target process; they only affect where the data is
physically located (and therefore, how fast it can be accessed).  What we
want is the ability for one process to influence another process in order
to optimize performance across the entire system while leaving the
security boundary intact.

Replace PTRACE_MODE_ATTACH with a combination of PTRACE_MODE_READ and
CAP_SYS_NICE.  PTRACE_MODE_READ to prevent leaking ASLR metadata and
CAP_SYS_NICE for influencing process performance.

Link: https://lkml.kernel.org/r/20210303185807.2160264-1-sur...@google.com
Signed-off-by: Suren Baghdasaryan 
Reviewed-by: Kees Cook 
Acked-by: Minchan Kim 
Acked-by: David Rientjes 
Cc: Jann Horn 
Cc: Jeff Vander Stoep 
Cc: Michal Hocko 
Cc: Shakeel Butt 
Cc: Tim Murray 
Cc: Florian Weimer 
Cc: Oleg Nesterov 
Cc: James Morris 
Cc: [5.10+]
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/madvise.c |   13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1197,12 +1197,22 @@ SYSCALL_DEFINE5(process_madvise, int, pi
goto release_task;
}

-   mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+   /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+   mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (IS_ERR_OR_NULL(mm)) {
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
goto release_task;
}

+   /*
+* Require CAP_SYS_NICE for influencing process performance. Note that
+* only non-destructive hints are currently supported.
+*/
+   if (!capable(CAP_SYS_NICE)) {
+   ret = -EPERM;
+   goto release_mm;
+   }
+
total_len = iov_iter_count();

while (iov_iter_count()) {
@@ -1217,6 +1227,7 @@ SYSCALL_DEFINE5(process_madvise, int, pi
if (ret == 0)
ret = total_len - iov_iter_count();

+release_mm:
mmput(mm);
 release_task:
put_task_struct(task);

[PATCH 5.11 300/306] mm/highmem.c: fix zero_user_segments() with start > end

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: OGAWA Hirofumi 

commit 184cee516f3e24019a08ac8eb5c7cf04c00933cb upstream.

zero_user_segments() is used from __block_write_begin_int(), for example
like the following

zero_user_segments(page, 4096, 1024, 512, 918)

But new the zero_user_segments() implementation for for HIGHMEM +
TRANSPARENT_HUGEPAGE doesn't handle "start > end" case correctly, and hits
BUG_ON().  (we can fix __block_write_begin_int() instead though, it is the
old and multiple usage)

Also it calls kmap_atomic() unnecessarily while start == end == 0.

Link: https://lkml.kernel.org/r/87v9ab60r4@mail.parknet.co.jp
Fixes: 0060ef3b4e6d ("mm: support THPs in zero_user_segments")
Signed-off-by: OGAWA Hirofumi 
Cc: Matthew Wilcox 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/highmem.c |   17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -368,20 +368,24 @@ void zero_user_segments(struct page *pag
 
BUG_ON(end1 > page_size(page) || end2 > page_size(page));
 
+   if (start1 >= end1)
+   start1 = end1 = 0;
+   if (start2 >= end2)
+   start2 = end2 = 0;
+
for (i = 0; i < compound_nr(page); i++) {
void *kaddr = NULL;
 
-   if (start1 < PAGE_SIZE || start2 < PAGE_SIZE)
-   kaddr = kmap_atomic(page + i);
-
if (start1 >= PAGE_SIZE) {
start1 -= PAGE_SIZE;
end1 -= PAGE_SIZE;
} else {
unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
 
-   if (end1 > start1)
+   if (end1 > start1) {
+   kaddr = kmap_atomic(page + i);
memset(kaddr + start1, 0, this_end - start1);
+   }
end1 -= this_end;
start1 = 0;
}
@@ -392,8 +396,11 @@ void zero_user_segments(struct page *pag
} else {
unsigned this_end = min_t(unsigned, end2, PAGE_SIZE);
 
-   if (end2 > start2)
+   if (end2 > start2) {
+   if (!kaddr)
+   kaddr = kmap_atomic(page + i);
memset(kaddr + start2, 0, this_end - start2);
+   }
end2 -= this_end;
start2 = 0;
}

[PATCH 5.11 299/306] KVM: arm64: Fix exclusive limit for IPA size

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Marc Zyngier 

commit 262b003d059c6671601a19057e9fe1a5e7f23722 upstream.

When registering a memslot, we check the size and location of that
memslot against the IPA size to ensure that we can provide guest
access to the whole of the memory.

Unfortunately, this check rejects memslot that end-up at the exact
limit of the addressing capability for a given IPA size. For example,
it refuses the creation of a 2GB memslot at 0x800 with a 32bit
IPA space.

Fix it by relaxing the check to accept a memslot reaching the
limit of the IPA space.

Fixes: c3058d5da222 ("arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE")
Reviewed-by: Eric Auger 
Signed-off-by: Marc Zyngier 
Cc: sta...@vger.kernel.org
Reviewed-by: Andrew Jones 
Link: https://lore.kernel.org/r/20210311100016.3830038-3-...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/kvm/mmu.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1309,8 +1309,7 @@ int kvm_arch_prepare_memory_region(struc
 * Prevent userspace from creating a memory region outside of the IPA
 * space addressable by the KVM guest IPA space.
 */
-   if (memslot->base_gfn + memslot->npages >=
-   (kvm_phys_size(kvm) >> PAGE_SHIFT))
+   if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> 
PAGE_SHIFT))
return -EFAULT;

mmap_read_lock(current->mm);

[PATCH 5.11 298/306] KVM: arm64: Reject VM creation when the default IPA size is unsupported

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Marc Zyngier 

commit 7d717558dd5ef10d28866750d5c24ff892ea3778 upstream.

KVM/arm64 has forever used a 40bit default IPA space, partially
due to its 32bit heritage (where the only choice is 40bit).

However, there are implementations in the wild that have a *cough*
much smaller *cough* IPA space, which leads to a misprogramming of
VTCR_EL2, and a guest that is stuck on its first memory access
if userspace dares to ask for the default IPA setting (which most
VMMs do).

Instead, blundly reject the creation of such VM, as we can't
satisfy the requirements from userspace (with a one-off warning).
Also clarify the boot warning, and document that the VM creation
will fail when an unsupported IPA size is provided.

Although this is an ABI change, it doesn't really change much
for userspace:

- the guest couldn't run before this change, but no error was
  returned. At least userspace knows what is happening.

- a memory slot that was accepted because it did fit the default
  IPA space now doesn't even get a chance to be registered.

The other thing that is left doing is to convince userspace to
actually use the IPA space setting instead of relying on the
antiquated default.

Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for 
VM")
Signed-off-by: Marc Zyngier 
Cc: sta...@vger.kernel.org
Reviewed-by: Andrew Jones 
Reviewed-by: Eric Auger 
Link: https://lore.kernel.org/r/20210311100016.3830038-2-...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 Documentation/virt/kvm/api.rst |3 +++
 arch/arm64/kvm/reset.c |   12 
 2 files changed, 11 insertions(+), 4 deletions(-)

--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -182,6 +182,9 @@ is dependent on the CPU capability and t
 be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
 ioctl() at run-time.
 
+Creation of the VM will fail if the requested IPA size (whether it is
+implicit or explicit) is unsupported on the host.
+
 Please note that configuring the IPA size does not affect the capability
 exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
 size of the address translated by the stage2 level (guest physical to
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void)
}
 
kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
-   WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
-"KVM IPA Size Limit (%d bits) is smaller than default size\n",
-kvm_ipa_limit);
-   kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
+   kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
+((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
+ " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
 
return 0;
 }
@@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
+   if (phys_shift > kvm_ipa_limit) {
+   pr_warn_once("%s using unsupported default IPA limit, 
upgrade your VMM\n",
+current->comm);
+   return -EINVAL;
+   }
}
 
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);

[PATCH 5.11 297/306] KVM: arm64: nvhe: Save the SPE context early

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Suzuki K Poulose 

commit b96b0c5de685df82019e16826a282d53d86d112c upstream.

The nVHE KVM hyp drains and disables the SPE buffer, before
entering the guest, as the EL1&0 translation regime
is going to be loaded with that of the guest.

But this operation is performed way too late, because :
  - The owning translation regime of the SPE buffer
is transferred to EL2. (MDCR_EL2_E2PB == 0)
  - The guest Stage1 is loaded.

Thus the flush could use the host EL1 virtual address,
but use the EL2 translations instead of host EL1, for writing
out any cached data.

Fix this by moving the SPE buffer handling early enough.
The restore path is doing the right thing.

Fixes: 014c4c77aad7 ("KVM: arm64: Improve debug register save/restore flow")
Cc: sta...@vger.kernel.org
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Mark Rutland 
Cc: Alexandru Elisei 
Reviewed-by: Alexandru Elisei 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210302120345.3102874-1-suzuki.poul...@arm.com
Message-Id: <20210305185254.3730990-2-...@kernel.org>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/include/asm/kvm_hyp.h   |5 +
 arch/arm64/kvm/hyp/nvhe/debug-sr.c |   12 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c   |   11 ++-
 3 files changed, 25 insertions(+), 3 deletions(-)

--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(stru
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
 void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmsc
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
/* Disable and flush SPE data generation */
__debug_save_spe(>arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_guest_common(vcpu);
 }
 
-void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_host_common(vcpu);
 }
 
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
 
__sysreg_save_state_nvhe(host_ctxt);
+   /*
+* We must flush and disable the SPE buffer for nVHE, as
+* the translation regime(EL1&0) is going to be loaded with
+* that of the guest. And we must do this before we change the
+* translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
+* before we load guest Stage1.
+*/
+   __debug_save_host_buffers_nvhe(vcpu);
 
__adjust_pc(vcpu);
 
@@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
 
+   __debug_switch_to_host(vcpu);
/*
 * This must come after restoring the host sysregs, since a non-VHE
 * system may enable SPE here and make use of the TTBRs.
 */
-   __debug_switch_to_host(vcpu);
+   __debug_restore_host_buffers_nvhe(vcpu);
 
if (pmu_switch_needed)
__pmu_switch_to_host(host_ctxt);

[PATCH 5.11 293/306] KVM: kvmclock: Fix vCPUs > 64 cant be online/hotpluged

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Wanpeng Li 

commit d7eb79c6290c7ae4561418544072e0a3266e7384 upstream.

# lscpu
Architecture:  x86_64
CPU op-mode(s):32-bit, 64-bit
Byte Order:Little Endian
CPU(s):88
On-line CPU(s) list:   0-63
Off-line CPU(s) list:  64-87

# cat /proc/cmdline
BOOT_IMAGE=/vmlinuz-5.10.0-rc3-tlinux2-0050+ root=/dev/mapper/cl-root ro
rd.lvm.lv=cl/root rhgb quiet console=ttyS0 LANG=en_US .UTF-8 
no-kvmclock-vsyscall

# echo 1 > /sys/devices/system/cpu/cpu76/online
-bash: echo: write error: Cannot allocate memory

The per-cpu vsyscall pvclock data pointer assigns either an element of the
static array hv_clock_boot (#vCPU <= 64) or dynamically allocated memory
hvclock_mem (vCPU > 64), the dynamically memory will not be allocated if
kvmclock vsyscall is disabled, this can result in cpu hotpluged fails in
kvmclock_setup_percpu() which returns -ENOMEM. It's broken for no-vsyscall
and sometimes you end up with vsyscall disabled if the host does something
strange. This patch fixes it by allocating this dynamically memory
unconditionally even if vsyscall is disabled.

Fixes: 6a1cac56f4 ("x86/kvm: Use __bss_decrypted attribute in shared variables")
Reported-by: Zelin Deng 
Cc: Brijesh Singh 
Cc: sta...@vger.kernel.org
Signed-off-by: Wanpeng Li 
Message-Id: <1614130683-24137-1-git-send-email-wanpen...@tencent.com>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/kvmclock.c |   19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(voi
 
 static int __init kvm_setup_vsyscall_timeinfo(void)
 {
-#ifdef CONFIG_X86_64
-   u8 flags;
+   kvmclock_init_mem();
 
-   if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
-   return 0;
+#ifdef CONFIG_X86_64
+   if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
+   u8 flags;
 
-   flags = pvclock_read_flags(_clock_boot[0].pvti);
-   if (!(flags & PVCLOCK_TSC_STABLE_BIT))
-   return 0;
+   flags = pvclock_read_flags(_clock_boot[0].pvti);
+   if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+   return 0;
 
-   kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+   kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+   }
 #endif
 
-   kvmclock_init_mem();
-
return 0;
 }
 early_initcall(kvm_setup_vsyscall_timeinfo);

[PATCH 5.11 294/306] KVM: arm64: Ensure I-cache isolation between vcpus of a same VM

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Marc Zyngier 

commit 01dc9262ff5797b675c32c0c6bc682777d23de05 upstream.

It recently became apparent that the ARMv8 architecture has interesting
rules regarding attributes being used when fetching instructions
if the MMU is off at Stage-1.

In this situation, the CPU is allowed to fetch from the PoC and
allocate into the I-cache (unless the memory is mapped with
the XN attribute at Stage-2).

If we transpose this to vcpus sharing a single physical CPU,
it is possible for a vcpu running with its MMU off to influence
another vcpu running with its MMU on, as the latter is expected to
fetch from the PoU (and self-patching code doesn't flush below that
level).

In order to solve this, reuse the vcpu-private TLB invalidation
code to apply the same policy to the I-cache, nuking it every time
the vcpu runs on a physical CPU that ran another vcpu of the same
VM in the past.

This involve renaming __kvm_tlb_flush_local_vmid() to
__kvm_flush_cpu_context(), and inserting a local i-cache invalidation
there.

Cc: sta...@vger.kernel.org
Signed-off-by: Marc Zyngier 
Acked-by: Will Deacon 
Acked-by: Catalin Marinas 
Link: https://lore.kernel.org/r/20210303164505.68492-1-...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/include/asm/kvm_asm.h   |4 ++--
 arch/arm64/kvm/arm.c   |7 ++-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c |6 +++---
 arch/arm64/kvm/hyp/nvhe/tlb.c  |3 ++-
 arch/arm64/kvm/hyp/vhe/tlb.c   |3 ++-
 5 files changed, 15 insertions(+), 8 deletions(-)

--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -47,7 +47,7 @@
 #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context   2
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
-#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid   5
+#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context  5
 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff  6
 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs7
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el28
@@ -183,10 +183,10 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs
 #define __bp_harden_hyp_vecs   CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
 
 extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 int level);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
 
/*
+* We guarantee that both TLBs and I-cache are private to each
+* vcpu. If detecting that a vcpu from the same VM has
+* previously run on the same physical CPU, call into the
+* hypervisor code to nuke the relevant contexts.
+*
 * We might get preempted before the vCPU actually runs, but
 * over-invalidation doesn't affect correctness.
 */
if (*last_ran != vcpu->vcpu_id) {
-   kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
+   kvm_call_hyp(__kvm_flush_cpu_context, mmu);
*last_ran = vcpu->vcpu_id;
}
 
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
 }
 
-static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context 
*host_ctxt)
+static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
 {
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
 
-   __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
+   __kvm_flush_cpu_context(kern_hyp_va(mmu));
 }
 
 static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
@@ -115,7 +115,7 @@ static const hcall_t *host_hcall[] = {
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
-   HANDLE_FUNC(__kvm_tlb_flush_local_vmid),
+   HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__kvm_enable_ssbs),
HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_
__tlb_switch_to_host();
 }
 
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
 {
struct tlb_inv_context cxt;
 
@@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct k

[PATCH 5.11 296/306] KVM: arm64: Avoid corrupting vCPU context register in guest exit

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Will Deacon 

commit 31948332d5fa392ad933f4a6a10026850649ed76 upstream.

Commit 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest
context") tracks the currently running vCPU, clearing the pointer to
NULL on exit from a guest.

Unfortunately, the use of 'set_loaded_vcpu' clobbers x1 to point at the
kvm_hyp_ctxt instead of the vCPU context, causing the subsequent RAS
code to go off into the weeds when it saves the DISR assuming that the
CPU context is embedded in a struct vCPU.

Leave x1 alone and use x3 as a temporary register instead when clearing
the vCPU on the guest exit path.

Cc: Marc Zyngier 
Cc: Andrew Scull 
Cc: 
Fixes: 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context")
Suggested-by: Quentin Perret 
Signed-off-by: Will Deacon 
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210226181211.14542-1-w...@kernel.org
Message-Id: <20210305185254.3730990-3-...@kernel.org>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/kvm/hyp/entry.S |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -146,7 +146,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOB
// Now restore the hyp regs
restore_callee_saved_regs x2

-   set_loaded_vcpu xzr, x1, x2
+   set_loaded_vcpu xzr, x2, x3

 alternative_if ARM64_HAS_RAS_EXTN
// If we have the RAS extensions we can consume a pending error

[PATCH 5.11 295/306] KVM: arm64: Fix range alignment when walking page tables

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Jia He 

commit 357ad203d45c0f9d76a8feadbd5a1c5d460c638b upstream.

When walking the page tables at a given level, and if the start
address for the range isn't aligned for that level, we propagate
the misalignment on each iteration at that level.

This results in the walker ignoring a number of entries (depending
on the original misalignment) on each subsequent iteration.

Properly aligning the address before the next iteration addresses
this issue.

Cc: sta...@vger.kernel.org
Reported-by: Howard Zhang 
Acked-by: Will Deacon 
Signed-off-by: Jia He 
Fixes: b1e57de62cfb ("KVM: arm64: Add stand-alone page-table walker 
infrastructure")
[maz: rewrite commit message]
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210303024225.2591-1-justin...@arm.com
Message-Id: <20210305185254.3730990-9-...@kernel.org>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/kvm/hyp/pgtable.c |1 +
 1 file changed, 1 insertion(+)

--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -225,6 +225,7 @@ static inline int __kvm_pgtable_visit(st
goto out;

if (!table) {
+   data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
data->addr += kvm_granule_size(level);
goto out;
}

[PATCH 5.10 282/290] KVM: arm64: Fix exclusive limit for IPA size

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Marc Zyngier 

commit 262b003d059c6671601a19057e9fe1a5e7f23722 upstream.

When registering a memslot, we check the size and location of that
memslot against the IPA size to ensure that we can provide guest
access to the whole of the memory.

Unfortunately, this check rejects memslot that end-up at the exact
limit of the addressing capability for a given IPA size. For example,
it refuses the creation of a 2GB memslot at 0x800 with a 32bit
IPA space.

Fix it by relaxing the check to accept a memslot reaching the
limit of the IPA space.

Fixes: c3058d5da222 ("arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE")
Reviewed-by: Eric Auger 
Signed-off-by: Marc Zyngier 
Cc: sta...@vger.kernel.org
Reviewed-by: Andrew Jones 
Link: https://lore.kernel.org/r/20210311100016.3830038-3-...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/kvm/mmu.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1309,8 +1309,7 @@ int kvm_arch_prepare_memory_region(struc
 * Prevent userspace from creating a memory region outside of the IPA
 * space addressable by the KVM guest IPA space.
 */
-   if (memslot->base_gfn + memslot->npages >=
-   (kvm_phys_size(kvm) >> PAGE_SHIFT))
+   if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> 
PAGE_SHIFT))
return -EFAULT;

mmap_read_lock(current->mm);

[PATCH 5.10 281/290] KVM: arm64: Reject VM creation when the default IPA size is unsupported

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Marc Zyngier 

commit 7d717558dd5ef10d28866750d5c24ff892ea3778 upstream.

KVM/arm64 has forever used a 40bit default IPA space, partially
due to its 32bit heritage (where the only choice is 40bit).

However, there are implementations in the wild that have a *cough*
much smaller *cough* IPA space, which leads to a misprogramming of
VTCR_EL2, and a guest that is stuck on its first memory access
if userspace dares to ask for the default IPA setting (which most
VMMs do).

Instead, blundly reject the creation of such VM, as we can't
satisfy the requirements from userspace (with a one-off warning).
Also clarify the boot warning, and document that the VM creation
will fail when an unsupported IPA size is provided.

Although this is an ABI change, it doesn't really change much
for userspace:

- the guest couldn't run before this change, but no error was
  returned. At least userspace knows what is happening.

- a memory slot that was accepted because it did fit the default
  IPA space now doesn't even get a chance to be registered.

The other thing that is left doing is to convince userspace to
actually use the IPA space setting instead of relying on the
antiquated default.

Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for 
VM")
Signed-off-by: Marc Zyngier 
Cc: sta...@vger.kernel.org
Reviewed-by: Andrew Jones 
Reviewed-by: Eric Auger 
Link: https://lore.kernel.org/r/20210311100016.3830038-2-...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 Documentation/virt/kvm/api.rst |3 +++
 arch/arm64/kvm/reset.c |   12 
 2 files changed, 11 insertions(+), 4 deletions(-)

--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -182,6 +182,9 @@ is dependent on the CPU capability and t
 be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
 ioctl() at run-time.
 
+Creation of the VM will fail if the requested IPA size (whether it is
+implicit or explicit) is unsupported on the host.
+
 Please note that configuring the IPA size does not affect the capability
 exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
 size of the address translated by the stage2 level (guest physical to
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -373,10 +373,9 @@ int kvm_set_ipa_limit(void)
}
 
kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
-   WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
-"KVM IPA Size Limit (%d bits) is smaller than default size\n",
-kvm_ipa_limit);
-   kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
+   kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
+((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
+ " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
 
return 0;
 }
@@ -405,6 +404,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
+   if (phys_shift > kvm_ipa_limit) {
+   pr_warn_once("%s using unsupported default IPA limit, 
upgrade your VMM\n",
+current->comm);
+   return -EINVAL;
+   }
}
 
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);

[PATCH 5.10 277/290] KVM: kvmclock: Fix vCPUs > 64 cant be online/hotpluged

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Wanpeng Li 

commit d7eb79c6290c7ae4561418544072e0a3266e7384 upstream.

# lscpu
Architecture:  x86_64
CPU op-mode(s):32-bit, 64-bit
Byte Order:Little Endian
CPU(s):88
On-line CPU(s) list:   0-63
Off-line CPU(s) list:  64-87

# cat /proc/cmdline
BOOT_IMAGE=/vmlinuz-5.10.0-rc3-tlinux2-0050+ root=/dev/mapper/cl-root ro
rd.lvm.lv=cl/root rhgb quiet console=ttyS0 LANG=en_US .UTF-8 
no-kvmclock-vsyscall

# echo 1 > /sys/devices/system/cpu/cpu76/online
-bash: echo: write error: Cannot allocate memory

The per-cpu vsyscall pvclock data pointer assigns either an element of the
static array hv_clock_boot (#vCPU <= 64) or dynamically allocated memory
hvclock_mem (vCPU > 64), the dynamically memory will not be allocated if
kvmclock vsyscall is disabled, this can result in cpu hotpluged fails in
kvmclock_setup_percpu() which returns -ENOMEM. It's broken for no-vsyscall
and sometimes you end up with vsyscall disabled if the host does something
strange. This patch fixes it by allocating this dynamically memory
unconditionally even if vsyscall is disabled.

Fixes: 6a1cac56f4 ("x86/kvm: Use __bss_decrypted attribute in shared variables")
Reported-by: Zelin Deng 
Cc: Brijesh Singh 
Cc: sta...@vger.kernel.org
Signed-off-by: Wanpeng Li 
Message-Id: <1614130683-24137-1-git-send-email-wanpen...@tencent.com>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/kvmclock.c |   19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -269,21 +269,20 @@ static void __init kvmclock_init_mem(voi
 
 static int __init kvm_setup_vsyscall_timeinfo(void)
 {
-#ifdef CONFIG_X86_64
-   u8 flags;
+   kvmclock_init_mem();
 
-   if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
-   return 0;
+#ifdef CONFIG_X86_64
+   if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
+   u8 flags;
 
-   flags = pvclock_read_flags(_clock_boot[0].pvti);
-   if (!(flags & PVCLOCK_TSC_STABLE_BIT))
-   return 0;
+   flags = pvclock_read_flags(_clock_boot[0].pvti);
+   if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+   return 0;
 
-   kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+   kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+   }
 #endif
 
-   kvmclock_init_mem();
-
return 0;
 }
 early_initcall(kvm_setup_vsyscall_timeinfo);

[PATCH 5.10 289/290] KVM: arm64: Fix nVHE hyp panic host context restore

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Andrew Scull 

Commit c4b000c3928d4f20acef79dccf3a65ae3795e0b0 upstream.

When panicking from the nVHE hyp and restoring the host context, x29 is
expected to hold a pointer to the host context. This wasn't being done
so fix it to make sure there's a valid pointer the host context being
used.

Rather than passing a boolean indicating whether or not the host context
should be restored, instead pass the pointer to the host context. NULL
is passed to indicate that no context should be restored.

Fixes: a2e102e20fd6 ("KVM: arm64: nVHE: Handle hyp panics")
Cc: sta...@vger.kernel.org # 5.10.y only
Signed-off-by: Andrew Scull 
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210219122406.1337626-1-asc...@google.com
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/include/asm/kvm_hyp.h |3 ++-
 arch/arm64/kvm/hyp/nvhe/host.S   |   20 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c |3 +--
 3 files changed, 13 insertions(+), 13 deletions(-)

--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -99,7 +99,8 @@ u64 __guest_enter(struct kvm_vcpu *vcpu)
 
 void __noreturn hyp_panic(void);
 #ifdef __KVM_NVHE_HYPERVISOR__
-void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
+void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+  u64 elr, u64 par);
 #endif
 
 #endif /* __ARM64_KVM_HYP_H__ */
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -64,10 +64,15 @@ __host_enter_without_restoring:
 SYM_FUNC_END(__host_exit)
 
 /*
- * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 
par);
+ * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+ *   u64 elr, u64 par);
  */
 SYM_FUNC_START(__hyp_do_panic)
-   /* Load the format arguments into x1-7 */
+   mov x29, x0
+
+   /* Load the format string into x0 and arguments into x1-7 */
+   ldr x0, =__hyp_panic_string
+
mov x6, x3
get_vcpu_ptr x7, x3
 
@@ -82,13 +87,8 @@ SYM_FUNC_START(__hyp_do_panic)
ldr lr, =panic
msr elr_el2, lr
 
-   /*
-* Set the panic format string and enter the host, conditionally
-* restoring the host context.
-*/
-   cmp x0, xzr
-   ldr x0, =__hyp_panic_string
-   b.eq__host_enter_without_restoring
+   /* Enter the host, conditionally restoring the host context. */
+   cbz x29, __host_enter_without_restoring
b   __host_enter_for_panic
 SYM_FUNC_END(__hyp_do_panic)
 
@@ -144,7 +144,7 @@ SYM_FUNC_END(__hyp_do_panic)
 
 .macro invalid_host_el1_vect
.align 7
-   mov x0, xzr /* restore_host = false */
+   mov x0, xzr /* host_ctxt = NULL */
mrs x1, spsr_el2
mrs x2, elr_el2
mrs x3, par_el1
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -260,7 +260,6 @@ void __noreturn hyp_panic(void)
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg_par();
-   bool restore_host = true;
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
 
@@ -274,7 +273,7 @@ void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}
 
-   __hyp_do_panic(restore_host, spsr, elr, par);
+   __hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
 }

[PATCH 5.10 288/290] xen/events: avoid handling the same event on two cpus at the same time

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Juergen Gross 

commit b6622798bc50b625a1e62f82c7190df40c1f5b21 upstream.

When changing the cpu affinity of an event it can happen today that
(with some unlucky timing) the same event will be handled on the old
and the new cpu at the same time.

Avoid that by adding an "event active" flag to the per-event data and
call the handler only if this flag isn't set.

Cc: sta...@vger.kernel.org
Reported-by: Julien Grall 
Signed-off-by: Juergen Gross 
Reviewed-by: Julien Grall 
Link: https://lore.kernel.org/r/20210306161833.4552-4-jgr...@suse.com
Signed-off-by: Boris Ostrovsky 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/xen/events/events_base.c |   26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -101,6 +101,7 @@ struct irq_info {
 #define EVT_MASK_REASON_EXPLICIT   0x01
 #define EVT_MASK_REASON_TEMPORARY  0x02
 #define EVT_MASK_REASON_EOI_PENDING0x04
+   u8 is_active;   /* Is event just being handled? */
unsigned irq;
evtchn_port_t evtchn;   /* event channel */
unsigned short cpu; /* cpu bound */
@@ -751,6 +752,12 @@ static void xen_evtchn_close(evtchn_port
BUG();
 }
 
+static void event_handler_exit(struct irq_info *info)
+{
+   smp_store_release(>is_active, 0);
+   clear_evtchn(info->evtchn);
+}
+
 static void pirq_query_unmask(int irq)
 {
struct physdev_irq_status_query irq_status;
@@ -781,13 +788,13 @@ static void eoi_pirq(struct irq_data *da
likely(!irqd_irq_disabled(data))) {
do_mask(info, EVT_MASK_REASON_TEMPORARY);
 
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 
irq_move_masked_irq(data);
 
do_unmask(info, EVT_MASK_REASON_TEMPORARY);
} else
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 
if (pirq_needs_eoi(data->irq)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, );
@@ -1603,6 +1610,8 @@ void handle_irq_for_port(evtchn_port_t p
}
 
info = info_for_irq(irq);
+   if (xchg_acquire(>is_active, 1))
+   return;
 
if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
@@ -1778,13 +1787,13 @@ static void ack_dynirq(struct irq_data *
likely(!irqd_irq_disabled(data))) {
do_mask(info, EVT_MASK_REASON_TEMPORARY);
 
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 
irq_move_masked_irq(data);
 
do_unmask(info, EVT_MASK_REASON_TEMPORARY);
} else
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 }
 
 static void mask_ack_dynirq(struct irq_data *data)
@@ -1800,7 +1809,7 @@ static void lateeoi_ack_dynirq(struct ir
 
if (VALID_EVTCHN(evtchn)) {
do_mask(info, EVT_MASK_REASON_EOI_PENDING);
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
}
 }
 
@@ -1811,7 +1820,7 @@ static void lateeoi_mask_ack_dynirq(stru
 
if (VALID_EVTCHN(evtchn)) {
do_mask(info, EVT_MASK_REASON_EXPLICIT);
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
}
 }
 
@@ -1922,10 +1931,11 @@ static void restore_cpu_ipis(unsigned in
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq)
 {
-   evtchn_port_t evtchn = evtchn_from_irq(irq);
+   struct irq_info *info = info_for_irq(irq);
+   evtchn_port_t evtchn = info ? info->evtchn : 0;
 
if (VALID_EVTCHN(evtchn))
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)

[PATCH 5.10 290/290] RDMA/umem: Use ib_dma_max_seg_size instead of dma_get_max_seg_size

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Christoph Hellwig 

commit b116c702791a9834e6485f67ca6267d9fdf59b87 upstream.

RDMA ULPs must not call DMA mapping APIs directly but instead use the
ib_dma_* wrappers.

Fixes: 0c16d9635e3a ("RDMA/umem: Move to allocate SG table from pages")
Link: https://lore.kernel.org/r/20201106181941.1878556-3-...@lst.de
Reported-by: Jason Gunthorpe 
Signed-off-by: Christoph Hellwig 
Signed-off-by: Jason Gunthorpe 
Cc: "Marciniszyn, Mike" 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/infiniband/core/umem.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -220,10 +220,10 @@ struct ib_umem *ib_umem_get(struct ib_de
 
cur_base += ret * PAGE_SIZE;
npages -= ret;
-   sg = __sg_alloc_table_from_pages(
-   >sg_head, page_list, ret, 0, ret << PAGE_SHIFT,
-   dma_get_max_seg_size(device->dma_device), sg, npages,
-   GFP_KERNEL);
+   sg = __sg_alloc_table_from_pages(>sg_head, page_list, ret,
+   0, ret << PAGE_SHIFT,
+   ib_dma_max_seg_size(device), sg, npages,
+   GFP_KERNEL);
umem->sg_nents = umem->sg_head.nents;
if (IS_ERR(sg)) {
unpin_user_pages_dirty_lock(page_list, ret, 0);

[PATCH 5.10 287/290] xen/events: dont unmask an event channel when an eoi is pending

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Juergen Gross 

commit 25da4618af240fbec6112401498301a6f2bc9702 upstream.

An event channel should be kept masked when an eoi is pending for it.
When being migrated to another cpu it might be unmasked, though.

In order to avoid this keep three different flags for each event channel
to be able to distinguish "normal" masking/unmasking from eoi related
masking/unmasking and temporary masking. The event channel should only
be able to generate an interrupt if all flags are cleared.

Cc: sta...@vger.kernel.org
Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework")
Reported-by: Julien Grall 
Signed-off-by: Juergen Gross 
Reviewed-by: Julien Grall 
Reviewed-by: Boris Ostrovsky 
Tested-by: Ross Lagerwall 
Link: https://lore.kernel.org/r/20210306161833.4552-3-jgr...@suse.com
[boris -- corrected Fixed tag format]
Signed-off-by: Boris Ostrovsky 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/xen/events/events_2l.c   |7 --
 drivers/xen/events/events_base.c |  117 ++-
 drivers/xen/events/events_fifo.c |7 --
 drivers/xen/events/events_internal.h |6 -
 4 files changed, 88 insertions(+), 49 deletions(-)

--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -77,12 +77,6 @@ static bool evtchn_2l_is_pending(evtchn_
return sync_test_bit(port, BM(>evtchn_pending[0]));
 }
 
-static bool evtchn_2l_test_and_set_mask(evtchn_port_t port)
-{
-   struct shared_info *s = HYPERVISOR_shared_info;
-   return sync_test_and_set_bit(port, BM(>evtchn_mask[0]));
-}
-
 static void evtchn_2l_mask(evtchn_port_t port)
 {
struct shared_info *s = HYPERVISOR_shared_info;
@@ -376,7 +370,6 @@ static const struct evtchn_ops evtchn_op
.clear_pending = evtchn_2l_clear_pending,
.set_pending   = evtchn_2l_set_pending,
.is_pending= evtchn_2l_is_pending,
-   .test_and_set_mask = evtchn_2l_test_and_set_mask,
.mask  = evtchn_2l_mask,
.unmask= evtchn_2l_unmask,
.handle_events = evtchn_2l_handle_events,
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -96,13 +96,18 @@ struct irq_info {
struct list_head eoi_list;
short refcnt;
short spurious_cnt;
-   enum xen_irq_type type; /* type */
+   short type; /* type */
+   u8 mask_reason; /* Why is event channel masked */
+#define EVT_MASK_REASON_EXPLICIT   0x01
+#define EVT_MASK_REASON_TEMPORARY  0x02
+#define EVT_MASK_REASON_EOI_PENDING0x04
unsigned irq;
evtchn_port_t evtchn;   /* event channel */
unsigned short cpu; /* cpu bound */
unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time;   /* Time in jiffies when to EOI. */
+   spinlock_t lock;
 
union {
unsigned short virq;
@@ -151,6 +156,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
  *   evtchn_rwlock
  * IRQ-desc lock
  *   percpu eoi_list_lock
+ * irq_info->lock
  */
 
 static LIST_HEAD(xen_irq_list_head);
@@ -272,6 +278,8 @@ static int xen_irq_info_common_setup(str
info->irq = irq;
info->evtchn = evtchn;
info->cpu = cpu;
+   info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+   spin_lock_init(>lock);
 
ret = set_evtchn_to_irq(evtchn, irq);
if (ret < 0)
@@ -419,6 +427,34 @@ unsigned int cpu_from_evtchn(evtchn_port
return ret;
 }
 
+static void do_mask(struct irq_info *info, u8 reason)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
+
+   if (!info->mask_reason)
+   mask_evtchn(info->evtchn);
+
+   info->mask_reason |= reason;
+
+   spin_unlock_irqrestore(>lock, flags);
+}
+
+static void do_unmask(struct irq_info *info, u8 reason)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
+
+   info->mask_reason &= ~reason;
+
+   if (!info->mask_reason)
+   unmask_evtchn(info->evtchn);
+
+   spin_unlock_irqrestore(>lock, flags);
+}
+
 #ifdef CONFIG_X86
 static bool pirq_check_eoi_map(unsigned irq)
 {
@@ -546,7 +582,7 @@ static void xen_irq_lateeoi_locked(struc
}
 
info->eoi_time = 0;
-   unmask_evtchn(evtchn);
+   do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
 }
 
 static void xen_irq_lateeoi_worker(struct work_struct *work)
@@ -733,7 +769,8 @@ static void pirq_query_unmask(int irq)
 
 static void eoi_pirq(struct irq_data *data)
 {
-   evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+   struct irq_info *info = info_for_irq(data->irq);
+   evtchn_port_t evtchn = info ? info->evtchn : 0;
struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
int rc = 0;
 
@@ -742,14 +779,13 @@ static void eoi_pirq(struct irq_data *da
 
if

[PATCH 5.10 286/290] mm/page_alloc.c: refactor initialization of struct page for holes in memory layout

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Mike Rapoport 

commit 0740a50b9baa4472cfb12442df4b39e2712a64a4 upstream.

There could be struct pages that are not backed by actual physical memory.
This can happen when the actual memory bank is not a multiple of
SECTION_SIZE or when an architecture does not register memory holes
reserved by the firmware as memblock.memory.

Such pages are currently initialized using init_unavailable_mem() function
that iterates through PFNs in holes in memblock.memory and if there is a
struct page corresponding to a PFN, the fields of this page are set to
default values and it is marked as Reserved.

init_unavailable_mem() does not take into account zone and node the page
belongs to and sets both zone and node links in struct page to zero.

Before commit 73a6e474cb37 ("mm: memmap_init: iterate over memblock
regions rather that check each PFN") the holes inside a zone were
re-initialized during memmap_init() and got their zone/node links right.
However, after that commit nothing updates the struct pages representing
such holes.

On a system that has firmware reserved holes in a zone above ZONE_DMA, for
instance in a configuration below:

# grep -A1 E820 /proc/iomem
7a17b000-7a216fff : Unknown E820 type
7a217000-7bff : System RAM

unset zone link in struct page will trigger

VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);

in set_pfnblock_flags_mask() when called with a struct page from a range
other than E820_TYPE_RAM because there are pages in the range of
ZONE_DMA32 but the unset zone link in struct page makes them appear as a
part of ZONE_DMA.

Interleave initialization of the unavailable pages with the normal
initialization of memory map, so that zone and node information will be
properly set on struct pages that are not backed by the actual memory.

With this change the pages for holes inside a zone will get proper
zone/node links and the pages that are not spanned by any node will get
links to the adjacent zone/node.  The holes between nodes will be
prepended to the zone/node above the hole and the trailing pages in the
last section that will be appended to the zone/node below.

[a...@linux-foundation.org: don't initialize static to zero, use %llu for u64]

Link: https://lkml.kernel.org/r/20210225224351.7356-2-r...@kernel.org
Fixes: 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather 
that check each PFN")
Signed-off-by: Mike Rapoport 
Reported-by: Qian Cai 
Reported-by: Andrea Arcangeli 
Reviewed-by: Baoquan He 
Acked-by: Vlastimil Babka 
Reviewed-by: David Hildenbrand 
Cc: Borislav Petkov 
Cc: Chris Wilson 
Cc: "H. Peter Anvin" 
Cc: Łukasz Majczak 
Cc: Ingo Molnar 
Cc: Mel Gorman 
Cc: Michal Hocko 
Cc: "Sarvela, Tomi P" 
Cc: Thomas Gleixner 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Mike Rapoport 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/page_alloc.c |  158 ++--
 1 file changed, 75 insertions(+), 83 deletions(-)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6189,13 +6189,66 @@ static void __meminit zone_init_free_lis
}
 }

+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
+/*
+ * Only struct pages that correspond to ranges defined by memblock.memory
+ * are zeroed and initialized by going through __init_single_page() during
+ * memmap_init_zone().
+ *
+ * But, there could be struct pages that correspond to holes in
+ * memblock.memory. This can happen because of the following reasons:
+ * - physical memory bank size is not necessarily the exact multiple of the
+ *   arbitrary section size
+ * - early reserved memory may not be listed in memblock.memory
+ * - memory layouts defined with memmap= kernel parameter may not align
+ *   nicely with memmap sections
+ *
+ * Explicitly initialize those struct pages so that:
+ * - PG_Reserved is set
+ * - zone and node links point to zone and node that span the page if the
+ *   hole is in the middle of a zone
+ * - zone and node links point to adjacent zone/node if the hole falls on
+ *   the zone boundary; the pages in such holes will be prepended to the
+ *   zone/node above the hole except for the trailing pages in the last
+ *   section that will be appended to the zone/node below.
+ */
+static u64 __meminit init_unavailable_range(unsigned long spfn,
+   unsigned long epfn,
+   int zone, int node)
+{
+   unsigned long pfn;
+   u64 pgcnt = 0;
+
+   for (pfn = spfn; pfn < epfn; pfn++) {
+   if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
+   pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+   + pageblock_nr_pages - 1;
+   continue;
+   }
+   __init_single_page(pfn_to_page(pfn), pfn, zone, node);
+   __SetPageReserved(pfn_to_page(pfn));
+   pgcnt++;
+   }
+
+

[PATCH 5.10 285/290] KVM: arm64: Ensure I-cache isolation between vcpus of a same VM

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Marc Zyngier 

Commit 01dc9262ff5797b675c32c0c6bc682777d23de05 upstream.

It recently became apparent that the ARMv8 architecture has interesting
rules regarding attributes being used when fetching instructions
if the MMU is off at Stage-1.

In this situation, the CPU is allowed to fetch from the PoC and
allocate into the I-cache (unless the memory is mapped with
the XN attribute at Stage-2).

If we transpose this to vcpus sharing a single physical CPU,
it is possible for a vcpu running with its MMU off to influence
another vcpu running with its MMU on, as the latter is expected to
fetch from the PoU (and self-patching code doesn't flush below that
level).

In order to solve this, reuse the vcpu-private TLB invalidation
code to apply the same policy to the I-cache, nuking it every time
the vcpu runs on a physical CPU that ran another vcpu of the same
VM in the past.

This involve renaming __kvm_tlb_flush_local_vmid() to
__kvm_flush_cpu_context(), and inserting a local i-cache invalidation
there.

Cc: sta...@vger.kernel.org
Signed-off-by: Marc Zyngier 
Acked-by: Will Deacon 
Acked-by: Catalin Marinas 
Link: https://lore.kernel.org/r/20210303164505.68492-1-...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/include/asm/kvm_asm.h   |4 ++--
 arch/arm64/kvm/arm.c   |7 ++-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c |4 ++--
 arch/arm64/kvm/hyp/nvhe/tlb.c  |3 ++-
 arch/arm64/kvm/hyp/vhe/tlb.c   |3 ++-
 5 files changed, 14 insertions(+), 7 deletions(-)

--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -49,7 +49,7 @@
 #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context   2
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
-#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid   5
+#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context  5
 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff  6
 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs7
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el28
@@ -180,10 +180,10 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs
 #define __bp_harden_hyp_vecs   CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
 
 extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 int level);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -352,11 +352,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
 
/*
+* We guarantee that both TLBs and I-cache are private to each
+* vcpu. If detecting that a vcpu from the same VM has
+* previously run on the same physical CPU, call into the
+* hypervisor code to nuke the relevant contexts.
+*
 * We might get preempted before the vCPU actually runs, but
 * over-invalidation doesn't affect correctness.
 */
if (*last_ran != vcpu->vcpu_id) {
-   kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
+   kvm_call_hyp(__kvm_flush_cpu_context, mmu);
*last_ran = vcpu->vcpu_id;
}
 
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -46,11 +46,11 @@ static void handle_host_hcall(unsigned l
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
break;
}
-   case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): {
+   case KVM_HOST_SMCCC_FUNC(__kvm_flush_cpu_context): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
 
-   __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
+   __kvm_flush_cpu_context(kern_hyp_va(mmu));
break;
}
case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): {
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_
__tlb_switch_to_host();
 }
 
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
 {
struct tlb_inv_context cxt;
 
@@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct k
__tlb_switch_to_guest(mmu, );
 
__tlbi(vmalle1);
+   asm volatile("ic iallu");
dsb(nsh);
isb();
 
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_
__tlb_switch_to_host();
 }
 
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void

[PATCH 5.10 284/290] mm/madvise: replace ptrace attach requirement for process_madvise

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Suren Baghdasaryan 

commit 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e upstream.

process_madvise currently requires ptrace attach capability.
PTRACE_MODE_ATTACH gives one process complete control over another
process.  It effectively removes the security boundary between the two
processes (in one direction).  Granting ptrace attach capability even to a
system process is considered dangerous since it creates an attack surface.
This severely limits the usage of this API.

The operations process_madvise can perform do not affect the correctness
of the operation of the target process; they only affect where the data is
physically located (and therefore, how fast it can be accessed).  What we
want is the ability for one process to influence another process in order
to optimize performance across the entire system while leaving the
security boundary intact.

Replace PTRACE_MODE_ATTACH with a combination of PTRACE_MODE_READ and
CAP_SYS_NICE.  PTRACE_MODE_READ to prevent leaking ASLR metadata and
CAP_SYS_NICE for influencing process performance.

Link: https://lkml.kernel.org/r/20210303185807.2160264-1-sur...@google.com
Signed-off-by: Suren Baghdasaryan 
Reviewed-by: Kees Cook 
Acked-by: Minchan Kim 
Acked-by: David Rientjes 
Cc: Jann Horn 
Cc: Jeff Vander Stoep 
Cc: Michal Hocko 
Cc: Shakeel Butt 
Cc: Tim Murray 
Cc: Florian Weimer 
Cc: Oleg Nesterov 
Cc: James Morris 
Cc: [5.10+]
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/madvise.c |   13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1202,12 +1202,22 @@ SYSCALL_DEFINE5(process_madvise, int, pi
goto release_task;
}

-   mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+   /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+   mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (IS_ERR_OR_NULL(mm)) {
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
goto release_task;
}

+   /*
+* Require CAP_SYS_NICE for influencing process performance. Note that
+* only non-destructive hints are currently supported.
+*/
+   if (!capable(CAP_SYS_NICE)) {
+   ret = -EPERM;
+   goto release_mm;
+   }
+
total_len = iov_iter_count();

while (iov_iter_count()) {
@@ -1222,6 +1232,7 @@ SYSCALL_DEFINE5(process_madvise, int, pi
if (ret == 0)
ret = total_len - iov_iter_count();

+release_mm:
mmput(mm);
 release_task:
put_task_struct(task);

[PATCH 5.10 283/290] mm/userfaultfd: fix memory corruption due to writeprotect

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Nadav Amit 

commit 6ce64428d62026a10cb5d80138ff2f90cc21d367 upstream.

Userfaultfd self-test fails occasionally, indicating a memory corruption.

Analyzing this problem indicates that there is a real bug since mmap_lock
is only taken for read in mwriteprotect_range() and defers flushes, and
since there is insufficient consideration of concurrent deferred TLB
flushes in wp_page_copy().  Although the PTE is flushed from the TLBs in
wp_page_copy(), this flush takes place after the copy has already been
performed, and therefore changes of the page are possible between the time
of the copy and the time in which the PTE is flushed.

To make matters worse, memory-unprotection using userfaultfd also poses a
problem.  Although memory unprotection is logically a promotion of PTE
permissions, and therefore should not require a TLB flush, the current
userrfaultfd code might actually cause a demotion of the architectural PTE
permission: when userfaultfd_writeprotect() unprotects memory region, it
unintentionally *clears* the RW-bit if it was already set.  Note that this
unprotecting a PTE that is not write-protected is a valid use-case: the
userfaultfd monitor might ask to unprotect a region that holds both
write-protected and write-unprotected PTEs.

The scenario that happens in selftests/vm/userfaultfd is as follows:

cpu0cpu1cpu2

[ Writable PTE
  cached in TLB ]
userfaultfd_writeprotect()
[ write-*unprotect* ]
mwriteprotect_range()
mmap_read_lock()
change_protection()

change_protection_range()
...
change_pte_range()
[ *clear* “write”-bit ]
[ defer TLB flushes ]
[ page-fault ]
...
wp_page_copy()
 cow_user_page()
  [ copy page ]
[ write to old
  page ]
...
 set_pte_at_notify()

A similar scenario can happen:

cpu0cpu1cpu2cpu3

[ Writable PTE
  cached in TLB ]
userfaultfd_writeprotect()
[ write-protect ]
[ deferred TLB flush ]
userfaultfd_writeprotect()
[ write-unprotect ]
[ deferred TLB flush]
[ page-fault ]
wp_page_copy()
 cow_user_page()
 [ copy page ]
 ...[ write to page ]
set_pte_at_notify()

This race exists since commit 292924b26024 ("userfaultfd: wp: apply
_PAGE_UFFD_WP bit").  Yet, as Yu Zhao pointed, these races became apparent
since commit 09854ba94c6a ("mm: do_wp_page() simplification") which made
wp_page_copy() more likely to take place, specifically if page_count(page)
> 1.

To resolve the aforementioned races, check whether there are pending
flushes on uffd-write-protected VMAs, and if there are, perform a flush
before doing the COW.

Further optimizations will follow to avoid during uffd-write-unprotect
unnecassary PTE write-protection and TLB flushes.

Link: https://lkml.kernel.org/r/20210304095423.3825684-1-na...@vmware.com
Fixes: 09854ba94c6a ("mm: do_wp_page() simplification")
Signed-off-by: Nadav Amit 
Suggested-by: Yu Zhao 
Reviewed-by: Peter Xu 
Tested-by: Peter Xu 
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Pavel Emelyanov 
Cc: Mike Kravetz 
Cc: Mike Rapoport 
Cc: Minchan Kim 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: [5.9+]
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/memory.c |8 
 1 file changed, 8 insertions(+)

--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3090,6 +3090,14 @@ static vm_fault_t do_wp_page(struct vm_f
return handle_userfault(vmf, VM_UFFD_WP);
}

+   /*
+* Userfaultfd write-protect can defer flushes. Ensure the TLB
+* is flushed in this case before copying.
+*/
+   if (unlikely(userfaultfd_wp(vmf->vma) &&
+mm_tlb_flush_pending(vmf->vma->vm_mm)))
+   flush_tlb_page(vmf->vma, vmf->address);
+
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) {
/*

[PATCH 5.11 250/306] drm/ttm: Fix TTM page pool accounting

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Anthony DeRossi 

[ Upstream commit ca63d76fd2319db984f2875992643f900caf2c72 ]

Freed pages are not subtracted from the allocated_pages counter in
ttm_pool_type_fini(), causing a leak in the count on device removal.
The next shrinker invocation loops forever trying to free pages that are
no longer in the pool:

  rcu: INFO: rcu_sched self-detected stall on CPU
  rcu:  3-: (9998 ticks this GP) idle=54e/1/0x4000 
softirq=434857/434857 fqs=2237
(t=10001 jiffies g=2194533 q=49211)
  NMI backtrace for cpu 3
  CPU: 3 PID: 1034 Comm: kswapd0 Tainted: P   O  5.11.0-com #1
  Hardware name: System manufacturer System Product Name/PRIME X570-PRO, BIOS 
1405 11/19/2019
  Call Trace:
   
   ...
   
   sysvec_apic_timer_interrupt+0x77/0x80
   asm_sysvec_apic_timer_interrupt+0x12/0x20
  RIP: 0010:mutex_unlock+0x16/0x20
  Code: e7 48 8b 70 10 e8 7a 53 77 ff eb aa e8 43 6c ff ff 0f 1f 00 65 48 8b 14 
25 00 6d 01 00 31 c9 48 89 d0 f0 48 0f b1 0f 48 39 c2 <74> 05 e9 e3 fe ff ff c3 
66 90 48 8b 47 20 48 85 c0 74 0f 8b 50 10
  RSP: 0018:bdb840797be8 EFLAGS: 0246
  RAX: 9ff445a41c00 RBX: c02a9ef8 RCX: 
  RDX: 9ff445a41c00 RSI: bdb840797c78 RDI: c02a9ac0
  RBP: 0080 R08:  R09: bdb840797c80
  R10:  R11: fff5 R12: 
  R13:  R14: 0084 R15: c02a9a60
   ttm_pool_shrink+0x7d/0x90 [ttm]
   ttm_pool_shrinker_scan+0x5/0x20 [ttm]
   do_shrink_slab+0x13a/0x1a0
...

debugfs shows the incorrect total:

  $ cat /sys/kernel/debug/dri/0/ttm_page_pool
--- 0--- --- 1--- --- 2--- --- 3--- --- 4--- --- 5--- --- 6--- --- 
7--- --- 8--- --- 9--- ---10---
  wc  :0000000  
  0000
  uc  :0000000  
  0000
  wc 32   :0000000  
  0000
  uc 32   :0000000  
  0000
  DMA uc  :0000000  
  0000
  DMA wc  :0000000  
  0000
  DMA :0000000  
  0000

  total   : 3029 of  8244261

Using ttm_pool_type_take() to remove pages from the pool before freeing
them correctly accounts for the freed pages.

Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3")
Signed-off-by: Anthony DeRossi 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210303011723.22512-1-ajdero...@gmail.com
Reviewed-by: Christian König 
Signed-off-by: Christian König 
Signed-off-by: Maarten Lankhorst 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e27cb1bf48b..4eb6efb8b8c0 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -268,13 +268,13 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, 
struct ttm_pool *pool,
 /* Remove a pool_type from the global shrinker list and free all pages */
 static void ttm_pool_type_fini(struct ttm_pool_type *pt)
 {
-   struct page *p, *tmp;
+   struct page *p;
 
mutex_lock(_lock);
list_del(>shrinker_list);
mutex_unlock(_lock);
 
-   list_for_each_entry_safe(p, tmp, >pages, lru)
+   while ((p = ttm_pool_type_take(pt)))
ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
 }
 
-- 
2.30.1

[PATCH 5.10 278/290] KVM: arm64: Fix range alignment when walking page tables

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Jia He 

commit 357ad203d45c0f9d76a8feadbd5a1c5d460c638b upstream.

When walking the page tables at a given level, and if the start
address for the range isn't aligned for that level, we propagate
the misalignment on each iteration at that level.

This results in the walker ignoring a number of entries (depending
on the original misalignment) on each subsequent iteration.

Properly aligning the address before the next iteration addresses
this issue.

Cc: sta...@vger.kernel.org
Reported-by: Howard Zhang 
Acked-by: Will Deacon 
Signed-off-by: Jia He 
Fixes: b1e57de62cfb ("KVM: arm64: Add stand-alone page-table walker 
infrastructure")
[maz: rewrite commit message]
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210303024225.2591-1-justin...@arm.com
Message-Id: <20210305185254.3730990-9-...@kernel.org>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/kvm/hyp/pgtable.c |1 +
 1 file changed, 1 insertion(+)

--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -225,6 +225,7 @@ static inline int __kvm_pgtable_visit(st
goto out;

if (!table) {
+   data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
data->addr += kvm_granule_size(level);
goto out;
}

[PATCH 5.11 245/306] SUNRPC: Set memalloc_nofs_save() for sync tasks

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Benjamin Coddington 

[ Upstream commit f0940f4b3284a00f38a5d42e6067c2aaa20e1f2e ]

We could recurse into NFS doing memory reclaim while sending a sync task,
which might result in a deadlock.  Set memalloc_nofs_save for sync task
execution.

Fixes: a1231fda7e94 ("SUNRPC: Set memalloc_nofs_save() on all rpciod/xprtiod 
jobs")
Signed-off-by: Benjamin Coddington 
Signed-off-by: Anna Schumaker 
Signed-off-by: Sasha Levin 
---
 net/sunrpc/sched.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cf702a5f7fe5..39ed0e0afe6d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -963,8 +963,11 @@ void rpc_execute(struct rpc_task *task)
 
rpc_set_active(task);
rpc_make_runnable(rpciod_workqueue, task);
-   if (!is_async)
+   if (!is_async) {
+   unsigned int pflags = memalloc_nofs_save();
__rpc_execute(task);
+   memalloc_nofs_restore(pflags);
+   }
 }
 
 static void rpc_async_schedule(struct work_struct *work)
-- 
2.30.1

[PATCH 5.10 279/290] KVM: arm64: Avoid corrupting vCPU context register in guest exit

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Will Deacon 

commit 31948332d5fa392ad933f4a6a10026850649ed76 upstream.

Commit 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest
context") tracks the currently running vCPU, clearing the pointer to
NULL on exit from a guest.

Unfortunately, the use of 'set_loaded_vcpu' clobbers x1 to point at the
kvm_hyp_ctxt instead of the vCPU context, causing the subsequent RAS
code to go off into the weeds when it saves the DISR assuming that the
CPU context is embedded in a struct vCPU.

Leave x1 alone and use x3 as a temporary register instead when clearing
the vCPU on the guest exit path.

Cc: Marc Zyngier 
Cc: Andrew Scull 
Cc: 
Fixes: 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context")
Suggested-by: Quentin Perret 
Signed-off-by: Will Deacon 
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210226181211.14542-1-w...@kernel.org
Message-Id: <20210305185254.3730990-3-...@kernel.org>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/kvm/hyp/entry.S |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -146,7 +146,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOB
// Now restore the hyp regs
restore_callee_saved_regs x2

-   set_loaded_vcpu xzr, x1, x2
+   set_loaded_vcpu xzr, x2, x3

 alternative_if ARM64_HAS_RAS_EXTN
// If we have the RAS extensions we can consume a pending error

[PATCH 5.11 244/306] arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Anshuman Khandual 

[ Upstream commit eeb0753ba27b26f609e61f9950b14f1b934fe429 ]

pfn_valid() validates a pfn but basically it checks for a valid struct page
backing for that pfn. It should always return positive for memory ranges
backed with struct page mapping. But currently pfn_valid() fails for all
ZONE_DEVICE based memory types even though they have struct page mapping.

pfn_valid() asserts that there is a memblock entry for a given pfn without
MEMBLOCK_NOMAP flag being set. The problem with ZONE_DEVICE based memory is
that they do not have memblock entries. Hence memblock_is_map_memory() will
invariably fail via memblock_search() for a ZONE_DEVICE based address. This
eventually fails pfn_valid() which is wrong. memblock_is_map_memory() needs
to be skipped for such memory ranges. As ZONE_DEVICE memory gets hotplugged
into the system via memremap_pages() called from a driver, their respective
memory sections will not have SECTION_IS_EARLY set.

Normal hotplug memory will never have MEMBLOCK_NOMAP set in their memblock
regions. Because the flag MEMBLOCK_NOMAP was specifically designed and set
for firmware reserved memory regions. memblock_is_map_memory() can just be
skipped as its always going to be positive and that will be an optimization
for the normal hotplug memory. Like ZONE_DEVICE based memory, all normal
hotplugged memory too will not have SECTION_IS_EARLY set for their sections

Skipping memblock_is_map_memory() for all non early memory sections would
fix pfn_valid() problem for ZONE_DEVICE based memory and also improve its
performance for normal hotplug memory as well.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Ard Biesheuvel 
Cc: Robin Murphy 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Acked-by: David Hildenbrand 
Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support")
Signed-off-by: Anshuman Khandual 
Acked-by: Catalin Marinas 
Link: 
https://lore.kernel.org/r/1614921898-4099-2-git-send-email-anshuman.khand...@arm.com
Signed-off-by: Will Deacon 
Signed-off-by: Sasha Levin 
---
 arch/arm64/mm/init.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 709d98fea90c..1141075e4d53 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -230,6 +230,18 @@ int pfn_valid(unsigned long pfn)

if (!valid_section(__pfn_to_section(pfn)))
return 0;
+
+   /*
+* ZONE_DEVICE memory does not have the memblock entries.
+* memblock_is_map_memory() check for ZONE_DEVICE based
+* addresses will always fail. Even the normal hotplugged
+* memory will never have MEMBLOCK_NOMAP flag set in their
+* memblock entries. Skip memblock search for all non early
+* memory sections covering all of hotplug memory including
+* both normal and ZONE_DEVICE based.
+*/
+   if (!early_section(__pfn_to_section(pfn)))
+   return pfn_section_valid(__pfn_to_section(pfn), pfn);
 #endif
return memblock_is_map_memory(addr);
 }
-- 
2.30.1

[PATCH 5.10 280/290] KVM: arm64: nvhe: Save the SPE context early

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Suzuki K Poulose 

commit b96b0c5de685df82019e16826a282d53d86d112c upstream.

The nVHE KVM hyp drains and disables the SPE buffer, before
entering the guest, as the EL1&0 translation regime
is going to be loaded with that of the guest.

But this operation is performed way too late, because :
  - The owning translation regime of the SPE buffer
is transferred to EL2. (MDCR_EL2_E2PB == 0)
  - The guest Stage1 is loaded.

Thus the flush could use the host EL1 virtual address,
but use the EL2 translations instead of host EL1, for writing
out any cached data.

Fix this by moving the SPE buffer handling early enough.
The restore path is doing the right thing.

Fixes: 014c4c77aad7 ("KVM: arm64: Improve debug register save/restore flow")
Cc: sta...@vger.kernel.org
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Mark Rutland 
Cc: Alexandru Elisei 
Reviewed-by: Alexandru Elisei 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Marc Zyngier 
Link: https://lore.kernel.org/r/20210302120345.3102874-1-suzuki.poul...@arm.com
Message-Id: <20210305185254.3730990-2-...@kernel.org>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/arm64/include/asm/kvm_hyp.h   |5 +
 arch/arm64/kvm/hyp/nvhe/debug-sr.c |   12 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c   |   11 ++-
 3 files changed, 25 insertions(+), 3 deletions(-)

--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -82,6 +82,11 @@ void sysreg_restore_guest_state_vhe(stru
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
 void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmsc
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
/* Disable and flush SPE data generation */
__debug_save_spe(>arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_guest_common(vcpu);
 }
 
-void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_host_common(vcpu);
 }
 
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -188,6 +188,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
 
__sysreg_save_state_nvhe(host_ctxt);
+   /*
+* We must flush and disable the SPE buffer for nVHE, as
+* the translation regime(EL1&0) is going to be loaded with
+* that of the guest. And we must do this before we change the
+* translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
+* before we load guest Stage1.
+*/
+   __debug_save_host_buffers_nvhe(vcpu);
 
/*
 * We must restore the 32-bit state before the sysregs, thanks
@@ -228,11 +236,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
 
+   __debug_switch_to_host(vcpu);
/*
 * This must come after restoring the host sysregs, since a non-VHE
 * system may enable SPE here and make use of the TTBRs.
 */
-   __debug_switch_to_host(vcpu);
+   __debug_restore_host_buffers_nvhe(vcpu);
 
if (pmu_switch_needed)
__pmu_switch_to_host(host_ctxt);

[PATCH 5.11 233/306] staging: comedi: dmm32at: Fix endian problem for AI command data

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ian Abbott 

commit 54999c0d94b3c26625f896f8e3460bc029821578 upstream.

The analog input subdevice supports Comedi asynchronous commands that
use Comedi's 16-bit sample format.  However, the call to
`comedi_buf_write_samples()` is passing the address of a 32-bit integer
variable.  On bigendian machines, this will copy 2 bytes from the wrong
end of the 32-bit value.  Fix it by changing the type of the variable
holding the sample value to `unsigned short`.

[Note: the bug was introduced in commit 1700529b24cc ("staging: comedi:
dmm32at: use comedi_buf_write_samples()") but the patch applies better
to the later (but in the same kernel release) commit 0c0eadadcbe6e
("staging: comedi: dmm32at: introduce dmm32_ai_get_sample()").]

Fixes: 0c0eadadcbe6e ("staging: comedi: dmm32at: introduce 
dmm32_ai_get_sample()")
Cc:  # 3.19+
Signed-off-by: Ian Abbott 
Link: https://lore.kernel.org/r/20210223143055.257402-7-abbo...@mev.co.uk
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/comedi/drivers/dmm32at.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/staging/comedi/drivers/dmm32at.c
+++ b/drivers/staging/comedi/drivers/dmm32at.c
@@ -404,7 +404,7 @@ static irqreturn_t dmm32at_isr(int irq,
 {
struct comedi_device *dev = d;
unsigned char intstat;
-   unsigned int val;
+   unsigned short val;
int i;
 
if (!dev->attached) {

[PATCH 5.11 226/306] staging: rtl8712: Fix possible buffer overflow in r8712_sitesurvey_cmd

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Lee Gibson 

commit b93c1e3981af19527beee1c10a2bef67a228c48c upstream.

Function r8712_sitesurvey_cmd calls memcpy without checking the length.
A user could control that length and trigger a buffer overflow.
Fix by checking the length is within the maximum allowed size.

Signed-off-by: Lee Gibson 
Link: https://lore.kernel.org/r/20210301132648.420296-1-lee...@gmail.com
Cc: stable 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/rtl8712/rtl871x_cmd.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/drivers/staging/rtl8712/rtl871x_cmd.c
+++ b/drivers/staging/rtl8712/rtl871x_cmd.c
@@ -192,8 +192,10 @@ u8 r8712_sitesurvey_cmd(struct _adapter
psurveyPara->ss_ssidlen = 0;
memset(psurveyPara->ss_ssid, 0, IW_ESSID_MAX_SIZE + 1);
if (pssid && pssid->SsidLength) {
-   memcpy(psurveyPara->ss_ssid, pssid->Ssid, pssid->SsidLength);
-   psurveyPara->ss_ssidlen = cpu_to_le32(pssid->SsidLength);
+   int len = min_t(int, pssid->SsidLength, IW_ESSID_MAX_SIZE);
+
+   memcpy(psurveyPara->ss_ssid, pssid->Ssid, len);
+   psurveyPara->ss_ssidlen = cpu_to_le32(len);
}
set_fwstate(pmlmepriv, _FW_UNDER_SURVEY);
r8712_enqueue_cmd(pcmdpriv, ph2c);

[PATCH 5.10 210/290] usb: xhci: Fix ASMedia ASM1042A and ASM3242 DMA addressing

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Forest Crossman 

commit b71c669ad8390dd1c866298319ff89fe68b45653 upstream.

I've confirmed that both the ASMedia ASM1042A and ASM3242 have the same
problem as the ASM1142 and ASM2142/ASM3142, where they lose some of the
upper bits of 64-bit DMA addresses. As with the other chips, this can
cause problems on systems where the upper bits matter, and adding the
XHCI_NO_64BIT_SUPPORT quirk completely fixes the issue.

Cc: sta...@vger.kernel.org
Signed-off-by: Forest Crossman 
Signed-off-by: Mathias Nyman 
Link: 
https://lore.kernel.org/r/2021035353.2137560-4-mathias.ny...@linux.intel.com
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/host/xhci-pci.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -66,6 +66,7 @@
 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI   0x1142
 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI0x1242
 #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI0x2142
+#define PCI_DEVICE_ID_ASMEDIA_3242_XHCI0x3242
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -276,11 +277,14 @@ static void xhci_pci_quirks(struct devic
pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI)
xhci->quirks |= XHCI_BROKEN_STREAMS;
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
-   pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI)
+   pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) {
xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+   xhci->quirks |= XHCI_NO_64BIT_SUPPORT;
+   }
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
(pdev->device == PCI_DEVICE_ID_ASMEDIA_1142_XHCI ||
-pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI))
+pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI ||
+pdev->device == PCI_DEVICE_ID_ASMEDIA_3242_XHCI))
xhci->quirks |= XHCI_NO_64BIT_SUPPORT;
 
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&

[PATCH 5.10 253/290] perf/core: Flush PMU internal buffers for per-CPU events

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Kan Liang 

[ Upstream commit a5398bffc01fe044848c5024e5e867e407f239b8 ]

Sometimes the PMU internal buffers have to be flushed for per-CPU events
during a context switch, e.g., large PEBS. Otherwise, the perf tool may
report samples in locations that do not belong to the process where the
samples are processed in, because PEBS does not tag samples with PID/TID.

The current code only flush the buffers for a per-task event. It doesn't
check a per-CPU event.

Add a new event state flag, PERF_ATTACH_SCHED_CB, to indicate that the
PMU internal buffers have to be flushed for this event during a context
switch.

Add sched_cb_entry and perf_sched_cb_usages back to track the PMU/cpuctx
which is required to be flushed.

Only need to invoke the sched_task() for per-CPU events in this patch.
The per-task events have been handled in perf_event_context_sched_in/out
already.

Fixes: 9c964efa4330 ("perf/x86/intel: Drain the PEBS buffer during context 
switches")
Reported-by: Gabriel Marin 
Originally-by: Namhyung Kim 
Signed-off-by: Kan Liang 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Link: https://lkml.kernel.org/r/20201130193842.10569-1-kan.li...@linux.intel.com
Signed-off-by: Sasha Levin 
---
 include/linux/perf_event.h |  2 ++
 kernel/events/core.c   | 42 ++
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 96450f6fb1de..22ce0604b448 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -606,6 +606,7 @@ struct swevent_hlist {
 #define PERF_ATTACH_TASK   0x04
 #define PERF_ATTACH_TASK_DATA  0x08
 #define PERF_ATTACH_ITRACE 0x10
+#define PERF_ATTACH_SCHED_CB   0x20
 
 struct perf_cgroup;
 struct perf_buffer;
@@ -872,6 +873,7 @@ struct perf_cpu_context {
struct list_headcgrp_cpuctx_entry;
 #endif
 
+   struct list_headsched_cb_entry;
int sched_cb_usage;
 
int online;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c3ba29d058b7..4af161b3f322 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -383,6 +383,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
 static atomic_t perf_sched_count;
 
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -3466,11 +3467,16 @@ static void perf_event_context_sched_out(struct 
task_struct *task, int ctxn,
}
 }
 
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
 void perf_sched_cb_dec(struct pmu *pmu)
 {
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
-   --cpuctx->sched_cb_usage;
+   this_cpu_dec(perf_sched_cb_usages);
+
+   if (!--cpuctx->sched_cb_usage)
+   list_del(>sched_cb_entry);
 }
 
 
@@ -3478,7 +3484,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
 {
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
-   cpuctx->sched_cb_usage++;
+   if (!cpuctx->sched_cb_usage++)
+   list_add(>sched_cb_entry, this_cpu_ptr(_cb_list));
+
+   this_cpu_inc(perf_sched_cb_usages);
 }
 
 /*
@@ -3507,6 +3516,24 @@ static void __perf_pmu_sched_task(struct 
perf_cpu_context *cpuctx, bool sched_in
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 }
 
+static void perf_pmu_sched_task(struct task_struct *prev,
+   struct task_struct *next,
+   bool sched_in)
+{
+   struct perf_cpu_context *cpuctx;
+
+   if (prev == next)
+   return;
+
+   list_for_each_entry(cpuctx, this_cpu_ptr(_cb_list), 
sched_cb_entry) {
+   /* will be handled in perf_event_context_sched_in/out */
+   if (cpuctx->task_ctx)
+   continue;
+
+   __perf_pmu_sched_task(cpuctx, sched_in);
+   }
+}
+
 static void perf_event_switch(struct task_struct *task,
  struct task_struct *next_prev, bool sched_in);
 
@@ -3529,6 +3556,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 {
int ctxn;
 
+   if (__this_cpu_read(perf_sched_cb_usages))
+   perf_pmu_sched_task(task, next, false);
+
if (atomic_read(_switch_events))
perf_event_switch(task, next, false);
 
@@ -3837,6 +3867,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
if (atomic_read(_switch_events))
perf_event_switch(task, prev, true);
+
+   if (__this_cpu_read(perf_sched_cb_usages))
+   perf_pmu_sched_task(prev, task, true);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -4661,7 +4694,7 @@ static void unaccount_event(struct perf_event *event)

[PATCH 5.10 217/290] usbip: fix vhci_hcd to check for stream socket

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Shuah Khan 

commit f55a0571690c4aae03180e001522538c0927432f upstream.

Fix attach_store() to validate the passed in file descriptor is a
stream socket. If the file descriptor passed was a SOCK_DGRAM socket,
sock_recvmsg() can't detect end of stream.

Cc: sta...@vger.kernel.org
Suggested-by: Tetsuo Handa 
Signed-off-by: Shuah Khan 
Link: 
https://lore.kernel.org/r/52712aa308915bda02cece1589e04ee8b401d1f3.1615171203.git.sk...@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/usbip/vhci_sysfs.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -349,8 +349,16 @@ static ssize_t attach_store(struct devic
 
/* Extract socket from fd. */
socket = sockfd_lookup(sockfd, );
-   if (!socket)
+   if (!socket) {
+   dev_err(dev, "failed to lookup sock");
return -EINVAL;
+   }
+   if (socket->type != SOCK_STREAM) {
+   dev_err(dev, "Expecting SOCK_STREAM - found %d",
+   socket->type);
+   sockfd_put(socket);
+   return -EINVAL;
+   }
 
/* now need lock until setting vdev status as used */

[PATCH 5.10 208/290] usb: xhci: do not perform Soft Retry for some xHCI hosts

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Stanislaw Gruszka 

commit a4a251f8c23518899d2078c320cf9ce2fa459c9f upstream.

On some systems rt2800usb and mt7601u devices are unable to operate since
commit f8f80be501aa ("xhci: Use soft retry to recover faster from
transaction errors")

Seems that some xHCI controllers can not perform Soft Retry correctly,
affecting those devices.

To avoid the problem add xhci->quirks flag that restore pre soft retry
xhci behaviour for affected xHCI controllers. Currently those are
AMD_PROMONTORYA_4 and AMD_PROMONTORYA_2, since it was confirmed
by the users: on those xHCI hosts issue happen and is gone after
disabling Soft Retry.

[minor commit message rewording for checkpatch -Mathias]

Fixes: f8f80be501aa ("xhci: Use soft retry to recover faster from transaction 
errors")
Cc:  # 4.20+
Reported-by: Bernhard 
Tested-by: Bernhard 
Signed-off-by: Stanislaw Gruszka 
Signed-off-by: Mathias Nyman 
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202541
Link: 
https://lore.kernel.org/r/2021035353.2137560-2-mathias.ny...@linux.intel.com
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/host/xhci-pci.c  |5 +
 drivers/usb/host/xhci-ring.c |3 ++-
 drivers/usb/host/xhci.h  |1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -295,6 +295,11 @@ static void xhci_pci_quirks(struct devic
 pdev->device == 0x9026)
xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT;
 
+   if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+   (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2 ||
+pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4))
+   xhci->quirks |= XHCI_NO_SOFT_RETRY;
+
if (xhci->quirks & XHCI_RESET_ON_RESUME)
xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
"QUIRK: Resetting on resume");
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2307,7 +2307,8 @@ static int process_bulk_intr_td(struct x
remaining   = 0;
break;
case COMP_USB_TRANSACTION_ERROR:
-   if ((ep_ring->err_count++ > MAX_SOFT_RETRY) ||
+   if (xhci->quirks & XHCI_NO_SOFT_RETRY ||
+   (ep_ring->err_count++ > MAX_SOFT_RETRY) ||
le32_to_cpu(slot_ctx->tt_info) & TT_SLOT)
break;
*status = 0;
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1879,6 +1879,7 @@ struct xhci_hcd {
 #define XHCI_SKIP_PHY_INIT BIT_ULL(37)
 #define XHCI_DISABLE_SPARSEBIT_ULL(38)
 #define XHCI_SG_TRB_CACHE_SIZE_QUIRK   BIT_ULL(39)
+#define XHCI_NO_SOFT_RETRY BIT_ULL(40)
 
unsigned intnum_active_eps;
unsigned intlimit_active_eps;

[PATCH 5.10 212/290] USB: serial: io_edgeport: fix memory leak in edge_startup

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Pavel Skripkin 

commit cfdc67acc785e01a8719eeb7012709d245564701 upstream.

sysbot found memory leak in edge_startup().
The problem was that when an error was received from the usb_submit_urb(),
nothing was cleaned up.

Reported-by: syzbot+59f777bdcbdd7eea5...@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin 
Fixes: 6e8cf7751f9f ("USB: add EPIC support to the io_edgeport driver")
Cc: sta...@vger.kernel.org  # 2.6.21: c5c0c55598ce
Signed-off-by: Johan Hovold 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/serial/io_edgeport.c |   26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -3003,26 +3003,32 @@ static int edge_startup(struct usb_seria
response = -ENODEV;
}
 
-   usb_free_urb(edge_serial->interrupt_read_urb);
-   kfree(edge_serial->interrupt_in_buffer);
-
-   usb_free_urb(edge_serial->read_urb);
-   kfree(edge_serial->bulk_in_buffer);
-
-   kfree(edge_serial);
-
-   return response;
+   goto error;
}
 
/* start interrupt read for this edgeport this interrupt will
 * continue as long as the edgeport is connected */
response = usb_submit_urb(edge_serial->interrupt_read_urb,
GFP_KERNEL);
-   if (response)
+   if (response) {
dev_err(ddev, "%s - Error %d submitting control urb\n",
__func__, response);
+
+   goto error;
+   }
}
return response;
+
+error:
+   usb_free_urb(edge_serial->interrupt_read_urb);
+   kfree(edge_serial->interrupt_in_buffer);
+
+   usb_free_urb(edge_serial->read_urb);
+   kfree(edge_serial->bulk_in_buffer);
+
+   kfree(edge_serial);
+
+   return response;
 }

[PATCH 5.11 234/306] staging: comedi: me4000: Fix endian problem for AI command data

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ian Abbott 

commit b39dfcced399d31e7c4b7341693b18e01c8f655e upstream.

The analog input subdevice supports Comedi asynchronous commands that
use Comedi's 16-bit sample format.  However, the calls to
`comedi_buf_write_samples()` are passing the address of a 32-bit integer
variable.  On bigendian machines, this will copy 2 bytes from the wrong
end of the 32-bit value.  Fix it by changing the type of the variable
holding the sample value to `unsigned short`.

Fixes: de88924f67d1 ("staging: comedi: me4000: use comedi_buf_write_samples()")
Cc:  # 3.19+
Signed-off-by: Ian Abbott 
Link: https://lore.kernel.org/r/20210223143055.257402-8-abbo...@mev.co.uk
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/comedi/drivers/me4000.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/staging/comedi/drivers/me4000.c
+++ b/drivers/staging/comedi/drivers/me4000.c
@@ -924,7 +924,7 @@ static irqreturn_t me4000_ai_isr(int irq
struct comedi_subdevice *s = dev->read_subdev;
int i;
int c = 0;
-   unsigned int lval;
+   unsigned short lval;

if (!dev->attached)
return IRQ_NONE;

[PATCH 5.11 224/306] staging: rtl8188eu: fix potential memory corruption in rtw_check_beacon_data()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Dan Carpenter 

commit d4ac640322b06095128a5c45ba4a1e80929fe7f3 upstream.

The "ie_len" is a value in the 1-255 range that comes from the user.  We
have to cap it to ensure that it's not too large or it could lead to
memory corruption.

Fixes: 9a7fe54ddc3a ("staging: r8188eu: Add source files for new driver - part 
1")
Signed-off-by: Dan Carpenter 
Cc: stable 
Link: https://lore.kernel.org/r/YEHyQCrFZKTXyT7J@mwanda
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/rtl8188eu/core/rtw_ap.c |5 +
 1 file changed, 5 insertions(+)

--- a/drivers/staging/rtl8188eu/core/rtw_ap.c
+++ b/drivers/staging/rtl8188eu/core/rtw_ap.c
@@ -791,6 +791,7 @@ int rtw_check_beacon_data(struct adapter
p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SSID, _len,
   pbss_network->ie_length - _BEACON_IE_OFFSET_);
if (p && ie_len > 0) {
+   ie_len = min_t(int, ie_len, sizeof(pbss_network->ssid.ssid));
memset(_network->ssid, 0, sizeof(struct ndis_802_11_ssid));
memcpy(pbss_network->ssid.ssid, p + 2, ie_len);
pbss_network->ssid.ssid_length = ie_len;
@@ -811,6 +812,7 @@ int rtw_check_beacon_data(struct adapter
p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SUPP_RATES, _len,
   pbss_network->ie_length - _BEACON_IE_OFFSET_);
if (p) {
+   ie_len = min_t(int, ie_len, NDIS_802_11_LENGTH_RATES_EX);
memcpy(supportRate, p + 2, ie_len);
supportRateNum = ie_len;
}
@@ -819,6 +821,8 @@ int rtw_check_beacon_data(struct adapter
p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_EXT_SUPP_RATES,
   _len, pbss_network->ie_length - _BEACON_IE_OFFSET_);
if (p) {
+   ie_len = min_t(int, ie_len,
+  NDIS_802_11_LENGTH_RATES_EX - supportRateNum);
memcpy(supportRate + supportRateNum, p + 2, ie_len);
supportRateNum += ie_len;
}
@@ -934,6 +938,7 @@ int rtw_check_beacon_data(struct adapter
 
pht_cap->mcs.rx_mask[0] = 0xff;
pht_cap->mcs.rx_mask[1] = 0x0;
+   ie_len = min_t(int, ie_len, sizeof(pmlmepriv->htpriv.ht_cap));
memcpy(>htpriv.ht_cap, p + 2, ie_len);
}

[PATCH 5.10 252/290] arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ard Biesheuvel 

[ Upstream commit 7ba8f2b2d652cd8d8a2ab61f4be66973e70f9f88 ]

52-bit VA kernels can run on hardware that is only 48-bit capable, but
configure the ID map as 52-bit by default. This was not a problem until
recently, because the special T0SZ value for a 52-bit VA space was never
programmed into the TCR register anwyay, and because a 52-bit ID map
happens to use the same number of translation levels as a 48-bit one.

This behavior was changed by commit 1401bef703a4 ("arm64: mm: Always update
TCR_EL1 from __cpu_set_tcr_t0sz()"), which causes the unsupported T0SZ
value for a 52-bit VA to be programmed into TCR_EL1. While some hardware
simply ignores this, Mark reports that Amberwing systems choke on this,
resulting in a broken boot. But even before that commit, the unsupported
idmap_t0sz value was exposed to KVM and used to program TCR_EL2 incorrectly
as well.

Given that we already have to deal with address spaces being either 48-bit
or 52-bit in size, the cleanest approach seems to be to simply default to
a 48-bit VA ID map, and only switch to a 52-bit one if the placement of the
kernel in DRAM requires it. This is guaranteed not to happen unless the
system is actually 52-bit VA capable.

Fixes: 90ec95cda91a ("arm64: mm: Introduce VA_BITS_MIN")
Reported-by: Mark Salter 
Link: http://lore.kernel.org/r/20210310003216.410037-1-msal...@redhat.com
Signed-off-by: Ard Biesheuvel 
Link: https://lore.kernel.org/r/20210310171515.416643-2-a...@kernel.org
Signed-off-by: Will Deacon 
Signed-off-by: Sasha Levin 
---
 arch/arm64/include/asm/mmu_context.h | 5 +
 arch/arm64/kernel/head.S | 2 +-
 arch/arm64/mm/mmu.c  | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h 
b/arch/arm64/include/asm/mmu_context.h
index 0672236e1aea..4e2ba9477845 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -65,10 +65,7 @@ extern u64 idmap_ptrs_per_pgd;

 static inline bool __cpu_uses_extended_idmap(void)
 {
-   if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52))
-   return false;
-
-   return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
+   return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual));
 }

 /*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e7550a5289fe..78cdd6b24172 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -334,7 +334,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 */
adrpx5, __idmap_text_end
clz x5, x5
-   cmp x5, TCR_T0SZ(VA_BITS)   // default T0SZ small enough?
+   cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
b.ge1f  // .. then skip VA range extension

adr_l   x6, idmap_t0sz
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f0125bb09fa3..6aabf1eced31 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,7 +40,7 @@
 #define NO_BLOCK_MAPPINGS  BIT(0)
 #define NO_CONT_MAPPINGS   BIT(1)

-u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
 u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;

 u64 __section(".mmuoff.data.write") vabits_actual;
-- 
2.30.1

[PATCH 5.10 250/290] nvme-fc: fix racing controller reset and create association

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: James Smart 

[ Upstream commit f20ef34d71abc1fc56b322aaa251f90f94320140 ]

Recent patch to prevent calling __nvme_fc_abort_outstanding_ios in
interrupt context results in a possible race condition. A controller
reset results in errored io completions, which schedules error
work. The change of error work to a work element allows it to fire
after the ctrl state transition to NVME_CTRL_CONNECTING, causing
any outstanding io (used to initialize the controller) to fail and
cause problems for connect_work.

Add a state check to only schedule error work if not in the RESETTING
state.

Fixes: 19fce0470f05 ("nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios 
from interrupt context")
Signed-off-by: Nigel Kirkland 
Signed-off-by: James Smart 
Signed-off-by: Christoph Hellwig 
Signed-off-by: Sasha Levin 
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5ead217ac2bc..fab068c8ba02 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2055,7 +2055,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
nvme_fc_complete_rq(rq);

 check_error:
-   if (terminate_assoc)
+   if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
queue_work(nvme_reset_wq, >ioerr_work);
 }

-- 
2.30.1

[PATCH 5.11 242/306] cpufreq: qcom-hw: fix dereferencing freed memory data

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Shawn Guo 

[ Upstream commit 02fc409540303801994d076fcdb7064bd634dbf3 ]

Commit 67fc209b527d ("cpufreq: qcom-hw: drop devm_xxx() calls from
init/exit hooks") introduces an issue of dereferencing freed memory
'data'.  Fix it.

Fixes: 67fc209b527d ("cpufreq: qcom-hw: drop devm_xxx() calls from init/exit 
hooks")
Reported-by: kernel test robot 
Reported-by: Dan Carpenter 
Signed-off-by: Shawn Guo 
Signed-off-by: Viresh Kumar 
Signed-off-by: Sasha Levin 
---
 drivers/cpufreq/qcom-cpufreq-hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c 
b/drivers/cpufreq/qcom-cpufreq-hw.c
index 2726e77c9e5a..5cdd20e38771 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -368,7 +368,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy 
*policy)
 error:
kfree(data);
 unmap_base:
-   iounmap(data->base);
+   iounmap(base);
 release_region:
release_mem_region(res->start, resource_size(res));
return ret;
-- 
2.30.1

[PATCH 5.11 223/306] staging: rtl8712: unterminated string leads to read overflow

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Dan Carpenter 

commit d660f4f42ccea50262c6ee90c8e7ad19a69fb225 upstream.

The memdup_user() function does not necessarily return a NUL terminated
string so this can lead to a read overflow.  Switch from memdup_user()
to strndup_user() to fix this bug.

Fixes: c6dc001f2add ("staging: r8712u: Merging Realtek's latest (v2.6.6). 
Various fixes.")
Cc: stable 
Signed-off-by: Dan Carpenter 
Link: https://lore.kernel.org/r/YDYSR+1rj26NRhvb@mwanda
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/rtl8712/rtl871x_ioctl_linux.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c
+++ b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c
@@ -924,7 +924,7 @@ static int r871x_wx_set_priv(struct net_
struct iw_point *dwrq = (struct iw_point *)awrq;
 
len = dwrq->length;
-   ext = memdup_user(dwrq->pointer, len);
+   ext = strndup_user(dwrq->pointer, len);
if (IS_ERR(ext))
return PTR_ERR(ext);

[PATCH 5.11 229/306] staging: comedi: addi_apci_1500: Fix endian problem for command sample

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ian Abbott 

commit ac0bbf55ed3be75fde1f8907e91ecd2fd589bde3 upstream.

The digital input subdevice supports Comedi asynchronous commands that
read interrupt status information.  This uses 16-bit Comedi samples (of
which only the bottom 8 bits contain status information).  However, the
interrupt handler is calling `comedi_buf_write_samples()` with the
address of a 32-bit variable `unsigned int status`.  On a bigendian
machine, this will copy 2 bytes from the wrong end of the variable.  Fix
it by changing the type of the variable to `unsigned short`.

Fixes: a8c66b684efa ("staging: comedi: addi_apci_1500: rewrite the subdevice 
support functions")
Cc:  #4.0+
Signed-off-by: Ian Abbott 
Link: https://lore.kernel.org/r/20210223143055.257402-3-abbo...@mev.co.uk
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/comedi/drivers/addi_apci_1500.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- a/drivers/staging/comedi/drivers/addi_apci_1500.c
+++ b/drivers/staging/comedi/drivers/addi_apci_1500.c
@@ -208,7 +208,7 @@ static irqreturn_t apci1500_interrupt(in
struct comedi_device *dev = d;
struct apci1500_private *devpriv = dev->private;
struct comedi_subdevice *s = dev->read_subdev;
-   unsigned int status = 0;
+   unsigned short status = 0;
unsigned int val;
 
val = inl(devpriv->amcc + AMCC_OP_REG_INTCSR);
@@ -238,14 +238,14 @@ static irqreturn_t apci1500_interrupt(in
 *
 *Mask Meaning
 * --  --
-* 0x0001  Event 1 has occurred
-* 0x0010  Event 2 has occurred
-* 0x0100  Counter/timer 1 has run down (not implemented)
-* 0x1000  Counter/timer 2 has run down (not implemented)
-* 0x0001  Counter 3 has run down (not implemented)
-* 0x0010  Watchdog has run down (not implemented)
-* 0x0100  Voltage error
-* 0x1000  Short-circuit error
+* 0b0001  Event 1 has occurred
+* 0b0010  Event 2 has occurred
+* 0b0100  Counter/timer 1 has run down (not implemented)
+* 0b1000  Counter/timer 2 has run down (not implemented)
+* 0b0001  Counter 3 has run down (not implemented)
+* 0b0010  Watchdog has run down (not implemented)
+* 0b0100  Voltage error
+* 0b1000  Short-circuit error
 */
comedi_buf_write_samples(s, , 1);
comedi_handle_events(dev, s);

[PATCH 5.11 253/306] arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ard Biesheuvel 

[ Upstream commit 7ba8f2b2d652cd8d8a2ab61f4be66973e70f9f88 ]

52-bit VA kernels can run on hardware that is only 48-bit capable, but
configure the ID map as 52-bit by default. This was not a problem until
recently, because the special T0SZ value for a 52-bit VA space was never
programmed into the TCR register anwyay, and because a 52-bit ID map
happens to use the same number of translation levels as a 48-bit one.

This behavior was changed by commit 1401bef703a4 ("arm64: mm: Always update
TCR_EL1 from __cpu_set_tcr_t0sz()"), which causes the unsupported T0SZ
value for a 52-bit VA to be programmed into TCR_EL1. While some hardware
simply ignores this, Mark reports that Amberwing systems choke on this,
resulting in a broken boot. But even before that commit, the unsupported
idmap_t0sz value was exposed to KVM and used to program TCR_EL2 incorrectly
as well.

Given that we already have to deal with address spaces being either 48-bit
or 52-bit in size, the cleanest approach seems to be to simply default to
a 48-bit VA ID map, and only switch to a 52-bit one if the placement of the
kernel in DRAM requires it. This is guaranteed not to happen unless the
system is actually 52-bit VA capable.

Fixes: 90ec95cda91a ("arm64: mm: Introduce VA_BITS_MIN")
Reported-by: Mark Salter 
Link: http://lore.kernel.org/r/20210310003216.410037-1-msal...@redhat.com
Signed-off-by: Ard Biesheuvel 
Link: https://lore.kernel.org/r/20210310171515.416643-2-a...@kernel.org
Signed-off-by: Will Deacon 
Signed-off-by: Sasha Levin 
---
 arch/arm64/include/asm/mmu_context.h | 5 +
 arch/arm64/kernel/head.S | 2 +-
 arch/arm64/mm/mmu.c  | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h 
b/arch/arm64/include/asm/mmu_context.h
index 0b3079fd28eb..1c364ec0ad31 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -65,10 +65,7 @@ extern u64 idmap_ptrs_per_pgd;

 static inline bool __cpu_uses_extended_idmap(void)
 {
-   if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52))
-   return false;
-
-   return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
+   return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual));
 }

 /*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7ec430e18f95..a0b3bfe67609 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 */
adrpx5, __idmap_text_end
clz x5, x5
-   cmp x5, TCR_T0SZ(VA_BITS)   // default T0SZ small enough?
+   cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
b.ge1f  // .. then skip VA range extension

adr_l   x6, idmap_t0sz
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index cb78343181db..6f0648777d34 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,7 +40,7 @@
 #define NO_BLOCK_MAPPINGS  BIT(0)
 #define NO_CONT_MAPPINGS   BIT(1)

-u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
 u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;

 u64 __section(".mmuoff.data.write") vabits_actual;
-- 
2.30.1

[PATCH 5.11 254/306] io_uring: perform IOPOLL reaping if canceler is thread itself

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Jens Axboe 

[ Upstream commit d052d1d685f5125249ab4ff887562c88ba959638 ]

We bypass IOPOLL completion polling (and reaping) for the SQPOLL thread,
but if it's the thread itself invoking cancelations, then we still need
to perform it or no one will.

Fixes: 9936c7c2bc76 ("io_uring: deduplicate core cancellations sequence")
Signed-off-by: Jens Axboe 
Signed-off-by: Sasha Levin 
---
 fs/io_uring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 241313278e5a..00ef0b90d149 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8891,7 +8891,8 @@ static void io_uring_try_cancel_requests(struct 
io_ring_ctx *ctx,
}
 
/* SQPOLL thread does its own polling */
-   if (!(ctx->flags & IORING_SETUP_SQPOLL) && !files) {
+   if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) ||
+   (ctx->sq_data && ctx->sq_data->thread == current)) {
while (!list_empty_careful(>iopoll_list)) {
io_iopoll_try_reap_events(ctx);
ret = true;
-- 
2.30.1

[PATCH 5.10 214/290] USB: serial: cp210x: add ID for Acuity Brands nLight Air Adapter

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Karan Singhal 

commit ca667a33207daeaf9c62b106815728718def60ec upstream.

IDs of nLight Air Adapter, Acuity Brands, Inc.:
vid: 10c4
pid: 88d8

Signed-off-by: Karan Singhal 
Cc: sta...@vger.kernel.org
Signed-off-by: Johan Hovold 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/serial/cp210x.c |1 +
 1 file changed, 1 insertion(+)

--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -149,6 +149,7 @@ static const struct usb_device_id id_tab
{ USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */
{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB 
Device */
+   { USB_DEVICE(0x10C4, 0x88D8) }, /* Acuity Brands nLight Air Adapter */
{ USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer 
*/
{ USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer 
*/
{ USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */

[PATCH 5.10 213/290] USB: serial: ch341: add new Product ID

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Niv Sardi 

commit 5563b3b6420362c8a1f468ca04afe6d5f0a8d0a3 upstream.

Add PID for CH340 that's found on cheap programmers.

The driver works flawlessly as soon as the new PID (0x9986) is added to it.
These look like ANU232MI but ship with a ch341 inside. They have no special
identifiers (mine only has the string "DB9D20130716" printed on the PCB and
nothing identifiable on the packaging. The merchant i bought it from
doesn't sell these anymore).

the lsusb -v output is:
Bus 001 Device 009: ID 9986:7523
Device Descriptor:
  bLength18
  bDescriptorType 1
  bcdUSB   1.10
  bDeviceClass  255 Vendor Specific Class
  bDeviceSubClass 0
  bDeviceProtocol 0
  bMaxPacketSize0 8
  idVendor   0x9986
  idProduct  0x7523
  bcdDevice2.54
  iManufacturer   0
  iProduct0
  iSerial 0
  bNumConfigurations  1
  Configuration Descriptor:
bLength 9
bDescriptorType 2
wTotalLength   0x0027
bNumInterfaces  1
bConfigurationValue 1
iConfiguration  0
bmAttributes 0x80
  (Bus Powered)
MaxPower   96mA
Interface Descriptor:
  bLength 9
  bDescriptorType 4
  bInterfaceNumber0
  bAlternateSetting   0
  bNumEndpoints   3
  bInterfaceClass   255 Vendor Specific Class
  bInterfaceSubClass  1
  bInterfaceProtocol  2
  iInterface  0
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x82  EP 2 IN
bmAttributes2
  Transfer TypeBulk
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x0020  1x 32 bytes
bInterval   0
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x02  EP 2 OUT
bmAttributes2
  Transfer TypeBulk
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x0020  1x 32 bytes
bInterval   0
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x81  EP 1 IN
bmAttributes3
  Transfer TypeInterrupt
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x0008  1x 8 bytes
bInterval   1

Signed-off-by: Niv Sardi 
Cc: sta...@vger.kernel.org
Signed-off-by: Johan Hovold 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/serial/ch341.c |1 +
 1 file changed, 1 insertion(+)

--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -86,6 +86,7 @@ static const struct usb_device_id id_tab
{ USB_DEVICE(0x1a86, 0x7522) },
{ USB_DEVICE(0x1a86, 0x7523) },
{ USB_DEVICE(0x4348, 0x5523) },
+   { USB_DEVICE(0x9986, 0x7523) },
{ },
 };
 MODULE_DEVICE_TABLE(usb, id_table);

[PATCH 5.11 227/306] staging: rtl8192e: Fix possible buffer overflow in _rtl92e_wx_set_scan

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Lee Gibson 

commit 8687bf9ef9551bcf93897e33364d121667b1aadf upstream.

Function _rtl92e_wx_set_scan calls memcpy without checking the length.
A user could control that length and trigger a buffer overflow.
Fix by checking the length is within the maximum allowed size.

Reviewed-by: Dan Carpenter 
Signed-off-by: Lee Gibson 
Cc: stable 
Link: https://lore.kernel.org/r/20210226145157.424065-1-lee...@gmail.com
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -406,9 +406,10 @@ static int _rtl92e_wx_set_scan(struct ne
struct iw_scan_req *req = (struct iw_scan_req *)b;
 
if (req->essid_len) {
-   ieee->current_network.ssid_len = req->essid_len;
-   memcpy(ieee->current_network.ssid, req->essid,
-  req->essid_len);
+   int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+
+   ieee->current_network.ssid_len = len;
+   memcpy(ieee->current_network.ssid, req->essid, len);
}
}

[PATCH 5.10 206/290] USB: usblp: fix a hang in poll() if disconnected

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Pete Zaitcev 

commit 9de2c43acf37a17dc4c69ff78bb099b80fb74325 upstream.

Apparently an application that opens a device and calls select()
on it, will hang if the decice is disconnected. It's a little
surprising that we had this bug for 15 years, but apparently
nobody ever uses select() with a printer: only write() and read(),
and those work fine. Well, you can also select() with a timeout.

The fix is modeled after devio.c. A few other drivers check the
condition first, then do not add the wait queue in case the
device is disconnected. We doubt that's completely race-free.
So, this patch adds the process first, then locks properly
and checks for the disconnect.

Reviewed-by: Zqiang 
Signed-off-by: Pete Zaitcev 
Cc: stable 
Link: https://lore.kernel.org/r/20210303221053.1cf33...@suzdal.zaitcev.lan
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/class/usblp.c |   16 
 1 file changed, 12 insertions(+), 4 deletions(-)

--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -494,16 +494,24 @@ static int usblp_release(struct inode *i
 /* No kernel lock - fine */
 static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait)
 {
-   __poll_t ret;
+   struct usblp *usblp = file->private_data;
+   __poll_t ret = 0;
unsigned long flags;
 
-   struct usblp *usblp = file->private_data;
/* Should we check file->f_mode & FMODE_WRITE before poll_wait()? */
poll_wait(file, >rwait, wait);
poll_wait(file, >wwait, wait);
+
+   mutex_lock(>mut);
+   if (!usblp->present)
+   ret |= EPOLLHUP;
+   mutex_unlock(>mut);
+
spin_lock_irqsave(>lock, flags);
-   ret = ((usblp->bidir && usblp->rcomplete) ? EPOLLIN  | EPOLLRDNORM : 0) 
|
-  ((usblp->no_paper || usblp->wcomplete) ? EPOLLOUT | EPOLLWRNORM : 0);
+   if (usblp->bidir && usblp->rcomplete)
+   ret |= EPOLLIN  | EPOLLRDNORM;
+   if (usblp->no_paper || usblp->wcomplete)
+   ret |= EPOLLOUT | EPOLLWRNORM;
spin_unlock_irqrestore(>lock, flags);
return ret;
 }

[PATCH 5.10 216/290] usbip: fix stub_dev to check for stream socket

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Shuah Khan 

commit 47ccc8fc2c9c94558b27b6f9e2582df32d29e6e8 upstream.

Fix usbip_sockfd_store() to validate the passed in file descriptor is
a stream socket. If the file descriptor passed was a SOCK_DGRAM socket,
sock_recvmsg() can't detect end of stream.

Cc: sta...@vger.kernel.org
Suggested-by: Tetsuo Handa 
Signed-off-by: Shuah Khan 
Link: 
https://lore.kernel.org/r/e942d2bd03afb8e8552bd2a5d84e18d17670d521.1615171203.git.sk...@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/usbip/stub_dev.c |   12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -69,8 +69,16 @@ static ssize_t usbip_sockfd_store(struct
}
 
socket = sockfd_lookup(sockfd, );
-   if (!socket)
+   if (!socket) {
+   dev_err(dev, "failed to lookup sock");
goto err;
+   }
+
+   if (socket->type != SOCK_STREAM) {
+   dev_err(dev, "Expecting SOCK_STREAM - found %d",
+   socket->type);
+   goto sock_err;
+   }
 
sdev->ud.tcp_socket = socket;
sdev->ud.sockfd = sockfd;
@@ -100,6 +108,8 @@ static ssize_t usbip_sockfd_store(struct
 
return count;
 
+sock_err:
+   sockfd_put(socket);
 err:
spin_unlock_irq(>ud.lock);
return -EINVAL;

[PATCH 5.10 204/290] usb: dwc3: qcom: add ACPI device id for sc8180x

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Shawn Guo 

commit 1edbff9c80ed32071fffa7dbaaea507fdb21ff2d upstream.

It enables USB Host support for sc8180x ACPI boot, both the standalone
one and the one behind URS (USB Role Switch).  And they share the
the same dwc3_acpi_pdata with sdm845.

Signed-off-by: Shawn Guo 
Link: https://lore.kernel.org/r/20210301075745.20544-1-shawn@linaro.org
Cc: stable 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/dwc3/dwc3-qcom.c |2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -935,6 +935,8 @@ static const struct dwc3_acpi_pdata sdm8
 static const struct acpi_device_id dwc3_qcom_acpi_match[] = {
{ "QCOM2430", (unsigned long)_acpi_pdata },
{ "QCOM0304", (unsigned long)_acpi_urs_pdata },
+   { "QCOM0497", (unsigned long)_acpi_urs_pdata },
+   { "QCOM04A6", (unsigned long)_acpi_pdata },
{ },
 };
 MODULE_DEVICE_TABLE(acpi, dwc3_qcom_acpi_match);

[PATCH 5.11 251/306] nvme-fc: fix racing controller reset and create association

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: James Smart 

[ Upstream commit f20ef34d71abc1fc56b322aaa251f90f94320140 ]

Recent patch to prevent calling __nvme_fc_abort_outstanding_ios in
interrupt context results in a possible race condition. A controller
reset results in errored io completions, which schedules error
work. The change of error work to a work element allows it to fire
after the ctrl state transition to NVME_CTRL_CONNECTING, causing
any outstanding io (used to initialize the controller) to fail and
cause problems for connect_work.

Add a state check to only schedule error work if not in the RESETTING
state.

Fixes: 19fce0470f05 ("nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios 
from interrupt context")
Signed-off-by: Nigel Kirkland 
Signed-off-by: James Smart 
Signed-off-by: Christoph Hellwig 
Signed-off-by: Sasha Levin 
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5f36cfa8136c..7ec6869b3e5b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2055,7 +2055,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
nvme_fc_complete_rq(rq);

 check_error:
-   if (terminate_assoc)
+   if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
queue_work(nvme_reset_wq, >ioerr_work);
 }

-- 
2.30.1

[PATCH 5.10 240/290] staging: comedi: pcl818: Fix endian problem for AI command data

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ian Abbott 

commit 148e34fd33d53740642db523724226de14ee5281 upstream.

The analog input subdevice supports Comedi asynchronous commands that
use Comedi's 16-bit sample format.  However, the call to
`comedi_buf_write_samples()` is passing the address of a 32-bit integer
parameter.  On bigendian machines, this will copy 2 bytes from the wrong
end of the 32-bit value.  Fix it by changing the type of the parameter
holding the sample value to `unsigned short`.

[Note: the bug was introduced in commit edf4537bcbf5 ("staging: comedi:
pcl818: use comedi_buf_write_samples()") but the patch applies better to
commit d615416de615 ("staging: comedi: pcl818: introduce
pcl818_ai_write_sample()").]

Fixes: d615416de615 ("staging: comedi: pcl818: introduce 
pcl818_ai_write_sample()")
Cc:  # 4.0+
Signed-off-by: Ian Abbott 
Link: https://lore.kernel.org/r/20210223143055.257402-10-abbo...@mev.co.uk
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/comedi/drivers/pcl818.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/staging/comedi/drivers/pcl818.c
+++ b/drivers/staging/comedi/drivers/pcl818.c
@@ -423,7 +423,7 @@ static int pcl818_ai_eoc(struct comedi_d
 
 static bool pcl818_ai_write_sample(struct comedi_device *dev,
   struct comedi_subdevice *s,
-  unsigned int chan, unsigned int val)
+  unsigned int chan, unsigned short val)
 {
struct pcl818_private *devpriv = dev->private;
struct comedi_cmd *cmd = >async->cmd;

[PATCH 5.10 189/290] s390/dasd: fix hanging DASD driver unbind

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Stefan Haberland 

commit 7d365bd0bff3c0310c39ebaffc9a8458e036d666 upstream.

In case of an unbind of the DASD device driver the function
dasd_generic_remove() is called which shuts down the device.
Among others this functions removes the int_handler from the cdev.
During shutdown the device cancels all outstanding IO requests and waits
for completion of the clear request.
Unfortunately the clear interrupt will never be received when there is no
interrupt handler connected.

Fix by moving the int_handler removal after the call to the state machine
where no request or interrupt is outstanding.

Cc: sta...@vger.kernel.org
Signed-off-by: Stefan Haberland 
Tested-by: Bjoern Walk 
Reviewed-by: Jan Hoeppner 
Signed-off-by: Jens Axboe 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/s390/block/dasd.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3522,8 +3522,6 @@ void dasd_generic_remove(struct ccw_devi
struct dasd_device *device;
struct dasd_block *block;

-   cdev->handler = NULL;
-
device = dasd_device_from_cdev(cdev);
if (IS_ERR(device)) {
dasd_remove_sysfs_files(cdev);
@@ -3542,6 +3540,7 @@ void dasd_generic_remove(struct ccw_devi
 * no quite down yet.
 */
dasd_set_target_state(device, DASD_STATE_NEW);
+   cdev->handler = NULL;
/* dasd_delete_device destroys the device reference. */
block = device->block;
dasd_delete_device(device);

[PATCH 5.11 225/306] staging: ks7010: prevent buffer overflow in ks_wlan_set_scan()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Dan Carpenter 

commit e163b9823a0b08c3bb8dc4f5b4b5c221c24ec3e5 upstream.

The user can specify a "req->essid_len" of up to 255 but if it's
over IW_ESSID_MAX_SIZE (32) that can lead to memory corruption.

Fixes: 13a9930d15b4 ("staging: ks7010: add driver from Nanonote 
extra-repository")
Signed-off-by: Dan Carpenter 
Cc: stable 
Link: https://lore.kernel.org/r/YD4fS8+HmM/Qmrw6@mwanda
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/ks7010/ks_wlan_net.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/drivers/staging/ks7010/ks_wlan_net.c
+++ b/drivers/staging/ks7010/ks_wlan_net.c
@@ -1120,6 +1120,7 @@ static int ks_wlan_set_scan(struct net_d
 {
struct ks_wlan_private *priv = netdev_priv(dev);
struct iw_scan_req *req = NULL;
+   int len;
 
if (priv->sleep_mode == SLP_SLEEP)
return -EPERM;
@@ -1129,8 +1130,9 @@ static int ks_wlan_set_scan(struct net_d
if (wrqu->data.length == sizeof(struct iw_scan_req) &&
wrqu->data.flags & IW_SCAN_THIS_ESSID) {
req = (struct iw_scan_req *)extra;
-   priv->scan_ssid_len = req->essid_len;
-   memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len);
+   len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+   priv->scan_ssid_len = len;
+   memcpy(priv->scan_ssid, req->essid, len);
} else {
priv->scan_ssid_len = 0;
}

[PATCH 5.11 217/306] usbip: fix vudc usbip_sockfd_store races leading to gpf

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Shuah Khan 

commit 46613c9dfa964c0c60b5385dbdf5aaa18be52a9c upstream.

usbip_sockfd_store() is invoked when user requests attach (import)
detach (unimport) usb gadget device from usbip host. vhci_hcd sends
import request and usbip_sockfd_store() exports the device if it is
free for export.

Export and unexport are governed by local state and shared state
- Shared state (usbip device status, sockfd) - sockfd and Device
  status are used to determine if stub should be brought up or shut
  down. Device status is shared between host and client.
- Local state (tcp_socket, rx and tx thread task_struct ptrs)
  A valid tcp_socket controls rx and tx thread operations while the
  device is in exported state.
- While the device is exported, device status is marked used and socket,
  sockfd, and thread pointers are valid.

Export sequence (stub-up) includes validating the socket and creating
receive (rx) and transmit (tx) threads to talk to the client to provide
access to the exported device. rx and tx threads depends on local and
shared state to be correct and in sync.

Unexport (stub-down) sequence shuts the socket down and stops the rx and
tx threads. Stub-down sequence relies on local and shared states to be
in sync.

There are races in updating the local and shared status in the current
stub-up sequence resulting in crashes. These stem from starting rx and
tx threads before local and global state is updated correctly to be in
sync.

1. Doesn't handle kthread_create() error and saves invalid ptr in local
   state that drives rx and tx threads.
2. Updates tcp_socket and sockfd,  starts stub_rx and stub_tx threads
   before updating usbip_device status to SDEV_ST_USED. This opens up a
   race condition between the threads and usbip_sockfd_store() stub up
   and down handling.

Fix the above problems:
- Stop using kthread_get_run() macro to create/start threads.
- Create threads and get task struct reference.
- Add kthread_create() failure handling and bail out.
- Hold usbip_device lock to update local and shared states after
  creating rx and tx threads.
- Update usbip_device status to SDEV_ST_USED.
- Update usbip_device tcp_socket, sockfd, tcp_rx, and tcp_tx
- Start threads after usbip_device (tcp_socket, sockfd, tcp_rx, tcp_tx,
  and status) is complete.

Credit goes to syzbot and Tetsuo Handa for finding and root-causing the
kthread_get_run() improper error handling problem and others. This is a
hard problem to find and debug since the races aren't seen in a normal
case. Fuzzing forces the race window to be small enough for the
kthread_get_run() error path bug and starting threads before updating the
local and shared state bug in the stub-up sequence.

Fixes: 9720b4bc76a83807 ("staging/usbip: convert to kthread")
Cc: sta...@vger.kernel.org
Reported-by: syzbot 
Reported-by: syzbot 
Reported-by: syzbot 
Reported-by: Tetsuo Handa 
Signed-off-by: Shuah Khan 
Link: 
https://lore.kernel.org/r/b1c08b983ffa185449c9f0f7d1021dc8c8454b60.1615171203.git.sk...@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/usbip/vudc_sysfs.c |   42 +
 1 file changed, 34 insertions(+), 8 deletions(-)

--- a/drivers/usb/usbip/vudc_sysfs.c
+++ b/drivers/usb/usbip/vudc_sysfs.c
@@ -90,8 +90,9 @@ unlock:
 }
 static BIN_ATTR_RO(dev_desc, sizeof(struct usb_device_descriptor));

-static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute 
*attr,
-const char *in, size_t count)
+static ssize_t usbip_sockfd_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *in, size_t count)
 {
struct vudc *udc = (struct vudc *) dev_get_drvdata(dev);
int rv;
@@ -100,6 +101,8 @@ static ssize_t usbip_sockfd_store(struct
struct socket *socket;
unsigned long flags;
int ret;
+   struct task_struct *tcp_rx = NULL;
+   struct task_struct *tcp_tx = NULL;

rv = kstrtoint(in, 0, );
if (rv != 0)
@@ -145,24 +148,47 @@ static ssize_t usbip_sockfd_store(struct
goto sock_err;
}

-   udc->ud.tcp_socket = socket;
-
+   /* unlock and create threads and get tasks */
spin_unlock_irq(>ud.lock);
spin_unlock_irqrestore(>lock, flags);

-   udc->ud.tcp_rx = kthread_get_run(_rx_loop,
-   >ud, "vudc_rx");
-   udc->ud.tcp_tx = kthread_get_run(_tx_loop,
-   >ud, "vudc_tx");
+   tcp_rx = kthread_create(_rx_loop, >ud, "vudc_rx");
+   if (IS_ERR(tcp_rx)) {
+   sockfd_put(socket);
+   return -EINVAL;
+   }
+   tcp_tx = kthread_create(_tx_loop, >ud, "vudc_tx");
+   if (IS_ERR(tcp_tx)) {
+

[PATCH 5.10 202/290] usb: dwc3: qcom: Add missing DWC3 OF node refcount decrement

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Serge Semin 

commit 1cffb1c66499a9db9a735473778abf8427d16287 upstream.

of_get_child_by_name() increments the reference counter of the OF node it
managed to find. So after the code is done using the device node, the
refcount must be decremented. Add missing of_node_put() invocation then
to the dwc3_qcom_of_register_core() method, since DWC3 OF node is being
used only there.

Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver")
Signed-off-by: Serge Semin 
Link: 
https://lore.kernel.org/r/20210212205521.14280-1-sergey.se...@baikalelectronics.ru
Cc: stable 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/dwc3/dwc3-qcom.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -639,16 +639,19 @@ static int dwc3_qcom_of_register_core(st
ret = of_platform_populate(np, NULL, NULL, dev);
if (ret) {
dev_err(dev, "failed to register dwc3 core - %d\n", ret);
-   return ret;
+   goto node_put;
}
 
qcom->dwc3 = of_find_device_by_node(dwc3_np);
if (!qcom->dwc3) {
+   ret = -ENODEV;
dev_err(dev, "failed to get dwc3 platform device\n");
-   return -ENODEV;
}
 
-   return 0;
+node_put:
+   of_node_put(dwc3_np);
+
+   return ret;
 }
 
 static int dwc3_qcom_probe(struct platform_device *pdev)

[PATCH 5.11 215/306] usbip: fix stub_dev usbip_sockfd_store() races leading to gpf

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Shuah Khan 

commit 9380afd6df70e24eacbdbde33afc6a3950965d22 upstream.

usbip_sockfd_store() is invoked when user requests attach (import)
detach (unimport) usb device from usbip host. vhci_hcd sends import
request and usbip_sockfd_store() exports the device if it is free
for export.

Export and unexport are governed by local state and shared state
- Shared state (usbip device status, sockfd) - sockfd and Device
  status are used to determine if stub should be brought up or shut
  down.
- Local state (tcp_socket, rx and tx thread task_struct ptrs)
  A valid tcp_socket controls rx and tx thread operations while the
  device is in exported state.
- While the device is exported, device status is marked used and socket,
  sockfd, and thread pointers are valid.

Export sequence (stub-up) includes validating the socket and creating
receive (rx) and transmit (tx) threads to talk to the client to provide
access to the exported device. rx and tx threads depends on local and
shared state to be correct and in sync.

Unexport (stub-down) sequence shuts the socket down and stops the rx and
tx threads. Stub-down sequence relies on local and shared states to be
in sync.

There are races in updating the local and shared status in the current
stub-up sequence resulting in crashes. These stem from starting rx and
tx threads before local and global state is updated correctly to be in
sync.

1. Doesn't handle kthread_create() error and saves invalid ptr in local
   state that drives rx and tx threads.
2. Updates tcp_socket and sockfd,  starts stub_rx and stub_tx threads
   before updating usbip_device status to SDEV_ST_USED. This opens up a
   race condition between the threads and usbip_sockfd_store() stub up
   and down handling.

Fix the above problems:
- Stop using kthread_get_run() macro to create/start threads.
- Create threads and get task struct reference.
- Add kthread_create() failure handling and bail out.
- Hold usbip_device lock to update local and shared states after
  creating rx and tx threads.
- Update usbip_device status to SDEV_ST_USED.
- Update usbip_device tcp_socket, sockfd, tcp_rx, and tcp_tx
- Start threads after usbip_device (tcp_socket, sockfd, tcp_rx, tcp_tx,
  and status) is complete.

Credit goes to syzbot and Tetsuo Handa for finding and root-causing the
kthread_get_run() improper error handling problem and others. This is a
hard problem to find and debug since the races aren't seen in a normal
case. Fuzzing forces the race window to be small enough for the
kthread_get_run() error path bug and starting threads before updating the
local and shared state bug in the stub-up sequence.

Tested with syzbot reproducer:
- https://syzkaller.appspot.com/text?tag=ReproC=14801034d0

Fixes: 9720b4bc76a83807 ("staging/usbip: convert to kthread")
Cc: sta...@vger.kernel.org
Reported-by: syzbot 
Reported-by: syzbot 
Reported-by: syzbot 
Reported-by: Tetsuo Handa 
Signed-off-by: Shuah Khan 
Link: 
https://lore.kernel.org/r/268a0668144d5ff36ec7d87fdfa90faf583b7ccc.1615171203.git.sk...@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/usbip/stub_dev.c |   32 +---
 1 file changed, 25 insertions(+), 7 deletions(-)

--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -46,6 +46,8 @@ static ssize_t usbip_sockfd_store(struct
int sockfd = 0;
struct socket *socket;
int rv;
+   struct task_struct *tcp_rx = NULL;
+   struct task_struct *tcp_tx = NULL;

if (!sdev) {
dev_err(dev, "sdev is null\n");
@@ -80,20 +82,36 @@ static ssize_t usbip_sockfd_store(struct
goto sock_err;
}

-   sdev->ud.tcp_socket = socket;
-   sdev->ud.sockfd = sockfd;
-
+   /* unlock and create threads and get tasks */
spin_unlock_irq(>ud.lock);
+   tcp_rx = kthread_create(stub_rx_loop, >ud, "stub_rx");
+   if (IS_ERR(tcp_rx)) {
+   sockfd_put(socket);
+   return -EINVAL;
+   }
+   tcp_tx = kthread_create(stub_tx_loop, >ud, "stub_tx");
+   if (IS_ERR(tcp_tx)) {
+   kthread_stop(tcp_rx);
+   sockfd_put(socket);
+   return -EINVAL;
+   }

-   sdev->ud.tcp_rx = kthread_get_run(stub_rx_loop, >ud,
- "stub_rx");
-   sdev->ud.tcp_tx = kthread_get_run(stub_tx_loop, >ud,
- "stub_tx");
+   /* get task structs now */
+   get_task_struct(tcp_rx);
+   get_task_struct(tcp_tx);

+   /* lock and update sdev->ud state */
spin_lock_irq(>ud.lock);
+   sdev->ud.tcp_socket = socket;
+   sdev->ud.sockfd = sockfd;
+   sdev->ud.tcp_rx =

[PATCH 5.11 209/306] USB: serial: ch341: add new Product ID

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Niv Sardi 

commit 5563b3b6420362c8a1f468ca04afe6d5f0a8d0a3 upstream.

Add PID for CH340 that's found on cheap programmers.

The driver works flawlessly as soon as the new PID (0x9986) is added to it.
These look like ANU232MI but ship with a ch341 inside. They have no special
identifiers (mine only has the string "DB9D20130716" printed on the PCB and
nothing identifiable on the packaging. The merchant i bought it from
doesn't sell these anymore).

the lsusb -v output is:
Bus 001 Device 009: ID 9986:7523
Device Descriptor:
  bLength18
  bDescriptorType 1
  bcdUSB   1.10
  bDeviceClass  255 Vendor Specific Class
  bDeviceSubClass 0
  bDeviceProtocol 0
  bMaxPacketSize0 8
  idVendor   0x9986
  idProduct  0x7523
  bcdDevice2.54
  iManufacturer   0
  iProduct0
  iSerial 0
  bNumConfigurations  1
  Configuration Descriptor:
bLength 9
bDescriptorType 2
wTotalLength   0x0027
bNumInterfaces  1
bConfigurationValue 1
iConfiguration  0
bmAttributes 0x80
  (Bus Powered)
MaxPower   96mA
Interface Descriptor:
  bLength 9
  bDescriptorType 4
  bInterfaceNumber0
  bAlternateSetting   0
  bNumEndpoints   3
  bInterfaceClass   255 Vendor Specific Class
  bInterfaceSubClass  1
  bInterfaceProtocol  2
  iInterface  0
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x82  EP 2 IN
bmAttributes2
  Transfer TypeBulk
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x0020  1x 32 bytes
bInterval   0
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x02  EP 2 OUT
bmAttributes2
  Transfer TypeBulk
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x0020  1x 32 bytes
bInterval   0
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x81  EP 1 IN
bmAttributes3
  Transfer TypeInterrupt
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x0008  1x 8 bytes
bInterval   1

Signed-off-by: Niv Sardi 
Cc: sta...@vger.kernel.org
Signed-off-by: Johan Hovold 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/usb/serial/ch341.c |1 +
 1 file changed, 1 insertion(+)

--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -86,6 +86,7 @@ static const struct usb_device_id id_tab
{ USB_DEVICE(0x1a86, 0x7522) },
{ USB_DEVICE(0x1a86, 0x7523) },
{ USB_DEVICE(0x4348, 0x5523) },
+   { USB_DEVICE(0x9986, 0x7523) },
{ },
 };
 MODULE_DEVICE_TABLE(usb, id_table);

[PATCH 5.11 150/306] PCI/LINK: Remove bandwidth notification

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Bjorn Helgaas 

[ Upstream commit b4c7d2076b4e767dd2e075a2b3a9e57753fc67f5 ]

The PCIe Bandwidth Change Notification feature logs messages when the link
bandwidth changes.  Some users have reported that these messages occur
often enough to significantly reduce NVMe performance.  GPUs also seem to
generate these messages.

We don't know why the link bandwidth changes, but in the reported cases
there's no indication that it's caused by hardware failures.

Remove the bandwidth change notifications for now.  Hopefully we can add
this back when we have a better understanding of why this happens and how
we can make the messages useful instead of overwhelming.

Link: https://lore.kernel.org/r/20200115221008.ga191...@google.com/
Link: 
https://lore.kernel.org/r/155605909349.3575.13433421148215616375.st...@gimli.home/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206197
Signed-off-by: Bjorn Helgaas 
Signed-off-by: Sasha Levin 
---
 drivers/pci/pcie/Kconfig   |   8 --
 drivers/pci/pcie/Makefile  |   1 -
 drivers/pci/pcie/bw_notification.c | 138 -
 drivers/pci/pcie/portdrv.h |   6 --
 drivers/pci/pcie/portdrv_pci.c |   1 -
 5 files changed, 154 deletions(-)
 delete mode 100644 drivers/pci/pcie/bw_notification.c

diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 3946555a6042..45a2ef702b45 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -133,14 +133,6 @@ config PCIE_PTM
  This is only useful if you have devices that support PTM, but it
  is safe to enable even if you don't.
 
-config PCIE_BW
-   bool "PCI Express Bandwidth Change Notification"
-   depends on PCIEPORTBUS
-   help
- This enables PCI Express Bandwidth Change Notification.  If
- you know link width or rate changes occur only to correct
- unreliable links, you may answer Y.
-
 config PCIE_EDR
bool "PCI Express Error Disconnect Recover support"
depends on PCIE_DPC && ACPI
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index d9697892fa3e..b2980db88cc0 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -12,5 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT)  += aer_inject.o
 obj-$(CONFIG_PCIE_PME) += pme.o
 obj-$(CONFIG_PCIE_DPC) += dpc.o
 obj-$(CONFIG_PCIE_PTM) += ptm.o
-obj-$(CONFIG_PCIE_BW)  += bw_notification.o
 obj-$(CONFIG_PCIE_EDR) += edr.o
diff --git a/drivers/pci/pcie/bw_notification.c 
b/drivers/pci/pcie/bw_notification.c
deleted file mode 100644
index 565d23cccb8b..
--- a/drivers/pci/pcie/bw_notification.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * PCI Express Link Bandwidth Notification services driver
- * Author: Alexandru Gagniuc 
- *
- * Copyright (C) 2019, Dell Inc
- *
- * The PCIe Link Bandwidth Notification provides a way to notify the
- * operating system when the link width or data rate changes.  This
- * capability is required for all root ports and downstream ports
- * supporting links wider than x1 and/or multiple link speeds.
- *
- * This service port driver hooks into the bandwidth notification interrupt
- * and warns when links become degraded in operation.
- */
-
-#define dev_fmt(fmt) "bw_notification: " fmt
-
-#include "../pci.h"
-#include "portdrv.h"
-
-static bool pcie_link_bandwidth_notification_supported(struct pci_dev *dev)
-{
-   int ret;
-   u32 lnk_cap;
-
-   ret = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, _cap);
-   return (ret == PCIBIOS_SUCCESSFUL) && (lnk_cap & PCI_EXP_LNKCAP_LBNC);
-}
-
-static void pcie_enable_link_bandwidth_notification(struct pci_dev *dev)
-{
-   u16 lnk_ctl;
-
-   pcie_capability_write_word(dev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
-
-   pcie_capability_read_word(dev, PCI_EXP_LNKCTL, _ctl);
-   lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
-   pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
-}
-
-static void pcie_disable_link_bandwidth_notification(struct pci_dev *dev)
-{
-   u16 lnk_ctl;
-
-   pcie_capability_read_word(dev, PCI_EXP_LNKCTL, _ctl);
-   lnk_ctl &= ~PCI_EXP_LNKCTL_LBMIE;
-   pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
-}
-
-static irqreturn_t pcie_bw_notification_irq(int irq, void *context)
-{
-   struct pcie_device *srv = context;
-   struct pci_dev *port = srv->port;
-   u16 link_status, events;
-   int ret;
-
-   ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, _status);
-   events = link_status & PCI_EXP_LNKSTA_LBMS;
-
-   if (ret != PCIBIOS_SUCCESSFUL || !events)
-   return IRQ_NONE;
-
-   pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
-   pcie_update_link_speed(port->subordinate, link_status);
-   return IRQ_WAKE_THREAD;
-}
-
-static irqreturn_t pcie_bw_notification_handler(int irq, void *context)
-{
-   struct

[PATCH 5.11 137/306] spi: stm32: make spurious and overrun interrupts visible

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Alain Volmat 

[ Upstream commit c64e7efe46b7de21937ef4b3594d9b1fc74f07df ]

We do not expect to receive spurious interrupts so rise a warning
if it happens.

RX overrun is an error condition that signals a corrupted RX
stream both in dma and in irq modes. Report the error and
abort the transfer in either cases.

Signed-off-by: Alain Volmat 
Link: 
https://lore.kernel.org/r/1612551572-495-9-git-send-email-alain.vol...@foss.st.com
Signed-off-by: Mark Brown 
Signed-off-by: Sasha Levin 
---
 drivers/spi/spi-stm32.c | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 6eeb39669a86..53c4311cc6ab 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -928,8 +928,8 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void 
*dev_id)
mask |= STM32H7_SPI_SR_RXP;
 
if (!(sr & mask)) {
-   dev_dbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
-   sr, ier);
+   dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
+sr, ier);
spin_unlock_irqrestore(>lock, flags);
return IRQ_NONE;
}
@@ -956,15 +956,8 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void 
*dev_id)
}
 
if (sr & STM32H7_SPI_SR_OVR) {
-   dev_warn(spi->dev, "Overrun: received value discarded\n");
-   if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
-   stm32h7_spi_read_rxfifo(spi, false);
-   /*
-* If overrun is detected while using DMA, it means that
-* something went wrong, so stop the current transfer
-*/
-   if (spi->cur_usedma)
-   end = true;
+   dev_err(spi->dev, "Overrun: RX data lost\n");
+   end = true;
}
 
if (sr & STM32H7_SPI_SR_EOT) {
-- 
2.30.1

[PATCH 5.4 072/168] HID: logitech-dj: add support for the new lightspeed connection iteration

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Filipe Laíns 

[ Upstream commit fab3a95654eea01d6b0204995be8b7492a00d001 ]

This new connection type is the new iteration of the Lightspeed
connection and will probably be used in some of the newer gaming
devices. It is currently use in the G Pro X Superlight.

This patch should be backported to older versions, as currently the
driver will panic when seing the unsupported connection. This isn't
an issue when using the receiver that came with the device, as Logitech
has been using different PIDs when they change the connection type, but
is an issue when using a generic receiver (well, generic Lightspeed
receiver), which is the case of the one in the Powerplay mat. Currently,
the only generic Ligthspeed receiver we support, and the only one that
exists AFAIK, is ther Powerplay.

As it stands, the driver will panic when seeing a G Pro X Superlight
connected to the Powerplay receiver and won't send any input events to
userspace! The kernel will warn about this so the issue should be easy
to identify, but it is still very worrying how hard it will fail :(

[915977.398471] logitech-djreceiver 0003:046D:C53A.0107: unusable device of 
type UNKNOWN (0x0f) connected on slot 1

Signed-off-by: Filipe Laíns 
Signed-off-by: Jiri Kosina 
Signed-off-by: Sasha Levin 
---
 drivers/hid/hid-logitech-dj.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 86001cfbdb6f..b499ac37dc7b 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -995,7 +995,12 @@ static void logi_hidpp_recv_queue_notif(struct hid_device 
*hdev,
workitem.reports_supported |= STD_KEYBOARD;
break;
case 0x0d:
-   device_type = "eQUAD Lightspeed 1_1";
+   device_type = "eQUAD Lightspeed 1.1";
+   logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, );
+   workitem.reports_supported |= STD_KEYBOARD;
+   break;
+   case 0x0f:
+   device_type = "eQUAD Lightspeed 1.2";
logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, );
workitem.reports_supported |= STD_KEYBOARD;
break;
-- 
2.30.1

[PATCH 4.14 21/95] net: lapbether: Remove netif_start_queue / netif_stop_queue

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Xie He 

commit f7d9d4854519fdf4d45c70a4d953438cd88e7e58 upstream.

For the devices in this driver, the default qdisc is "noqueue",
because their "tx_queue_len" is 0.

In function "__dev_queue_xmit" in "net/core/dev.c", devices with the
"noqueue" qdisc are specially handled. Packets are transmitted without
being queued after a "dev->flags & IFF_UP" check. However, it's possible
that even if this check succeeds, "ops->ndo_stop" may still have already
been called. This is because in "__dev_close_many", "ops->ndo_stop" is
called before clearing the "IFF_UP" flag.

If we call "netif_stop_queue" in "ops->ndo_stop", then it's possible in
"__dev_queue_xmit", it sees the "IFF_UP" flag is present, and then it
checks "netif_xmit_stopped" and finds that the queue is already stopped.
In this case, it will complain that:
"Virtual device ... asks to queue packet!"

To prevent "__dev_queue_xmit" from generating this complaint, we should
not call "netif_stop_queue" in "ops->ndo_stop".

We also don't need to call "netif_start_queue" in "ops->ndo_open",
because after a netdev is allocated and registered, the
"__QUEUE_STATE_DRV_XOFF" flag is initially not set, so there is no need
to call "netif_start_queue" to clear it.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xie He 
Acked-by: Martin Schiller 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/wan/lapbether.c |3 ---
 1 file changed, 3 deletions(-)

--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -286,7 +286,6 @@ static int lapbeth_open(struct net_devic
return -ENODEV;
}

-   netif_start_queue(dev);
return 0;
 }

@@ -294,8 +293,6 @@ static int lapbeth_close(struct net_devi
 {
int err;

-   netif_stop_queue(dev);
-
if ((err = lapb_unregister(dev)) != LAPB_OK)
pr_err("lapb_unregister error: %d\n", err);

[PATCH 4.19 030/120] net: stmmac: fix watchdog timeout during suspend/resume stress test

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joakim Zhang 

commit c511819d138de38e1637eedb645c207e09680d0f upstream.

stmmac_xmit() call stmmac_tx_timer_arm() at the end to modify tx timer to
do the transmission cleanup work. Imagine such a situation, stmmac enters
suspend immediately after tx timer modified, it's expire callback
stmmac_tx_clean() would not be invoked. This could affect BQL, since
netdev_tx_sent_queue() has been called, but netdev_tx_completed_queue()
have not been involved, as a result, dql_avail(_queue->dql) finally
always return a negative value.

__dev_queue_xmit->__dev_xmit_skb->qdisc_run->__qdisc_run->qdisc_restart->dequeue_skb:
if ((q->flags & TCQ_F_ONETXQUEUE) &&
netif_xmit_frozen_or_stopped(txq)) // __QUEUE_STATE_STACK_XOFF 
is set

Net core will stop transmitting any more. Finillay, net watchdong would timeout.
To fix this issue, we should call netdev_tx_reset_queue() in stmmac_resume().

Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx")
Signed-off-by: Joakim Zhang 
Signed-off-by: Jakub Kicinski 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4566,6 +4566,8 @@ static void stmmac_reset_queues_param(st
tx_q->cur_tx = 0;
tx_q->dirty_tx = 0;
tx_q->mss = 0;
+
+   netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
}
 }

[PATCH 5.10 054/290] net: stmmac: Fix VLAN filter delete timeout issue in Intel mGBE SGMII

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ong Boon Leong 

commit 9a7b3950c7e15968e23d83be215e95ccc7c92a53 upstream.

For Intel mGbE controller, MAC VLAN filter delete operation will time-out
if serdes power-down sequence happened first during driver remove() with
below message.

[82294.764958] intel-eth-pci :00:1e.4 eth2: stmmac_dvr_remove: removing 
driver
[82294.778677] intel-eth-pci :00:1e.4 eth2: Timeout accessing 
MAC_VLAN_Tag_Filter
[82294.779997] intel-eth-pci :00:1e.4 eth2: failed to kill vid 0081/0
[82294.947053] intel-eth-pci :00:1d.2 eth1: stmmac_dvr_remove: removing 
driver
[82295.002091] intel-eth-pci :00:1d.1 eth0: stmmac_dvr_remove: removing 
driver

Therefore, we delay the serdes power-down to be after unregister_netdev()
which triggers the VLAN filter delete.

Fixes: b9663b7ca6ff ("net: stmmac: Enable SERDES power up/down sequence")
Signed-off-by: Ong Boon Leong 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5114,13 +5114,16 @@ int stmmac_dvr_remove(struct device *dev
netdev_info(priv->dev, "%s: removing driver", __func__);
 
stmmac_stop_all_dma(priv);
+   stmmac_mac_set(priv, priv->ioaddr, false);
+   netif_carrier_off(ndev);
+   unregister_netdev(ndev);
 
+   /* Serdes power down needs to happen after VLAN filter
+* is deleted that is triggered by unregister_netdev().
+*/
if (priv->plat->serdes_powerdown)
priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
 
-   stmmac_mac_set(priv, priv->ioaddr, false);
-   netif_carrier_off(ndev);
-   unregister_netdev(ndev);
 #ifdef CONFIG_DEBUG_FS
stmmac_exit_fs(ndev);
 #endif

[PATCH 4.19 031/120] selftests: forwarding: Fix race condition in mirror installation

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Danielle Ratson 

commit edcbf5137f093b5502f5f6b97cce3cbadbde27aa upstream.

When mirroring to a gretap in hardware the device expects to be
programmed with the egress port and all the encapsulating headers. This
requires the driver to resolve the path the packet will take in the
software data path and program the device accordingly.

If the path cannot be resolved (in this case because of an unresolved
neighbor), then mirror installation fails until the path is resolved.
This results in a race that causes the test to sometimes fail.

Fix this by setting the neighbor's state to permanent, so that it is
always valid.

Fixes: b5b029399fa6d ("selftests: forwarding: mirror_gre_bridge_1d_vlan: Add 
STP test")
Signed-off-by: Danielle Ratson 
Reviewed-by: Petr Machata 
Signed-off-by: Ido Schimmel 
Signed-off-by: Jakub Kicinski 
Signed-off-by: Greg Kroah-Hartman 
---
 tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh |9 
+
 1 file changed, 9 insertions(+)

--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -86,11 +86,20 @@ test_ip6gretap()

 test_gretap_stp()
 {
+   # Sometimes after mirror installation, the neighbor's state is not 
valid.
+   # The reason is that there is no SW datapath activity related to the
+   # neighbor for the remote GRE address. Therefore whether the 
corresponding
+   # neighbor will be valid is a matter of luck, and the test is thus racy.
+   # Set the neighbor's state to permanent, so it would be always valid.
+   ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+   nud permanent dev br2
full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
 }

 test_ip6gretap_stp()
 {
+   ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+   nud permanent dev br2
full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
 }

[PATCH 5.4 046/168] bnxt_en: reliably allocate IRQ table on reset to avoid crash

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Edwin Peer 

commit 20d7d1c5c9b11e9f538ed4a2289be106de970d3e upstream.

The following trace excerpt corresponds with a NULL pointer dereference
of 'bp->irq_tbl' in bnxt_setup_inta() on an Aarch64 system after many
device resets:

Unable to handle kernel NULL pointer dereference at ... 00d
...
pc : string+0x3c/0x80
lr : vsnprintf+0x294/0x7e0
sp : 0f61ba70 pstate : 2145
x29: 0f61ba70 x28: 000d
x27: 009c8b5a x26: 0f61bb80
x25: 009c8b5a x24: 0012
x23: ffe0 x22: 08990428
x21: 0f61bb80 x20: 000d
x19: 001f x18: 
x17:  x16: 800b6d0fb400
x15:  x14: 800b7fe31ae8
x13: 1ed16472c920 x12: 08c6b1c9
x11: 08cf0580 x10: 0f61bb80
x9 : ffd8 x8 : 000c
x7 : 800b684b8000 x6 : 
x5 : 0065 x4 : 0001
x3 : 0a00ff04 x2 : 001f
x1 :  x0 : 000d
Call trace:
string+0x3c/0x80
vsnprintf+0x294/0x7e0
snprintf+0x44/0x50
__bnxt_open_nic+0x34c/0x928 [bnxt_en]
bnxt_open+0xe8/0x238 [bnxt_en]
__dev_open+0xbc/0x130
__dev_change_flags+0x12c/0x168
dev_change_flags+0x20/0x60
...

Ordinarily, a call to bnxt_setup_inta() (not in trace due to inlining)
would not be expected on a system supporting MSIX at all. However, if
bnxt_init_int_mode() does not end up being called after the call to
bnxt_clear_int_mode() in bnxt_fw_reset_close(), then the driver will
think that only INTA is supported and bp->irq_tbl will be NULL,
causing the above crash.

In the error recovery scenario, we call bnxt_clear_int_mode() in
bnxt_fw_reset_close() early in the sequence. Ordinarily, we will
call bnxt_init_int_mode() in bnxt_hwrm_if_change() after we
reestablish communication with the firmware after reset.  However,
if the sequence has to abort before we call bnxt_init_int_mode() and
if the user later attempts to re-open the device, then it will cause
the crash above.

We fix it in 2 ways:

1. Check for bp->irq_tbl in bnxt_setup_int_mode(). If it is NULL, call
bnxt_init_init_mode().

2. If we need to abort in bnxt_hwrm_if_change() and cannot complete
the error recovery sequence, set the BNXT_STATE_ABORT_ERR flag.  This
will cause more drastic recovery at the next attempt to re-open the
device, including a call to bnxt_init_int_mode().

Fixes: 3bc7d4a352ef ("bnxt_en: Add BNXT_STATE_IN_FW_RESET state.")
Reviewed-by: Scott Branden 
Signed-off-by: Edwin Peer 
Signed-off-by: Michael Chan 
Signed-off-by: Jakub Kicinski 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7925,10 +7925,18 @@ static void bnxt_setup_inta(struct bnxt
bp->irq_tbl[0].handler = bnxt_inta;
 }

+static int bnxt_init_int_mode(struct bnxt *bp);
+
 static int bnxt_setup_int_mode(struct bnxt *bp)
 {
int rc;

+   if (!bp->irq_tbl) {
+   rc = bnxt_init_int_mode(bp);
+   if (rc || !bp->irq_tbl)
+   return rc ?: -ENODEV;
+   }
+
if (bp->flags & BNXT_FLAG_USING_MSIX)
bnxt_setup_msix(bp);
else
@@ -8113,7 +8121,7 @@ static int bnxt_init_inta(struct bnxt *b

 static int bnxt_init_int_mode(struct bnxt *bp)
 {
-   int rc = 0;
+   int rc = -ENODEV;

if (bp->flags & BNXT_FLAG_MSIX_CAP)
rc = bnxt_init_msix(bp);
@@ -8748,7 +8756,8 @@ static int bnxt_hwrm_if_change(struct bn
 {
struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
struct hwrm_func_drv_if_change_input req = {0};
-   bool resc_reinit = false, fw_reset = false;
+   bool fw_reset = !bp->irq_tbl;
+   bool resc_reinit = false;
u32 flags = 0;
int rc;

@@ -8776,6 +8785,7 @@ static int bnxt_hwrm_if_change(struct bn

if (test_bit(BNXT_STATE_IN_FW_RESET, >state) && !fw_reset) {
netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
+   set_bit(BNXT_STATE_ABORT_ERR, >state);
return -ENODEV;
}
if (resc_reinit || fw_reset) {

[PATCH 5.11 065/306] net: stmmac: Fix VLAN filter delete timeout issue in Intel mGBE SGMII

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ong Boon Leong 

commit 9a7b3950c7e15968e23d83be215e95ccc7c92a53 upstream.

For Intel mGbE controller, MAC VLAN filter delete operation will time-out
if serdes power-down sequence happened first during driver remove() with
below message.

[82294.764958] intel-eth-pci :00:1e.4 eth2: stmmac_dvr_remove: removing 
driver
[82294.778677] intel-eth-pci :00:1e.4 eth2: Timeout accessing 
MAC_VLAN_Tag_Filter
[82294.779997] intel-eth-pci :00:1e.4 eth2: failed to kill vid 0081/0
[82294.947053] intel-eth-pci :00:1d.2 eth1: stmmac_dvr_remove: removing 
driver
[82295.002091] intel-eth-pci :00:1d.1 eth0: stmmac_dvr_remove: removing 
driver

Therefore, we delay the serdes power-down to be after unregister_netdev()
which triggers the VLAN filter delete.

Fixes: b9663b7ca6ff ("net: stmmac: Enable SERDES power up/down sequence")
Signed-off-by: Ong Boon Leong 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5144,13 +5144,16 @@ int stmmac_dvr_remove(struct device *dev
netdev_info(priv->dev, "%s: removing driver", __func__);
 
stmmac_stop_all_dma(priv);
+   stmmac_mac_set(priv, priv->ioaddr, false);
+   netif_carrier_off(ndev);
+   unregister_netdev(ndev);
 
+   /* Serdes power down needs to happen after VLAN filter
+* is deleted that is triggered by unregister_netdev().
+*/
if (priv->plat->serdes_powerdown)
priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
 
-   stmmac_mac_set(priv, priv->ioaddr, false);
-   netif_carrier_off(ndev);
-   unregister_netdev(ndev);
 #ifdef CONFIG_DEBUG_FS
stmmac_exit_fs(ndev);
 #endif

[PATCH 5.11 291/306] x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Andy Lutomirski 

commit 5d5675df792ff67e74a500c4c94db0f99e6a10ef upstream.

On a 32-bit fast syscall that fails to read its arguments from user
memory, the kernel currently does syscall exit work but not
syscall entry work.  This confuses audit and ptrace.  For example:

$ ./tools/testing/selftests/x86/syscall_arg_fault_32
...
strace: pid 264258: entering, ptrace_syscall_info.op == 2
...

This is a minimal fix intended for ease of backporting.  A more
complete cleanup is coming.

Fixes: 0b085e68f407 ("x86/entry: Consolidate 32/64 bit syscall entry")
Signed-off-by: Andy Lutomirski 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org
Link: 
https://lore.kernel.org/r/8c82296ddf803b91f8d1e5eac89e5803ba54ab0e.1614884673.git.l...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/entry/common.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32
regs->ax = -EFAULT;
 
instrumentation_end();
-   syscall_exit_to_user_mode(regs);
+   local_irq_disable();
+   irqentry_exit_to_user_mode(regs);
return false;
}

[PATCH 5.11 292/306] KVM: x86: Ensure deadline timer has truly expired before posting its IRQ

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Sean Christopherson 

commit beda430177f56656e7980dcce93456ffaa35676b upstream.

When posting a deadline timer interrupt, open code the checks guarding
__kvm_wait_lapic_expire() in order to skip the lapic_timer_int_injected()
check in kvm_wait_lapic_expire().  The injection check will always fail
since the interrupt has not yet be injected.  Moving the call after
injection would also be wrong as that wouldn't actually delay delivery
of the IRQ if it is indeed sent via posted interrupt.

Fixes: 010fd37fddf6 ("KVM: LAPIC: Reduce world switch latency caused by 
timer_advance_ns")
Cc: sta...@vger.kernel.org
Signed-off-by: Sean Christopherson 
Message-Id: <20210305021808.3769732-1-sea...@google.com>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kvm/lapic.c |   11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1641,7 +1641,16 @@ static void apic_timer_expired(struct kv
}
 
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
-   kvm_wait_lapic_expire(vcpu);
+   /*
+* Ensure the guest's timer has truly expired before posting an
+* interrupt.  Open code the relevant checks to avoid querying
+* lapic_timer_int_injected(), which will be false since the
+* interrupt isn't yet injected.  Waiting until after injecting
+* is not an option since that won't help a posted interrupt.
+*/
+   if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+   vcpu->arch.apic->lapic_timer.timer_advance_ns)
+   __kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic);
return;
}

[PATCH 5.10 274/290] x86/sev-es: Use __copy_from_user_inatomic()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit bffe30dd9f1f3b2608a87ac909a224d6be472485 upstream.

The #VC handler must run in atomic context and cannot sleep. This is a
problem when it tries to fetch instruction bytes from user-space via
copy_from_user().

Introduce a insn_fetch_from_user_inatomic() helper which uses
__copy_from_user_inatomic() to safely copy the instruction bytes to
kernel memory in the #VC handler.

Fixes: 5e3427a7bc432 ("x86/sev-es: Handle instruction fetches from user-space")
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-6-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/include/asm/insn-eval.h |  2 +
 arch/x86/kernel/sev-es.c |  2 +-
 arch/x86/lib/insn-eval.c | 66 +---
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index a0f839aa144d..98b4dae5e8bc 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int 
seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
 int insn_fetch_from_user(struct pt_regs *regs,
 unsigned char buf[MAX_INSN_SIZE]);
+int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
 bool insn_decode(struct insn *insn, struct pt_regs *regs,
 unsigned char buf[MAX_INSN_SIZE], int buf_size);
 
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index c3fd8fa79838..04a780abb512 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -258,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt 
*ctxt)
int res;
 
if (user_mode(ctxt->regs)) {
-   res = insn_fetch_from_user(ctxt->regs, buffer);
+   res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (!res) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 4229950a5d78..bb0b3fe1e0a0 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct 
pt_regs *regs)
}
 }
 
+static unsigned long insn_get_effective_ip(struct pt_regs *regs)
+{
+   unsigned long seg_base = 0;
+
+   /*
+* If not in user-space long mode, a custom code segment could be in
+* use. This is true in protected mode (if the process defined a local
+* descriptor table), or virtual-8086 mode. In most of the cases
+* seg_base will be zero as in USER_CS.
+*/
+   if (!user_64bit_mode(regs)) {
+   seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+   if (seg_base == -1L)
+   return 0;
+   }
+
+   return seg_base + regs->ip;
+}
+
 /**
  * insn_fetch_from_user() - Copy instruction bytes from user-space memory
  * @regs:  Structure with register values as seen when entering kernel mode
@@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, 
struct pt_regs *regs)
  */
 int insn_fetch_from_user(struct pt_regs *regs, unsigned char 
buf[MAX_INSN_SIZE])
 {
-   unsigned long seg_base = 0;
+   unsigned long ip;
int not_copied;
 
-   /*
-* If not in user-space long mode, a custom code segment could be in
-* use. This is true in protected mode (if the process defined a local
-* descriptor table), or virtual-8086 mode. In most of the cases
-* seg_base will be zero as in USER_CS.
-*/
-   if (!user_64bit_mode(regs)) {
-   seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
-   if (seg_base == -1L)
-   return 0;
-   }
+   ip = insn_get_effective_ip(regs);
+   if (!ip)
+   return 0;
+
+   not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
 
+   return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space 
memory
+ *   while in atomic code
+ * @regs:  Structure with register values as seen when entering kernel mode
+ * @buf:   Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf. This function must be used in atomic context.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char 
buf[MAX_INSN_SIZE])
+{
+   unsigned long ip;
+   int not_copied;
+
+   ip =

[PATCH 5.11 290/306] x86/sev-es: Use __copy_from_user_inatomic()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit bffe30dd9f1f3b2608a87ac909a224d6be472485 upstream.

The #VC handler must run in atomic context and cannot sleep. This is a
problem when it tries to fetch instruction bytes from user-space via
copy_from_user().

Introduce a insn_fetch_from_user_inatomic() helper which uses
__copy_from_user_inatomic() to safely copy the instruction bytes to
kernel memory in the #VC handler.

Fixes: 5e3427a7bc432 ("x86/sev-es: Handle instruction fetches from user-space")
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-6-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/include/asm/insn-eval.h |2 +
 arch/x86/kernel/sev-es.c |2 -
 arch/x86/lib/insn-eval.c |   66 ++-
 3 files changed, 55 insertions(+), 15 deletions(-)

--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct p
 int insn_get_code_seg_params(struct pt_regs *regs);
 int insn_fetch_from_user(struct pt_regs *regs,
 unsigned char buf[MAX_INSN_SIZE]);
+int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
 bool insn_decode(struct insn *insn, struct pt_regs *regs,
 unsigned char buf[MAX_INSN_SIZE], int buf_size);
 
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -258,7 +258,7 @@ static enum es_result vc_decode_insn(str
int res;
 
if (user_mode(ctxt->regs)) {
-   res = insn_fetch_from_user(ctxt->regs, buffer);
+   res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (!res) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct in
}
 }
 
+static unsigned long insn_get_effective_ip(struct pt_regs *regs)
+{
+   unsigned long seg_base = 0;
+
+   /*
+* If not in user-space long mode, a custom code segment could be in
+* use. This is true in protected mode (if the process defined a local
+* descriptor table), or virtual-8086 mode. In most of the cases
+* seg_base will be zero as in USER_CS.
+*/
+   if (!user_64bit_mode(regs)) {
+   seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+   if (seg_base == -1L)
+   return 0;
+   }
+
+   return seg_base + regs->ip;
+}
+
 /**
  * insn_fetch_from_user() - Copy instruction bytes from user-space memory
  * @regs:  Structure with register values as seen when entering kernel mode
@@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct in
  */
 int insn_fetch_from_user(struct pt_regs *regs, unsigned char 
buf[MAX_INSN_SIZE])
 {
-   unsigned long seg_base = 0;
+   unsigned long ip;
int not_copied;
 
-   /*
-* If not in user-space long mode, a custom code segment could be in
-* use. This is true in protected mode (if the process defined a local
-* descriptor table), or virtual-8086 mode. In most of the cases
-* seg_base will be zero as in USER_CS.
-*/
-   if (!user_64bit_mode(regs)) {
-   seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
-   if (seg_base == -1L)
-   return 0;
-   }
+   ip = insn_get_effective_ip(regs);
+   if (!ip)
+   return 0;
+
+   not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
+
+   return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space 
memory
+ *   while in atomic code
+ * @regs:  Structure with register values as seen when entering kernel mode
+ * @buf:   Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf. This function must be used in atomic context.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char 
buf[MAX_INSN_SIZE])
+{
+   unsigned long ip;
+   int not_copied;
 
+   ip = insn_get_effective_ip(regs);
+   if (!ip)
+   return 0;
 
-   not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
-   MAX_INSN_SIZE);
+   not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, 
MAX_INSN_SIZE);
 
return MAX_INSN_SIZE - not_copied;
 }

[PATCH 5.11 288/306] x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit 545ac14c16b5dbd909d5a90ddf5b5a629a40fa94 upstream.

The code in the NMI handler to adjust the #VC handler IST stack is
needed in case an NMI hits when the #VC handler is still using its IST
stack.

But the check for this condition also needs to look if the regs->sp
value is trusted, meaning it was not set by user-space. Extend the check
to not use regs->sp when the NMI interrupted user-space code or the
SYSCALL gap.

Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI 
handler")
Reported-by: Andy Lutomirski 
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # 5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-3-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/sev-es.c |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int c
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
 }
 
-static __always_inline bool on_vc_stack(unsigned long sp)
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
 {
+   unsigned long sp = regs->sp;
+
+   /* User-mode RSP is not trusted */
+   if (user_mode(regs))
+   return false;
+
+   /* SYSCALL gap still has user-mode RSP */
+   if (ip_within_syscall_gap(regs))
+   return false;
+
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < 
__this_cpu_ist_top_va(VC)));
 }
 
@@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct p
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
 
/* Make room on the IST stack */
-   if (on_vc_stack(regs->sp))
+   if (on_vc_stack(regs))
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
else
new_ist = old_ist - sizeof(old_ist);

[PATCH 5.11 287/306] x86/sev-es: Introduce ip_within_syscall_gap() helper

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit 78a81d88f60ba773cbe890205e1ee67f00502948 upstream.

Introduce a helper to check whether an exception came from the syscall
gap and use it in the SEV-ES code. Extend the check to also cover the
compatibility SYSCALL entry path.

Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI 
handler")
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # 5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-2-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/entry/entry_64_compat.S |2 ++
 arch/x86/include/asm/proto.h |1 +
 arch/x86/include/asm/ptrace.h|   15 +++
 arch/x86/kernel/traps.c  |3 +--
 4 files changed, 19 insertions(+), 2 deletions(-)

--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
/* Switch to the kernel stack */
movqPER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
+SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq   $__USER32_DS/* pt_regs->ss */
pushq   %r8 /* pt_regs->sp */
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(vo
 void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
+void entry_SYSCALL_compat_safe_stack(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -94,6 +94,8 @@ struct pt_regs {
 #include 
 #endif
 
+#include 
+
 struct cpuinfo_x86;
 struct task_struct;
 
@@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()   current_pt_regs()->sp
 #define compat_user_stack_pointer()current_pt_regs()->sp
+
+static inline bool ip_within_syscall_gap(struct pt_regs *regs)
+{
+   bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
+
+#ifdef CONFIG_IA32_EMULATION
+   ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
+ regs->ip <  (unsigned 
long)entry_SYSCALL_compat_safe_stack);
+#endif
+
+   return ret;
+}
 #endif
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_r
 * In the SYSCALL entry path the RSP value comes from user-space - don't
 * trust it and switch to the current kernel stack
 */
-   if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
-   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack) {
+   if (ip_within_syscall_gap(regs)) {
sp = this_cpu_read(cpu_current_top_of_stack);
goto sync;
}

[PATCH 5.10 275/290] x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Andy Lutomirski 

commit 5d5675df792ff67e74a500c4c94db0f99e6a10ef upstream.

On a 32-bit fast syscall that fails to read its arguments from user
memory, the kernel currently does syscall exit work but not
syscall entry work.  This confuses audit and ptrace.  For example:

$ ./tools/testing/selftests/x86/syscall_arg_fault_32
...
strace: pid 264258: entering, ptrace_syscall_info.op == 2
...

This is a minimal fix intended for ease of backporting.  A more
complete cleanup is coming.

Fixes: 0b085e68f407 ("x86/entry: Consolidate 32/64 bit syscall entry")
Signed-off-by: Andy Lutomirski 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org
Link: 
https://lore.kernel.org/r/8c82296ddf803b91f8d1e5eac89e5803ba54ab0e.1614884673.git.l...@kernel.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/entry/common.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32
regs->ax = -EFAULT;
 
instrumentation_end();
-   syscall_exit_to_user_mode(regs);
+   local_irq_disable();
+   irqentry_exit_to_user_mode(regs);
return false;
}

[PATCH 5.11 286/306] x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Josh Poimboeuf 

commit e504e74cc3a2c092b05577ce3e8e013fae7d94e6 upstream.

KASAN reserves "redzone" areas between stack frames in order to detect
stack overruns.  A read or write to such an area triggers a KASAN
"stack-out-of-bounds" BUG.

Normally, the ORC unwinder stays in-bounds and doesn't access the
redzone.  But sometimes it can't find ORC metadata for a given
instruction.  This can happen for code which is missing ORC metadata, or
for generated code.  In such cases, the unwinder attempts to fall back
to frame pointers, as a best-effort type thing.

This fallback often works, but when it doesn't, the unwinder can get
confused and go off into the weeds into the KASAN redzone, triggering
the aforementioned KASAN BUG.

But in this case, the unwinder's confusion is actually harmless and
working as designed.  It already has checks in place to prevent
off-stack accesses, but those checks get short-circuited by the KASAN
BUG.  And a BUG is a lot more disruptive than a harmless unwinder
warning.

Disable the KASAN checks by using READ_ONCE_NOCHECK() for all stack
accesses.  This finishes the job started by commit 881125bfe65b
("x86/unwind: Disable KASAN checking in the ORC unwinder"), which only
partially fixed the issue.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Reported-by: Ivan Babrou 
Signed-off-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Steven Rostedt (VMware) 
Tested-by: Ivan Babrou 
Cc: sta...@kernel.org
Link: 
https://lkml.kernel.org/r/9583327904ebbbeda399eca9c56d6c7085ac20fe.1612534649.git.jpoim...@redhat.com
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/unwind_orc.c |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwi
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false;

-   *ip = regs->ip;
-   *sp = regs->sp;
+   *ip = READ_ONCE_NOCHECK(regs->ip);
+   *sp = READ_ONCE_NOCHECK(regs->sp);
return true;
 }

@@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
return false;

-   *ip = regs->ip;
-   *sp = regs->sp;
+   *ip = READ_ONCE_NOCHECK(regs->ip);
+   *sp = READ_ONCE_NOCHECK(regs->sp);
return true;
 }

@@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state
return false;

if (state->full_regs) {
-   *val = ((unsigned long *)state->regs)[reg];
+   *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
return true;
}

if (state->prev_regs) {
-   *val = ((unsigned long *)state->prev_regs)[reg];
+   *val = READ_ONCE_NOCHECK(((unsigned long 
*)state->prev_regs)[reg]);
return true;
}

[PATCH 5.11 289/306] x86/sev-es: Correctly track IRQ states in runtime #VC handler

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit 62441a1fb53263bda349b6e5997c3cc5c120d89e upstream.

Call irqentry_nmi_enter()/irqentry_nmi_exit() in the #VC handler to
correctly track the IRQ state during its execution.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Reported-by: Andy Lutomirski 
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-5-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/sev-es.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -1258,13 +1258,12 @@ static __always_inline bool on_vc_fallba
 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 {
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+   irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
enum es_result result;
struct ghcb *ghcb;
 
-   lockdep_assert_irqs_disabled();
-
/*
 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
 */
@@ -1273,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_co
return;
}
 
+   irq_state = irqentry_nmi_enter(regs);
+   lockdep_assert_irqs_disabled();
instrumentation_begin();
 
/*
@@ -1335,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_co
 
 out:
instrumentation_end();
+   irqentry_nmi_exit(regs, irq_state);
 
return;

[PATCH 5.11 285/306] kasan: fix KASAN_STACK dependency for HW_TAGS

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Andrey Konovalov 

commit d9b571c885a8974fbb7d4ee639dbc643fd000f9e upstream.

There's a runtime failure when running HW_TAGS-enabled kernel built with
GCC on hardware that doesn't support MTE.  GCC-built kernels always have
CONFIG_KASAN_STACK enabled, even though stack instrumentation isn't
supported by HW_TAGS.  Having that config enabled causes KASAN to issue
MTE-only instructions to unpoison kernel stacks, which causes the failure.

Fix the issue by disallowing CONFIG_KASAN_STACK when HW_TAGS is used.

(The commit that introduced CONFIG_KASAN_HW_TAGS specified proper
 dependency for CONFIG_KASAN_STACK_ENABLE but not for CONFIG_KASAN_STACK.)

Link: 
https://lkml.kernel.org/r/59e75426241dbb5611277758c8d4d6f5f9298dac.1615215441.git.andreyk...@google.com
Fixes: 6a63a63ff1ac ("kasan: introduce CONFIG_KASAN_HW_TAGS")
Signed-off-by: Andrey Konovalov 
Reported-by: Catalin Marinas 
Cc: 
Cc: Will Deacon 
Cc: Vincenzo Frascino 
Cc: Dmitry Vyukov 
Cc: Andrey Ryabinin 
Cc: Alexander Potapenko 
Cc: Marco Elver 
Cc: Peter Collingbourne 
Cc: Evgenii Stepanov 
Cc: Branislav Rankov 
Cc: Kevin Brodsky 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 lib/Kconfig.kasan |1 +
 1 file changed, 1 insertion(+)

--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE
 
 config KASAN_STACK
int
+   depends on KASAN_GENERIC || KASAN_SW_TAGS
default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
default 0

[PATCH 5.10 276/290] KVM: x86: Ensure deadline timer has truly expired before posting its IRQ

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Sean Christopherson 

commit beda430177f56656e7980dcce93456ffaa35676b upstream.

When posting a deadline timer interrupt, open code the checks guarding
__kvm_wait_lapic_expire() in order to skip the lapic_timer_int_injected()
check in kvm_wait_lapic_expire().  The injection check will always fail
since the interrupt has not yet be injected.  Moving the call after
injection would also be wrong as that wouldn't actually delay delivery
of the IRQ if it is indeed sent via posted interrupt.

Fixes: 010fd37fddf6 ("KVM: LAPIC: Reduce world switch latency caused by 
timer_advance_ns")
Cc: sta...@vger.kernel.org
Signed-off-by: Sean Christopherson 
Message-Id: <20210305021808.3769732-1-sea...@google.com>
Signed-off-by: Paolo Bonzini 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kvm/lapic.c |   11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1641,7 +1641,16 @@ static void apic_timer_expired(struct kv
}
 
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
-   kvm_wait_lapic_expire(vcpu);
+   /*
+* Ensure the guest's timer has truly expired before posting an
+* interrupt.  Open code the relevant checks to avoid querying
+* lapic_timer_int_injected(), which will be false since the
+* interrupt isn't yet injected.  Waiting until after injecting
+* is not an option since that won't help a posted interrupt.
+*/
+   if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+   vcpu->arch.apic->lapic_timer.timer_advance_ns)
+   __kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic);
return;
}

[PATCH 5.11 284/306] kasan, mm: fix crash with HW_TAGS and DEBUG_PAGEALLOC

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Andrey Konovalov 

commit f9d79e8dce4077d3c6ab739c808169dfa99af9ef upstream.

Currently, kasan_free_nondeferred_pages()->kasan_free_pages() is called
after debug_pagealloc_unmap_pages(). This causes a crash when
debug_pagealloc is enabled, as HW_TAGS KASAN can't set tags on an
unmapped page.

This patch puts kasan_free_nondeferred_pages() before
debug_pagealloc_unmap_pages() and arch_free_page(), which can also make
the page unavailable.

Link: 
https://lkml.kernel.org/r/24cd7db274090f0e5bc3adcdc7399243668e3171.1614987311.git.andreyk...@google.com
Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS")
Signed-off-by: Andrey Konovalov 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Vincenzo Frascino 
Cc: Dmitry Vyukov 
Cc: Andrey Ryabinin 
Cc: Alexander Potapenko 
Cc: Marco Elver 
Cc: Peter Collingbourne 
Cc: Evgenii Stepanov 
Cc: Branislav Rankov 
Cc: Kevin Brodsky 
Cc: Christoph Hellwig 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 mm/page_alloc.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1282,6 +1282,12 @@ static __always_inline bool free_pages_p
kernel_poison_pages(page, 1 << order);
 
/*
+* With hardware tag-based KASAN, memory tags must be set before the
+* page becomes unavailable via debug_pagealloc or arch_free_page.
+*/
+   kasan_free_nondeferred_pages(page, order);
+
+   /*
 * arch_free_page() can make the page's contents inaccessible.  s390
 * does this.  So nothing which can access the page's contents should
 * happen after this.
@@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_p
 
debug_pagealloc_unmap_pages(page, 1 << order);
 
-   kasan_free_nondeferred_pages(page, order);
-
return true;
 }

[PATCH 5.10 273/290] x86/sev-es: Correctly track IRQ states in runtime #VC handler

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit 62441a1fb53263bda349b6e5997c3cc5c120d89e upstream.

Call irqentry_nmi_enter()/irqentry_nmi_exit() in the #VC handler to
correctly track the IRQ state during its execution.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Reported-by: Andy Lutomirski 
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-5-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/sev-es.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -1258,13 +1258,12 @@ static __always_inline bool on_vc_fallba
 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 {
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+   irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
enum es_result result;
struct ghcb *ghcb;
 
-   lockdep_assert_irqs_disabled();
-
/*
 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
 */
@@ -1273,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_co
return;
}
 
+   irq_state = irqentry_nmi_enter(regs);
+   lockdep_assert_irqs_disabled();
instrumentation_begin();
 
/*
@@ -1335,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_co
 
 out:
instrumentation_end();
+   irqentry_nmi_exit(regs, irq_state);
 
return;

[PATCH 5.10 270/290] x86/sev-es: Introduce ip_within_syscall_gap() helper

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Joerg Roedel 

commit 78a81d88f60ba773cbe890205e1ee67f00502948 upstream.

Introduce a helper to check whether an exception came from the syscall
gap and use it in the SEV-ES code. Extend the check to also cover the
compatibility SYSCALL entry path.

Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI 
handler")
Signed-off-by: Joerg Roedel 
Signed-off-by: Borislav Petkov 
Cc: sta...@vger.kernel.org # 5.10+
Link: https://lkml.kernel.org/r/20210303141716.29223-2-j...@8bytes.org
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/entry/entry_64_compat.S |2 ++
 arch/x86/include/asm/proto.h |1 +
 arch/x86/include/asm/ptrace.h|   15 +++
 arch/x86/kernel/traps.c  |3 +--
 4 files changed, 19 insertions(+), 2 deletions(-)

--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
/* Switch to the kernel stack */
movqPER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
+SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq   $__USER32_DS/* pt_regs->ss */
pushq   %r8 /* pt_regs->sp */
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(vo
 void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
+void entry_SYSCALL_compat_safe_stack(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -94,6 +94,8 @@ struct pt_regs {
 #include 
 #endif
 
+#include 
+
 struct cpuinfo_x86;
 struct task_struct;
 
@@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()   current_pt_regs()->sp
 #define compat_user_stack_pointer()current_pt_regs()->sp
+
+static inline bool ip_within_syscall_gap(struct pt_regs *regs)
+{
+   bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
+
+#ifdef CONFIG_IA32_EMULATION
+   ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
+ regs->ip <  (unsigned 
long)entry_SYSCALL_compat_safe_stack);
+#endif
+
+   return ret;
+}
 #endif
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -686,8 +686,7 @@ asmlinkage __visible noinstr struct pt_r
 * In the SYSCALL entry path the RSP value comes from user-space - don't
 * trust it and switch to the current kernel stack
 */
-   if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
-   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack) {
+   if (ip_within_syscall_gap(regs)) {
sp = this_cpu_read(cpu_current_top_of_stack);
goto sync;
}

[PATCH 5.10 268/290] binfmt_misc: fix possible deadlock in bm_register_write

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Lior Ribak 

commit e7850f4d844e0acfac7e570af611d89deade3146 upstream.

There is a deadlock in bm_register_write:

First, in the begining of the function, a lock is taken on the binfmt_misc
root inode with inode_lock(d_inode(root)).

Then, if the user used the MISC_FMT_OPEN_FILE flag, the function will call
open_exec on the user-provided interpreter.

open_exec will call a path lookup, and if the path lookup process includes
the root of binfmt_misc, it will try to take a shared lock on its inode
again, but it is already locked, and the code will get stuck in a deadlock

To reproduce the bug:
$ echo ":i:E::ii::/proc/sys/fs/binfmt_misc/bla:F" > 
/proc/sys/fs/binfmt_misc/register

backtrace of where the lock occurs (#5):
0  schedule () at ./arch/x86/include/asm/current.h:15
1  0x81b51237 in rwsem_down_read_slowpath (sem=0x888003b202e0, 
count=, state=state@entry=2) at kernel/locking/rwsem.c:992
2  0x81b5150a in __down_read_common (state=2, sem=) at 
kernel/locking/rwsem.c:1213
3  __down_read (sem=) at kernel/locking/rwsem.c:1222
4  down_read (sem=) at kernel/locking/rwsem.c:1355
5  0x811ee22a in inode_lock_shared (inode=) at 
./include/linux/fs.h:783
6  open_last_lookups (op=0xc922fe34, file=0x888004098600, 
nd=0xc922fd10) at fs/namei.c:3177
7  path_openat (nd=nd@entry=0xc922fd10, op=op@entry=0xc922fe34, 
flags=flags@entry=65) at fs/namei.c:3366
8  0x811efe1c in do_filp_open (dfd=, 
pathname=pathname@entry=0x8880031b9000, op=op@entry=0xc922fe34) at 
fs/namei.c:3396
9  0x811e493f in do_open_execat (fd=fd@entry=-100, 
name=name@entry=0x8880031b9000, flags=, flags@entry=0) at 
fs/exec.c:913
10 0x811e4a92 in open_exec (name=) at fs/exec.c:948
11 0x8124aa84 in bm_register_write (file=, 
buffer=, count=19, ppos=) at fs/binfmt_misc.c:682
12 0x811decd2 in vfs_write (file=file@entry=0x888004098500, 
buf=buf@entry=0xa758d0 ":i:E::ii::i:CF
", count=count@entry=19, pos=pos@entry=0xc922ff10) at 
fs/read_write.c:603
13 0x811defda in ksys_write (fd=, buf=0xa758d0 
":i:E::ii::i:CF
", count=19) at fs/read_write.c:658
14 0x81b49813 in do_syscall_64 (nr=, 
regs=0xc922ff58) at arch/x86/entry/common.c:46
15 0x81c0007c in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:120

To solve the issue, the open_exec call is moved to before the write
lock is taken by bm_register_write

Link: https://lkml.kernel.org/r/20210228224414.95962-1-liorri...@gmail.com
Fixes: 948b701a607f1 ("binfmt_misc: add persistent opened binary handler for 
containers")
Signed-off-by: Lior Ribak 
Acked-by: Helge Deller 
Cc: Al Viro 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
---
 fs/binfmt_misc.c |   29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -647,12 +647,24 @@ static ssize_t bm_register_write(struct
struct super_block *sb = file_inode(file)->i_sb;
struct dentry *root = sb->s_root, *dentry;
int err = 0;
+   struct file *f = NULL;
 
e = create_entry(buffer, count);
 
if (IS_ERR(e))
return PTR_ERR(e);
 
+   if (e->flags & MISC_FMT_OPEN_FILE) {
+   f = open_exec(e->interpreter);
+   if (IS_ERR(f)) {
+   pr_notice("register: failed to install interpreter file 
%s\n",
+e->interpreter);
+   kfree(e);
+   return PTR_ERR(f);
+   }
+   e->interp_file = f;
+   }
+
inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
@@ -676,21 +688,6 @@ static ssize_t bm_register_write(struct
goto out2;
}
 
-   if (e->flags & MISC_FMT_OPEN_FILE) {
-   struct file *f;
-
-   f = open_exec(e->interpreter);
-   if (IS_ERR(f)) {
-   err = PTR_ERR(f);
-   pr_notice("register: failed to install interpreter file 
%s\n", e->interpreter);
-   simple_release_fs(_mnt, _count);
-   iput(inode);
-   inode = NULL;
-   goto out2;
-   }
-   e->interp_file = f;
-   }
-
e->dentry = dget(dentry);
inode->i_private = e;
inode->i_fop = _entry_operations;
@@ -707,6 +704,8 @@ out:
inode_unlock(d_inode(root));
 
if (err) {
+   if (f)
+   filp_close(f, NULL);
kfree(e);
return err;
}

[PATCH 5.10 269/290] x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Josh Poimboeuf 

commit e504e74cc3a2c092b05577ce3e8e013fae7d94e6 upstream.

KASAN reserves "redzone" areas between stack frames in order to detect
stack overruns.  A read or write to such an area triggers a KASAN
"stack-out-of-bounds" BUG.

Normally, the ORC unwinder stays in-bounds and doesn't access the
redzone.  But sometimes it can't find ORC metadata for a given
instruction.  This can happen for code which is missing ORC metadata, or
for generated code.  In such cases, the unwinder attempts to fall back
to frame pointers, as a best-effort type thing.

This fallback often works, but when it doesn't, the unwinder can get
confused and go off into the weeds into the KASAN redzone, triggering
the aforementioned KASAN BUG.

But in this case, the unwinder's confusion is actually harmless and
working as designed.  It already has checks in place to prevent
off-stack accesses, but those checks get short-circuited by the KASAN
BUG.  And a BUG is a lot more disruptive than a harmless unwinder
warning.

Disable the KASAN checks by using READ_ONCE_NOCHECK() for all stack
accesses.  This finishes the job started by commit 881125bfe65b
("x86/unwind: Disable KASAN checking in the ORC unwinder"), which only
partially fixed the issue.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Reported-by: Ivan Babrou 
Signed-off-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Steven Rostedt (VMware) 
Tested-by: Ivan Babrou 
Cc: sta...@kernel.org
Link: 
https://lkml.kernel.org/r/9583327904ebbbeda399eca9c56d6c7085ac20fe.1612534649.git.jpoim...@redhat.com
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/kernel/unwind_orc.c |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwi
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false;

-   *ip = regs->ip;
-   *sp = regs->sp;
+   *ip = READ_ONCE_NOCHECK(regs->ip);
+   *sp = READ_ONCE_NOCHECK(regs->sp);
return true;
 }

@@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
return false;

-   *ip = regs->ip;
-   *sp = regs->sp;
+   *ip = READ_ONCE_NOCHECK(regs->ip);
+   *sp = READ_ONCE_NOCHECK(regs->sp);
return true;
 }

@@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state
return false;

if (state->full_regs) {
-   *val = ((unsigned long *)state->regs)[reg];
+   *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
return true;
}

if (state->prev_regs) {
-   *val = ((unsigned long *)state->prev_regs)[reg];
+   *val = READ_ONCE_NOCHECK(((unsigned long 
*)state->prev_regs)[reg]);
return true;
}

[PATCH 5.11 248/306] NFSv4.2: fix return value of _nfs4_get_security_label()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ondrej Mosnacek 

[ Upstream commit 53cb245454df5b13d7063162afd7a785aed6ebf2 ]

An xattr 'get' handler is expected to return the length of the value on
success, yet _nfs4_get_security_label() (and consequently also
nfs4_xattr_get_nfs4_label(), which is used as an xattr handler) returns
just 0 on success.

Fix this by returning label.len instead, which contains the length of
the result.

Fixes: aa9c2669626c ("NFS: Client implementation of Labeled-NFS")
Signed-off-by: Ondrej Mosnacek 
Reviewed-by: James Morris 
Reviewed-by: Paul Moore 
Signed-off-by: Anna Schumaker 
Signed-off-by: Sasha Levin 
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fc8bbfd9beb3..7eb44f37558c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5972,7 +5972,7 @@ static int _nfs4_get_security_label(struct inode *inode, 
void *buf,
return ret;
if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
return -ENOENT;
-   return 0;
+   return label.len;
 }
 
 static int nfs4_get_security_label(struct inode *inode, void *buf,
-- 
2.30.1

[PATCH 5.10 232/290] staging: comedi: addi_apci_1032: Fix endian problem for COS sample

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Ian Abbott 

commit 25317f428a78fde71b2bf3f24d05850f08a73a52 upstream.

The Change-Of-State (COS) subdevice supports Comedi asynchronous
commands to read 16-bit change-of-state values.  However, the interrupt
handler is calling `comedi_buf_write_samples()` with the address of a
32-bit integer `>state`.  On bigendian architectures, it will copy 2
bytes from the wrong end of the 32-bit integer.  Fix it by transferring
the value via a 16-bit integer.

Fixes: 6bb45f2b0c86 ("staging: comedi: addi_apci_1032: use 
comedi_buf_write_samples()")
Cc:  # 3.19+
Signed-off-by: Ian Abbott 
Link: https://lore.kernel.org/r/20210223143055.257402-2-abbo...@mev.co.uk
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/comedi/drivers/addi_apci_1032.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/drivers/staging/comedi/drivers/addi_apci_1032.c
+++ b/drivers/staging/comedi/drivers/addi_apci_1032.c
@@ -260,6 +260,7 @@ static irqreturn_t apci1032_interrupt(in
struct apci1032_private *devpriv = dev->private;
struct comedi_subdevice *s = dev->read_subdev;
unsigned int ctrl;
+   unsigned short val;
 
/* check interrupt is from this device */
if ((inl(devpriv->amcc_iobase + AMCC_OP_REG_INTCSR) &
@@ -275,7 +276,8 @@ static irqreturn_t apci1032_interrupt(in
outl(ctrl & ~APCI1032_CTRL_INT_ENA, dev->iobase + APCI1032_CTRL_REG);
 
s->state = inl(dev->iobase + APCI1032_STATUS_REG) & 0x;
-   comedi_buf_write_samples(s, >state, 1);
+   val = s->state;
+   comedi_buf_write_samples(s, , 1);
comedi_handle_events(dev, s);
 
/* enable the interrupt */

[PATCH 5.11 249/306] block: rsxx: fix error return code of rsxx_pci_probe()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Jia-Ju Bai 

[ Upstream commit df66617bfe87487190a60783d26175b65d2502ce ]

When create_singlethread_workqueue returns NULL to card->event_wq, no
error return code of rsxx_pci_probe() is assigned.

To fix this bug, st is assigned with -ENOMEM in this case.

Fixes: 8722ff8cdbfa ("block: IBM RamSan 70/80 device driver")
Reported-by: TOTE Robot 
Signed-off-by: Jia-Ju Bai 
Link: https://lore.kernel.org/r/20210310033017.4023-1-baijiaju1...@gmail.com
Signed-off-by: Jens Axboe 
Signed-off-by: Sasha Levin 
---
 drivers/block/rsxx/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 5ac1881396af..227e1be4c6f9 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -871,6 +871,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
if (!card->event_wq) {
dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+   st = -ENOMEM;
goto failed_event_handler;
}
 
-- 
2.30.1

[PATCH 5.11 247/306] NFS: Dont gratuitously clear the inode cache when lookup failed

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Trond Myklebust 

[ Upstream commit 47397915ede0192235474b145ebcd81b37b03624 ]

The fact that the lookup revalidation failed, does not mean that the
inode contents have changed.

Fixes: 5ceb9d7fdaaf ("NFS: Refactor nfs_lookup_revalidate()")
Signed-off-by: Trond Myklebust 
Signed-off-by: Anna Schumaker 
Signed-off-by: Sasha Levin 
---
 fs/nfs/dir.c | 20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7bcc6fcf1096..4db3018776f6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1444,18 +1444,14 @@ nfs_lookup_revalidate_done(struct inode *dir, struct 
dentry *dentry,
__func__, dentry);
return 1;
case 0:
-   if (inode && S_ISDIR(inode->i_mode)) {
-   /* Purge readdir caches. */
-   nfs_zap_caches(inode);
-   /*
-* We can't d_drop the root of a disconnected tree:
-* its d_hash is on the s_anon list and d_drop() would 
hide
-* it from shrink_dcache_for_unmount(), leading to busy
-* inodes on unmount and further oopses.
-*/
-   if (IS_ROOT(dentry))
-   return 1;
-   }
+   /*
+* We can't d_drop the root of a disconnected tree:
+* its d_hash is on the s_anon list and d_drop() would hide
+* it from shrink_dcache_for_unmount(), leading to busy
+* inodes on unmount and further oopses.
+*/
+   if (inode && IS_ROOT(dentry))
+   return 1;
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
__func__, dentry);
return 0;
-- 
2.30.1

[PATCH 5.10 230/290] staging: rtl8712: Fix possible buffer overflow in r8712_sitesurvey_cmd

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Lee Gibson 

commit b93c1e3981af19527beee1c10a2bef67a228c48c upstream.

Function r8712_sitesurvey_cmd calls memcpy without checking the length.
A user could control that length and trigger a buffer overflow.
Fix by checking the length is within the maximum allowed size.

Signed-off-by: Lee Gibson 
Link: https://lore.kernel.org/r/20210301132648.420296-1-lee...@gmail.com
Cc: stable 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/rtl8712/rtl871x_cmd.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/drivers/staging/rtl8712/rtl871x_cmd.c
+++ b/drivers/staging/rtl8712/rtl871x_cmd.c
@@ -192,8 +192,10 @@ u8 r8712_sitesurvey_cmd(struct _adapter
psurveyPara->ss_ssidlen = 0;
memset(psurveyPara->ss_ssid, 0, IW_ESSID_MAX_SIZE + 1);
if (pssid && pssid->SsidLength) {
-   memcpy(psurveyPara->ss_ssid, pssid->Ssid, pssid->SsidLength);
-   psurveyPara->ss_ssidlen = cpu_to_le32(pssid->SsidLength);
+   int len = min_t(int, pssid->SsidLength, IW_ESSID_MAX_SIZE);
+
+   memcpy(psurveyPara->ss_ssid, pssid->Ssid, len);
+   psurveyPara->ss_ssidlen = cpu_to_le32(len);
}
set_fwstate(pmlmepriv, _FW_UNDER_SURVEY);
r8712_enqueue_cmd(pcmdpriv, ph2c);

[PATCH 5.10 225/290] staging: rtl8192u: fix ->ssid overflow in r8192_wx_set_scan()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Dan Carpenter 

commit 87107518d7a93fec6cdb2559588862afeee800fb upstream.

We need to cap len at IW_ESSID_MAX_SIZE (32) to avoid memory corruption.
This can be controlled by the user via the ioctl.

Fixes: 5f53d8ca3d5d ("Staging: add rtl8192SU wireless usb driver")
Signed-off-by: Dan Carpenter 
Cc: stable 
Link: https://lore.kernel.org/r/YEHoAWMOSZBUw91F@mwanda
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/rtl8192u/r8192U_wx.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/drivers/staging/rtl8192u/r8192U_wx.c
+++ b/drivers/staging/rtl8192u/r8192U_wx.c
@@ -331,8 +331,10 @@ static int r8192_wx_set_scan(struct net_
struct iw_scan_req *req = (struct iw_scan_req *)b;
 
if (req->essid_len) {
-   ieee->current_network.ssid_len = req->essid_len;
-   memcpy(ieee->current_network.ssid, req->essid, 
req->essid_len);
+   int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+
+   ieee->current_network.ssid_len = len;
+   memcpy(ieee->current_network.ssid, req->essid, len);
}
}

[PATCH 5.10 229/290] staging: ks7010: prevent buffer overflow in ks_wlan_set_scan()

2021-03-15 Thread gregkh

From: Greg Kroah-Hartman 

From: Dan Carpenter 

commit e163b9823a0b08c3bb8dc4f5b4b5c221c24ec3e5 upstream.

The user can specify a "req->essid_len" of up to 255 but if it's
over IW_ESSID_MAX_SIZE (32) that can lead to memory corruption.

Fixes: 13a9930d15b4 ("staging: ks7010: add driver from Nanonote 
extra-repository")
Signed-off-by: Dan Carpenter 
Cc: stable 
Link: https://lore.kernel.org/r/YD4fS8+HmM/Qmrw6@mwanda
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/ks7010/ks_wlan_net.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/drivers/staging/ks7010/ks_wlan_net.c
+++ b/drivers/staging/ks7010/ks_wlan_net.c
@@ -1120,6 +1120,7 @@ static int ks_wlan_set_scan(struct net_d
 {
struct ks_wlan_private *priv = netdev_priv(dev);
struct iw_scan_req *req = NULL;
+   int len;
 
if (priv->sleep_mode == SLP_SLEEP)
return -EPERM;
@@ -1129,8 +1130,9 @@ static int ks_wlan_set_scan(struct net_d
if (wrqu->data.length == sizeof(struct iw_scan_req) &&
wrqu->data.flags & IW_SCAN_THIS_ESSID) {
req = (struct iw_scan_req *)extra;
-   priv->scan_ssid_len = req->essid_len;
-   memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len);
+   len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+   priv->scan_ssid_len = len;
+   memcpy(priv->scan_ssid, req->essid, len);
} else {
priv->scan_ssid_len = 0;
}

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1788 matches

Mail list logo