[PATCH 2/2] x86/microcode/intel: Save pointer to ucode patch for early AP loading

2017-06-12 Thread Borislav Petkov
From: Borislav Petkov 

Normally, when the initrd is gone, we can't search it for microcode
blobs to apply anymore. For that we need to stash away the patch in our
own storage.

And save_microcode_in_initrd_intel() looks like the proper place to
do that from. So in order for early loading to work, invalidate the
intel_ucode_patch pointer to the patch *before* scanning the initrd one
last time.

If the scanning code finds a microcode patch, it will assign that
pointer again, this time with our own storage's address.

This way, early microcode application during resume-from-RAM works too,
even after the initrd is long gone.

Signed-off-by: Borislav Petkov 
---
 arch/x86/kernel/cpu/microcode/intel.c | 25 +
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c 
b/arch/x86/kernel/cpu/microcode/intel.c
index f522415bf9e5..d525a0bd7d28 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -166,7 +166,7 @@ static struct ucode_patch *__alloc_microcode_buf(void 
*data, unsigned int size)
 static void save_microcode_patch(void *data, unsigned int size)
 {
struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
-   struct ucode_patch *iter, *tmp, *p;
+   struct ucode_patch *iter, *tmp, *p = NULL;
bool prev_found = false;
unsigned int sig, pf;
 
@@ -202,6 +202,18 @@ static void save_microcode_patch(void *data, unsigned int 
size)
else
list_add_tail(>plist, _cache);
}
+
+   /*
+* Save for early loading. On 32-bit, that needs to be a physical
+* address as the APs are running from physical addresses, before
+* paging has been enabled.
+*/
+   if (p) {
+   if (IS_ENABLED(CONFIG_X86_32))
+   intel_ucode_patch = (struct microcode_intel 
*)__pa_nodebug(p->data);
+   else
+   intel_ucode_patch = p->data;
+   }
 }
 
 static int microcode_sanity_check(void *mc, int print_err)
@@ -607,6 +619,14 @@ int __init save_microcode_in_initrd_intel(void)
struct ucode_cpu_info uci;
struct cpio_data cp;
 
+   /*
+* initrd is going away, clear patch ptr. We will scan the microcode one
+* last time before jettisoning and save a patch, if found. Then we will
+* update that pointer too, with a stable patch address to use when
+* resuming the cores.
+*/
+   intel_ucode_patch = NULL;
+
if (!load_builtin_intel_microcode())
cp = find_microcode_in_initrd(ucode_path, false);
 
@@ -619,9 +639,6 @@ int __init save_microcode_in_initrd_intel(void)
 
show_saved_mc();
 
-   /* initrd is going away, clear patch ptr. */
-   intel_ucode_patch = NULL;
-
return 0;
 }
 
-- 
2.13.0



[PATCH 1/2] x86/microcode: Look for the initrd at the proper address on 32-bit

2017-06-12 Thread Borislav Petkov
From: Borislav Petkov 

Early during boot, the BSP finds the ramdisk's position from boot_params
but by the time the APs get to boot, the BSP has continued in the mean
time and has potentially managed to relocate that ramdisk.

And in that case, the APs need to find the ramdisk at its new position,
in *physical* memory as they're running before paging has been enabled.

Thus, get the updated physical location of the ramdisk which is in the
relocated_ramdisk variable.

Signed-off-by: Borislav Petkov 
---
 arch/x86/kernel/cpu/microcode/core.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/core.c 
b/arch/x86/kernel/cpu/microcode/core.c
index e53d3c909840..053e5cd1dce0 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -290,6 +290,18 @@ struct cpio_data find_microcode_in_initrd(const char 
*path, bool use_pa)
return (struct cpio_data){ NULL, 0, "" };
if (initrd_start)
start = initrd_start;
+   } else {
+   /*
+* The picture on 32-bit is a bit different: we need to get
+* the *physical* address to which the ramdisk was relocated,
+* i.e., relocated_ramdisk (not initrd_start) and since we're
+* running from physical addresses, we need to access
+* relocated_ramdisk through its *physical* address.
+*/
+   u64 *rr = (u64 *)__pa_nodebug(_ramdisk);
+
+   if (*rr)
+   start = *rr;
}
 
return find_cpio_data(path, (void *)start, size, NULL);
-- 
2.13.0



Re: [HMM 07/15] mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory v3

2017-06-12 Thread Jerome Glisse
On Thu, Jun 08, 2017 at 08:55:05PM -0700, John Hubbard wrote:
> On 05/24/2017 10:20 AM, Jérôme Glisse wrote:
> [...8<...]
> > +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
> > +int device_private_entry_fault(struct vm_area_struct *vma,
> > +  unsigned long addr,
> > +  swp_entry_t entry,
> > +  unsigned int flags,
> > +  pmd_t *pmdp)
> > +{
> > +   struct page *page = device_private_entry_to_page(entry);
> > +
> > +   /*
> > +* The page_fault() callback must migrate page back to system memory
> > +* so that CPU can access it. This might fail for various reasons
> > +* (device issue, device was unsafely unplugged, ...). When such
> > +* error conditions happen, the callback must return VM_FAULT_SIGBUS.
> > +*
> > +* Note that because memory cgroup charges are accounted to the device
> > +* memory, this should never fail because of memory restrictions (but
> > +* allocation of regular system page might still fail because we are
> > +* out of memory).
> > +*
> > +* There is a more in-depth description of what that callback can and
> > +* cannot do, in include/linux/memremap.h
> > +*/
> > +   return page->pgmap->page_fault(vma, addr, page, flags, pmdp);
> > +}
> > +EXPORT_SYMBOL(device_private_entry_fault);
> > +#endif /* CONFIG_DEVICE_PRIVATE */
> > +
> >   static void pgmap_radix_release(struct resource *res)
> >   {
> > resource_size_t key, align_start, align_size, align_end;
> > @@ -321,6 +351,10 @@ void *devm_memremap_pages(struct device *dev, struct 
> > resource *res,
> > }
> > pgmap->ref = ref;
> > pgmap->res = _map->res;
> > +   pgmap->type = MEMORY_DEVICE_PUBLIC;
> > +   pgmap->page_fault = NULL;
> > +   pgmap->page_free = NULL;
> > +   pgmap->data = NULL;
> > mutex_lock(_lock);
> > error = 0;
> > diff --git a/mm/Kconfig b/mm/Kconfig
> > index d744cff..f5357ff 100644
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -736,6 +736,19 @@ config ZONE_DEVICE
> >   If FS_DAX is enabled, then say Y.
> > +config DEVICE_PRIVATE
> > +   bool "Unaddressable device memory (GPU memory, ...)"
> > +   depends on X86_64
> > +   depends on ZONE_DEVICE
> > +   depends on MEMORY_HOTPLUG
> > +   depends on MEMORY_HOTREMOVE
> > +   depends on SPARSEMEM_VMEMMAP
> > +
> > +   help
> > + Allows creation of struct pages to represent unaddressable device
> > + memory; i.e., memory that is only accessible from the device (or
> > + group of devices).
> > +
> 
> Hi Jerome,
> 
> CONFIG_DEVICE_PRIVATE has caused me some problems, because it's not coupled 
> to HMM_DEVMEM.
> 
> To fix this, my first choice would be to just s/DEVICE_PRIVATE/HMM_DEVMEM/g
> , because I don't see any value to DEVICE_PRIVATE as an independent Kconfig
> choice. It's complicating the Kconfig choices, and adding problems. However,
> if DEVICE_PRIVATE must be kept, then something like this also fixes my HMM
> tests:


Better is depend on so that you can not select HMM_DEVMEM if you do not have
DEVICE_PRIVATE. But maybe this can be merge under one config option, i do not
have any strong preference personnaly. The HMM_DEVMEM just enable helper code
that make using CONFIG_DEVICE_PRIVATE easier for device driver but is not
strictly needed ie device driver can reimplement what HMM_DEVMEM provides.

I might just merge this kernel option as part of CDM patchset that i am about
to send.

Cheers,
Jérôme

> 
> From: John Hubbard 
> Date: Thu, 8 Jun 2017 20:13:13 -0700
> Subject: [PATCH] hmm: select CONFIG_DEVICE_PRIVATE with HMM_DEVMEM
> 
> The HMM_DEVMEM feature is useless without the various
> features that are guarded with CONFIG_DEVICE_PRIVATE.
> Therefore, auto-select DEVICE_PRIVATE when selecting
> HMM_DEVMEM.
> 
> Otherwise, you can easily end up with a partially
> working HMM installation: if you select HMM_DEVMEM,
> but do not select DEVICE_PRIVATE, then faulting and
> migrating to a device (such as a GPU) works, but CPU
> page faults are ignored, so the page never migrates
> back to the CPU.
> 
> Signed-off-by: John Hubbard 
> ---
>  mm/Kconfig | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 46296d5d7570..23d2f5ec865e 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -318,6 +318,8 @@ config HMM_DEVMEM
>   bool "HMM device memory helpers (to leverage ZONE_DEVICE)"
>   depends on ARCH_HAS_HMM
>   select HMM
> + select DEVICE_PRIVATE
> +
>   help
> HMM devmem is a set of helper routines to leverage the ZONE_DEVICE
> feature. This is just to avoid having device drivers to replicating a 
> lot
> -- 
> 2.13.1
> 
> This is a minor thing, and I don't think this needs to hold up merging HMM
> v23 into -mm, IMHO. But I would like it fixed at some point.
> 
> thanks,
> --
> John Hubbard
> NVIDIA
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  

Re: [HMM 07/15] mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory v3

2017-06-12 Thread Jerome Glisse
On Thu, Jun 08, 2017 at 08:55:05PM -0700, John Hubbard wrote:
> On 05/24/2017 10:20 AM, Jérôme Glisse wrote:
> [...8<...]
> > +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
> > +int device_private_entry_fault(struct vm_area_struct *vma,
> > +  unsigned long addr,
> > +  swp_entry_t entry,
> > +  unsigned int flags,
> > +  pmd_t *pmdp)
> > +{
> > +   struct page *page = device_private_entry_to_page(entry);
> > +
> > +   /*
> > +* The page_fault() callback must migrate page back to system memory
> > +* so that CPU can access it. This might fail for various reasons
> > +* (device issue, device was unsafely unplugged, ...). When such
> > +* error conditions happen, the callback must return VM_FAULT_SIGBUS.
> > +*
> > +* Note that because memory cgroup charges are accounted to the device
> > +* memory, this should never fail because of memory restrictions (but
> > +* allocation of regular system page might still fail because we are
> > +* out of memory).
> > +*
> > +* There is a more in-depth description of what that callback can and
> > +* cannot do, in include/linux/memremap.h
> > +*/
> > +   return page->pgmap->page_fault(vma, addr, page, flags, pmdp);
> > +}
> > +EXPORT_SYMBOL(device_private_entry_fault);
> > +#endif /* CONFIG_DEVICE_PRIVATE */
> > +
> >   static void pgmap_radix_release(struct resource *res)
> >   {
> > resource_size_t key, align_start, align_size, align_end;
> > @@ -321,6 +351,10 @@ void *devm_memremap_pages(struct device *dev, struct 
> > resource *res,
> > }
> > pgmap->ref = ref;
> > pgmap->res = _map->res;
> > +   pgmap->type = MEMORY_DEVICE_PUBLIC;
> > +   pgmap->page_fault = NULL;
> > +   pgmap->page_free = NULL;
> > +   pgmap->data = NULL;
> > mutex_lock(_lock);
> > error = 0;
> > diff --git a/mm/Kconfig b/mm/Kconfig
> > index d744cff..f5357ff 100644
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -736,6 +736,19 @@ config ZONE_DEVICE
> >   If FS_DAX is enabled, then say Y.
> > +config DEVICE_PRIVATE
> > +   bool "Unaddressable device memory (GPU memory, ...)"
> > +   depends on X86_64
> > +   depends on ZONE_DEVICE
> > +   depends on MEMORY_HOTPLUG
> > +   depends on MEMORY_HOTREMOVE
> > +   depends on SPARSEMEM_VMEMMAP
> > +
> > +   help
> > + Allows creation of struct pages to represent unaddressable device
> > + memory; i.e., memory that is only accessible from the device (or
> > + group of devices).
> > +
> 
> Hi Jerome,
> 
> CONFIG_DEVICE_PRIVATE has caused me some problems, because it's not coupled 
> to HMM_DEVMEM.
> 
> To fix this, my first choice would be to just s/DEVICE_PRIVATE/HMM_DEVMEM/g
> , because I don't see any value to DEVICE_PRIVATE as an independent Kconfig
> choice. It's complicating the Kconfig choices, and adding problems. However,
> if DEVICE_PRIVATE must be kept, then something like this also fixes my HMM
> tests:


Better is depend on so that you can not select HMM_DEVMEM if you do not have
DEVICE_PRIVATE. But maybe this can be merge under one config option, i do not
have any strong preference personnaly. The HMM_DEVMEM just enable helper code
that make using CONFIG_DEVICE_PRIVATE easier for device driver but is not
strictly needed ie device driver can reimplement what HMM_DEVMEM provides.

I might just merge this kernel option as part of CDM patchset that i am about
to send.

Cheers,
Jérôme

> 
> From: John Hubbard 
> Date: Thu, 8 Jun 2017 20:13:13 -0700
> Subject: [PATCH] hmm: select CONFIG_DEVICE_PRIVATE with HMM_DEVMEM
> 
> The HMM_DEVMEM feature is useless without the various
> features that are guarded with CONFIG_DEVICE_PRIVATE.
> Therefore, auto-select DEVICE_PRIVATE when selecting
> HMM_DEVMEM.
> 
> Otherwise, you can easily end up with a partially
> working HMM installation: if you select HMM_DEVMEM,
> but do not select DEVICE_PRIVATE, then faulting and
> migrating to a device (such as a GPU) works, but CPU
> page faults are ignored, so the page never migrates
> back to the CPU.
> 
> Signed-off-by: John Hubbard 
> ---
>  mm/Kconfig | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 46296d5d7570..23d2f5ec865e 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -318,6 +318,8 @@ config HMM_DEVMEM
>   bool "HMM device memory helpers (to leverage ZONE_DEVICE)"
>   depends on ARCH_HAS_HMM
>   select HMM
> + select DEVICE_PRIVATE
> +
>   help
> HMM devmem is a set of helper routines to leverage the ZONE_DEVICE
> feature. This is just to avoid having device drivers to replicating a 
> lot
> -- 
> 2.13.1
> 
> This is a minor thing, and I don't think this needs to hold up merging HMM
> v23 into -mm, IMHO. But I would like it fixed at some point.
> 
> thanks,
> --
> John Hubbard
> NVIDIA
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: 

Re: [kernel-hardening] [PATCH 01/11] S.A.R.A. Documentation

2017-06-12 Thread Jann Horn
On Mon, Jun 12, 2017 at 6:56 PM, Salvatore Mesoraca
 wrote:
> Adding documentation for S.A.R.A. LSM.
>
> Signed-off-by: Salvatore Mesoraca 
[...]
> +/proc/PID/attr/sara/wxprot interface
> +
> +The `procattr` interface can be used by a program to discover which
> +WX Protection features are enabled and/or to tighten them: protection
> +can't be softened via procattr.
> +The interface is simple: it's a text file with an hexadecimal
> +number in it representing enabled features (more information can be
> +found in the `Flags values`_ section). Via this interface it is also
> +possible to perform a complete memory scan to remove the write permission
> +from pages that are both writable and executable.
> +
> +Protections that prevent the runtime creation of executable code
> +can be troublesome for all those programs that actually need to do it
> +e.g. programs shipping with a JIT compiler built-in.
> +Given that it's possible to segregate the part that runs untrusted
> +code from the rest through a fork, this feature can be use to run the JIT
> +compiler with few restrictions while enforcing full WX Protection in the
> +rest of the program.

As far as I can tell, the wxprot interface in procfs, when used as
/proc/PID/attr/sara/wxprot, actually only sets restrictions on one of the
threads.
The documentation doesn't seem to mention this.


> +.. [3] `saralib`_

This link is broken.


Re: [kernel-hardening] [PATCH 01/11] S.A.R.A. Documentation

2017-06-12 Thread Jann Horn
On Mon, Jun 12, 2017 at 6:56 PM, Salvatore Mesoraca
 wrote:
> Adding documentation for S.A.R.A. LSM.
>
> Signed-off-by: Salvatore Mesoraca 
[...]
> +/proc/PID/attr/sara/wxprot interface
> +
> +The `procattr` interface can be used by a program to discover which
> +WX Protection features are enabled and/or to tighten them: protection
> +can't be softened via procattr.
> +The interface is simple: it's a text file with an hexadecimal
> +number in it representing enabled features (more information can be
> +found in the `Flags values`_ section). Via this interface it is also
> +possible to perform a complete memory scan to remove the write permission
> +from pages that are both writable and executable.
> +
> +Protections that prevent the runtime creation of executable code
> +can be troublesome for all those programs that actually need to do it
> +e.g. programs shipping with a JIT compiler built-in.
> +Given that it's possible to segregate the part that runs untrusted
> +code from the rest through a fork, this feature can be use to run the JIT
> +compiler with few restrictions while enforcing full WX Protection in the
> +rest of the program.

As far as I can tell, the wxprot interface in procfs, when used as
/proc/PID/attr/sara/wxprot, actually only sets restrictions on one of the
threads.
The documentation doesn't seem to mention this.


> +.. [3] `saralib`_

This link is broken.


Re: [PATCH V3 1/7] tty: serial: lpuart: introduce lpuart_soc_data to represent SoC property

2017-06-12 Thread Andy Shevchenko
On Mon, Jun 12, 2017 at 6:37 PM, Dong Aisheng  wrote:
> This is used to dynamically check the SoC specific lpuart properies.
> Currently only the iotype is added, it functions the same as before.
> With this, new chips with different iotype will be more easily added.


> +struct lpuart_soc_data {
> +   chariotype;
> +};
> +
> +static const struct lpuart_soc_data vf_data = {
> +   .iotype = UPIO_MEM,
> +};
> +
> +static const struct lpuart_soc_data ls_data = {
> +   .iotype = UPIO_MEM32BE,

> +

Redundant.

> +};

And now most interesting part...

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_write(sport->port.x_char, 
> sport->port.membase + UARTDATA);
> else
> writeb(sport->port.x_char, sport->port.membase + 
> UARTDR);

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_stop_tx(>port);
> else
> lpuart_stop_tx(>port);

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_transmit_buffer(sport);
> else
> lpuart_transmit_buffer(sport);

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_console_get_options(sport, , , 
> );
> else
> lpuart_console_get_options(sport, , , 
> );

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_setup_watermark(sport);
> else
> lpuart_setup_watermark(sport);

> -   if (sport->lpuart32)
> +   sport->port.iotype = sdata->iotype;
> +   if (sport->port.iotype & UPIO_MEM32BE)
> sport->port.ops = _pops;
> else
> sport->port.ops = _pops;

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart_reg.cons = LPUART32_CONSOLE;
> else
> lpuart_reg.cons = LPUART_CONSOLE;

...all above since you introduced nice struct, can get rid of conditionals.
Instead it might be a members of the struct above.

(I dunno if it's good to have in this patch, but at list a follow up
could be nice to have)

> -   if (sport->lpuart32) {
> +   if (sport->port.iotype & UPIO_MEM32BE) {
> /* disable Rx/Tx and interrupts */
> temp = lpuart32_read(sport->port.membase + UARTCTRL);
> temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE);

> -   if (sport->lpuart32) {
> +   if (sport->port.iotype & UPIO_MEM32BE) {
> lpuart32_setup_watermark(sport);
> temp = lpuart32_read(sport->port.membase + UARTCTRL);
> temp |= (UARTCTRL_RIE | UARTCTRL_TIE | UARTCTRL_RE |

Above are questionable, might be not need to convert them.

So, in any case above is a sighting which you could address (separately).

-- 
With Best Regards,
Andy Shevchenko


Re: [PATCH V3 1/7] tty: serial: lpuart: introduce lpuart_soc_data to represent SoC property

2017-06-12 Thread Andy Shevchenko
On Mon, Jun 12, 2017 at 6:37 PM, Dong Aisheng  wrote:
> This is used to dynamically check the SoC specific lpuart properies.
> Currently only the iotype is added, it functions the same as before.
> With this, new chips with different iotype will be more easily added.


> +struct lpuart_soc_data {
> +   chariotype;
> +};
> +
> +static const struct lpuart_soc_data vf_data = {
> +   .iotype = UPIO_MEM,
> +};
> +
> +static const struct lpuart_soc_data ls_data = {
> +   .iotype = UPIO_MEM32BE,

> +

Redundant.

> +};

And now most interesting part...

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_write(sport->port.x_char, 
> sport->port.membase + UARTDATA);
> else
> writeb(sport->port.x_char, sport->port.membase + 
> UARTDR);

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_stop_tx(>port);
> else
> lpuart_stop_tx(>port);

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_transmit_buffer(sport);
> else
> lpuart_transmit_buffer(sport);

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_console_get_options(sport, , , 
> );
> else
> lpuart_console_get_options(sport, , , 
> );

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart32_setup_watermark(sport);
> else
> lpuart_setup_watermark(sport);

> -   if (sport->lpuart32)
> +   sport->port.iotype = sdata->iotype;
> +   if (sport->port.iotype & UPIO_MEM32BE)
> sport->port.ops = _pops;
> else
> sport->port.ops = _pops;

> -   if (sport->lpuart32)
> +   if (sport->port.iotype & UPIO_MEM32BE)
> lpuart_reg.cons = LPUART32_CONSOLE;
> else
> lpuart_reg.cons = LPUART_CONSOLE;

...all above since you introduced nice struct, can get rid of conditionals.
Instead it might be a members of the struct above.

(I dunno if it's good to have in this patch, but at list a follow up
could be nice to have)

> -   if (sport->lpuart32) {
> +   if (sport->port.iotype & UPIO_MEM32BE) {
> /* disable Rx/Tx and interrupts */
> temp = lpuart32_read(sport->port.membase + UARTCTRL);
> temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE);

> -   if (sport->lpuart32) {
> +   if (sport->port.iotype & UPIO_MEM32BE) {
> lpuart32_setup_watermark(sport);
> temp = lpuart32_read(sport->port.membase + UARTCTRL);
> temp |= (UARTCTRL_RIE | UARTCTRL_TIE | UARTCTRL_RE |

Above are questionable, might be not need to convert them.

So, in any case above is a sighting which you could address (separately).

-- 
With Best Regards,
Andy Shevchenko


[PATCH] edac, i5000, i5400: fix definition of nrecmemb register

2017-06-12 Thread Jérémy Lefaure
In i5000 and i5400 edac drivers, the register nrecmemb is defined as a
16 bits value which result in wrong shifts in the code:
  CHECK   drivers/edac/i5000_edac.c
drivers/edac/i5000_edac.c:485:15: warning: right shift by bigger than
source value
drivers/edac/i5000_edac.c:580:23: warning: right shift by bigger than
source value
  CC  drivers/edac/i5000_edac.o
  CHECK   drivers/edac/i5400_edac.c
drivers/edac/i5400_edac.c:391:36: warning: right shift by bigger than
source value
drivers/edac/i5400_edac.c:401:37: warning: right shift by bigger than
source value
  CC  drivers/edac/i5400_edac.o

In the datasheets ([1], section 3.9.22.20 and [2], section 3.9.22.21),
this register is a 32 bits register. A u32 value for the register fixes
the wrong shifts warnings and matches the datasheet.

This patch also fixes the mask to access to the CAS bits [16 to 28] in
the i5000 edac driver.

[1]: 
https://www.intel.com/content/dam/doc/datasheet/5000p-5000v-5000z-chipset-memory-controller-hub-datasheet.pdf
[2]: 
https://www.intel.se/content/dam/doc/datasheet/5400-chipset-memory-controller-hub-datasheet.pdf

Signed-off-by: Jérémy Lefaure 
---

I have found this error thanks to the sparse tool. Please note that this patch
hasn't been tested on real hardware.


 drivers/edac/i5000_edac.c | 6 +++---
 drivers/edac/i5400_edac.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index f683919981b0..c79016ade51e 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -227,7 +227,7 @@
 #defineNREC_RDWR(x)(((x)>>11) & 1)
 #defineNREC_RANK(x)(((x)>>8) & 0x7)
 #defineNRECMEMB0xC0
-#defineNREC_CAS(x) (((x)>>16) & 0xFF)
+#defineNREC_CAS(x) (((x)>>16) & 0x1FFF)
 #defineNREC_RAS(x) ((x) & 0x7FFF)
 #defineNRECFGLOG   0xC4
 #defineNREEECFBDA  0xC8
@@ -371,7 +371,7 @@ struct i5000_error_info {
/* These registers are input ONLY if there was a
 * Non-Recoverable Error */
u16 nrecmema;   /* Non-Recoverable Mem log A */
-   u16 nrecmemb;   /* Non-Recoverable Mem log B */
+   u32 nrecmemb;   /* Non-Recoverable Mem log B */
 
 };
 
@@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci,
NERR_FAT_FBD, >nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, >nrecmema);
-   pci_read_config_word(pvt->branchmap_werrors,
+   pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, >nrecmemb);
 
/* Clear the error bits, by writing them back */
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 37a9ba71da44..cd889edc8516 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -368,7 +368,7 @@ struct i5400_error_info {
 
/* These registers are input ONLY if there was a Non-Rec Error */
u16 nrecmema;   /* Non-Recoverable Mem log A */
-   u16 nrecmemb;   /* Non-Recoverable Mem log B */
+   u32 nrecmemb;   /* Non-Recoverable Mem log B */
 
 };
 
@@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci,
NERR_FAT_FBD, >nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, >nrecmema);
-   pci_read_config_word(pvt->branchmap_werrors,
+   pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, >nrecmemb);
 
/* Clear the error bits, by writing them back */
-- 
2.13.1



[PATCH] edac, i5000, i5400: fix definition of nrecmemb register

2017-06-12 Thread Jérémy Lefaure
In i5000 and i5400 edac drivers, the register nrecmemb is defined as a
16 bits value which result in wrong shifts in the code:
  CHECK   drivers/edac/i5000_edac.c
drivers/edac/i5000_edac.c:485:15: warning: right shift by bigger than
source value
drivers/edac/i5000_edac.c:580:23: warning: right shift by bigger than
source value
  CC  drivers/edac/i5000_edac.o
  CHECK   drivers/edac/i5400_edac.c
drivers/edac/i5400_edac.c:391:36: warning: right shift by bigger than
source value
drivers/edac/i5400_edac.c:401:37: warning: right shift by bigger than
source value
  CC  drivers/edac/i5400_edac.o

In the datasheets ([1], section 3.9.22.20 and [2], section 3.9.22.21),
this register is a 32 bits register. A u32 value for the register fixes
the wrong shifts warnings and matches the datasheet.

This patch also fixes the mask to access to the CAS bits [16 to 28] in
the i5000 edac driver.

[1]: 
https://www.intel.com/content/dam/doc/datasheet/5000p-5000v-5000z-chipset-memory-controller-hub-datasheet.pdf
[2]: 
https://www.intel.se/content/dam/doc/datasheet/5400-chipset-memory-controller-hub-datasheet.pdf

Signed-off-by: Jérémy Lefaure 
---

I have found this error thanks to the sparse tool. Please note that this patch
hasn't been tested on real hardware.


 drivers/edac/i5000_edac.c | 6 +++---
 drivers/edac/i5400_edac.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index f683919981b0..c79016ade51e 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -227,7 +227,7 @@
 #defineNREC_RDWR(x)(((x)>>11) & 1)
 #defineNREC_RANK(x)(((x)>>8) & 0x7)
 #defineNRECMEMB0xC0
-#defineNREC_CAS(x) (((x)>>16) & 0xFF)
+#defineNREC_CAS(x) (((x)>>16) & 0x1FFF)
 #defineNREC_RAS(x) ((x) & 0x7FFF)
 #defineNRECFGLOG   0xC4
 #defineNREEECFBDA  0xC8
@@ -371,7 +371,7 @@ struct i5000_error_info {
/* These registers are input ONLY if there was a
 * Non-Recoverable Error */
u16 nrecmema;   /* Non-Recoverable Mem log A */
-   u16 nrecmemb;   /* Non-Recoverable Mem log B */
+   u32 nrecmemb;   /* Non-Recoverable Mem log B */
 
 };
 
@@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci,
NERR_FAT_FBD, >nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, >nrecmema);
-   pci_read_config_word(pvt->branchmap_werrors,
+   pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, >nrecmemb);
 
/* Clear the error bits, by writing them back */
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 37a9ba71da44..cd889edc8516 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -368,7 +368,7 @@ struct i5400_error_info {
 
/* These registers are input ONLY if there was a Non-Rec Error */
u16 nrecmema;   /* Non-Recoverable Mem log A */
-   u16 nrecmemb;   /* Non-Recoverable Mem log B */
+   u32 nrecmemb;   /* Non-Recoverable Mem log B */
 
 };
 
@@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci,
NERR_FAT_FBD, >nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, >nrecmema);
-   pci_read_config_word(pvt->branchmap_werrors,
+   pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, >nrecmemb);
 
/* Clear the error bits, by writing them back */
-- 
2.13.1



Re: [PATCH v2 11/11] ARC: [plat-eznps] Handle memory error as an exception

2017-06-12 Thread Vineet Gupta

On 06/08/2017 08:17 PM, Noam Camus wrote:


*> From:*Vineet Gupta 

*> Sent:* Thursday, June 8, 2017 10:00 PM


>> With EZsim we try to simulate NPS400 CTOP core and not ARC core, and as such 
we
>> strive to have similar echo system for both silicon and its simulator.

>Right, but if you are using nSIM which generates L2 interrupt for user mode 
error
>- then it is already different from silicon and needs to handled as such.
Why so, this is something I wish to minimize and not just live with.
So I handle difference by redirecting back to the track I want to.

>> If we could, we would alter nSIM to behave just like our silicon.
>> So in current situation where we lack doing so we suffice in single pretty 
small
>> adjustment in OS (platform specific code).

>You are saying contradicting things here. Above u want EZSim to simulate CTOP
>(i.e. generate machinechk for U errors) but here you claim u use nSIM which 
will
>generates L2 intr.

>So I'm still grossly confused.
Sorry, for confusion.
I am using nSIM which creates L2 interrupt and not machine check because I got 
no alternative.
At the first chance I get the opportunity, I put execution back to my desired 
direction.
Since the direction as our silicon dictate is ending with die() it is a clear 
path without a risk.


OK I understand now - you want the die semantics not try and continue... but the 
patch is simply wrong. From L2 interrupt you jump to machine check vector which 
reads ECR. In this case it would be bogus - with some old stale value. What you 
really want is an NPS version of do_memory_error - since platforms can have their 
way of dealing with such errors - but the low level machine handling is just not 
in their control.


I'd propose the patch below and then you can define your own version of 
do_memory_error() and handle as appropriate !


>
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index ff83e78d0cfb..5a8042784ee9 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -80,11 +80,26 @@ int name(unsigned long address, struct pt_regs *regs) \
 DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC)
 DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
 DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
-DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)

 /*
+ * Entry point for bus errors
+ * opencoded DO_ERROR_INFO() but __weak to allow possible plarform override
+ */
+int __weak do_memory_error(unsigned long address, struct pt_regs *regs)
+{
+   siginfo_t info = {
+   .si_signo = SIGBUS,
+   .si_errno = 0,
+   .si_code  = BUS_ADRERR,
+   .si_addr = (void __user *)address,
+   };
+
+   return unhandled_exception("Invalid Mem Access", regs, );
+}
+
+/*


Re: [PATCH v2 11/11] ARC: [plat-eznps] Handle memory error as an exception

2017-06-12 Thread Vineet Gupta

On 06/08/2017 08:17 PM, Noam Camus wrote:


*> From:*Vineet Gupta 

*> Sent:* Thursday, June 8, 2017 10:00 PM


>> With EZsim we try to simulate NPS400 CTOP core and not ARC core, and as such 
we
>> strive to have similar echo system for both silicon and its simulator.

>Right, but if you are using nSIM which generates L2 interrupt for user mode 
error
>- then it is already different from silicon and needs to handled as such.
Why so, this is something I wish to minimize and not just live with.
So I handle difference by redirecting back to the track I want to.

>> If we could, we would alter nSIM to behave just like our silicon.
>> So in current situation where we lack doing so we suffice in single pretty 
small
>> adjustment in OS (platform specific code).

>You are saying contradicting things here. Above u want EZSim to simulate CTOP
>(i.e. generate machinechk for U errors) but here you claim u use nSIM which 
will
>generates L2 intr.

>So I'm still grossly confused.
Sorry, for confusion.
I am using nSIM which creates L2 interrupt and not machine check because I got 
no alternative.
At the first chance I get the opportunity, I put execution back to my desired 
direction.
Since the direction as our silicon dictate is ending with die() it is a clear 
path without a risk.


OK I understand now - you want the die semantics not try and continue... but the 
patch is simply wrong. From L2 interrupt you jump to machine check vector which 
reads ECR. In this case it would be bogus - with some old stale value. What you 
really want is an NPS version of do_memory_error - since platforms can have their 
way of dealing with such errors - but the low level machine handling is just not 
in their control.


I'd propose the patch below and then you can define your own version of 
do_memory_error() and handle as appropriate !


>
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index ff83e78d0cfb..5a8042784ee9 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -80,11 +80,26 @@ int name(unsigned long address, struct pt_regs *regs) \
 DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC)
 DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
 DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
-DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)

 /*
+ * Entry point for bus errors
+ * opencoded DO_ERROR_INFO() but __weak to allow possible plarform override
+ */
+int __weak do_memory_error(unsigned long address, struct pt_regs *regs)
+{
+   siginfo_t info = {
+   .si_signo = SIGBUS,
+   .si_errno = 0,
+   .si_code  = BUS_ADRERR,
+   .si_addr = (void __user *)address,
+   };
+
+   return unhandled_exception("Invalid Mem Access", regs, );
+}
+
+/*


Re: [RFC PATCH 1/8] Documentation: add DT binding for ARM System Control and Management Interface(SCMI) protocol

2017-06-12 Thread Sudeep Holla
Hi Matt,


Thanks for starting this discussion on the list.

On Fri, Jun 09, 2017 at 01:12:50PM -0500, Matt Sealey wrote:
> Hullo all,
> 
> This is a long one.. apologies for odd linefeeds and so on.
> 
> On Wed, Jun 7, 2017 at 11:10 AM, Sudeep Holla  wrote:
> 
> > +Clock/Performance bindings for the clocks/OPPs based on SCMI Message
> Protocol
> > +
> > +
> > +This binding uses the common clock binding[1].
> > +
> > +Required properties:
> > +- compatible : shall be "arm,scmi-clocks" or "arm,scmi-perf-domains"
> 
> After a little thought, there are a couple objections to be made here.
> Firstly, the SCMI protocol families are  discoverable - you only really need
> to know that it is usable (and where to use it, mboxes etc.) - whether it
> supports the clock management, performance domains, power domains et al.
> protocols is a function of querying the base protocol for a list.
>
> These protocols are identified by a value, several of which are
> standardized, some being vendor extension numbers. All protocols must be
> able to be queried for information.
>

Agreed on most the above.

> As such, defining compatible properties for each protocol is treading that
> fine line of tying device trees to particular driver subsystems and giving
> operating systems an ability to ignore any discovery procedure. While I
> can't make a case for clock management (which should obviously conform with
> a particular clock management definition in DT, as already defined), there
> is plenty of past evidence of bindings for particular devices being mis-used
> or used in non-intended ways (regulators as reset GPIOs is the one that
> immediately came to mind) in lieu of a more fleshed out way of defining a
> particular class of device for a binding. The same would be true of tying a
> 'performance domain' to the concept of clock management.
>

As I mentioned in private, I reused clock for simplicity and most of the
platforms already do that. But yes, that's no valid argument to just
continue the legacy. I am fine to break clock and performance domains.
The main problem IMO is that it's not well defined either in theis
specification or architecture to an extent that we can define a standard
binding and live with it. We are/already have seen lost of churn around
this bindings and that's one of the reason I chose to reuse existing
bindings.

> From the point of view of being able to specify things against a particular
> binding (whatever that might be), one could imagine something that mapped
> protocols to those bindings without introducing compatible names. SCMI ids
> would be verbatim, and per-protocol. Things like clock-indices are therefore
> not relevant and defining which indices go with which protocol at the SCMI
> level isn't needed anymore. It is really up to the protocol how many cells
> it would need to define it's protocol behavior but for the purpose of some
> standardization, we could imagine a binding that defined protocols as such:
> 
> scmi: arm_scmi {
> compatible = "arm,scmi,1.0";

I prefer v1.0 dropped based on the same argument that it's discoverable.
If we don't agree on that then the whole discussion falls apart. I
assume you agree on dropping versioning just to continue the discussion
here.

> mboxes = ;
> shmem = ;
> protocols {
>   scmi_clocks: protocol@0x14 {
> #whatever-cells = 3;
>};
>foo_smic: protocol@0x89 {
> #foo-cells = 4;
> #bar-cells = 5;
> };
> };
> };
> 
> uart: myuart@8000 {
> compatible = "arm,pl011";
> clocks = <_smic 3>;
> };
> 
> If you manage to get a device tree that specifies a clock but there is no
> protocol 0x89 then you're just as hosed as if you specified an
> arm,scmi-clocks node when the protocol was not supported by SCMI itself, so
> we don't gain any new dangers, but we do gain the ability to instantiate
> SCMI, discover protocols, and then load drivers against those protocols,
> without duplicating the discovery process with a hardcoded tree. Device
> trees, from my point of view, are a contract between the SoC & board
> designer and the OS (helped along by firmware, hopefully). They shouldn't be
> dictating the driver behavior to be applied at this kind of level. 

Agreed.

> Device trees need to be rock solid - agile development is fine but as soon
> as you ship, changing the device tree means cutting off support for existing
> software, or only working with augmented features on new software and
> severely reduced functionality on old software. That can be as simple as not
> being able to go to Turbo mode, or as bad as an inability to apply thermal
> limits and burning someone's board. If we define a specific binding of a
> specific protocol to a specific way of interacting with that device which is
> 

Re: [RFC PATCH 1/8] Documentation: add DT binding for ARM System Control and Management Interface(SCMI) protocol

2017-06-12 Thread Sudeep Holla
Hi Matt,


Thanks for starting this discussion on the list.

On Fri, Jun 09, 2017 at 01:12:50PM -0500, Matt Sealey wrote:
> Hullo all,
> 
> This is a long one.. apologies for odd linefeeds and so on.
> 
> On Wed, Jun 7, 2017 at 11:10 AM, Sudeep Holla  wrote:
> 
> > +Clock/Performance bindings for the clocks/OPPs based on SCMI Message
> Protocol
> > +
> > +
> > +This binding uses the common clock binding[1].
> > +
> > +Required properties:
> > +- compatible : shall be "arm,scmi-clocks" or "arm,scmi-perf-domains"
> 
> After a little thought, there are a couple objections to be made here.
> Firstly, the SCMI protocol families are  discoverable - you only really need
> to know that it is usable (and where to use it, mboxes etc.) - whether it
> supports the clock management, performance domains, power domains et al.
> protocols is a function of querying the base protocol for a list.
>
> These protocols are identified by a value, several of which are
> standardized, some being vendor extension numbers. All protocols must be
> able to be queried for information.
>

Agreed on most the above.

> As such, defining compatible properties for each protocol is treading that
> fine line of tying device trees to particular driver subsystems and giving
> operating systems an ability to ignore any discovery procedure. While I
> can't make a case for clock management (which should obviously conform with
> a particular clock management definition in DT, as already defined), there
> is plenty of past evidence of bindings for particular devices being mis-used
> or used in non-intended ways (regulators as reset GPIOs is the one that
> immediately came to mind) in lieu of a more fleshed out way of defining a
> particular class of device for a binding. The same would be true of tying a
> 'performance domain' to the concept of clock management.
>

As I mentioned in private, I reused clock for simplicity and most of the
platforms already do that. But yes, that's no valid argument to just
continue the legacy. I am fine to break clock and performance domains.
The main problem IMO is that it's not well defined either in theis
specification or architecture to an extent that we can define a standard
binding and live with it. We are/already have seen lost of churn around
this bindings and that's one of the reason I chose to reuse existing
bindings.

> From the point of view of being able to specify things against a particular
> binding (whatever that might be), one could imagine something that mapped
> protocols to those bindings without introducing compatible names. SCMI ids
> would be verbatim, and per-protocol. Things like clock-indices are therefore
> not relevant and defining which indices go with which protocol at the SCMI
> level isn't needed anymore. It is really up to the protocol how many cells
> it would need to define it's protocol behavior but for the purpose of some
> standardization, we could imagine a binding that defined protocols as such:
> 
> scmi: arm_scmi {
> compatible = "arm,scmi,1.0";

I prefer v1.0 dropped based on the same argument that it's discoverable.
If we don't agree on that then the whole discussion falls apart. I
assume you agree on dropping versioning just to continue the discussion
here.

> mboxes = ;
> shmem = ;
> protocols {
>   scmi_clocks: protocol@0x14 {
> #whatever-cells = 3;
>};
>foo_smic: protocol@0x89 {
> #foo-cells = 4;
> #bar-cells = 5;
> };
> };
> };
> 
> uart: myuart@8000 {
> compatible = "arm,pl011";
> clocks = <_smic 3>;
> };
> 
> If you manage to get a device tree that specifies a clock but there is no
> protocol 0x89 then you're just as hosed as if you specified an
> arm,scmi-clocks node when the protocol was not supported by SCMI itself, so
> we don't gain any new dangers, but we do gain the ability to instantiate
> SCMI, discover protocols, and then load drivers against those protocols,
> without duplicating the discovery process with a hardcoded tree. Device
> trees, from my point of view, are a contract between the SoC & board
> designer and the OS (helped along by firmware, hopefully). They shouldn't be
> dictating the driver behavior to be applied at this kind of level. 

Agreed.

> Device trees need to be rock solid - agile development is fine but as soon
> as you ship, changing the device tree means cutting off support for existing
> software, or only working with augmented features on new software and
> severely reduced functionality on old software. That can be as simple as not
> being able to go to Turbo mode, or as bad as an inability to apply thermal
> limits and burning someone's board. If we define a specific binding of a
> specific protocol to a specific way of interacting with that device which is
> a purely software 

[PATCH 4.11 000/150] 4.11.5-stable review

2017-06-12 Thread Greg Kroah-Hartman
This is the start of the stable review cycle for the 4.11.5 release.
There are 150 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed Jun 14 15:24:44 UTC 2017.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:
kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.11.5-rc1.gz
or in the git tree and branch at:
  git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.11.y
and the diffstat can be found below.

thanks,

greg k-h

-
Pseudo-Shortlog of commits:

Greg Kroah-Hartman 
Linux 4.11.5-rc1

Pablo Neira Ayuso 
netfilter: nft_set_rbtree: handle element re-addition after deletion

Jani Nikula 
drm/i915/vbt: split out defaults that are set when there is no VBT

Jani Nikula 
drm/i915/vbt: don't propagate errors from intel_bios_init()

Paul Moore 
audit: fix the RCU locking for the auditd_connection structure

Thomas Gleixner 
hwmon: (coretemp) Handle frozen hotplug state correctly

Amey Telawane 
tracing: Use strlcpy() instead of strcpy() in __trace_find_cmdline()

Chandan Rajendra 
iomap_dio_rw: Prevent reading file data beyond iomap_dio->i_size

Tejun Heo 
cgroup: mark cgroup_get() with __maybe_unused

Wei Yongjun 
pinctrl: cherryview: Add terminate entry for dmi_system_id tables

Takatoshi Akiyama 
serial: sh-sci: Fix panic when serial console and DMA are enabled

Michał Winiarski 
drm/i915/skl: Add missing SKL ID

Ville Syrjälä 
drm/i915: Fix runtime PM for LPE audio

Julius Werner 
drivers: char: mem: Fix wraparound check to allow mappings up to the end

Sebastian Andrzej Siewior 
cpu/hotplug: Drop the device lock on error

Takashi Iwai 
ASoC: Fix use-after-free at card unregistration

Takashi Iwai 
ALSA: timer: Fix missing queue indices reset at SNDRV_TIMER_IOCTL_SELECT

Takashi Iwai 
ALSA: timer: Fix race between read and ioctl

Ben Skeggs 
drm/nouveau/tmr: fully separate alarm execution/pending lists

Dominik Brodowski 
x86/microcode/intel: Clear patch pointer before jettisoning the initrd

Sinclair Yeh 
drm/vmwgfx: Make sure backup_handle is always valid

Vladis Dronov 
drm/vmwgfx: limit the number of mip levels in vmw_gb_surface_define_ioctl()

Dan Carpenter 
drm/vmwgfx: Handle vmalloc() failure in vmw_local_fifo_reserve()

Timur Tabi 
net: qcom/emac: do not use hardware mdio automatic polling

Paolo Bonzini 
srcu: Allow use of Classic SRCU from both process and interrupt context

Jin Yao 
perf/core: Drop kernel samples even though :u is specified

Andrew Lunn 
Revert "ata: sata_mv: Convert to devm_ioremap_resource()"

Breno Leitao 
powerpc/kernel: Initialize load_tm on task creation

Breno Leitao 
powerpc/kernel: Fix FP and vector register restoration

Michael Bringmann 
powerpc/hotplug-mem: Fix missing endian conversion of aa_index

Michael Ellerman 
powerpc/numa: Fix percpu allocations to be NUMA aware

Christophe Leroy 
powerpc/sysdev/simple_gpio: Fix oops in gpio save_regs function

Joe Carnuccio 
scsi: qla2xxx: Fix mailbox pointer error in fwdump capture

Joe Carnuccio 
scsi: qla2xxx: Set bit 15 for DIAG_ECHO_TEST MBC

Joe Carnuccio 
scsi: qla2xxx: Modify T262 FW dump template to specify same start/end to 
debug customer issues

Quinn Tran 
scsi: qla2xxx: Fix NULL pointer access due to redundant fc_host_port_name 
call

Sawan Chandak 
scsi: qla2xxx: Fix crash due to mismatch mumber of Q-pair creation for 
Multi queue

himanshu.madh...@cavium.com 
scsi: qla2xxx: Fix recursive loop during target mode configuration for 
ISP25XX leaving system unresponsive

Johannes Thumshirn 
scsi: qla2xxx: don't disable a not previously enabled PCI device

Marc Zyngier 
KVM: arm/arm64: Handle possible NULL stage2 pud when ageing pages

Omar Sandoval 
Btrfs: fix delalloc accounting leak caused by u32 

[PATCH 4.11 000/150] 4.11.5-stable review

2017-06-12 Thread Greg Kroah-Hartman
This is the start of the stable review cycle for the 4.11.5 release.
There are 150 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed Jun 14 15:24:44 UTC 2017.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:
kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.11.5-rc1.gz
or in the git tree and branch at:
  git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.11.y
and the diffstat can be found below.

thanks,

greg k-h

-
Pseudo-Shortlog of commits:

Greg Kroah-Hartman 
Linux 4.11.5-rc1

Pablo Neira Ayuso 
netfilter: nft_set_rbtree: handle element re-addition after deletion

Jani Nikula 
drm/i915/vbt: split out defaults that are set when there is no VBT

Jani Nikula 
drm/i915/vbt: don't propagate errors from intel_bios_init()

Paul Moore 
audit: fix the RCU locking for the auditd_connection structure

Thomas Gleixner 
hwmon: (coretemp) Handle frozen hotplug state correctly

Amey Telawane 
tracing: Use strlcpy() instead of strcpy() in __trace_find_cmdline()

Chandan Rajendra 
iomap_dio_rw: Prevent reading file data beyond iomap_dio->i_size

Tejun Heo 
cgroup: mark cgroup_get() with __maybe_unused

Wei Yongjun 
pinctrl: cherryview: Add terminate entry for dmi_system_id tables

Takatoshi Akiyama 
serial: sh-sci: Fix panic when serial console and DMA are enabled

Michał Winiarski 
drm/i915/skl: Add missing SKL ID

Ville Syrjälä 
drm/i915: Fix runtime PM for LPE audio

Julius Werner 
drivers: char: mem: Fix wraparound check to allow mappings up to the end

Sebastian Andrzej Siewior 
cpu/hotplug: Drop the device lock on error

Takashi Iwai 
ASoC: Fix use-after-free at card unregistration

Takashi Iwai 
ALSA: timer: Fix missing queue indices reset at SNDRV_TIMER_IOCTL_SELECT

Takashi Iwai 
ALSA: timer: Fix race between read and ioctl

Ben Skeggs 
drm/nouveau/tmr: fully separate alarm execution/pending lists

Dominik Brodowski 
x86/microcode/intel: Clear patch pointer before jettisoning the initrd

Sinclair Yeh 
drm/vmwgfx: Make sure backup_handle is always valid

Vladis Dronov 
drm/vmwgfx: limit the number of mip levels in vmw_gb_surface_define_ioctl()

Dan Carpenter 
drm/vmwgfx: Handle vmalloc() failure in vmw_local_fifo_reserve()

Timur Tabi 
net: qcom/emac: do not use hardware mdio automatic polling

Paolo Bonzini 
srcu: Allow use of Classic SRCU from both process and interrupt context

Jin Yao 
perf/core: Drop kernel samples even though :u is specified

Andrew Lunn 
Revert "ata: sata_mv: Convert to devm_ioremap_resource()"

Breno Leitao 
powerpc/kernel: Initialize load_tm on task creation

Breno Leitao 
powerpc/kernel: Fix FP and vector register restoration

Michael Bringmann 
powerpc/hotplug-mem: Fix missing endian conversion of aa_index

Michael Ellerman 
powerpc/numa: Fix percpu allocations to be NUMA aware

Christophe Leroy 
powerpc/sysdev/simple_gpio: Fix oops in gpio save_regs function

Joe Carnuccio 
scsi: qla2xxx: Fix mailbox pointer error in fwdump capture

Joe Carnuccio 
scsi: qla2xxx: Set bit 15 for DIAG_ECHO_TEST MBC

Joe Carnuccio 
scsi: qla2xxx: Modify T262 FW dump template to specify same start/end to 
debug customer issues

Quinn Tran 
scsi: qla2xxx: Fix NULL pointer access due to redundant fc_host_port_name 
call

Sawan Chandak 
scsi: qla2xxx: Fix crash due to mismatch mumber of Q-pair creation for 
Multi queue

himanshu.madh...@cavium.com 
scsi: qla2xxx: Fix recursive loop during target mode configuration for 
ISP25XX leaving system unresponsive

Johannes Thumshirn 
scsi: qla2xxx: don't disable a not previously enabled PCI device

Marc Zyngier 
KVM: arm/arm64: Handle possible NULL stage2 pud when ageing pages

Omar Sandoval 
Btrfs: fix delalloc accounting leak caused by u32 overflow

Jeff Mahoney 
btrfs: fix race with relocation recovery and fs_root setup

Jeff Mahoney 
btrfs: fix memory leak in update_space_info failure path

David Sterba 
btrfs: use correct types for page indices in btrfs_page_exists_in_range

Vaibhav Jain 
cxl: Avoid double free_irq() for psl,slice interrupts

Frederic Barrat 
cxl: Fix error path on bad ioctl

Al Viro 
excessive checks in ufs_write_failed() and ufs_evict_inode()

Al Viro 
ufs_getfrag_block(): we only grab ->truncate_mutex on block creation path

Al Viro 
ufs_extend_tail(): fix the braino in calling conventions of 
ufs_new_fragments()

Al Viro 
ufs: set correct ->s_maxsize

Al Viro 
ufs: restore maintaining ->i_blocks

Al Viro 
fix ufs_isblockset()

Al Viro 
ufs: restore proper tail allocation

Tejun Heo 
cpuset: consider dying css as offline

Ulrik De Bie 
Input: elantech - add Fujitsu Lifebook E546/E557 to 

[PATCH 4.11 016/150] ravb: Fix use-after-free on `ifconfig eth0 down`

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Eugeniu Rosca 


[ Upstream commit 79514ef670e9e575a1fe36922268c439d0f0ca8a ]

Commit a47b70ea86bd ("ravb: unmap descriptors when freeing rings") has
introduced the issue seen in [1] reproduced on H3ULCB board.

Fix this by relocating the RX skb ringbuffer free operation, so that
swiotlb page unmapping can be done first. Freeing of aligned TX buffers
is not relevant to the issue seen in [1]. Still, reposition TX free
calls as well, to have all kfree() operations performed consistently
_after_ dma_unmap_*()/dma_free_*().

[1] Console screenshot with the problem reproduced:

salvator-x login: root
root@salvator-x:~# ifconfig eth0 up
Micrel KSZ9031 Gigabit PHY e680.ethernet-:00: \
   attached PHY driver [Micrel KSZ9031 Gigabit PHY]   \
   (mii_bus:phy_addr=e680.ethernet-:00, irq=235)
IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready
root@salvator-x:~#
root@salvator-x:~# ifconfig eth0 down

==
BUG: KASAN: use-after-free in swiotlb_tbl_unmap_single+0xc4/0x35c
Write of size 1538 at addr 8006d884f780 by task ifconfig/1649

CPU: 0 PID: 1649 Comm: ifconfig Not tainted 4.12.0-rc4-4-g112eb07287d1 #32
Hardware name: Renesas H3ULCB board based on r8a7795 (DT)
Call trace:
[] dump_backtrace+0x0/0x3a4
[] show_stack+0x14/0x1c
[] dump_stack+0xf8/0x150
[] print_address_description+0x7c/0x330
[] kasan_report+0x2e0/0x2f4
[] check_memory_region+0x20/0x14c
[] memcpy+0x48/0x68
[] swiotlb_tbl_unmap_single+0xc4/0x35c
[] unmap_single+0x90/0xa4
[] swiotlb_unmap_page+0xc/0x14
[] __swiotlb_unmap_page+0xcc/0xe4
[] ravb_ring_free+0x514/0x870
[] ravb_close+0x288/0x36c
[] __dev_close_many+0x14c/0x174
[] __dev_close+0xc8/0x144
[] __dev_change_flags+0xd8/0x194
[] dev_change_flags+0x60/0xb0
[] devinet_ioctl+0x484/0x9d4
[] inet_ioctl+0x190/0x194
[] sock_do_ioctl+0x78/0xa8
[] sock_ioctl+0x110/0x3c4
[] vfs_ioctl+0x90/0xa0
[] do_vfs_ioctl+0x148/0xc38
[] SyS_ioctl+0x44/0x74
[] el0_svc_naked+0x24/0x28

The buggy address belongs to the page:
page:7e001b6213c0 count:0 mapcount:0 mapping:  (null) index:0x0
flags: 0x4000()
raw: 4000   
raw:  7e001b6213e0  
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8006d884f680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8006d884f700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>8006d884f780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ^
 8006d884f800: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8006d884f880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==
Disabling lock debugging due to kernel taint
root@salvator-x:~#

Fixes: a47b70ea86bd ("ravb: unmap descriptors when freeing rings")
Signed-off-by: Eugeniu Rosca 
Acked-by: Sergei Shtylyov 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/renesas/ravb_main.c |   24 
 1 file changed, 12 insertions(+), 12 deletions(-)

--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -230,18 +230,6 @@ static void ravb_ring_free(struct net_de
int ring_size;
int i;
 
-   /* Free RX skb ringbuffer */
-   if (priv->rx_skb[q]) {
-   for (i = 0; i < priv->num_rx_ring[q]; i++)
-   dev_kfree_skb(priv->rx_skb[q][i]);
-   }
-   kfree(priv->rx_skb[q]);
-   priv->rx_skb[q] = NULL;
-
-   /* Free aligned TX buffers */
-   kfree(priv->tx_align[q]);
-   priv->tx_align[q] = NULL;
-
if (priv->rx_ring[q]) {
for (i = 0; i < priv->num_rx_ring[q]; i++) {
struct ravb_ex_rx_desc *desc = >rx_ring[q][i];
@@ -270,6 +258,18 @@ static void ravb_ring_free(struct net_de
priv->tx_ring[q] = NULL;
}
 
+   /* Free RX skb ringbuffer */
+   if (priv->rx_skb[q]) {
+   for (i = 0; i < priv->num_rx_ring[q]; i++)
+   dev_kfree_skb(priv->rx_skb[q][i]);
+   }
+   kfree(priv->rx_skb[q]);
+   priv->rx_skb[q] = NULL;
+
+   /* Free aligned TX buffers */
+   kfree(priv->tx_align[q]);
+   priv->tx_align[q] = NULL;
+
/* Free TX skb ringbuffer.
 * SKBs are freed by ravb_tx_free() call above.
 */




[PATCH 4.11 016/150] ravb: Fix use-after-free on `ifconfig eth0 down`

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Eugeniu Rosca 


[ Upstream commit 79514ef670e9e575a1fe36922268c439d0f0ca8a ]

Commit a47b70ea86bd ("ravb: unmap descriptors when freeing rings") has
introduced the issue seen in [1] reproduced on H3ULCB board.

Fix this by relocating the RX skb ringbuffer free operation, so that
swiotlb page unmapping can be done first. Freeing of aligned TX buffers
is not relevant to the issue seen in [1]. Still, reposition TX free
calls as well, to have all kfree() operations performed consistently
_after_ dma_unmap_*()/dma_free_*().

[1] Console screenshot with the problem reproduced:

salvator-x login: root
root@salvator-x:~# ifconfig eth0 up
Micrel KSZ9031 Gigabit PHY e680.ethernet-:00: \
   attached PHY driver [Micrel KSZ9031 Gigabit PHY]   \
   (mii_bus:phy_addr=e680.ethernet-:00, irq=235)
IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready
root@salvator-x:~#
root@salvator-x:~# ifconfig eth0 down

==
BUG: KASAN: use-after-free in swiotlb_tbl_unmap_single+0xc4/0x35c
Write of size 1538 at addr 8006d884f780 by task ifconfig/1649

CPU: 0 PID: 1649 Comm: ifconfig Not tainted 4.12.0-rc4-4-g112eb07287d1 #32
Hardware name: Renesas H3ULCB board based on r8a7795 (DT)
Call trace:
[] dump_backtrace+0x0/0x3a4
[] show_stack+0x14/0x1c
[] dump_stack+0xf8/0x150
[] print_address_description+0x7c/0x330
[] kasan_report+0x2e0/0x2f4
[] check_memory_region+0x20/0x14c
[] memcpy+0x48/0x68
[] swiotlb_tbl_unmap_single+0xc4/0x35c
[] unmap_single+0x90/0xa4
[] swiotlb_unmap_page+0xc/0x14
[] __swiotlb_unmap_page+0xcc/0xe4
[] ravb_ring_free+0x514/0x870
[] ravb_close+0x288/0x36c
[] __dev_close_many+0x14c/0x174
[] __dev_close+0xc8/0x144
[] __dev_change_flags+0xd8/0x194
[] dev_change_flags+0x60/0xb0
[] devinet_ioctl+0x484/0x9d4
[] inet_ioctl+0x190/0x194
[] sock_do_ioctl+0x78/0xa8
[] sock_ioctl+0x110/0x3c4
[] vfs_ioctl+0x90/0xa0
[] do_vfs_ioctl+0x148/0xc38
[] SyS_ioctl+0x44/0x74
[] el0_svc_naked+0x24/0x28

The buggy address belongs to the page:
page:7e001b6213c0 count:0 mapcount:0 mapping:  (null) index:0x0
flags: 0x4000()
raw: 4000   
raw:  7e001b6213e0  
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8006d884f680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8006d884f700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>8006d884f780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ^
 8006d884f800: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8006d884f880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==
Disabling lock debugging due to kernel taint
root@salvator-x:~#

Fixes: a47b70ea86bd ("ravb: unmap descriptors when freeing rings")
Signed-off-by: Eugeniu Rosca 
Acked-by: Sergei Shtylyov 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/renesas/ravb_main.c |   24 
 1 file changed, 12 insertions(+), 12 deletions(-)

--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -230,18 +230,6 @@ static void ravb_ring_free(struct net_de
int ring_size;
int i;
 
-   /* Free RX skb ringbuffer */
-   if (priv->rx_skb[q]) {
-   for (i = 0; i < priv->num_rx_ring[q]; i++)
-   dev_kfree_skb(priv->rx_skb[q][i]);
-   }
-   kfree(priv->rx_skb[q]);
-   priv->rx_skb[q] = NULL;
-
-   /* Free aligned TX buffers */
-   kfree(priv->tx_align[q]);
-   priv->tx_align[q] = NULL;
-
if (priv->rx_ring[q]) {
for (i = 0; i < priv->num_rx_ring[q]; i++) {
struct ravb_ex_rx_desc *desc = >rx_ring[q][i];
@@ -270,6 +258,18 @@ static void ravb_ring_free(struct net_de
priv->tx_ring[q] = NULL;
}
 
+   /* Free RX skb ringbuffer */
+   if (priv->rx_skb[q]) {
+   for (i = 0; i < priv->num_rx_ring[q]; i++)
+   dev_kfree_skb(priv->rx_skb[q][i]);
+   }
+   kfree(priv->rx_skb[q]);
+   priv->rx_skb[q] = NULL;
+
+   /* Free aligned TX buffers */
+   kfree(priv->tx_align[q]);
+   priv->tx_align[q] = NULL;
+
/* Free TX skb ringbuffer.
 * SKBs are freed by ravb_tx_free() call above.
 */




[PATCH 4.11 002/150] net: bridge: start hello timer only if device is up

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Nikolay Aleksandrov 


[ Upstream commit aeb073241fe7a2b932e04e20c60e47718332877f ]

When the transition of NO_STP -> KERNEL_STP was fixed by always calling
mod_timer in br_stp_start, it introduced a new regression which causes
the timer to be armed even when the bridge is down, and since we stop
the timers in its ndo_stop() function, they never get disabled if the
device is destroyed before it's upped.

To reproduce:
$ while :; do ip l add br0 type bridge hello_time 100; brctl stp br0 on;
ip l del br0; done;

CC: Xin Long 
CC: Ivan Vecera 
CC: Sebastian Ott 
Reported-by: Sebastian Ott 
Fixes: 6d18c732b95c ("bridge: start hello_timer when enabling KERNEL_STP in 
br_stp_start")
Signed-off-by: Nikolay Aleksandrov 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/bridge/br_stp_if.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -179,7 +179,8 @@ static void br_stp_start(struct net_brid
br_debug(br, "using kernel STP\n");
 
/* To start timers on any ports left in blocking */
-   mod_timer(>hello_timer, jiffies + br->hello_time);
+   if (br->dev->flags & IFF_UP)
+   mod_timer(>hello_timer, jiffies + br->hello_time);
br_port_state_selection(br);
}
 




[PATCH 4.11 002/150] net: bridge: start hello timer only if device is up

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Nikolay Aleksandrov 


[ Upstream commit aeb073241fe7a2b932e04e20c60e47718332877f ]

When the transition of NO_STP -> KERNEL_STP was fixed by always calling
mod_timer in br_stp_start, it introduced a new regression which causes
the timer to be armed even when the bridge is down, and since we stop
the timers in its ndo_stop() function, they never get disabled if the
device is destroyed before it's upped.

To reproduce:
$ while :; do ip l add br0 type bridge hello_time 100; brctl stp br0 on;
ip l del br0; done;

CC: Xin Long 
CC: Ivan Vecera 
CC: Sebastian Ott 
Reported-by: Sebastian Ott 
Fixes: 6d18c732b95c ("bridge: start hello_timer when enabling KERNEL_STP in 
br_stp_start")
Signed-off-by: Nikolay Aleksandrov 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/bridge/br_stp_if.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -179,7 +179,8 @@ static void br_stp_start(struct net_brid
br_debug(br, "using kernel STP\n");
 
/* To start timers on any ports left in blocking */
-   mod_timer(>hello_timer, jiffies + br->hello_time);
+   if (br->dev->flags & IFF_UP)
+   mod_timer(>hello_timer, jiffies + br->hello_time);
br_port_state_selection(br);
}
 




[PATCH 4.11 017/150] net: bridge: fix a null pointer dereference in br_afspec

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Nikolay Aleksandrov 


[ Upstream commit 1020ce3108cc26fbf09d70550ea2937cb1a211d2 ]

We might call br_afspec() with p == NULL which is a valid use case if
the action is on the bridge device itself, but the bridge tunnel code
dereferences the p pointer without checking, so check if p is null
first.

Reported-by: Gustavo A. R. Silva 
Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support")
Signed-off-by: Nikolay Aleksandrov 
Acked-by: Roopa Prabhu 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/bridge/br_netlink.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -591,7 +591,7 @@ static int br_afspec(struct net_bridge *
err = 0;
switch (nla_type(attr)) {
case IFLA_BRIDGE_VLAN_TUNNEL_INFO:
-   if (!(p->flags & BR_VLAN_TUNNEL))
+   if (!p || !(p->flags & BR_VLAN_TUNNEL))
return -EINVAL;
err = br_parse_vlan_tunnel_info(attr, _curr);
if (err)




[PATCH 4.11 017/150] net: bridge: fix a null pointer dereference in br_afspec

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Nikolay Aleksandrov 


[ Upstream commit 1020ce3108cc26fbf09d70550ea2937cb1a211d2 ]

We might call br_afspec() with p == NULL which is a valid use case if
the action is on the bridge device itself, but the bridge tunnel code
dereferences the p pointer without checking, so check if p is null
first.

Reported-by: Gustavo A. R. Silva 
Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support")
Signed-off-by: Nikolay Aleksandrov 
Acked-by: Roopa Prabhu 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/bridge/br_netlink.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -591,7 +591,7 @@ static int br_afspec(struct net_bridge *
err = 0;
switch (nla_type(attr)) {
case IFLA_BRIDGE_VLAN_TUNNEL_INFO:
-   if (!(p->flags & BR_VLAN_TUNNEL))
+   if (!p || !(p->flags & BR_VLAN_TUNNEL))
return -EINVAL;
err = br_parse_vlan_tunnel_info(attr, _curr);
if (err)




[PATCH 4.11 013/150] net: dsa: Fix stale cpu_switch reference after unbind then bind

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Florian Fainelli 


[ Upstream commit b07ac9894644202614ca87c69f3f45e424a82fef ]

Commit 9520ed8fb841 ("net: dsa: use cpu_switch instead of ds[0]")
replaced the use of dst->ds[0] with dst->cpu_switch since that is
functionally equivalent, however, we can now run into an use after free
scenario after unbinding then rebinding the switch driver.

The use after free happens because we do correctly initialize
dst->cpu_switch the first time we probe in dsa_cpu_parse(), then we
unbind the driver: dsa_dst_unapply() is called, and we rebind again.
dst->cpu_switch now points to a freed "ds" structure, and so when we
finally dereference it in dsa_cpu_port_ethtool_setup(), we oops.

To fix this, simply set dst->cpu_switch to NULL in dsa_dst_unapply()
which guarantees that we always correctly re-assign dst->cpu_switch in
dsa_cpu_parse().

Fixes: 9520ed8fb841 ("net: dsa: use cpu_switch instead of ds[0]")
Signed-off-by: Florian Fainelli 
Reviewed-by: Vivien Didelot 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/dsa/dsa2.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -440,8 +440,10 @@ static void dsa_dst_unapply(struct dsa_s
dsa_ds_unapply(dst, ds);
}
 
-   if (dst->cpu_switch)
+   if (dst->cpu_switch) {
dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+   dst->cpu_switch = NULL;
+   }
 
pr_info("DSA: tree %d unapplied\n", dst->tree);
dst->applied = false;




[PATCH 4.11 013/150] net: dsa: Fix stale cpu_switch reference after unbind then bind

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Florian Fainelli 


[ Upstream commit b07ac9894644202614ca87c69f3f45e424a82fef ]

Commit 9520ed8fb841 ("net: dsa: use cpu_switch instead of ds[0]")
replaced the use of dst->ds[0] with dst->cpu_switch since that is
functionally equivalent, however, we can now run into an use after free
scenario after unbinding then rebinding the switch driver.

The use after free happens because we do correctly initialize
dst->cpu_switch the first time we probe in dsa_cpu_parse(), then we
unbind the driver: dsa_dst_unapply() is called, and we rebind again.
dst->cpu_switch now points to a freed "ds" structure, and so when we
finally dereference it in dsa_cpu_port_ethtool_setup(), we oops.

To fix this, simply set dst->cpu_switch to NULL in dsa_dst_unapply()
which guarantees that we always correctly re-assign dst->cpu_switch in
dsa_cpu_parse().

Fixes: 9520ed8fb841 ("net: dsa: use cpu_switch instead of ds[0]")
Signed-off-by: Florian Fainelli 
Reviewed-by: Vivien Didelot 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/dsa/dsa2.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -440,8 +440,10 @@ static void dsa_dst_unapply(struct dsa_s
dsa_ds_unapply(dst, ds);
}
 
-   if (dst->cpu_switch)
+   if (dst->cpu_switch) {
dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+   dst->cpu_switch = NULL;
+   }
 
pr_info("DSA: tree %d unapplied\n", dst->tree);
dst->applied = false;




[PATCH 4.11 005/150] ipv6: xfrm: Handle errors reported by xfrm6_find_1stfragopt()

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Ben Hutchings 


[ Upstream commit 6e80ac5cc992ab6256c3dae87f7e57db15e1a58c ]

xfrm6_find_1stfragopt() may now return an error code and we must
not treat it as a length.

Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options")
Signed-off-by: Ben Hutchings 
Acked-by: Craig Gallek 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv6/xfrm6_mode_ro.c|2 ++
 net/ipv6/xfrm6_mode_transport.c |2 ++
 2 files changed, 4 insertions(+)

--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_s
iph = ipv6_hdr(skb);
 
hdr_len = x->type->hdr_offset(x, skb, );
+   if (hdr_len < 0)
+   return hdr_len;
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
skb_set_network_header(skb, -x->props.header_len);
skb->transport_header = skb->network_header + hdr_len;
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -28,6 +28,8 @@ static int xfrm6_transport_output(struct
iph = ipv6_hdr(skb);
 
hdr_len = x->type->hdr_offset(x, skb, );
+   if (hdr_len < 0)
+   return hdr_len;
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
skb_set_network_header(skb, -x->props.header_len);
skb->transport_header = skb->network_header + hdr_len;




[PATCH 4.11 005/150] ipv6: xfrm: Handle errors reported by xfrm6_find_1stfragopt()

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Ben Hutchings 


[ Upstream commit 6e80ac5cc992ab6256c3dae87f7e57db15e1a58c ]

xfrm6_find_1stfragopt() may now return an error code and we must
not treat it as a length.

Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options")
Signed-off-by: Ben Hutchings 
Acked-by: Craig Gallek 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv6/xfrm6_mode_ro.c|2 ++
 net/ipv6/xfrm6_mode_transport.c |2 ++
 2 files changed, 4 insertions(+)

--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_s
iph = ipv6_hdr(skb);
 
hdr_len = x->type->hdr_offset(x, skb, );
+   if (hdr_len < 0)
+   return hdr_len;
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
skb_set_network_header(skb, -x->props.header_len);
skb->transport_header = skb->network_header + hdr_len;
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -28,6 +28,8 @@ static int xfrm6_transport_output(struct
iph = ipv6_hdr(skb);
 
hdr_len = x->type->hdr_offset(x, skb, );
+   if (hdr_len < 0)
+   return hdr_len;
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
skb_set_network_header(skb, -x->props.header_len);
skb->transport_header = skb->network_header + hdr_len;




Re: [PATCH 03/11] Creation of "usb_device_auth" LSM hook

2017-06-12 Thread Krzysztof Opasiak

Hi,

On 06/12/2017 06:56 PM, Salvatore Mesoraca wrote:

Creation of a new LSM hook that can be used to authorize or deauthorize
new USB devices via the usb authorization interface.
The same hook can also prevent the authorization of a USB device via
"/sys/bus/usb/devices/DEVICE/authorized".
Using this hook an LSM could provide an higher level of granularity
than the current authorization interface.



Could you please explain me why we need LSM for this?

There are tools like usbguard[1] and as far as I can tell it looks like 
they can do everything for you...

Without kernel modification...
without matching and storing rules inside kernel..
just pure userspace which uses device/interface authorization

Footnote:
1 - https://dkopecek.github.io/usbguard/

Best regards,
--
Krzysztof Opasiak
Samsung R Institute Poland
Samsung Electronics


Re: [PATCH 03/11] Creation of "usb_device_auth" LSM hook

2017-06-12 Thread Krzysztof Opasiak

Hi,

On 06/12/2017 06:56 PM, Salvatore Mesoraca wrote:

Creation of a new LSM hook that can be used to authorize or deauthorize
new USB devices via the usb authorization interface.
The same hook can also prevent the authorization of a USB device via
"/sys/bus/usb/devices/DEVICE/authorized".
Using this hook an LSM could provide an higher level of granularity
than the current authorization interface.



Could you please explain me why we need LSM for this?

There are tools like usbguard[1] and as far as I can tell it looks like 
they can do everything for you...

Without kernel modification...
without matching and storing rules inside kernel..
just pure userspace which uses device/interface authorization

Footnote:
1 - https://dkopecek.github.io/usbguard/

Best regards,
--
Krzysztof Opasiak
Samsung R Institute Poland
Samsung Electronics


[PATCH 4.11 022/150] sparc: Machine description indices can vary

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: James Clarke 


[ Upstream commit c982aa9c304bf0b9a7522fd118fed4afa5a0263c ]

VIO devices were being looked up by their index in the machine
description node block, but this often varies over time as devices are
added and removed. Instead, store the ID and look up using the type,
config handle and ID.

Signed-off-by: James Clarke 
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112541
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/vio.h |1 
 arch/sparc/kernel/vio.c  |   68 ---
 2 files changed, 65 insertions(+), 4 deletions(-)

--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -327,6 +327,7 @@ struct vio_dev {
int compat_len;
 
u64 dev_no;
+   u64 id;
 
unsigned long   channel_id;
 
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c
@@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(st
if (!id) {
dev_set_name(>dev, "%s", bus_id_name);
vdev->dev_no = ~(u64)0;
+   vdev->id = ~(u64)0;
} else if (!cfg_handle) {
dev_set_name(>dev, "%s-%llu", bus_id_name, *id);
vdev->dev_no = *id;
+   vdev->id = ~(u64)0;
} else {
dev_set_name(>dev, "%s-%llu-%llu", bus_id_name,
 *cfg_handle, *id);
vdev->dev_no = *cfg_handle;
+   vdev->id = *id;
}
 
vdev->dev.parent = parent;
@@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle
(void) vio_create_one(hp, node, _vdev->dev);
 }
 
+struct vio_md_node_query {
+   const char *type;
+   u64 dev_no;
+   u64 id;
+};
+
 static int vio_md_node_match(struct device *dev, void *arg)
 {
+   struct vio_md_node_query *query = (struct vio_md_node_query *) arg;
struct vio_dev *vdev = to_vio_dev(dev);
 
-   if (vdev->mp == (u64) arg)
-   return 1;
+   if (vdev->dev_no != query->dev_no)
+   return 0;
+   if (vdev->id != query->id)
+   return 0;
+   if (strcmp(vdev->type, query->type))
+   return 0;
 
-   return 0;
+   return 1;
 }
 
 static void vio_remove(struct mdesc_handle *hp, u64 node)
 {
+   const char *type;
+   const u64 *id, *cfg_handle;
+   u64 a;
+   struct vio_md_node_query query;
struct device *dev;
 
-   dev = device_find_child(_vdev->dev, (void *) node,
+   type = mdesc_get_property(hp, node, "device-type", NULL);
+   if (!type) {
+   type = mdesc_get_property(hp, node, "name", NULL);
+   if (!type)
+   type = mdesc_node_name(hp, node);
+   }
+
+   query.type = type;
+
+   id = mdesc_get_property(hp, node, "id", NULL);
+   cfg_handle = NULL;
+   mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+   u64 target;
+
+   target = mdesc_arc_target(hp, a);
+   cfg_handle = mdesc_get_property(hp, target,
+   "cfg-handle", NULL);
+   if (cfg_handle)
+   break;
+   }
+
+   if (!id) {
+   query.dev_no = ~(u64)0;
+   query.id = ~(u64)0;
+   } else if (!cfg_handle) {
+   query.dev_no = *id;
+   query.id = ~(u64)0;
+   } else {
+   query.dev_no = *cfg_handle;
+   query.id = *id;
+   }
+
+   dev = device_find_child(_vdev->dev, ,
vio_md_node_match);
if (dev) {
printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
 
device_unregister(dev);
put_device(dev);
+   } else {
+   if (!id)
+   printk(KERN_ERR "VIO: Removed unknown %s node.\n",
+  type);
+   else if (!cfg_handle)
+   printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n",
+  type, *id);
+   else
+   printk(KERN_ERR "VIO: Removed unknown %s node 
%llu-%llu.\n",
+  type, *cfg_handle, *id);
}
 }
 




[PATCH 4.11 022/150] sparc: Machine description indices can vary

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: James Clarke 


[ Upstream commit c982aa9c304bf0b9a7522fd118fed4afa5a0263c ]

VIO devices were being looked up by their index in the machine
description node block, but this often varies over time as devices are
added and removed. Instead, store the ID and look up using the type,
config handle and ID.

Signed-off-by: James Clarke 
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112541
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/vio.h |1 
 arch/sparc/kernel/vio.c  |   68 ---
 2 files changed, 65 insertions(+), 4 deletions(-)

--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -327,6 +327,7 @@ struct vio_dev {
int compat_len;
 
u64 dev_no;
+   u64 id;
 
unsigned long   channel_id;
 
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c
@@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(st
if (!id) {
dev_set_name(>dev, "%s", bus_id_name);
vdev->dev_no = ~(u64)0;
+   vdev->id = ~(u64)0;
} else if (!cfg_handle) {
dev_set_name(>dev, "%s-%llu", bus_id_name, *id);
vdev->dev_no = *id;
+   vdev->id = ~(u64)0;
} else {
dev_set_name(>dev, "%s-%llu-%llu", bus_id_name,
 *cfg_handle, *id);
vdev->dev_no = *cfg_handle;
+   vdev->id = *id;
}
 
vdev->dev.parent = parent;
@@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle
(void) vio_create_one(hp, node, _vdev->dev);
 }
 
+struct vio_md_node_query {
+   const char *type;
+   u64 dev_no;
+   u64 id;
+};
+
 static int vio_md_node_match(struct device *dev, void *arg)
 {
+   struct vio_md_node_query *query = (struct vio_md_node_query *) arg;
struct vio_dev *vdev = to_vio_dev(dev);
 
-   if (vdev->mp == (u64) arg)
-   return 1;
+   if (vdev->dev_no != query->dev_no)
+   return 0;
+   if (vdev->id != query->id)
+   return 0;
+   if (strcmp(vdev->type, query->type))
+   return 0;
 
-   return 0;
+   return 1;
 }
 
 static void vio_remove(struct mdesc_handle *hp, u64 node)
 {
+   const char *type;
+   const u64 *id, *cfg_handle;
+   u64 a;
+   struct vio_md_node_query query;
struct device *dev;
 
-   dev = device_find_child(_vdev->dev, (void *) node,
+   type = mdesc_get_property(hp, node, "device-type", NULL);
+   if (!type) {
+   type = mdesc_get_property(hp, node, "name", NULL);
+   if (!type)
+   type = mdesc_node_name(hp, node);
+   }
+
+   query.type = type;
+
+   id = mdesc_get_property(hp, node, "id", NULL);
+   cfg_handle = NULL;
+   mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+   u64 target;
+
+   target = mdesc_arc_target(hp, a);
+   cfg_handle = mdesc_get_property(hp, target,
+   "cfg-handle", NULL);
+   if (cfg_handle)
+   break;
+   }
+
+   if (!id) {
+   query.dev_no = ~(u64)0;
+   query.id = ~(u64)0;
+   } else if (!cfg_handle) {
+   query.dev_no = *id;
+   query.id = ~(u64)0;
+   } else {
+   query.dev_no = *cfg_handle;
+   query.id = *id;
+   }
+
+   dev = device_find_child(_vdev->dev, ,
vio_md_node_match);
if (dev) {
printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
 
device_unregister(dev);
put_device(dev);
+   } else {
+   if (!id)
+   printk(KERN_ERR "VIO: Removed unknown %s node.\n",
+  type);
+   else if (!cfg_handle)
+   printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n",
+  type, *id);
+   else
+   printk(KERN_ERR "VIO: Removed unknown %s node 
%llu-%llu.\n",
+  type, *cfg_handle, *id);
}
 }
 




[PATCH 4.11 021/150] sparc64: mm: fix copy_tsb to correctly copy huge page TSBs

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Mike Kravetz 


[ Upstream commit 654f4807624a657f364417c2a7454f0df9961734 ]

When a TSB grows beyond its current capacity, a new TSB is allocated
and copy_tsb is called to copy entries from the old TSB to the new.
A hash shift based on page size is used to calculate the index of an
entry in the TSB.  copy_tsb has hard coded PAGE_SHIFT in these
calculations.  However, for huge page TSBs the value REAL_HPAGE_SHIFT
should be used.  As a result, when copy_tsb is called for a huge page
TSB the entries are placed at the incorrect index in the newly
allocated TSB.  When doing hardware table walk, the MMU does not
match these entries and we end up in the TSB miss handling code.
This code will then create and write an entry to the correct index
in the TSB.  We take a performance hit for the table walk miss and
recreation of these entries.

Pass a new parameter to copy_tsb that is the page size shift to be
used when copying the TSB.

Suggested-by: Anthony Yznaga 
Signed-off-by: Mike Kravetz 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/kernel/tsb.S |   11 +++
 arch/sparc/mm/tsb.c |7 +--
 2 files changed, 12 insertions(+), 6 deletions(-)

--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -455,13 +455,16 @@ __tsb_context_switch:
.type   copy_tsb,#function
 copy_tsb:  /* %o0=old_tsb_base, %o1=old_tsb_size
 * %o2=new_tsb_base, %o3=new_tsb_size
+* %o4=page_size_shift
 */
sethi   %uhi(TSB_PASS_BITS), %g7
srlx%o3, 4, %o3
-   add %o0, %o1, %g1   /* end of old tsb */
+   add %o0, %o1, %o1   /* end of old tsb */
sllx%g7, 32, %g7
sub %o3, 1, %o3 /* %o3 == new tsb hash mask */
 
+   mov %o4, %g1/* page_size_shift */
+
 661:   prefetcha   [%o0] ASI_N, #one_read
.section.tsb_phys_patch, "ax"
.word   661b
@@ -486,9 +489,9 @@ copy_tsb:   /* %o0=old_tsb_base, %o1=old_
/* This can definitely be computed faster... */
srlx%o0, 4, %o5 /* Build index */
and %o5, 511, %o5   /* Mask index */
-   sllx%o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
+   sllx%o5, %g1, %o5   /* Put into vaddr position */
or  %o4, %o5, %o4   /* Full VADDR. */
-   srlx%o4, PAGE_SHIFT, %o4 /* Shift down to create index */
+   srlx%o4, %g1, %o4   /* Shift down to create index */
and %o4, %o3, %o4   /* Mask with new_tsb_nents-1 */
sllx%o4, 4, %o4 /* Shift back up into tsb ent offset */
TSB_STORE(%o2 + %o4, %g2)   /* Store TAG */
@@ -496,7 +499,7 @@ copy_tsb:   /* %o0=old_tsb_base, %o1=old_
TSB_STORE(%o2 + %o4, %g3)   /* Store TTE */
 
 80:add %o0, 16, %o0
-   cmp %o0, %g1
+   cmp %o0, %o1
bne,pt  %xcc, 90b
 nop
 
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -496,7 +496,8 @@ retry_tsb_alloc:
extern void copy_tsb(unsigned long old_tsb_base,
 unsigned long old_tsb_size,
 unsigned long new_tsb_base,
-unsigned long new_tsb_size);
+unsigned long new_tsb_size,
+unsigned long page_size_shift);
unsigned long old_tsb_base = (unsigned long) old_tsb;
unsigned long new_tsb_base = (unsigned long) new_tsb;
 
@@ -504,7 +505,9 @@ retry_tsb_alloc:
old_tsb_base = __pa(old_tsb_base);
new_tsb_base = __pa(new_tsb_base);
}
-   copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
+   copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
+   tsb_index == MM_TSB_BASE ?
+   PAGE_SHIFT : REAL_HPAGE_SHIFT);
}
 
mm->context.tsb_block[tsb_index].tsb = new_tsb;




[PATCH 4.11 021/150] sparc64: mm: fix copy_tsb to correctly copy huge page TSBs

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Mike Kravetz 


[ Upstream commit 654f4807624a657f364417c2a7454f0df9961734 ]

When a TSB grows beyond its current capacity, a new TSB is allocated
and copy_tsb is called to copy entries from the old TSB to the new.
A hash shift based on page size is used to calculate the index of an
entry in the TSB.  copy_tsb has hard coded PAGE_SHIFT in these
calculations.  However, for huge page TSBs the value REAL_HPAGE_SHIFT
should be used.  As a result, when copy_tsb is called for a huge page
TSB the entries are placed at the incorrect index in the newly
allocated TSB.  When doing hardware table walk, the MMU does not
match these entries and we end up in the TSB miss handling code.
This code will then create and write an entry to the correct index
in the TSB.  We take a performance hit for the table walk miss and
recreation of these entries.

Pass a new parameter to copy_tsb that is the page size shift to be
used when copying the TSB.

Suggested-by: Anthony Yznaga 
Signed-off-by: Mike Kravetz 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/kernel/tsb.S |   11 +++
 arch/sparc/mm/tsb.c |7 +--
 2 files changed, 12 insertions(+), 6 deletions(-)

--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -455,13 +455,16 @@ __tsb_context_switch:
.type   copy_tsb,#function
 copy_tsb:  /* %o0=old_tsb_base, %o1=old_tsb_size
 * %o2=new_tsb_base, %o3=new_tsb_size
+* %o4=page_size_shift
 */
sethi   %uhi(TSB_PASS_BITS), %g7
srlx%o3, 4, %o3
-   add %o0, %o1, %g1   /* end of old tsb */
+   add %o0, %o1, %o1   /* end of old tsb */
sllx%g7, 32, %g7
sub %o3, 1, %o3 /* %o3 == new tsb hash mask */
 
+   mov %o4, %g1/* page_size_shift */
+
 661:   prefetcha   [%o0] ASI_N, #one_read
.section.tsb_phys_patch, "ax"
.word   661b
@@ -486,9 +489,9 @@ copy_tsb:   /* %o0=old_tsb_base, %o1=old_
/* This can definitely be computed faster... */
srlx%o0, 4, %o5 /* Build index */
and %o5, 511, %o5   /* Mask index */
-   sllx%o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
+   sllx%o5, %g1, %o5   /* Put into vaddr position */
or  %o4, %o5, %o4   /* Full VADDR. */
-   srlx%o4, PAGE_SHIFT, %o4 /* Shift down to create index */
+   srlx%o4, %g1, %o4   /* Shift down to create index */
and %o4, %o3, %o4   /* Mask with new_tsb_nents-1 */
sllx%o4, 4, %o4 /* Shift back up into tsb ent offset */
TSB_STORE(%o2 + %o4, %g2)   /* Store TAG */
@@ -496,7 +499,7 @@ copy_tsb:   /* %o0=old_tsb_base, %o1=old_
TSB_STORE(%o2 + %o4, %g3)   /* Store TTE */
 
 80:add %o0, 16, %o0
-   cmp %o0, %g1
+   cmp %o0, %o1
bne,pt  %xcc, 90b
 nop
 
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -496,7 +496,8 @@ retry_tsb_alloc:
extern void copy_tsb(unsigned long old_tsb_base,
 unsigned long old_tsb_size,
 unsigned long new_tsb_base,
-unsigned long new_tsb_size);
+unsigned long new_tsb_size,
+unsigned long page_size_shift);
unsigned long old_tsb_base = (unsigned long) old_tsb;
unsigned long new_tsb_base = (unsigned long) new_tsb;
 
@@ -504,7 +505,9 @@ retry_tsb_alloc:
old_tsb_base = __pa(old_tsb_base);
new_tsb_base = __pa(new_tsb_base);
}
-   copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
+   copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
+   tsb_index == MM_TSB_BASE ?
+   PAGE_SHIFT : REAL_HPAGE_SHIFT);
}
 
mm->context.tsb_block[tsb_index].tsb = new_tsb;




[PATCH 4.11 023/150] sparc/mm/hugepages: Fix setup_hugepagesz for invalid values.

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: "Liam R. Howlett" 


[ Upstream commit f322980b74a15e08f8c70a34a5864ecdbf957251 ]

hugetlb_bad_size needs to be called on invalid values.  Also change the
pr_warn to a pr_err to better align with other platforms.

Signed-off-by: Liam R. Howlett 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/mm/init_64.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char
}
 
if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
-   pr_warn("hugepagesz=%llu not supported by MMU.\n",
+   hugetlb_bad_size();
+   pr_err("hugepagesz=%llu not supported by MMU.\n",
hugepage_size);
goto out;
}




[PATCH 4.11 026/150] sparc64: redefine first version

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit c4415235b2be0cc791572e8e7f7466ab8f73a2bf ]

CTX_FIRST_VERSION defines the first context version, but also it defines
first context. This patch redefines it to only include the first context
version.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/mmu_64.h |2 +-
 arch/sparc/mm/init_64.c |6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -52,7 +52,7 @@
 #define CTX_NR_MASKTAG_CONTEXT_BITS
 #define CTX_HW_MASK(CTX_NR_MASK | CTX_PGSZ_MASK)
 
-#define CTX_FIRST_VERSION  ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_FIRST_VERSION  BIT(CTX_VERSION_SHIFT)
 #define CTX_VALID(__ctx)   \
 (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
 #define CTX_HWBITS(__ctx)  ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -707,7 +707,7 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
-unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
+unsigned long tlb_context_cache = CTX_FIRST_VERSION;
 #define MAX_CTX_NR (1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
@@ -738,9 +738,9 @@ void get_new_mmu_context(struct mm_struc
if (new_ctx >= ctx) {
int i;
new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
-   CTX_FIRST_VERSION;
+   CTX_FIRST_VERSION + 1;
if (new_ctx == 1)
-   new_ctx = CTX_FIRST_VERSION;
+   new_ctx = CTX_FIRST_VERSION + 1;
 
/* Don't call memset, for 16 entries that's just
 * plain silly...




[PATCH 4.11 023/150] sparc/mm/hugepages: Fix setup_hugepagesz for invalid values.

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: "Liam R. Howlett" 


[ Upstream commit f322980b74a15e08f8c70a34a5864ecdbf957251 ]

hugetlb_bad_size needs to be called on invalid values.  Also change the
pr_warn to a pr_err to better align with other platforms.

Signed-off-by: Liam R. Howlett 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/mm/init_64.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char
}
 
if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
-   pr_warn("hugepagesz=%llu not supported by MMU.\n",
+   hugetlb_bad_size();
+   pr_err("hugepagesz=%llu not supported by MMU.\n",
hugepage_size);
goto out;
}




[PATCH 4.11 026/150] sparc64: redefine first version

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit c4415235b2be0cc791572e8e7f7466ab8f73a2bf ]

CTX_FIRST_VERSION defines the first context version, but also it defines
first context. This patch redefines it to only include the first context
version.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/mmu_64.h |2 +-
 arch/sparc/mm/init_64.c |6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -52,7 +52,7 @@
 #define CTX_NR_MASKTAG_CONTEXT_BITS
 #define CTX_HW_MASK(CTX_NR_MASK | CTX_PGSZ_MASK)
 
-#define CTX_FIRST_VERSION  ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_FIRST_VERSION  BIT(CTX_VERSION_SHIFT)
 #define CTX_VALID(__ctx)   \
 (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
 #define CTX_HWBITS(__ctx)  ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -707,7 +707,7 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
-unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
+unsigned long tlb_context_cache = CTX_FIRST_VERSION;
 #define MAX_CTX_NR (1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
@@ -738,9 +738,9 @@ void get_new_mmu_context(struct mm_struc
if (new_ctx >= ctx) {
int i;
new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
-   CTX_FIRST_VERSION;
+   CTX_FIRST_VERSION + 1;
if (new_ctx == 1)
-   new_ctx = CTX_FIRST_VERSION;
+   new_ctx = CTX_FIRST_VERSION + 1;
 
/* Don't call memset, for 16 entries that's just
 * plain silly...




[PATCH 4.11 027/150] sparc64: add per-cpu mm of secondary contexts

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit 7a5b4bbf49fe86ce77488a70c5dccfe2d50d7a2d ]

The new wrap is going to use information from this array to figure out
mm's that currently have valid secondary contexts setup.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/mmu_context_64.h |5 +++--
 arch/sparc/mm/init_64.c |1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -19,6 +19,7 @@ extern spinlock_t ctx_alloc_lock;
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
+DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
 #ifdef CONFIG_SMP
 void smp_new_mmu_context_version(void);
@@ -76,8 +77,9 @@ void __flush_tlb_mm(unsigned long, unsig
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, 
struct task_struct *tsk)
 {
unsigned long ctx_valid, flags;
-   int cpu;
+   int cpu = smp_processor_id();
 
+   per_cpu(per_cpu_secondary_mm, cpu) = mm;
if (unlikely(mm == _mm))
return;
 
@@ -123,7 +125,6 @@ static inline void switch_mm(struct mm_s
 * for the first time, we must flush that context out of the
 * local TLB.
 */
-   cpu = smp_processor_id();
if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
cpumask_set_cpu(cpu, mm_cpumask(mm));
__flush_tlb_mm(CTX_HWBITS(mm->context),
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -711,6 +711,7 @@ unsigned long tlb_context_cache = CTX_FI
 #define MAX_CTX_NR (1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
+DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
 
 /* Caller does TLB context flushing on local CPU if necessary.
  * The caller also ensures that CTX_VALID(mm->context) is false.




[PATCH 4.11 027/150] sparc64: add per-cpu mm of secondary contexts

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit 7a5b4bbf49fe86ce77488a70c5dccfe2d50d7a2d ]

The new wrap is going to use information from this array to figure out
mm's that currently have valid secondary contexts setup.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/mmu_context_64.h |5 +++--
 arch/sparc/mm/init_64.c |1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -19,6 +19,7 @@ extern spinlock_t ctx_alloc_lock;
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
+DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
 #ifdef CONFIG_SMP
 void smp_new_mmu_context_version(void);
@@ -76,8 +77,9 @@ void __flush_tlb_mm(unsigned long, unsig
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, 
struct task_struct *tsk)
 {
unsigned long ctx_valid, flags;
-   int cpu;
+   int cpu = smp_processor_id();
 
+   per_cpu(per_cpu_secondary_mm, cpu) = mm;
if (unlikely(mm == _mm))
return;
 
@@ -123,7 +125,6 @@ static inline void switch_mm(struct mm_s
 * for the first time, we must flush that context out of the
 * local TLB.
 */
-   cpu = smp_processor_id();
if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
cpumask_set_cpu(cpu, mm_cpumask(mm));
__flush_tlb_mm(CTX_HWBITS(mm->context),
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -711,6 +711,7 @@ unsigned long tlb_context_cache = CTX_FI
 #define MAX_CTX_NR (1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
+DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
 
 /* Caller does TLB context flushing on local CPU if necessary.
  * The caller also ensures that CTX_VALID(mm->context) is false.




[PATCH 4.11 006/150] cxgb4: avoid enabling napi twice to the same queue

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Ganesh Goudar 


[ Upstream commit e7519f9926f1d0d11c776eb0475eb098c7760f68 ]

Take uld mutex to avoid race between cxgb_up() and
cxgb4_register_uld() to enable napi for the same uld
queue.

Signed-off-by: Ganesh Goudar 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |4 
 1 file changed, 4 insertions(+)

--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2217,10 +2217,14 @@ static int cxgb_up(struct adapter *adap)
if (err)
goto irq_err;
}
+
+   mutex_lock(_mutex);
enable_rx(adap);
t4_sge_start(adap);
t4_intr_enable(adap);
adap->flags |= FULL_INIT_DONE;
+   mutex_unlock(_mutex);
+
notify_ulds(adap, CXGB4_STATE_UP);
 #if IS_ENABLED(CONFIG_IPV6)
update_clip(adap);




[PATCH 4.11 006/150] cxgb4: avoid enabling napi twice to the same queue

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Ganesh Goudar 


[ Upstream commit e7519f9926f1d0d11c776eb0475eb098c7760f68 ]

Take uld mutex to avoid race between cxgb_up() and
cxgb4_register_uld() to enable napi for the same uld
queue.

Signed-off-by: Ganesh Goudar 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |4 
 1 file changed, 4 insertions(+)

--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2217,10 +2217,14 @@ static int cxgb_up(struct adapter *adap)
if (err)
goto irq_err;
}
+
+   mutex_lock(_mutex);
enable_rx(adap);
t4_sge_start(adap);
t4_intr_enable(adap);
adap->flags |= FULL_INIT_DONE;
+   mutex_unlock(_mutex);
+
notify_ulds(adap, CXGB4_STATE_UP);
 #if IS_ENABLED(CONFIG_IPV6)
update_clip(adap);




[PATCH 4.11 029/150] sparc64: delete old wrap code

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit 0197e41ce70511dc3b71f7fefa1a676e2b5cd60b ]

The old method that is using xcall and softint to get new context id is
deleted, as it is replaced by a method of using per_cpu_secondary_mm
without xcall to perform the context wrap.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/mmu_context_64.h |6 --
 arch/sparc/include/asm/pil.h|1 -
 arch/sparc/kernel/kernel.h  |1 -
 arch/sparc/kernel/smp_64.c  |   31 ---
 arch/sparc/kernel/ttable_64.S   |2 +-
 arch/sparc/mm/ultra.S   |5 -
 6 files changed, 1 insertion(+), 45 deletions(-)

--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -21,12 +21,6 @@ extern unsigned long mmu_context_bmap[];
 
 DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
-#ifdef CONFIG_SMP
-void smp_new_mmu_context_version(void);
-#else
-#define smp_new_mmu_context_version() do { } while (0)
-#endif
-
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context(struct mm_struct *mm);
 
--- a/arch/sparc/include/asm/pil.h
+++ b/arch/sparc/include/asm/pil.h
@@ -20,7 +20,6 @@
 #define PIL_SMP_CALL_FUNC  1
 #define PIL_SMP_RECEIVE_SIGNAL 2
 #define PIL_SMP_CAPTURE3
-#define PIL_SMP_CTX_NEW_VERSION4
 #define PIL_DEVICE_IRQ 5
 #define PIL_SMP_CALL_FUNC_SNGL 6
 #define PIL_DEFERRED_PCR_WORK  7
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs
 /* smp_64.c */
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_call_function_single_client(int irq, struct pt_regs 
*regs);
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs 
*regs);
 void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
 
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_str
preempt_enable();
 }
 
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs 
*regs)
-{
-   struct mm_struct *mm;
-   unsigned long flags;
-
-   clear_softint(1 << irq);
-
-   /* See if we need to allocate a new TLB context because
-* the version of the one we are using is now out of date.
-*/
-   mm = current->active_mm;
-   if (unlikely(!mm || (mm == _mm)))
-   return;
-
-   spin_lock_irqsave(>context.lock, flags);
-
-   if (unlikely(!CTX_VALID(mm->context)))
-   get_new_mmu_context(mm);
-
-   spin_unlock_irqrestore(>context.lock, flags);
-
-   load_secondary_context(mm);
-   __flush_tlb_mm(CTX_HWBITS(mm->context),
-  SECONDARY_CONTEXT);
-}
-
-void smp_new_mmu_context_version(void)
-{
-   smp_cross_call(_new_mmu_context_version, 0, 0, 0);
-}
-
 #ifdef CONFIG_KGDB
 void kgdb_roundup_cpus(unsigned long flags)
 {
--- a/arch/sparc/kernel/ttable_64.S
+++ b/arch/sparc/kernel/ttable_64.S
@@ -50,7 +50,7 @@ tl0_resv03e:  BTRAP(0x3e) BTRAP(0x3f) BTR
 tl0_irq1:  TRAP_IRQ(smp_call_function_client, 1)
 tl0_irq2:  TRAP_IRQ(smp_receive_signal_client, 2)
 tl0_irq3:  TRAP_IRQ(smp_penguin_jailcell, 3)
-tl0_irq4:  TRAP_IRQ(smp_new_mmu_context_version_client, 4)
+tl0_irq4:   BTRAP(0x44)
 #else
 tl0_irq1:  BTRAP(0x41)
 tl0_irq2:  BTRAP(0x42)
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -971,11 +971,6 @@ xcall_capture:
wr  %g0, (1 << PIL_SMP_CAPTURE), %set_softint
retry
 
-   .globl  xcall_new_mmu_context_version
-xcall_new_mmu_context_version:
-   wr  %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
-   retry
-
 #ifdef CONFIG_KGDB
.globl  xcall_kgdb_capture
 xcall_kgdb_capture:




[PATCH 4.11 030/150] arch/sparc: support NR_CPUS = 4096

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Jane Chu 


[ Upstream commit c79a13734d104b5b147d7cb0870276ccdd660dae ]

Linux SPARC64 limits NR_CPUS to 4064 because init_cpu_send_mondo_info()
only allocates a single page for NR_CPUS mondo entries. Thus we cannot
use all 4096 CPUs on some SPARC platforms.

To fix, allocate (2^order) pages where order is set according to the size
of cpu_list for possible cpus. Since cpu_list_pa and cpu_mondo_block_pa
are not used in asm code, there are no imm13 offsets from the base PA
that will break because they can only reach one page.

Orabug: 25505750

Signed-off-by: Jane Chu 

Reviewed-by: Bob Picco 
Reviewed-by: Atish Patra 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/Kconfig |4 ++--
 arch/sparc/kernel/irq_64.c |   17 +
 2 files changed, 15 insertions(+), 6 deletions(-)

--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -192,9 +192,9 @@ config NR_CPUS
int "Maximum number of CPUs"
depends on SMP
range 2 32 if SPARC32
-   range 2 1024 if SPARC64
+   range 2 4096 if SPARC64
default 32 if SPARC32
-   default 64 if SPARC64
+   default 4096 if SPARC64
 
 source kernel/Kconfig.hz
 
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_i
 {
 #ifdef CONFIG_SMP
unsigned long page;
+   void *mondo, *p;
 
-   BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+   BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
+
+   /* Make sure mondo block is 64byte aligned */
+   p = kzalloc(127, GFP_KERNEL);
+   if (!p) {
+   prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
+   prom_halt();
+   }
+   mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
+   tb->cpu_mondo_block_pa = __pa(mondo);
 
page = get_zeroed_page(GFP_KERNEL);
if (!page) {
-   prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+   prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
prom_halt();
}
 
-   tb->cpu_mondo_block_pa = __pa(page);
-   tb->cpu_list_pa = __pa(page + 64);
+   tb->cpu_list_pa = __pa(page);
 #endif
 }
 




[PATCH 4.11 029/150] sparc64: delete old wrap code

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit 0197e41ce70511dc3b71f7fefa1a676e2b5cd60b ]

The old method that is using xcall and softint to get new context id is
deleted, as it is replaced by a method of using per_cpu_secondary_mm
without xcall to perform the context wrap.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/include/asm/mmu_context_64.h |6 --
 arch/sparc/include/asm/pil.h|1 -
 arch/sparc/kernel/kernel.h  |1 -
 arch/sparc/kernel/smp_64.c  |   31 ---
 arch/sparc/kernel/ttable_64.S   |2 +-
 arch/sparc/mm/ultra.S   |5 -
 6 files changed, 1 insertion(+), 45 deletions(-)

--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -21,12 +21,6 @@ extern unsigned long mmu_context_bmap[];
 
 DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
-#ifdef CONFIG_SMP
-void smp_new_mmu_context_version(void);
-#else
-#define smp_new_mmu_context_version() do { } while (0)
-#endif
-
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context(struct mm_struct *mm);
 
--- a/arch/sparc/include/asm/pil.h
+++ b/arch/sparc/include/asm/pil.h
@@ -20,7 +20,6 @@
 #define PIL_SMP_CALL_FUNC  1
 #define PIL_SMP_RECEIVE_SIGNAL 2
 #define PIL_SMP_CAPTURE3
-#define PIL_SMP_CTX_NEW_VERSION4
 #define PIL_DEVICE_IRQ 5
 #define PIL_SMP_CALL_FUNC_SNGL 6
 #define PIL_DEFERRED_PCR_WORK  7
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs
 /* smp_64.c */
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_call_function_single_client(int irq, struct pt_regs 
*regs);
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs 
*regs);
 void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
 
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_str
preempt_enable();
 }
 
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs 
*regs)
-{
-   struct mm_struct *mm;
-   unsigned long flags;
-
-   clear_softint(1 << irq);
-
-   /* See if we need to allocate a new TLB context because
-* the version of the one we are using is now out of date.
-*/
-   mm = current->active_mm;
-   if (unlikely(!mm || (mm == _mm)))
-   return;
-
-   spin_lock_irqsave(>context.lock, flags);
-
-   if (unlikely(!CTX_VALID(mm->context)))
-   get_new_mmu_context(mm);
-
-   spin_unlock_irqrestore(>context.lock, flags);
-
-   load_secondary_context(mm);
-   __flush_tlb_mm(CTX_HWBITS(mm->context),
-  SECONDARY_CONTEXT);
-}
-
-void smp_new_mmu_context_version(void)
-{
-   smp_cross_call(_new_mmu_context_version, 0, 0, 0);
-}
-
 #ifdef CONFIG_KGDB
 void kgdb_roundup_cpus(unsigned long flags)
 {
--- a/arch/sparc/kernel/ttable_64.S
+++ b/arch/sparc/kernel/ttable_64.S
@@ -50,7 +50,7 @@ tl0_resv03e:  BTRAP(0x3e) BTRAP(0x3f) BTR
 tl0_irq1:  TRAP_IRQ(smp_call_function_client, 1)
 tl0_irq2:  TRAP_IRQ(smp_receive_signal_client, 2)
 tl0_irq3:  TRAP_IRQ(smp_penguin_jailcell, 3)
-tl0_irq4:  TRAP_IRQ(smp_new_mmu_context_version_client, 4)
+tl0_irq4:   BTRAP(0x44)
 #else
 tl0_irq1:  BTRAP(0x41)
 tl0_irq2:  BTRAP(0x42)
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -971,11 +971,6 @@ xcall_capture:
wr  %g0, (1 << PIL_SMP_CAPTURE), %set_softint
retry
 
-   .globl  xcall_new_mmu_context_version
-xcall_new_mmu_context_version:
-   wr  %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
-   retry
-
 #ifdef CONFIG_KGDB
.globl  xcall_kgdb_capture
 xcall_kgdb_capture:




[PATCH 4.11 030/150] arch/sparc: support NR_CPUS = 4096

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Jane Chu 


[ Upstream commit c79a13734d104b5b147d7cb0870276ccdd660dae ]

Linux SPARC64 limits NR_CPUS to 4064 because init_cpu_send_mondo_info()
only allocates a single page for NR_CPUS mondo entries. Thus we cannot
use all 4096 CPUs on some SPARC platforms.

To fix, allocate (2^order) pages where order is set according to the size
of cpu_list for possible cpus. Since cpu_list_pa and cpu_mondo_block_pa
are not used in asm code, there are no imm13 offsets from the base PA
that will break because they can only reach one page.

Orabug: 25505750

Signed-off-by: Jane Chu 

Reviewed-by: Bob Picco 
Reviewed-by: Atish Patra 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/Kconfig |4 ++--
 arch/sparc/kernel/irq_64.c |   17 +
 2 files changed, 15 insertions(+), 6 deletions(-)

--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -192,9 +192,9 @@ config NR_CPUS
int "Maximum number of CPUs"
depends on SMP
range 2 32 if SPARC32
-   range 2 1024 if SPARC64
+   range 2 4096 if SPARC64
default 32 if SPARC32
-   default 64 if SPARC64
+   default 4096 if SPARC64
 
 source kernel/Kconfig.hz
 
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_i
 {
 #ifdef CONFIG_SMP
unsigned long page;
+   void *mondo, *p;
 
-   BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+   BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
+
+   /* Make sure mondo block is 64byte aligned */
+   p = kzalloc(127, GFP_KERNEL);
+   if (!p) {
+   prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
+   prom_halt();
+   }
+   mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
+   tb->cpu_mondo_block_pa = __pa(mondo);
 
page = get_zeroed_page(GFP_KERNEL);
if (!page) {
-   prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+   prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
prom_halt();
}
 
-   tb->cpu_mondo_block_pa = __pa(page);
-   tb->cpu_list_pa = __pa(page + 64);
+   tb->cpu_list_pa = __pa(page);
 #endif
 }
 




[PATCH 4.11 028/150] sparc64: new context wrap

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit a0582f26ec9dfd5360ea2f35dd9a1b026f8adda0 ]

The current wrap implementation has a race issue: it is called outside of
the ctx_alloc_lock, and also does not wait for all CPUs to complete the
wrap.  This means that a thread can get a new context with a new version
and another thread might still be running with the same context. The
problem is especially severe on CPUs with shared TLBs, like sun4v. I used
the following test to very quickly reproduce the problem:
- start over 8K processes (must be more than context IDs)
- write and read values at a  memory location in every process.

Very quickly memory corruptions start happening, and what we read back
does not equal what we wrote.

Several approaches were explored before settling on this one:

Approach 1:
Move smp_new_mmu_context_version() inside ctx_alloc_lock, and wait for
every process to complete the wrap. (Note: every CPU must WAIT before
leaving smp_new_mmu_context_version_client() until every one arrives).

This approach ends up with deadlocks, as some threads own locks which other
threads are waiting for, and they never receive softint until these threads
exit smp_new_mmu_context_version_client(). Since we do not allow the exit,
deadlock happens.

Approach 2:
Handle wrap right during mondo interrupt. Use etrap/rtrap to enter into
into C code, and issue new versions to every CPU.
This approach adds some overhead to runtime: in switch_mm() we must add
some checks to make sure that versions have not changed due to wrap while
we were loading the new secondary context. (could be protected by PSTATE_IE
but that degrades performance as on M7 and older CPUs as it takes 50 cycles
for each access). Also, we still need a global per-cpu array of MMs to know
where we need to load new contexts, otherwise we can change context to a
thread that is going way (if we received mondo between switch_mm() and
switch_to() time). Finally, there are some issues with window registers in
rtrap() when context IDs are changed during CPU mondo time.

The approach in this patch is the simplest and has almost no impact on
runtime.  We use the array with mm's where last secondary contexts were
loaded onto CPUs and bump their versions to the new generation without
changing context IDs. If a new process comes in to get a context ID, it
will go through get_new_mmu_context() because of version mismatch. But the
running processes do not need to be interrupted. And wrap is quicker as we
do not need to xcall and wait for everyone to receive and complete wrap.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/mm/init_64.c |   81 
 1 file changed, 54 insertions(+), 27 deletions(-)

--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -713,6 +713,53 @@ unsigned long tlb_context_cache = CTX_FI
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
 DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
 
+static void mmu_context_wrap(void)
+{
+   unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
+   unsigned long new_ver, new_ctx, old_ctx;
+   struct mm_struct *mm;
+   int cpu;
+
+   bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
+
+   /* Reserve kernel context */
+   set_bit(0, mmu_context_bmap);
+
+   new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
+   if (unlikely(new_ver == 0))
+   new_ver = CTX_FIRST_VERSION;
+   tlb_context_cache = new_ver;
+
+   /*
+* Make sure that any new mm that are added into per_cpu_secondary_mm,
+* are going to go through get_new_mmu_context() path.
+*/
+   mb();
+
+   /*
+* Updated versions to current on those CPUs that had valid secondary
+* contexts
+*/
+   for_each_online_cpu(cpu) {
+   /*
+* If a new mm is stored after we took this mm from the array,
+* it will go into get_new_mmu_context() path, because we
+* already bumped the version in tlb_context_cache.
+*/
+   mm = per_cpu(per_cpu_secondary_mm, cpu);
+
+   if (unlikely(!mm || mm == _mm))
+   continue;
+
+   old_ctx = mm->context.sparc64_ctx_val;
+   if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
+   new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
+   set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
+   mm->context.sparc64_ctx_val = new_ctx;
+   }
+   }
+}
+
 /* 

[PATCH 4.11 028/150] sparc64: new context wrap

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Pavel Tatashin 


[ Upstream commit a0582f26ec9dfd5360ea2f35dd9a1b026f8adda0 ]

The current wrap implementation has a race issue: it is called outside of
the ctx_alloc_lock, and also does not wait for all CPUs to complete the
wrap.  This means that a thread can get a new context with a new version
and another thread might still be running with the same context. The
problem is especially severe on CPUs with shared TLBs, like sun4v. I used
the following test to very quickly reproduce the problem:
- start over 8K processes (must be more than context IDs)
- write and read values at a  memory location in every process.

Very quickly memory corruptions start happening, and what we read back
does not equal what we wrote.

Several approaches were explored before settling on this one:

Approach 1:
Move smp_new_mmu_context_version() inside ctx_alloc_lock, and wait for
every process to complete the wrap. (Note: every CPU must WAIT before
leaving smp_new_mmu_context_version_client() until every one arrives).

This approach ends up with deadlocks, as some threads own locks which other
threads are waiting for, and they never receive softint until these threads
exit smp_new_mmu_context_version_client(). Since we do not allow the exit,
deadlock happens.

Approach 2:
Handle wrap right during mondo interrupt. Use etrap/rtrap to enter into
into C code, and issue new versions to every CPU.
This approach adds some overhead to runtime: in switch_mm() we must add
some checks to make sure that versions have not changed due to wrap while
we were loading the new secondary context. (could be protected by PSTATE_IE
but that degrades performance as on M7 and older CPUs as it takes 50 cycles
for each access). Also, we still need a global per-cpu array of MMs to know
where we need to load new contexts, otherwise we can change context to a
thread that is going way (if we received mondo between switch_mm() and
switch_to() time). Finally, there are some issues with window registers in
rtrap() when context IDs are changed during CPU mondo time.

The approach in this patch is the simplest and has almost no impact on
runtime.  We use the array with mm's where last secondary contexts were
loaded onto CPUs and bump their versions to the new generation without
changing context IDs. If a new process comes in to get a context ID, it
will go through get_new_mmu_context() because of version mismatch. But the
running processes do not need to be interrupted. And wrap is quicker as we
do not need to xcall and wait for everyone to receive and complete wrap.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Bob Picco 
Reviewed-by: Steven Sistare 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/sparc/mm/init_64.c |   81 
 1 file changed, 54 insertions(+), 27 deletions(-)

--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -713,6 +713,53 @@ unsigned long tlb_context_cache = CTX_FI
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
 DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
 
+static void mmu_context_wrap(void)
+{
+   unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
+   unsigned long new_ver, new_ctx, old_ctx;
+   struct mm_struct *mm;
+   int cpu;
+
+   bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
+
+   /* Reserve kernel context */
+   set_bit(0, mmu_context_bmap);
+
+   new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
+   if (unlikely(new_ver == 0))
+   new_ver = CTX_FIRST_VERSION;
+   tlb_context_cache = new_ver;
+
+   /*
+* Make sure that any new mm that are added into per_cpu_secondary_mm,
+* are going to go through get_new_mmu_context() path.
+*/
+   mb();
+
+   /*
+* Updated versions to current on those CPUs that had valid secondary
+* contexts
+*/
+   for_each_online_cpu(cpu) {
+   /*
+* If a new mm is stored after we took this mm from the array,
+* it will go into get_new_mmu_context() path, because we
+* already bumped the version in tlb_context_cache.
+*/
+   mm = per_cpu(per_cpu_secondary_mm, cpu);
+
+   if (unlikely(!mm || mm == _mm))
+   continue;
+
+   old_ctx = mm->context.sparc64_ctx_val;
+   if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
+   new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
+   set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
+   mm->context.sparc64_ctx_val = new_ctx;
+   }
+   }
+}
+
 /* Caller does TLB context flushing on local CPU if necessary.
  * The caller also ensures that CTX_VALID(mm->context) is false.
  *
@@ -727,50 +774,30 @@ void 

Re: [PATCH 08/11] Creation of "pagefault_handler_x86" LSM hook

2017-06-12 Thread Thomas Gleixner
On Mon, 12 Jun 2017, Salvatore Mesoraca wrote:
> Creation of a new hook to let LSM modules handle user-space pagefaults on
> x86.
> It can be used to avoid segfaulting the originating process.
> If it's the case it can modify process registers before returning.

That explains, what you could do with it, but it completely lacks any
rationale WHY this is desired and good behaviour and how that is a security
feature.

Thanks,

tglx



Re: [PATCH 08/11] Creation of "pagefault_handler_x86" LSM hook

2017-06-12 Thread Thomas Gleixner
On Mon, 12 Jun 2017, Salvatore Mesoraca wrote:
> Creation of a new hook to let LSM modules handle user-space pagefaults on
> x86.
> It can be used to avoid segfaulting the originating process.
> If it's the case it can modify process registers before returning.

That explains, what you could do with it, but it completely lacks any
rationale WHY this is desired and good behaviour and how that is a security
feature.

Thanks,

tglx



Re: [PATCH] workqueue: Ensure that cpumask set for pools created after boot

2017-06-12 Thread Tejun Heo
Hello,

On Mon, Jun 12, 2017 at 12:10:49PM -0500, Michael Bringmann wrote:
> > The reason why we're ending up with empty masks is because
> > wq_calc_node_cpumask() is assuming that the possible node cpumask is
> > always a superset of online (as it should).  We can trigger a fat
> > warning there if that isn't so and just return false from that
> > function.
> 
> What would that look like?  I should be able to test it on top of the
> other changes / corrections.

So, the function looks like the following now.

  static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int 
node,
   int cpu_going_down, cpumask_t *cpumask)
  {
  if (!wq_numa_enabled || attrs->no_numa)
  goto use_dfl;

  /* does @node have any online CPUs @attrs wants? */
A:  cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
  if (cpu_going_down >= 0)
  cpumask_clear_cpu(cpu_going_down, cpumask);

B:  if (cpumask_empty(cpumask))
  goto use_dfl;

  /* yeap, return possible CPUs in @node that @attrs wants */
C:  cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
  return !cpumask_equal(cpumask, attrs->cpumask);

  use_dfl:
  cpumask_copy(cpumask, attrs->cpumask);
  return false;
  }

A is calculating the target cpumask to use using the online map.  B
falls back to dfl mask if the intersection is empty.  C calculates the
eventual mask to use from the intersection of possible mask and what's
requested.  The assumption is that because possible is a superset of
online, C's result can't be smaller than A.

So, what we can do is if to calculate the possible intersection,
compare it against the online intersection, and if the latter is
bigger, trigger a big fat warning and return false there.

> > I have no idea about the specifics of ppc but at least the code base
> > we have currently expect all possible cpus and nodes and their
> > mappings to be established on boot.
> 
> Hopefully, the new properties will fix the holes in the current implementation
> with regard to hot-add.

Yeah, that's the only proper fix here.

Thanks.

-- 
tejun


Re: [PATCH] workqueue: Ensure that cpumask set for pools created after boot

2017-06-12 Thread Tejun Heo
Hello,

On Mon, Jun 12, 2017 at 12:10:49PM -0500, Michael Bringmann wrote:
> > The reason why we're ending up with empty masks is because
> > wq_calc_node_cpumask() is assuming that the possible node cpumask is
> > always a superset of online (as it should).  We can trigger a fat
> > warning there if that isn't so and just return false from that
> > function.
> 
> What would that look like?  I should be able to test it on top of the
> other changes / corrections.

So, the function looks like the following now.

  static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int 
node,
   int cpu_going_down, cpumask_t *cpumask)
  {
  if (!wq_numa_enabled || attrs->no_numa)
  goto use_dfl;

  /* does @node have any online CPUs @attrs wants? */
A:  cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
  if (cpu_going_down >= 0)
  cpumask_clear_cpu(cpu_going_down, cpumask);

B:  if (cpumask_empty(cpumask))
  goto use_dfl;

  /* yeap, return possible CPUs in @node that @attrs wants */
C:  cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
  return !cpumask_equal(cpumask, attrs->cpumask);

  use_dfl:
  cpumask_copy(cpumask, attrs->cpumask);
  return false;
  }

A is calculating the target cpumask to use using the online map.  B
falls back to dfl mask if the intersection is empty.  C calculates the
eventual mask to use from the intersection of possible mask and what's
requested.  The assumption is that because possible is a superset of
online, C's result can't be smaller than A.

So, what we can do is if to calculate the possible intersection,
compare it against the online intersection, and if the latter is
bigger, trigger a big fat warning and return false there.

> > I have no idea about the specifics of ppc but at least the code base
> > we have currently expect all possible cpus and nodes and their
> > mappings to be established on boot.
> 
> Hopefully, the new properties will fix the holes in the current implementation
> with regard to hot-add.

Yeah, that's the only proper fix here.

Thanks.

-- 
tejun


[PATCH 4.11 009/150] ip6_tunnel: fix traffic class routing for tunnels

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Liam McBirnie 


[ Upstream commit 5f733ee68f9a4df94775299ac6a7ab260704f6ed ]

ip6_route_output() requires that the flowlabel contains the traffic
class for policy routing.

Commit 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on
encapsulated packets") removed the code which previously added the
traffic class to the flowlabel.

The traffic class is added here because only route lookup needs the
flowlabel to contain the traffic class.

Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated 
packets")
Signed-off-by: Liam McBirnie 
Acked-by: Peter Dawson 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv6/ip6_tunnel.c |3 +++
 1 file changed, 3 insertions(+)

--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1095,6 +1095,9 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
 
if (!dst) {
 route_lookup:
+   /* add dsfield to flowlabel for route lookup */
+   fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
+
dst = ip6_route_output(net, NULL, fl6);
 
if (dst->error)




[PATCH 4.11 011/150] geneve: fix needed_headroom and max_mtu for collect_metadata

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Garver 


[ Upstream commit 9a1c44d989bff4c992b8b9a112d9fda275ea5515 ]

Since commit 9b4437a5b870 ("geneve: Unify LWT and netdev handling.")
when using COLLECT_METADATA geneve devices are created with too small of
a needed_headroom and too large of a max_mtu. This is because
ip_tunnel_info_af() is not valid with the device level info when using
COLLECT_METADATA and we mistakenly fall into the IPv4 case.

For COLLECT_METADATA, always use the worst case of ipv6 since both
sockets are created.

Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.")
Signed-off-by: Eric Garver 
Acked-by: Pravin B Shelar 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/geneve.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1133,7 +1133,7 @@ static int geneve_configure(struct net *
 
/* make enough headroom for basic scenario */
encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-   if (ip_tunnel_info_af(info) == AF_INET) {
+   if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
encap_len += sizeof(struct iphdr);
dev->max_mtu -= sizeof(struct iphdr);
} else {




[PATCH 4.11 011/150] geneve: fix needed_headroom and max_mtu for collect_metadata

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Garver 


[ Upstream commit 9a1c44d989bff4c992b8b9a112d9fda275ea5515 ]

Since commit 9b4437a5b870 ("geneve: Unify LWT and netdev handling.")
when using COLLECT_METADATA geneve devices are created with too small of
a needed_headroom and too large of a max_mtu. This is because
ip_tunnel_info_af() is not valid with the device level info when using
COLLECT_METADATA and we mistakenly fall into the IPv4 case.

For COLLECT_METADATA, always use the worst case of ipv6 since both
sockets are created.

Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.")
Signed-off-by: Eric Garver 
Acked-by: Pravin B Shelar 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/geneve.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1133,7 +1133,7 @@ static int geneve_configure(struct net *
 
/* make enough headroom for basic scenario */
encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-   if (ip_tunnel_info_af(info) == AF_INET) {
+   if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
encap_len += sizeof(struct iphdr);
dev->max_mtu -= sizeof(struct iphdr);
} else {




[PATCH 4.11 009/150] ip6_tunnel: fix traffic class routing for tunnels

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Liam McBirnie 


[ Upstream commit 5f733ee68f9a4df94775299ac6a7ab260704f6ed ]

ip6_route_output() requires that the flowlabel contains the traffic
class for policy routing.

Commit 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on
encapsulated packets") removed the code which previously added the
traffic class to the flowlabel.

The traffic class is added here because only route lookup needs the
flowlabel to contain the traffic class.

Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated 
packets")
Signed-off-by: Liam McBirnie 
Acked-by: Peter Dawson 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv6/ip6_tunnel.c |3 +++
 1 file changed, 3 insertions(+)

--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1095,6 +1095,9 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
 
if (!dst) {
 route_lookup:
+   /* add dsfield to flowlabel for route lookup */
+   fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
+
dst = ip6_route_output(net, NULL, fl6);
 
if (dst->error)




[PATCH 4.11 010/150] sock: reset sk_err when the error queue is empty

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Soheil Hassas Yeganeh 


[ Upstream commit 38b257938ac6655d0d6333743303231b9c465ec1 ]

Prior to f5f99309fa74 (sock: do not set sk_err in
sock_dequeue_err_skb), sk_err was reset to the error of
the skb on the head of the error queue.

Applications, most notably ping, are relying on this
behavior to reset sk_err for ICMP packets.

Set sk_err to the ICMP error when there is an ICMP packet
at the head of the error queue.

Fixes: f5f99309fa74 (sock: do not set sk_err in sock_dequeue_err_skb)
Reported-by: Cyril Hrubis 
Tested-by: Cyril Hrubis 
Signed-off-by: Soheil Hassas Yeganeh 
Signed-off-by: Eric Dumazet 
Signed-off-by: Willem de Bruijn 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/core/skbuff.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3755,8 +3755,11 @@ struct sk_buff *sock_dequeue_err_skb(str
 
spin_lock_irqsave(>lock, flags);
skb = __skb_dequeue(q);
-   if (skb && (skb_next = skb_peek(q)))
+   if (skb && (skb_next = skb_peek(q))) {
icmp_next = is_icmp_err_skb(skb_next);
+   if (icmp_next)
+   sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+   }
spin_unlock_irqrestore(>lock, flags);
 
if (is_icmp_err_skb(skb) && !icmp_next)




[PATCH 4.11 010/150] sock: reset sk_err when the error queue is empty

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Soheil Hassas Yeganeh 


[ Upstream commit 38b257938ac6655d0d6333743303231b9c465ec1 ]

Prior to f5f99309fa74 (sock: do not set sk_err in
sock_dequeue_err_skb), sk_err was reset to the error of
the skb on the head of the error queue.

Applications, most notably ping, are relying on this
behavior to reset sk_err for ICMP packets.

Set sk_err to the ICMP error when there is an ICMP packet
at the head of the error queue.

Fixes: f5f99309fa74 (sock: do not set sk_err in sock_dequeue_err_skb)
Reported-by: Cyril Hrubis 
Tested-by: Cyril Hrubis 
Signed-off-by: Soheil Hassas Yeganeh 
Signed-off-by: Eric Dumazet 
Signed-off-by: Willem de Bruijn 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/core/skbuff.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3755,8 +3755,11 @@ struct sk_buff *sock_dequeue_err_skb(str
 
spin_lock_irqsave(>lock, flags);
skb = __skb_dequeue(q);
-   if (skb && (skb_next = skb_peek(q)))
+   if (skb && (skb_next = skb_peek(q))) {
icmp_next = is_icmp_err_skb(skb_next);
+   if (icmp_next)
+   sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+   }
spin_unlock_irqrestore(>lock, flags);
 
if (is_icmp_err_skb(skb) && !icmp_next)




[PATCH 4.11 003/150] vxlan: eliminate cached dst leak

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Lance Richardson 


[ Upstream commit 35cf2845563c1aaa01d27bd34d64795c4ae72700 ]

After commit 0c1d70af924b ("net: use dst_cache for vxlan device"),
cached dst entries could be leaked when more than one remote was
present for a given vxlan_fdb entry, causing subsequent netns
operations to block indefinitely and "unregister_netdevice: waiting
for lo to become free." messages to appear in the kernel log.

Fix by properly releasing cached dst and freeing resources in this
case.

Fixes: 0c1d70af924b ("net: use dst_cache for vxlan device")
Signed-off-by: Lance Richardson 
Acked-by: Paolo Abeni 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/vxlan.c |   20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -740,6 +740,22 @@ static void vxlan_fdb_destroy(struct vxl
call_rcu(>rcu, vxlan_fdb_free);
 }
 
+static void vxlan_dst_free(struct rcu_head *head)
+{
+   struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+   dst_cache_destroy(>dst_cache);
+   kfree(rd);
+}
+
+static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+ struct vxlan_rdst *rd)
+{
+   list_del_rcu(>list);
+   vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
+   call_rcu(>rcu, vxlan_dst_free);
+}
+
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
   __be32 *vni, u32 *ifindex)
@@ -864,9 +880,7 @@ static int __vxlan_fdb_delete(struct vxl
 * otherwise destroy the fdb entry
 */
if (rd && !list_is_singular(>remotes)) {
-   list_del_rcu(>list);
-   vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
-   kfree_rcu(rd, rcu);
+   vxlan_fdb_dst_destroy(vxlan, f, rd);
goto out;
}
 




[PATCH 4.11 003/150] vxlan: eliminate cached dst leak

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Lance Richardson 


[ Upstream commit 35cf2845563c1aaa01d27bd34d64795c4ae72700 ]

After commit 0c1d70af924b ("net: use dst_cache for vxlan device"),
cached dst entries could be leaked when more than one remote was
present for a given vxlan_fdb entry, causing subsequent netns
operations to block indefinitely and "unregister_netdevice: waiting
for lo to become free." messages to appear in the kernel log.

Fix by properly releasing cached dst and freeing resources in this
case.

Fixes: 0c1d70af924b ("net: use dst_cache for vxlan device")
Signed-off-by: Lance Richardson 
Acked-by: Paolo Abeni 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/vxlan.c |   20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -740,6 +740,22 @@ static void vxlan_fdb_destroy(struct vxl
call_rcu(>rcu, vxlan_fdb_free);
 }
 
+static void vxlan_dst_free(struct rcu_head *head)
+{
+   struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+   dst_cache_destroy(>dst_cache);
+   kfree(rd);
+}
+
+static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+ struct vxlan_rdst *rd)
+{
+   list_del_rcu(>list);
+   vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
+   call_rcu(>rcu, vxlan_dst_free);
+}
+
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
   __be32 *vni, u32 *ifindex)
@@ -864,9 +880,7 @@ static int __vxlan_fdb_delete(struct vxl
 * otherwise destroy the fdb entry
 */
if (rd && !list_is_singular(>remotes)) {
-   list_del_rcu(>list);
-   vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
-   kfree_rcu(rd, rcu);
+   vxlan_fdb_dst_destroy(vxlan, f, rd);
goto out;
}
 




[PATCH 4.11 032/150] serial: exar: Fix stuck MSIs

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Jan Kiszka 

commit 2c0ac5b48a3586f612b85755b041ed7733dc8e6b upstream.

After migrating 8250_exar to MSI in 172c33cb61da, we can get stuck
without further interrupts because of the special wake-up event these
chips send. They are only cleared by reading INT0. As we fail to do so
during startup and shutdown, we can leave the interrupt line asserted,
which is fatal with edge-triggered MSIs.

Add the required reading of INT0 to startup and shutdown. Also account
for the fact that a pending wake-up interrupt means we have to return 1
from exar_handle_irq. Drop the unneeded reading of INT1..3 along with
this - those never reset anything.

An alternative approach would have been disabling the wake-up interrupt.
Unfortunately, this feature (REGB[17] = 1) is not available on the
XR17D15X.

Fixes: 172c33cb61da ("serial: exar: Enable MSI support")
Signed-off-by: Jan Kiszka 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/serial/8250/8250_port.c |   19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -47,6 +47,7 @@
 /*
  * These are definitions for the Exar XR17V35X and XR17(C|D)15X
  */
+#define UART_EXAR_INT0 0x80
 #define UART_EXAR_SLEEP0x8b/* Sleep mode */
 #define UART_EXAR_DVID 0x8d/* Device identification */
 
@@ -1869,17 +1870,13 @@ static int serial8250_default_handle_irq
 static int exar_handle_irq(struct uart_port *port)
 {
unsigned int iir = serial_port_in(port, UART_IIR);
-   int ret;
+   int ret = 0;
 
-   ret = serial8250_handle_irq(port, iir);
+   if (((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X)) &&
+   serial_port_in(port, UART_EXAR_INT0) != 0)
+   ret = 1;
 
-   if ((port->type == PORT_XR17V35X) ||
-  (port->type == PORT_XR17D15X)) {
-   serial_port_in(port, 0x80);
-   serial_port_in(port, 0x81);
-   serial_port_in(port, 0x82);
-   serial_port_in(port, 0x83);
-   }
+   ret |= serial8250_handle_irq(port, iir);
 
return ret;
 }
@@ -2177,6 +2174,8 @@ int serial8250_do_startup(struct uart_po
serial_port_in(port, UART_RX);
serial_port_in(port, UART_IIR);
serial_port_in(port, UART_MSR);
+   if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+   serial_port_in(port, UART_EXAR_INT0);
 
/*
 * At this point, there's no way the LSR could still be 0xff;
@@ -2335,6 +2334,8 @@ dont_test_tx_en:
serial_port_in(port, UART_RX);
serial_port_in(port, UART_IIR);
serial_port_in(port, UART_MSR);
+   if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+   serial_port_in(port, UART_EXAR_INT0);
up->lsr_saved_flags = 0;
up->msr_saved_flags = 0;
 




[PATCH 4.11 032/150] serial: exar: Fix stuck MSIs

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Jan Kiszka 

commit 2c0ac5b48a3586f612b85755b041ed7733dc8e6b upstream.

After migrating 8250_exar to MSI in 172c33cb61da, we can get stuck
without further interrupts because of the special wake-up event these
chips send. They are only cleared by reading INT0. As we fail to do so
during startup and shutdown, we can leave the interrupt line asserted,
which is fatal with edge-triggered MSIs.

Add the required reading of INT0 to startup and shutdown. Also account
for the fact that a pending wake-up interrupt means we have to return 1
from exar_handle_irq. Drop the unneeded reading of INT1..3 along with
this - those never reset anything.

An alternative approach would have been disabling the wake-up interrupt.
Unfortunately, this feature (REGB[17] = 1) is not available on the
XR17D15X.

Fixes: 172c33cb61da ("serial: exar: Enable MSI support")
Signed-off-by: Jan Kiszka 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/serial/8250/8250_port.c |   19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -47,6 +47,7 @@
 /*
  * These are definitions for the Exar XR17V35X and XR17(C|D)15X
  */
+#define UART_EXAR_INT0 0x80
 #define UART_EXAR_SLEEP0x8b/* Sleep mode */
 #define UART_EXAR_DVID 0x8d/* Device identification */
 
@@ -1869,17 +1870,13 @@ static int serial8250_default_handle_irq
 static int exar_handle_irq(struct uart_port *port)
 {
unsigned int iir = serial_port_in(port, UART_IIR);
-   int ret;
+   int ret = 0;
 
-   ret = serial8250_handle_irq(port, iir);
+   if (((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X)) &&
+   serial_port_in(port, UART_EXAR_INT0) != 0)
+   ret = 1;
 
-   if ((port->type == PORT_XR17V35X) ||
-  (port->type == PORT_XR17D15X)) {
-   serial_port_in(port, 0x80);
-   serial_port_in(port, 0x81);
-   serial_port_in(port, 0x82);
-   serial_port_in(port, 0x83);
-   }
+   ret |= serial8250_handle_irq(port, iir);
 
return ret;
 }
@@ -2177,6 +2174,8 @@ int serial8250_do_startup(struct uart_po
serial_port_in(port, UART_RX);
serial_port_in(port, UART_IIR);
serial_port_in(port, UART_MSR);
+   if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+   serial_port_in(port, UART_EXAR_INT0);
 
/*
 * At this point, there's no way the LSR could still be 0xff;
@@ -2335,6 +2334,8 @@ dont_test_tx_en:
serial_port_in(port, UART_RX);
serial_port_in(port, UART_IIR);
serial_port_in(port, UART_MSR);
+   if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+   serial_port_in(port, UART_EXAR_INT0);
up->lsr_saved_flags = 0;
up->msr_saved_flags = 0;
 




[PATCH 4.11 047/150] gfs2: Make flush bios explicitely sync

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Jan Kara 

commit 0f0b9b63e14fc3f66e4d342df016c9b071c5abed upstream.

Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as
synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...}
definitions.  generic_make_request_checks() however strips REQ_FUA and
REQ_PREFLUSH flags from a bio when the storage doesn't report volatile
write cache and thus write effectively becomes asynchronous which can
lead to performance regressions

Fix the problem by making sure all bios which are synchronous are
properly marked with REQ_SYNC.

Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3
CC: Steven Whitehouse 
CC: cluster-de...@redhat.com
Acked-by: Bob Peterson 
Signed-off-by: Jan Kara 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/gfs2/log.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -659,7 +659,7 @@ static void log_write_header(struct gfs2
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
-   int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
+   int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
enum gfs2_freeze_state state = atomic_read(>sd_freeze_state);
lh = page_address(page);




[PATCH 4.11 048/150] efi: Dont issue error message when booted under Xen

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Juergen Gross 

commit 1ea34adb87c969b89dfd83f1905a79161e9ada26 upstream.

When booted as Xen dom0 there won't be an EFI memmap allocated. Avoid
issuing an error message in this case:

  [0.144079] efi: Failed to allocate new EFI memmap

Signed-off-by: Juergen Gross 
Signed-off-by: Matt Fleming 
Cc: Ard Biesheuvel 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20170526113652.21339-2-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/platform/efi/quirks.c |3 +++
 1 file changed, 3 insertions(+)

--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -358,6 +358,9 @@ void __init efi_free_boot_services(void)
free_bootmem_late(start, size);
}
 
+   if (!num_entries)
+   return;
+
new_size = efi.memmap.desc_size * num_entries;
new_phys = efi_memmap_alloc(num_entries);
if (!new_phys) {




[PATCH 4.11 047/150] gfs2: Make flush bios explicitely sync

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Jan Kara 

commit 0f0b9b63e14fc3f66e4d342df016c9b071c5abed upstream.

Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as
synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...}
definitions.  generic_make_request_checks() however strips REQ_FUA and
REQ_PREFLUSH flags from a bio when the storage doesn't report volatile
write cache and thus write effectively becomes asynchronous which can
lead to performance regressions

Fix the problem by making sure all bios which are synchronous are
properly marked with REQ_SYNC.

Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3
CC: Steven Whitehouse 
CC: cluster-de...@redhat.com
Acked-by: Bob Peterson 
Signed-off-by: Jan Kara 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/gfs2/log.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -659,7 +659,7 @@ static void log_write_header(struct gfs2
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
-   int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
+   int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
enum gfs2_freeze_state state = atomic_read(>sd_freeze_state);
lh = page_address(page);




[PATCH 4.11 048/150] efi: Dont issue error message when booted under Xen

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Juergen Gross 

commit 1ea34adb87c969b89dfd83f1905a79161e9ada26 upstream.

When booted as Xen dom0 there won't be an EFI memmap allocated. Avoid
issuing an error message in this case:

  [0.144079] efi: Failed to allocate new EFI memmap

Signed-off-by: Juergen Gross 
Signed-off-by: Matt Fleming 
Cc: Ard Biesheuvel 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20170526113652.21339-2-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/platform/efi/quirks.c |3 +++
 1 file changed, 3 insertions(+)

--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -358,6 +358,9 @@ void __init efi_free_boot_services(void)
free_bootmem_late(start, size);
}
 
+   if (!num_entries)
+   return;
+
new_size = efi.memmap.desc_size * num_entries;
new_phys = efi_memmap_alloc(num_entries);
if (!new_phys) {




[PATCH 4.11 046/150] nfsd4: fix null dereference on replay

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: J. Bruce Fields 

commit 9a307403d374b993061f5992a6e260c944920d0b upstream.

if we receive a compound such that:

- the sessionid, slot, and sequence number in the SEQUENCE op
  match a cached succesful reply with N ops, and
- the Nth operation of the compound is a PUTFH, PUTPUBFH,
  PUTROOTFH, or RESTOREFH,

then nfsd4_sequence will return 0 and set cstate->status to
nfserr_replay_cache.  The current filehandle will not be set.  This will
cause us to call check_nfsd_access with first argument NULL.

To nfsd4_compound it looks like we just succesfully executed an
operation that set a filehandle, but the current filehandle is not set.

Fix this by moving the nfserr_replay_cache earlier.  There was never any
reason to have it after the encode_op label, since the only case where
he hit that is when opdesc->op_func sets it.

Note that there are two ways we could hit this case:

- a client is resending a previously sent compound that ended
  with one of the four PUTFH-like operations, or
- a client is sending a *new* compound that (incorrectly) shares
  sessionid, slot, and sequence number with a previously sent
  compound, and the length of the previously sent compound
  happens to match the position of a PUTFH-like operation in the
  new compound.

The second is obviously incorrect client behavior.  The first is also
very strange--the only purpose of a PUTFH-like operation is to set the
current filehandle to be used by the following operation, so there's no
point in having it as the last in a compound.

So it's likely this requires a buggy or malicious client to reproduce.

Reported-by: Scott Mayhew 
Signed-off-by: J. Bruce Fields 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/nfsd/nfs4proc.c |   13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqs
opdesc->op_get_currentstateid(cstate, >u);
op->status = opdesc->op_func(rqstp, cstate, >u);
 
+   /* Only from SEQUENCE */
+   if (cstate->status == nfserr_replay_cache) {
+   dprintk("%s NFS4.1 replay from cache\n", __func__);
+   status = op->status;
+   goto out;
+   }
if (!op->status) {
if (opdesc->op_set_currentstateid)
opdesc->op_set_currentstateid(cstate, >u);
@@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs
if (need_wrongsec_check(rqstp))
op->status = 
check_nfsd_access(current_fh->fh_export, rqstp);
}
-
 encode_op:
-   /* Only from SEQUENCE */
-   if (cstate->status == nfserr_replay_cache) {
-   dprintk("%s NFS4.1 replay from cache\n", __func__);
-   status = op->status;
-   goto out;
-   }
if (op->status == nfserr_replay_me) {
op->replay = >replay_owner->so_replay;
nfsd4_encode_replay(>xdr, op);




[PATCH 4.11 046/150] nfsd4: fix null dereference on replay

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: J. Bruce Fields 

commit 9a307403d374b993061f5992a6e260c944920d0b upstream.

if we receive a compound such that:

- the sessionid, slot, and sequence number in the SEQUENCE op
  match a cached succesful reply with N ops, and
- the Nth operation of the compound is a PUTFH, PUTPUBFH,
  PUTROOTFH, or RESTOREFH,

then nfsd4_sequence will return 0 and set cstate->status to
nfserr_replay_cache.  The current filehandle will not be set.  This will
cause us to call check_nfsd_access with first argument NULL.

To nfsd4_compound it looks like we just succesfully executed an
operation that set a filehandle, but the current filehandle is not set.

Fix this by moving the nfserr_replay_cache earlier.  There was never any
reason to have it after the encode_op label, since the only case where
he hit that is when opdesc->op_func sets it.

Note that there are two ways we could hit this case:

- a client is resending a previously sent compound that ended
  with one of the four PUTFH-like operations, or
- a client is sending a *new* compound that (incorrectly) shares
  sessionid, slot, and sequence number with a previously sent
  compound, and the length of the previously sent compound
  happens to match the position of a PUTFH-like operation in the
  new compound.

The second is obviously incorrect client behavior.  The first is also
very strange--the only purpose of a PUTFH-like operation is to set the
current filehandle to be used by the following operation, so there's no
point in having it as the last in a compound.

So it's likely this requires a buggy or malicious client to reproduce.

Reported-by: Scott Mayhew 
Signed-off-by: J. Bruce Fields 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/nfsd/nfs4proc.c |   13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqs
opdesc->op_get_currentstateid(cstate, >u);
op->status = opdesc->op_func(rqstp, cstate, >u);
 
+   /* Only from SEQUENCE */
+   if (cstate->status == nfserr_replay_cache) {
+   dprintk("%s NFS4.1 replay from cache\n", __func__);
+   status = op->status;
+   goto out;
+   }
if (!op->status) {
if (opdesc->op_set_currentstateid)
opdesc->op_set_currentstateid(cstate, >u);
@@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs
if (need_wrongsec_check(rqstp))
op->status = 
check_nfsd_access(current_fh->fh_export, rqstp);
}
-
 encode_op:
-   /* Only from SEQUENCE */
-   if (cstate->status == nfserr_replay_cache) {
-   dprintk("%s NFS4.1 replay from cache\n", __func__);
-   status = op->status;
-   goto out;
-   }
if (op->status == nfserr_replay_me) {
op->replay = >replay_owner->so_replay;
nfsd4_encode_replay(>xdr, op);




[PATCH 4.11 044/150] kthread: Fix use-after-free if kthread fork fails

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Vegard Nossum 

commit 4d6501dce079c1eb6bf0b1d8f528a5e81770109e upstream.

If a kthread forks (e.g. usermodehelper since commit 1da5c46fa965) but
fails in copy_process() between calling dup_task_struct() and setting
p->set_child_tid, then the value of p->set_child_tid will be inherited
from the parent and get prematurely freed by free_kthread_struct().

kthread()
 - worker_thread()
- process_one_work()
|  - call_usermodehelper_exec_work()
| - kernel_thread()
|- _do_fork()
|   - copy_process()
|  - dup_task_struct()
| - arch_dup_task_struct()
|- tsk->set_child_tid = current->set_child_tid // 
implied
|  - ...
|  - goto bad_fork_*
|  - ...
|  - free_task(tsk)
| - free_kthread_struct(tsk)
|- kfree(tsk->set_child_tid)
- ...
- schedule()
   - __schedule()
  - wq_worker_sleeping()
 - kthread_data(task)->flags // UAF

The problem started showing up with commit 1da5c46fa965 since it reused
->set_child_tid for the kthread worker data.

A better long-term solution might be to get rid of the ->set_child_tid
abuse. The comment in set_kthread_struct() also looks slightly wrong.

Debugged-by: Jamie Iles 
Fixes: 1da5c46fa965 ("kthread: Make struct kthread kmalloc'ed")
Signed-off-by: Vegard Nossum 
Acked-by: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Greg Kroah-Hartman 
Cc: Andy Lutomirski 
Cc: Frederic Weisbecker 
Cc: Jamie Iles 
Link: http://lkml.kernel.org/r/20170509073959.17858-1-vegard.nos...@oracle.com
Signed-off-by: Thomas Gleixner 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/fork.c |   17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1552,6 +1552,18 @@ static __latent_entropy struct task_stru
if (!p)
goto fork_out;
 
+   /*
+* This _must_ happen before we call free_task(), i.e. before we jump
+* to any of the bad_fork_* labels. This is to avoid freeing
+* p->set_child_tid which is (ab)used as a kthread's data pointer for
+* kernel threads (PF_KTHREAD).
+*/
+   p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : 
NULL;
+   /*
+* Clear TID on mm_release()?
+*/
+   p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? 
child_tidptr : NULL;
+
ftrace_graph_init_task(p);
 
rt_mutex_init_task(p);
@@ -1715,11 +1727,6 @@ static __latent_entropy struct task_stru
}
}
 
-   p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : 
NULL;
-   /*
-* Clear TID on mm_release()?
-*/
-   p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? 
child_tidptr : NULL;
 #ifdef CONFIG_BLOCK
p->plug = NULL;
 #endif




[PATCH 4.11 044/150] kthread: Fix use-after-free if kthread fork fails

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Vegard Nossum 

commit 4d6501dce079c1eb6bf0b1d8f528a5e81770109e upstream.

If a kthread forks (e.g. usermodehelper since commit 1da5c46fa965) but
fails in copy_process() between calling dup_task_struct() and setting
p->set_child_tid, then the value of p->set_child_tid will be inherited
from the parent and get prematurely freed by free_kthread_struct().

kthread()
 - worker_thread()
- process_one_work()
|  - call_usermodehelper_exec_work()
| - kernel_thread()
|- _do_fork()
|   - copy_process()
|  - dup_task_struct()
| - arch_dup_task_struct()
|- tsk->set_child_tid = current->set_child_tid // 
implied
|  - ...
|  - goto bad_fork_*
|  - ...
|  - free_task(tsk)
| - free_kthread_struct(tsk)
|- kfree(tsk->set_child_tid)
- ...
- schedule()
   - __schedule()
  - wq_worker_sleeping()
 - kthread_data(task)->flags // UAF

The problem started showing up with commit 1da5c46fa965 since it reused
->set_child_tid for the kthread worker data.

A better long-term solution might be to get rid of the ->set_child_tid
abuse. The comment in set_kthread_struct() also looks slightly wrong.

Debugged-by: Jamie Iles 
Fixes: 1da5c46fa965 ("kthread: Make struct kthread kmalloc'ed")
Signed-off-by: Vegard Nossum 
Acked-by: Oleg Nesterov 
Cc: Peter Zijlstra 
Cc: Greg Kroah-Hartman 
Cc: Andy Lutomirski 
Cc: Frederic Weisbecker 
Cc: Jamie Iles 
Link: http://lkml.kernel.org/r/20170509073959.17858-1-vegard.nos...@oracle.com
Signed-off-by: Thomas Gleixner 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/fork.c |   17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1552,6 +1552,18 @@ static __latent_entropy struct task_stru
if (!p)
goto fork_out;
 
+   /*
+* This _must_ happen before we call free_task(), i.e. before we jump
+* to any of the bad_fork_* labels. This is to avoid freeing
+* p->set_child_tid which is (ab)used as a kthread's data pointer for
+* kernel threads (PF_KTHREAD).
+*/
+   p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : 
NULL;
+   /*
+* Clear TID on mm_release()?
+*/
+   p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? 
child_tidptr : NULL;
+
ftrace_graph_init_task(p);
 
rt_mutex_init_task(p);
@@ -1715,11 +1727,6 @@ static __latent_entropy struct task_stru
}
}
 
-   p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : 
NULL;
-   /*
-* Clear TID on mm_release()?
-*/
-   p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? 
child_tidptr : NULL;
 #ifdef CONFIG_BLOCK
p->plug = NULL;
 #endif




[PATCH 4.11 053/150] arm64: KVM: Allow unaligned accesses at EL2

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Marc Zyngier 

commit 78fd6dcf11468a5a131b8365580d0c613bcc02cb upstream.

We currently have the SCTLR_EL2.A bit set, trapping unaligned accesses
at EL2, but we're not really prepared to deal with it. So far, this
has been unnoticed, until GCC 7 started emitting those (in particular
64bit writes on a 32bit boundary).

Since the rest of the kernel is pretty happy about that, let's follow
its example and set SCTLR_EL2.A to zero. Modern CPUs don't really
care.

Reported-by: Alexander Graf 
Signed-off-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm64/kvm/hyp-init.S |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -104,9 +104,10 @@ __do_hyp_init:
 
/*
 * Preserve all the RES1 bits while setting the default flags,
-* as well as the EE bit on BE.
+* as well as the EE bit on BE. Drop the A flag since the compiler
+* is allowed to generate unaligned accesses.
 */
-   ldr x4, =(SCTLR_EL2_RES1 | SCTLR_ELx_FLAGS)
+   ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
 CPU_BE(orr x4, x4, #SCTLR_ELx_EE)
msr sctlr_el2, x4
isb




[PATCH 4.11 053/150] arm64: KVM: Allow unaligned accesses at EL2

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Marc Zyngier 

commit 78fd6dcf11468a5a131b8365580d0c613bcc02cb upstream.

We currently have the SCTLR_EL2.A bit set, trapping unaligned accesses
at EL2, but we're not really prepared to deal with it. So far, this
has been unnoticed, until GCC 7 started emitting those (in particular
64bit writes on a 32bit boundary).

Since the rest of the kernel is pretty happy about that, let's follow
its example and set SCTLR_EL2.A to zero. Modern CPUs don't really
care.

Reported-by: Alexander Graf 
Signed-off-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm64/kvm/hyp-init.S |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -104,9 +104,10 @@ __do_hyp_init:
 
/*
 * Preserve all the RES1 bits while setting the default flags,
-* as well as the EE bit on BE.
+* as well as the EE bit on BE. Drop the A flag since the compiler
+* is allowed to generate unaligned accesses.
 */
-   ldr x4, =(SCTLR_EL2_RES1 | SCTLR_ELx_FLAGS)
+   ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
 CPU_BE(orr x4, x4, #SCTLR_ELx_EE)
msr sctlr_el2, x4
isb




[PATCH 2/8] usb: gadget: u_serial: propagate poll() to the next layer

2017-06-12 Thread Tal Shorer
In order for a serial function to add flags to the poll() mask of their
tty files, propagate the poll() callback to the next layer so it can
return a mask if it sees fit to do so.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/u_serial.c | 16 
 drivers/usb/gadget/function/u_serial.h |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/drivers/usb/gadget/function/u_serial.c 
b/drivers/usb/gadget/function/u_serial.c
index 9b0805f..d466f58 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1025,6 +1025,21 @@ static int gs_break_ctl(struct tty_struct *tty, int 
duration)
return status;
 }
 
+static unsigned int gs_poll(struct tty_struct *tty, struct file *file,
+   poll_table *wait)
+{
+   struct gs_port *port = tty->driver_data;
+   struct gserial *gser;
+   unsigned int mask = 0;
+
+   spin_lock_irq(>port_lock);
+   gser = port->port_usb;
+   if (gser && gser->poll)
+   mask |= gser->poll(gser, file, wait);
+   spin_unlock_irq(>port_lock);
+   return mask;
+}
+
 static const struct tty_operations gs_tty_ops = {
.open = gs_open,
.close =gs_close,
@@ -1035,6 +1050,7 @@ static const struct tty_operations gs_tty_ops = {
.chars_in_buffer =  gs_chars_in_buffer,
.unthrottle =   gs_unthrottle,
.break_ctl =gs_break_ctl,
+   .poll = gs_poll,
 };
 
 /*-*/
diff --git a/drivers/usb/gadget/function/u_serial.h 
b/drivers/usb/gadget/function/u_serial.h
index c20210c..ce00840 100644
--- a/drivers/usb/gadget/function/u_serial.h
+++ b/drivers/usb/gadget/function/u_serial.h
@@ -12,6 +12,7 @@
 #ifndef __U_SERIAL_H
 #define __U_SERIAL_H
 
+#include 
 #include 
 #include 
 
@@ -50,6 +51,8 @@ struct gserial {
void (*connect)(struct gserial *p);
void (*disconnect)(struct gserial *p);
int (*send_break)(struct gserial *p, int duration);
+   unsigned int (*poll)(struct gserial *p, struct file *file,
+   poll_table *wait);
 };
 
 /* utilities to allocate/free request and buffer */
-- 
2.7.4



[PATCH 2/8] usb: gadget: u_serial: propagate poll() to the next layer

2017-06-12 Thread Tal Shorer
In order for a serial function to add flags to the poll() mask of their
tty files, propagate the poll() callback to the next layer so it can
return a mask if it sees fit to do so.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/u_serial.c | 16 
 drivers/usb/gadget/function/u_serial.h |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/drivers/usb/gadget/function/u_serial.c 
b/drivers/usb/gadget/function/u_serial.c
index 9b0805f..d466f58 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1025,6 +1025,21 @@ static int gs_break_ctl(struct tty_struct *tty, int 
duration)
return status;
 }
 
+static unsigned int gs_poll(struct tty_struct *tty, struct file *file,
+   poll_table *wait)
+{
+   struct gs_port *port = tty->driver_data;
+   struct gserial *gser;
+   unsigned int mask = 0;
+
+   spin_lock_irq(>port_lock);
+   gser = port->port_usb;
+   if (gser && gser->poll)
+   mask |= gser->poll(gser, file, wait);
+   spin_unlock_irq(>port_lock);
+   return mask;
+}
+
 static const struct tty_operations gs_tty_ops = {
.open = gs_open,
.close =gs_close,
@@ -1035,6 +1050,7 @@ static const struct tty_operations gs_tty_ops = {
.chars_in_buffer =  gs_chars_in_buffer,
.unthrottle =   gs_unthrottle,
.break_ctl =gs_break_ctl,
+   .poll = gs_poll,
 };
 
 /*-*/
diff --git a/drivers/usb/gadget/function/u_serial.h 
b/drivers/usb/gadget/function/u_serial.h
index c20210c..ce00840 100644
--- a/drivers/usb/gadget/function/u_serial.h
+++ b/drivers/usb/gadget/function/u_serial.h
@@ -12,6 +12,7 @@
 #ifndef __U_SERIAL_H
 #define __U_SERIAL_H
 
+#include 
 #include 
 #include 
 
@@ -50,6 +51,8 @@ struct gserial {
void (*connect)(struct gserial *p);
void (*disconnect)(struct gserial *p);
int (*send_break)(struct gserial *p, int duration);
+   unsigned int (*poll)(struct gserial *p, struct file *file,
+   poll_table *wait);
 };
 
 /* utilities to allocate/free request and buffer */
-- 
2.7.4



[PATCH 4.11 033/150] serial: ifx6x60: fix use-after-free on module unload

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Johan Hovold 

commit 1e948479b3d63e3ac0ecca13cbf4921c7d17c168 upstream.

Make sure to deregister the SPI driver before releasing the tty driver
to avoid use-after-free in the SPI remove callback where the tty
devices are deregistered.

Fixes: 72d4724ea54c ("serial: ifx6x60: Add modem power off function in the 
platform reboot process")
Cc: Jun Chen 
Signed-off-by: Johan Hovold 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/serial/ifx6x60.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -1382,9 +1382,9 @@ static struct spi_driver ifx_spi_driver
 static void __exit ifx_spi_exit(void)
 {
/* unregister */
+   spi_unregister_driver(_spi_driver);
tty_unregister_driver(tty_drv);
put_tty_driver(tty_drv);
-   spi_unregister_driver(_spi_driver);
unregister_reboot_notifier(_modem_reboot_notifier_block);
 }
 




[PATCH 4.11 033/150] serial: ifx6x60: fix use-after-free on module unload

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Johan Hovold 

commit 1e948479b3d63e3ac0ecca13cbf4921c7d17c168 upstream.

Make sure to deregister the SPI driver before releasing the tty driver
to avoid use-after-free in the SPI remove callback where the tty
devices are deregistered.

Fixes: 72d4724ea54c ("serial: ifx6x60: Add modem power off function in the 
platform reboot process")
Cc: Jun Chen 
Signed-off-by: Johan Hovold 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/serial/ifx6x60.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -1382,9 +1382,9 @@ static struct spi_driver ifx_spi_driver
 static void __exit ifx_spi_exit(void)
 {
/* unregister */
+   spi_unregister_driver(_spi_driver);
tty_unregister_driver(tty_drv);
put_tty_driver(tty_drv);
-   spi_unregister_driver(_spi_driver);
unregister_reboot_notifier(_modem_reboot_notifier_block);
 }
 




[PATCH 7/8] usb: gadget: f_acm: notify the user on SetLineCoding

2017-06-12 Thread Tal Shorer
Notify the user with a POLLPRI event when the host issues a
SetLineCoding request so that they can act upon it, for example by
configuring the line coding on a real serial port.

The event is cleared when the user reads the line coding using
USB_F_ACM_GET_LINE_CODING ioctl()

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/f_acm.c | 38 ++---
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index 5feea7c..0983999 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -58,6 +58,9 @@ struct f_acm {
struct usb_request  *notify_req;
 
struct usb_cdc_line_coding  port_line_coding;   /* 8-N-1 etc */
+   /* we have a SetLineCoding request that the user haven't read yet */
+   bool set_line_coding_pending;
+   wait_queue_head_t set_line_coding_waitq;
 
/* SetControlLineState request -- CDC 1.1 section 6.2.14 (INPUT) */
u16 port_handshake_bits;
@@ -326,23 +329,19 @@ static void acm_complete_set_line_coding(struct usb_ep 
*ep,
} else {
struct usb_cdc_line_coding  *value = req->buf;
 
-   /* REVISIT:  we currently just remember this data.
-   * If we change that,
-   * (a) update whatever hardware needs updating,
-   * (b) worry about locking.  This is information on
-   * the order of 9600-8-N-1 ... most of which means
-   * nothing unless we control a real RS232 line.
-   */
dev_dbg(>gadget->dev,
"acm ttyGS%d set_line_coding: %d %d %d %d\n",
acm->port_num, le32_to_cpu(value->dwDTERate),
value->bCharFormat, value->bParityType,
value->bDataBits);
if (value->bCharFormat > 2 || value->bParityType > 4 ||
-   value->bDataBits < 5 || value->bDataBits > 8)
+   value->bDataBits < 5 || value->bDataBits > 8) {
usb_ep_set_halt(ep);
-   else
+   } else {
acm->port_line_coding = *value;
+   acm->set_line_coding_pending = true;
+   wake_up_interruptible(>set_line_coding_waitq);
+   }
}
 }
 
@@ -598,6 +597,19 @@ static void acm_disconnect(struct gserial *port)
acm_notify_serial_state(acm);
 }
 
+static unsigned int acm_poll(struct gserial *port, struct file *file,
+   poll_table *wait)
+{
+   unsigned int mask = 0;
+   struct f_acm *acm = port_to_acm(port);
+
+   poll_wait(file, >set_line_coding_waitq, wait);
+   if (acm->set_line_coding_pending)
+   mask |= POLLPRI;
+   return mask;
+}
+
+
 static int acm_send_break(struct gserial *port, int duration)
 {
struct f_acm*acm = port_to_acm(port);
@@ -620,10 +632,12 @@ static int acm_ioctl(struct gserial *port, unsigned int 
cmd, unsigned long arg)
switch (cmd) {
case USB_F_ACM_GET_LINE_CODING:
if (copy_to_user((__user void *)arg, >port_line_coding,
-   sizeof(acm->port_line_coding)))
+   sizeof(acm->port_line_coding))) {
ret = -EFAULT;
-   else
+   } else {
ret = 0;
+   acm->set_line_coding_pending = false;
+   }
break;
}
return ret;
@@ -763,11 +777,13 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
return ERR_PTR(-ENOMEM);
 
spin_lock_init(>lock);
+   init_waitqueue_head(>set_line_coding_waitq);
 
acm->port.connect = acm_connect;
acm->port.disconnect = acm_disconnect;
acm->port.send_break = acm_send_break;
acm->port.ioctl = acm_ioctl;
+   acm->port.poll = acm_poll;
 
acm->port.func.name = "acm";
acm->port.func.strings = acm_strings;
-- 
2.7.4



[PATCH 7/8] usb: gadget: f_acm: notify the user on SetLineCoding

2017-06-12 Thread Tal Shorer
Notify the user with a POLLPRI event when the host issues a
SetLineCoding request so that they can act upon it, for example by
configuring the line coding on a real serial port.

The event is cleared when the user reads the line coding using
USB_F_ACM_GET_LINE_CODING ioctl()

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/f_acm.c | 38 ++---
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index 5feea7c..0983999 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -58,6 +58,9 @@ struct f_acm {
struct usb_request  *notify_req;
 
struct usb_cdc_line_coding  port_line_coding;   /* 8-N-1 etc */
+   /* we have a SetLineCoding request that the user haven't read yet */
+   bool set_line_coding_pending;
+   wait_queue_head_t set_line_coding_waitq;
 
/* SetControlLineState request -- CDC 1.1 section 6.2.14 (INPUT) */
u16 port_handshake_bits;
@@ -326,23 +329,19 @@ static void acm_complete_set_line_coding(struct usb_ep 
*ep,
} else {
struct usb_cdc_line_coding  *value = req->buf;
 
-   /* REVISIT:  we currently just remember this data.
-   * If we change that,
-   * (a) update whatever hardware needs updating,
-   * (b) worry about locking.  This is information on
-   * the order of 9600-8-N-1 ... most of which means
-   * nothing unless we control a real RS232 line.
-   */
dev_dbg(>gadget->dev,
"acm ttyGS%d set_line_coding: %d %d %d %d\n",
acm->port_num, le32_to_cpu(value->dwDTERate),
value->bCharFormat, value->bParityType,
value->bDataBits);
if (value->bCharFormat > 2 || value->bParityType > 4 ||
-   value->bDataBits < 5 || value->bDataBits > 8)
+   value->bDataBits < 5 || value->bDataBits > 8) {
usb_ep_set_halt(ep);
-   else
+   } else {
acm->port_line_coding = *value;
+   acm->set_line_coding_pending = true;
+   wake_up_interruptible(>set_line_coding_waitq);
+   }
}
 }
 
@@ -598,6 +597,19 @@ static void acm_disconnect(struct gserial *port)
acm_notify_serial_state(acm);
 }
 
+static unsigned int acm_poll(struct gserial *port, struct file *file,
+   poll_table *wait)
+{
+   unsigned int mask = 0;
+   struct f_acm *acm = port_to_acm(port);
+
+   poll_wait(file, >set_line_coding_waitq, wait);
+   if (acm->set_line_coding_pending)
+   mask |= POLLPRI;
+   return mask;
+}
+
+
 static int acm_send_break(struct gserial *port, int duration)
 {
struct f_acm*acm = port_to_acm(port);
@@ -620,10 +632,12 @@ static int acm_ioctl(struct gserial *port, unsigned int 
cmd, unsigned long arg)
switch (cmd) {
case USB_F_ACM_GET_LINE_CODING:
if (copy_to_user((__user void *)arg, >port_line_coding,
-   sizeof(acm->port_line_coding)))
+   sizeof(acm->port_line_coding))) {
ret = -EFAULT;
-   else
+   } else {
ret = 0;
+   acm->set_line_coding_pending = false;
+   }
break;
}
return ret;
@@ -763,11 +777,13 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
return ERR_PTR(-ENOMEM);
 
spin_lock_init(>lock);
+   init_waitqueue_head(>set_line_coding_waitq);
 
acm->port.connect = acm_connect;
acm->port.disconnect = acm_disconnect;
acm->port.send_break = acm_send_break;
acm->port.ioctl = acm_ioctl;
+   acm->port.poll = acm_poll;
 
acm->port.func.name = "acm";
acm->port.func.strings = acm_strings;
-- 
2.7.4



[PATCH 3/8] usb: gadget: f_acm: validate set_line_coding requests

2017-06-12 Thread Tal Shorer
We shouldn't accept malformed set_line_coding requests.
All values were taken from table 17 (section 6.3.11) of the cdc1.2 spec
available at http://www.usb.org/developers/docs/devclass_docs/
The table is in the file PTSN120.pdf.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/f_acm.c | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index 5e3828d..e023313 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -326,13 +326,22 @@ static void acm_complete_set_line_coding(struct usb_ep 
*ep,
struct usb_cdc_line_coding  *value = req->buf;
 
/* REVISIT:  we currently just remember this data.
-* If we change that, (a) validate it first, then
-* (b) update whatever hardware needs updating,
-* (c) worry about locking.  This is information on
-* the order of 9600-8-N-1 ... most of which means
-* nothing unless we control a real RS232 line.
-*/
-   acm->port_line_coding = *value;
+   * If we change that,
+   * (a) update whatever hardware needs updating,
+   * (b) worry about locking.  This is information on
+   * the order of 9600-8-N-1 ... most of which means
+   * nothing unless we control a real RS232 line.
+   */
+   dev_dbg(>gadget->dev,
+   "acm ttyGS%d set_line_coding: %d %d %d %d\n",
+   acm->port_num, le32_to_cpu(value->dwDTERate),
+   value->bCharFormat, value->bParityType,
+   value->bDataBits);
+   if (value->bCharFormat > 2 || value->bParityType > 4 ||
+   value->bDataBits < 5 || value->bDataBits > 8)
+   usb_ep_set_halt(ep);
+   else
+   acm->port_line_coding = *value;
}
 }
 
-- 
2.7.4



[PATCH 3/8] usb: gadget: f_acm: validate set_line_coding requests

2017-06-12 Thread Tal Shorer
We shouldn't accept malformed set_line_coding requests.
All values were taken from table 17 (section 6.3.11) of the cdc1.2 spec
available at http://www.usb.org/developers/docs/devclass_docs/
The table is in the file PTSN120.pdf.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/f_acm.c | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index 5e3828d..e023313 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -326,13 +326,22 @@ static void acm_complete_set_line_coding(struct usb_ep 
*ep,
struct usb_cdc_line_coding  *value = req->buf;
 
/* REVISIT:  we currently just remember this data.
-* If we change that, (a) validate it first, then
-* (b) update whatever hardware needs updating,
-* (c) worry about locking.  This is information on
-* the order of 9600-8-N-1 ... most of which means
-* nothing unless we control a real RS232 line.
-*/
-   acm->port_line_coding = *value;
+   * If we change that,
+   * (a) update whatever hardware needs updating,
+   * (b) worry about locking.  This is information on
+   * the order of 9600-8-N-1 ... most of which means
+   * nothing unless we control a real RS232 line.
+   */
+   dev_dbg(>gadget->dev,
+   "acm ttyGS%d set_line_coding: %d %d %d %d\n",
+   acm->port_num, le32_to_cpu(value->dwDTERate),
+   value->bCharFormat, value->bParityType,
+   value->bDataBits);
+   if (value->bCharFormat > 2 || value->bParityType > 4 ||
+   value->bDataBits < 5 || value->bDataBits > 8)
+   usb_ep_set_halt(ep);
+   else
+   acm->port_line_coding = *value;
}
 }
 
-- 
2.7.4



[PATCH 5/8] usb: gadget: f_acm: initialize port_line_coding when creating an instance

2017-06-12 Thread Tal Shorer
Initialize acm->port_line_coding with something that makes sense so
that we can return a valid line coding if the host requests
GetLineCoding before requesting SetLineCoding

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/f_acm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index e023313..b7a1466 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -763,6 +763,12 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
acm->port.func.unbind = acm_unbind;
acm->port.func.free_func = acm_free_func;
 
+   /* initialize port_line_coding with something that makes sense */
+   coding.dwDTERate = cpu_to_le32(9600);
+   coding.bCharFormat = USB_CDC_1_STOP_BITS;
+   coding.bParityType = USB_CDC_NO_PARITY;
+   coding.bDataBits = 8;
+
return >port.func;
 }
 
-- 
2.7.4



[PATCH 5/8] usb: gadget: f_acm: initialize port_line_coding when creating an instance

2017-06-12 Thread Tal Shorer
Initialize acm->port_line_coding with something that makes sense so
that we can return a valid line coding if the host requests
GetLineCoding before requesting SetLineCoding

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/f_acm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index e023313..b7a1466 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -763,6 +763,12 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
acm->port.func.unbind = acm_unbind;
acm->port.func.free_func = acm_free_func;
 
+   /* initialize port_line_coding with something that makes sense */
+   coding.dwDTERate = cpu_to_le32(9600);
+   coding.bCharFormat = USB_CDC_1_STOP_BITS;
+   coding.bParityType = USB_CDC_NO_PARITY;
+   coding.bDataBits = 8;
+
return >port.func;
 }
 
-- 
2.7.4



[PATCH 8/8] usb: gadget: u_serial: remove port_line_config from struct gserial

2017-06-12 Thread Tal Shorer
GetLineCoding and SetLineCoding are a cdc-acm thing. It doesn't make
sense to have that in the generic u_serial layer. Moreso, f_acm has its
own port_line_coding in its own struct and it uses that, while the one
in struct gserial is set once upon initialization and then never used.
Also, the initialized never-used values were invalid, with bDataBits
and bCharFormat having each other's value.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/u_serial.c | 22 ++
 drivers/usb/gadget/function/u_serial.h |  3 ---
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/usb/gadget/function/u_serial.c 
b/drivers/usb/gadget/function/u_serial.c
index 8d9abf1..654d4a6 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -129,9 +129,6 @@ struct gs_port {
wait_queue_head_t   drain_wait; /* wait while writes drain */
boolwrite_busy;
wait_queue_head_t   close_wait;
-
-   /* REVISIT this state ... */
-   struct usb_cdc_line_coding port_line_coding;/* 8-N-1 etc */
 };
 
 static struct portmaster {
@@ -1314,7 +1311,7 @@ static void gserial_console_exit(void)
 #endif
 
 static int
-gs_port_alloc(unsigned port_num, struct usb_cdc_line_coding *coding)
+gs_port_alloc(unsigned port_num)
 {
struct gs_port  *port;
int ret = 0;
@@ -1343,7 +1340,6 @@ gs_port_alloc(unsigned port_num, struct 
usb_cdc_line_coding *coding)
INIT_LIST_HEAD(>write_pool);
 
port->port_num = port_num;
-   port->port_line_coding = *coding;
 
ports[port_num].port = port;
 out:
@@ -1392,18 +1388,12 @@ EXPORT_SYMBOL_GPL(gserial_free_line);
 
 int gserial_alloc_line(unsigned char *line_num)
 {
-   struct usb_cdc_line_coding  coding;
struct device   *tty_dev;
int ret;
int port_num;
 
-   coding.dwDTERate = cpu_to_le32(9600);
-   coding.bCharFormat = 8;
-   coding.bParityType = USB_CDC_NO_PARITY;
-   coding.bDataBits = USB_CDC_1_STOP_BITS;
-
for (port_num = 0; port_num < MAX_U_SERIAL_PORTS; port_num++) {
-   ret = gs_port_alloc(port_num, );
+   ret = gs_port_alloc(port_num);
if (ret == -EBUSY)
continue;
if (ret)
@@ -1491,11 +1481,6 @@ int gserial_connect(struct gserial *gser, u8 port_num)
gser->ioport = port;
port->port_usb = gser;
 
-   /* REVISIT unclear how best to handle this state...
-* we don't really couple it with the Linux TTY.
-*/
-   gser->port_line_coding = port->port_line_coding;
-
/* REVISIT if waiting on "carrier detect", signal. */
 
/* if it's already open, start I/O ... and notify the serial
@@ -1543,9 +1528,6 @@ void gserial_disconnect(struct gserial *gser)
/* tell the TTY glue not to do I/O here any more */
spin_lock_irqsave(>port_lock, flags);
 
-   /* REVISIT as above: how best to track this? */
-   port->port_line_coding = gser->port_line_coding;
-
port->port_usb = NULL;
gser->ioport = NULL;
if (port->port.count > 0 || port->openclose) {
diff --git a/drivers/usb/gadget/function/u_serial.h 
b/drivers/usb/gadget/function/u_serial.h
index 8d0901e..0549efe 100644
--- a/drivers/usb/gadget/function/u_serial.h
+++ b/drivers/usb/gadget/function/u_serial.h
@@ -44,9 +44,6 @@ struct gserial {
struct usb_ep   *in;
struct usb_ep   *out;
 
-   /* REVISIT avoid this CDC-ACM support harder ... */
-   struct usb_cdc_line_coding port_line_coding;/* 9600-8-N-1 etc */
-
/* notification callbacks */
void (*connect)(struct gserial *p);
void (*disconnect)(struct gserial *p);
-- 
2.7.4



[PATCH 8/8] usb: gadget: u_serial: remove port_line_config from struct gserial

2017-06-12 Thread Tal Shorer
GetLineCoding and SetLineCoding are a cdc-acm thing. It doesn't make
sense to have that in the generic u_serial layer. Moreso, f_acm has its
own port_line_coding in its own struct and it uses that, while the one
in struct gserial is set once upon initialization and then never used.
Also, the initialized never-used values were invalid, with bDataBits
and bCharFormat having each other's value.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/u_serial.c | 22 ++
 drivers/usb/gadget/function/u_serial.h |  3 ---
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/usb/gadget/function/u_serial.c 
b/drivers/usb/gadget/function/u_serial.c
index 8d9abf1..654d4a6 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -129,9 +129,6 @@ struct gs_port {
wait_queue_head_t   drain_wait; /* wait while writes drain */
boolwrite_busy;
wait_queue_head_t   close_wait;
-
-   /* REVISIT this state ... */
-   struct usb_cdc_line_coding port_line_coding;/* 8-N-1 etc */
 };
 
 static struct portmaster {
@@ -1314,7 +1311,7 @@ static void gserial_console_exit(void)
 #endif
 
 static int
-gs_port_alloc(unsigned port_num, struct usb_cdc_line_coding *coding)
+gs_port_alloc(unsigned port_num)
 {
struct gs_port  *port;
int ret = 0;
@@ -1343,7 +1340,6 @@ gs_port_alloc(unsigned port_num, struct 
usb_cdc_line_coding *coding)
INIT_LIST_HEAD(>write_pool);
 
port->port_num = port_num;
-   port->port_line_coding = *coding;
 
ports[port_num].port = port;
 out:
@@ -1392,18 +1388,12 @@ EXPORT_SYMBOL_GPL(gserial_free_line);
 
 int gserial_alloc_line(unsigned char *line_num)
 {
-   struct usb_cdc_line_coding  coding;
struct device   *tty_dev;
int ret;
int port_num;
 
-   coding.dwDTERate = cpu_to_le32(9600);
-   coding.bCharFormat = 8;
-   coding.bParityType = USB_CDC_NO_PARITY;
-   coding.bDataBits = USB_CDC_1_STOP_BITS;
-
for (port_num = 0; port_num < MAX_U_SERIAL_PORTS; port_num++) {
-   ret = gs_port_alloc(port_num, );
+   ret = gs_port_alloc(port_num);
if (ret == -EBUSY)
continue;
if (ret)
@@ -1491,11 +1481,6 @@ int gserial_connect(struct gserial *gser, u8 port_num)
gser->ioport = port;
port->port_usb = gser;
 
-   /* REVISIT unclear how best to handle this state...
-* we don't really couple it with the Linux TTY.
-*/
-   gser->port_line_coding = port->port_line_coding;
-
/* REVISIT if waiting on "carrier detect", signal. */
 
/* if it's already open, start I/O ... and notify the serial
@@ -1543,9 +1528,6 @@ void gserial_disconnect(struct gserial *gser)
/* tell the TTY glue not to do I/O here any more */
spin_lock_irqsave(>port_lock, flags);
 
-   /* REVISIT as above: how best to track this? */
-   port->port_line_coding = gser->port_line_coding;
-
port->port_usb = NULL;
gser->ioport = NULL;
if (port->port.count > 0 || port->openclose) {
diff --git a/drivers/usb/gadget/function/u_serial.h 
b/drivers/usb/gadget/function/u_serial.h
index 8d0901e..0549efe 100644
--- a/drivers/usb/gadget/function/u_serial.h
+++ b/drivers/usb/gadget/function/u_serial.h
@@ -44,9 +44,6 @@ struct gserial {
struct usb_ep   *in;
struct usb_ep   *out;
 
-   /* REVISIT avoid this CDC-ACM support harder ... */
-   struct usb_cdc_line_coding port_line_coding;/* 9600-8-N-1 etc */
-
/* notification callbacks */
void (*connect)(struct gserial *p);
void (*disconnect)(struct gserial *p);
-- 
2.7.4



[PATCH 4/8] usb: gadget: u_serial: propagate ioctl() to the next layer

2017-06-12 Thread Tal Shorer
In order for a serial function to implement its own ioctl() calls,
propagate the ioctl() callback to the next layer so it can handle it if
it sees fit to do so.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/u_serial.c | 15 +++
 drivers/usb/gadget/function/u_serial.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/drivers/usb/gadget/function/u_serial.c 
b/drivers/usb/gadget/function/u_serial.c
index d466f58..8d9abf1 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1040,6 +1040,20 @@ static unsigned int gs_poll(struct tty_struct *tty, 
struct file *file,
return mask;
 }
 
+static int gs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long 
arg)
+{
+   struct gs_port *port = tty->driver_data;
+   struct gserial *gser;
+   int ret = -ENOIOCTLCMD;
+
+   spin_lock_irq(>port_lock);
+   gser = port->port_usb;
+   if (gser && gser->ioctl)
+   ret = gser->ioctl(gser, cmd, arg);
+   spin_unlock_irq(>port_lock);
+   return ret;
+}
+
 static const struct tty_operations gs_tty_ops = {
.open = gs_open,
.close =gs_close,
@@ -1051,6 +1065,7 @@ static const struct tty_operations gs_tty_ops = {
.unthrottle =   gs_unthrottle,
.break_ctl =gs_break_ctl,
.poll = gs_poll,
+   .ioctl =gs_ioctl,
 };
 
 /*-*/
diff --git a/drivers/usb/gadget/function/u_serial.h 
b/drivers/usb/gadget/function/u_serial.h
index ce00840..8d0901e 100644
--- a/drivers/usb/gadget/function/u_serial.h
+++ b/drivers/usb/gadget/function/u_serial.h
@@ -53,6 +53,7 @@ struct gserial {
int (*send_break)(struct gserial *p, int duration);
unsigned int (*poll)(struct gserial *p, struct file *file,
poll_table *wait);
+   int (*ioctl)(struct gserial *p, unsigned int cmd, unsigned long arg);
 };
 
 /* utilities to allocate/free request and buffer */
-- 
2.7.4



[PATCH 6/8] usb: gadget: f_acm: add an ioctl to get the current line coding

2017-06-12 Thread Tal Shorer
The user can issue USB_F_GET_LINE_CODING to get the current line coding
as set by the host (or the default if unset yet).

Signed-off-by: Tal Shorer 
---
 Documentation/ioctl/ioctl-number.txt |  1 +
 drivers/usb/gadget/function/f_acm.c  | 27 +++
 include/uapi/linux/usb/f_acm.h   | 12 
 3 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 include/uapi/linux/usb/f_acm.h

diff --git a/Documentation/ioctl/ioctl-number.txt 
b/Documentation/ioctl/ioctl-number.txt
index 1e9fcb4..3d70680 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -329,6 +329,7 @@ Code  Seq#(hex) Include FileComments
 0xCA   80-8F   uapi/scsi/cxlflash_ioctl.h
 0xCB   00-1F   CBM serial IEC bus  in development:


+0xCD   10-1F   linux/usb/f_acm.h
 0xCD   01  linux/reiserfs_fs.h
 0xCF   02  fs/cifs/ioctl.c
 0xDB   00-0F   drivers/char/mwave/mwavepub.h
diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index b7a1466..5feea7c 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "u_serial.h"
 
@@ -611,6 +612,23 @@ static int acm_send_break(struct gserial *port, int 
duration)
return acm_notify_serial_state(acm);
 }
 
+static int acm_ioctl(struct gserial *port, unsigned int cmd, unsigned long arg)
+{
+   struct f_acm*acm = port_to_acm(port);
+   int ret = -ENOIOCTLCMD;
+
+   switch (cmd) {
+   case USB_F_ACM_GET_LINE_CODING:
+   if (copy_to_user((__user void *)arg, >port_line_coding,
+   sizeof(acm->port_line_coding)))
+   ret = -EFAULT;
+   else
+   ret = 0;
+   break;
+   }
+   return ret;
+}
+
 /*-*/
 
 /* ACM function driver setup/binding */
@@ -749,6 +767,7 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
acm->port.connect = acm_connect;
acm->port.disconnect = acm_disconnect;
acm->port.send_break = acm_send_break;
+   acm->port.ioctl = acm_ioctl;
 
acm->port.func.name = "acm";
acm->port.func.strings = acm_strings;
@@ -764,10 +783,10 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
acm->port.func.free_func = acm_free_func;
 
/* initialize port_line_coding with something that makes sense */
-   coding.dwDTERate = cpu_to_le32(9600);
-   coding.bCharFormat = USB_CDC_1_STOP_BITS;
-   coding.bParityType = USB_CDC_NO_PARITY;
-   coding.bDataBits = 8;
+   acm->port_line_coding.dwDTERate = cpu_to_le32(9600);
+   acm->port_line_coding.bCharFormat = USB_CDC_1_STOP_BITS;
+   acm->port_line_coding.bParityType = USB_CDC_NO_PARITY;
+   acm->port_line_coding.bDataBits = 8;
 
return >port.func;
 }
diff --git a/include/uapi/linux/usb/f_acm.h b/include/uapi/linux/usb/f_acm.h
new file mode 100644
index 000..51f96f0
--- /dev/null
+++ b/include/uapi/linux/usb/f_acm.h
@@ -0,0 +1,12 @@
+/* f_acm.h -- Header file for USB CDC-ACM gadget function */
+
+#ifndef __UAPI_LINUX_USB_F_ACM_H
+#define __UAPI_LINUX_USB_F_ACM_H
+
+#include 
+#include 
+
+/* The 0xCD code is also used by reiserfs. we use 0x10-0x1F range */
+#define USB_F_ACM_GET_LINE_CODING _IOR(0xCD, 0x10, struct usb_cdc_line_coding)
+
+#endif /* __UAPI_LINUX_USB_F_ACM_H */
-- 
2.7.4



[PATCH 4/8] usb: gadget: u_serial: propagate ioctl() to the next layer

2017-06-12 Thread Tal Shorer
In order for a serial function to implement its own ioctl() calls,
propagate the ioctl() callback to the next layer so it can handle it if
it sees fit to do so.

Signed-off-by: Tal Shorer 
---
 drivers/usb/gadget/function/u_serial.c | 15 +++
 drivers/usb/gadget/function/u_serial.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/drivers/usb/gadget/function/u_serial.c 
b/drivers/usb/gadget/function/u_serial.c
index d466f58..8d9abf1 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1040,6 +1040,20 @@ static unsigned int gs_poll(struct tty_struct *tty, 
struct file *file,
return mask;
 }
 
+static int gs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long 
arg)
+{
+   struct gs_port *port = tty->driver_data;
+   struct gserial *gser;
+   int ret = -ENOIOCTLCMD;
+
+   spin_lock_irq(>port_lock);
+   gser = port->port_usb;
+   if (gser && gser->ioctl)
+   ret = gser->ioctl(gser, cmd, arg);
+   spin_unlock_irq(>port_lock);
+   return ret;
+}
+
 static const struct tty_operations gs_tty_ops = {
.open = gs_open,
.close =gs_close,
@@ -1051,6 +1065,7 @@ static const struct tty_operations gs_tty_ops = {
.unthrottle =   gs_unthrottle,
.break_ctl =gs_break_ctl,
.poll = gs_poll,
+   .ioctl =gs_ioctl,
 };
 
 /*-*/
diff --git a/drivers/usb/gadget/function/u_serial.h 
b/drivers/usb/gadget/function/u_serial.h
index ce00840..8d0901e 100644
--- a/drivers/usb/gadget/function/u_serial.h
+++ b/drivers/usb/gadget/function/u_serial.h
@@ -53,6 +53,7 @@ struct gserial {
int (*send_break)(struct gserial *p, int duration);
unsigned int (*poll)(struct gserial *p, struct file *file,
poll_table *wait);
+   int (*ioctl)(struct gserial *p, unsigned int cmd, unsigned long arg);
 };
 
 /* utilities to allocate/free request and buffer */
-- 
2.7.4



[PATCH 6/8] usb: gadget: f_acm: add an ioctl to get the current line coding

2017-06-12 Thread Tal Shorer
The user can issue USB_F_GET_LINE_CODING to get the current line coding
as set by the host (or the default if unset yet).

Signed-off-by: Tal Shorer 
---
 Documentation/ioctl/ioctl-number.txt |  1 +
 drivers/usb/gadget/function/f_acm.c  | 27 +++
 include/uapi/linux/usb/f_acm.h   | 12 
 3 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 include/uapi/linux/usb/f_acm.h

diff --git a/Documentation/ioctl/ioctl-number.txt 
b/Documentation/ioctl/ioctl-number.txt
index 1e9fcb4..3d70680 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -329,6 +329,7 @@ Code  Seq#(hex) Include FileComments
 0xCA   80-8F   uapi/scsi/cxlflash_ioctl.h
 0xCB   00-1F   CBM serial IEC bus  in development:


+0xCD   10-1F   linux/usb/f_acm.h
 0xCD   01  linux/reiserfs_fs.h
 0xCF   02  fs/cifs/ioctl.c
 0xDB   00-0F   drivers/char/mwave/mwavepub.h
diff --git a/drivers/usb/gadget/function/f_acm.c 
b/drivers/usb/gadget/function/f_acm.c
index b7a1466..5feea7c 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "u_serial.h"
 
@@ -611,6 +612,23 @@ static int acm_send_break(struct gserial *port, int 
duration)
return acm_notify_serial_state(acm);
 }
 
+static int acm_ioctl(struct gserial *port, unsigned int cmd, unsigned long arg)
+{
+   struct f_acm*acm = port_to_acm(port);
+   int ret = -ENOIOCTLCMD;
+
+   switch (cmd) {
+   case USB_F_ACM_GET_LINE_CODING:
+   if (copy_to_user((__user void *)arg, >port_line_coding,
+   sizeof(acm->port_line_coding)))
+   ret = -EFAULT;
+   else
+   ret = 0;
+   break;
+   }
+   return ret;
+}
+
 /*-*/
 
 /* ACM function driver setup/binding */
@@ -749,6 +767,7 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
acm->port.connect = acm_connect;
acm->port.disconnect = acm_disconnect;
acm->port.send_break = acm_send_break;
+   acm->port.ioctl = acm_ioctl;
 
acm->port.func.name = "acm";
acm->port.func.strings = acm_strings;
@@ -764,10 +783,10 @@ static struct usb_function *acm_alloc_func(struct 
usb_function_instance *fi)
acm->port.func.free_func = acm_free_func;
 
/* initialize port_line_coding with something that makes sense */
-   coding.dwDTERate = cpu_to_le32(9600);
-   coding.bCharFormat = USB_CDC_1_STOP_BITS;
-   coding.bParityType = USB_CDC_NO_PARITY;
-   coding.bDataBits = 8;
+   acm->port_line_coding.dwDTERate = cpu_to_le32(9600);
+   acm->port_line_coding.bCharFormat = USB_CDC_1_STOP_BITS;
+   acm->port_line_coding.bParityType = USB_CDC_NO_PARITY;
+   acm->port_line_coding.bDataBits = 8;
 
return >port.func;
 }
diff --git a/include/uapi/linux/usb/f_acm.h b/include/uapi/linux/usb/f_acm.h
new file mode 100644
index 000..51f96f0
--- /dev/null
+++ b/include/uapi/linux/usb/f_acm.h
@@ -0,0 +1,12 @@
+/* f_acm.h -- Header file for USB CDC-ACM gadget function */
+
+#ifndef __UAPI_LINUX_USB_F_ACM_H
+#define __UAPI_LINUX_USB_F_ACM_H
+
+#include 
+#include 
+
+/* The 0xCD code is also used by reiserfs. we use 0x10-0x1F range */
+#define USB_F_ACM_GET_LINE_CODING _IOR(0xCD, 0x10, struct usb_cdc_line_coding)
+
+#endif /* __UAPI_LINUX_USB_F_ACM_H */
-- 
2.7.4



[PATCH 1/8] tty: add a poll() callback in struct tty_operations

2017-06-12 Thread Tal Shorer
If a tty driver wants to notify the user of some exceptional event,
such as a usb cdc acm device set_line_coding event, it needs a way to
modify the mask returned by poll() and possible also add wait queues.
In order to do that, we allow the driver to supply a poll() callback
of its own, which will be called in n_tty_poll().

Signed-off-by: Tal Shorer 
---
 drivers/tty/n_tty.c| 2 ++
 include/linux/tty_driver.h | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index bdf0e6e..7af8c29 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2394,6 +2394,8 @@ static unsigned int n_tty_poll(struct tty_struct *tty, 
struct file *file,
tty_chars_in_buffer(tty) < WAKEUP_CHARS &&
tty_write_room(tty) > 0)
mask |= POLLOUT | POLLWRNORM;
+   if (tty->ops->poll)
+   mask |= tty->ops->poll(tty, file, wait);
return mask;
 }
 
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index b742b5e..630ef03 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -243,6 +243,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct tty_struct;
 struct tty_driver;
@@ -285,6 +286,8 @@ struct tty_operations {
int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
int (*get_icount)(struct tty_struct *tty,
struct serial_icounter_struct *icount);
+   unsigned int (*poll)(struct tty_struct *tty, struct file *file,
+   poll_table *wait);
 #ifdef CONFIG_CONSOLE_POLL
int (*poll_init)(struct tty_driver *driver, int line, char *options);
int (*poll_get_char)(struct tty_driver *driver, int line);
-- 
2.7.4



[PATCH 1/8] tty: add a poll() callback in struct tty_operations

2017-06-12 Thread Tal Shorer
If a tty driver wants to notify the user of some exceptional event,
such as a usb cdc acm device set_line_coding event, it needs a way to
modify the mask returned by poll() and possible also add wait queues.
In order to do that, we allow the driver to supply a poll() callback
of its own, which will be called in n_tty_poll().

Signed-off-by: Tal Shorer 
---
 drivers/tty/n_tty.c| 2 ++
 include/linux/tty_driver.h | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index bdf0e6e..7af8c29 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2394,6 +2394,8 @@ static unsigned int n_tty_poll(struct tty_struct *tty, 
struct file *file,
tty_chars_in_buffer(tty) < WAKEUP_CHARS &&
tty_write_room(tty) > 0)
mask |= POLLOUT | POLLWRNORM;
+   if (tty->ops->poll)
+   mask |= tty->ops->poll(tty, file, wait);
return mask;
 }
 
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index b742b5e..630ef03 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -243,6 +243,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct tty_struct;
 struct tty_driver;
@@ -285,6 +286,8 @@ struct tty_operations {
int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
int (*get_icount)(struct tty_struct *tty,
struct serial_icounter_struct *icount);
+   unsigned int (*poll)(struct tty_struct *tty, struct file *file,
+   poll_table *wait);
 #ifdef CONFIG_CONSOLE_POLL
int (*poll_init)(struct tty_driver *driver, int line, char *options);
int (*poll_get_char)(struct tty_driver *driver, int line);
-- 
2.7.4



[PATCH 4.11 052/150] arm64: KVM: Preserve RES1 bits in SCTLR_EL2

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Marc Zyngier 

commit d68c1f7fd1b7148dab5fe658321d511998969f2d upstream.

__do_hyp_init has the rather bad habit of ignoring RES1 bits and
writing them back as zero. On a v8.0-8.2 CPU, this doesn't do anything
bad, but may end-up being pretty nasty on future revisions of the
architecture.

Let's preserve those bits so that we don't have to fix this later on.

Signed-off-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm64/include/asm/sysreg.h |4 
 arch/arm64/kvm/hyp-init.S   |   10 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -138,6 +138,10 @@
 #define SCTLR_ELx_A(1 << 1)
 #define SCTLR_ELx_M1
 
+#define SCTLR_EL2_RES1 ((1 << 4)  | (1 << 5)  | (1 << 11) | (1 << 16) | \
+(1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
+(1 << 28) | (1 << 29))
+
 #define SCTLR_ELx_FLAGS(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
 SCTLR_ELx_SA | SCTLR_ELx_I)
 
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -102,10 +102,12 @@ __do_hyp_init:
tlbialle2
dsb sy
 
-   mrs x4, sctlr_el2
-   and x4, x4, #SCTLR_ELx_EE   // preserve endianness of EL2
-   ldr x5, =SCTLR_ELx_FLAGS
-   orr x4, x4, x5
+   /*
+* Preserve all the RES1 bits while setting the default flags,
+* as well as the EE bit on BE.
+*/
+   ldr x4, =(SCTLR_EL2_RES1 | SCTLR_ELx_FLAGS)
+CPU_BE(orr x4, x4, #SCTLR_ELx_EE)
msr sctlr_el2, x4
isb
 




[PATCH 4.11 052/150] arm64: KVM: Preserve RES1 bits in SCTLR_EL2

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Marc Zyngier 

commit d68c1f7fd1b7148dab5fe658321d511998969f2d upstream.

__do_hyp_init has the rather bad habit of ignoring RES1 bits and
writing them back as zero. On a v8.0-8.2 CPU, this doesn't do anything
bad, but may end-up being pretty nasty on future revisions of the
architecture.

Let's preserve those bits so that we don't have to fix this later on.

Signed-off-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm64/include/asm/sysreg.h |4 
 arch/arm64/kvm/hyp-init.S   |   10 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -138,6 +138,10 @@
 #define SCTLR_ELx_A(1 << 1)
 #define SCTLR_ELx_M1
 
+#define SCTLR_EL2_RES1 ((1 << 4)  | (1 << 5)  | (1 << 11) | (1 << 16) | \
+(1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
+(1 << 28) | (1 << 29))
+
 #define SCTLR_ELx_FLAGS(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
 SCTLR_ELx_SA | SCTLR_ELx_I)
 
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -102,10 +102,12 @@ __do_hyp_init:
tlbialle2
dsb sy
 
-   mrs x4, sctlr_el2
-   and x4, x4, #SCTLR_ELx_EE   // preserve endianness of EL2
-   ldr x5, =SCTLR_ELx_FLAGS
-   orr x4, x4, x5
+   /*
+* Preserve all the RES1 bits while setting the default flags,
+* as well as the EE bit on BE.
+*/
+   ldr x4, =(SCTLR_EL2_RES1 | SCTLR_ELx_FLAGS)
+CPU_BE(orr x4, x4, #SCTLR_ELx_EE)
msr sctlr_el2, x4
isb
 




[PATCH 0/8] Allow f_acm gadgets to notify the user about SetLineCoding requests

2017-06-12 Thread Tal Shorer
I'm currently working on a project where I'd like to have an omap board
running linux be a usb-to-uart converter (using f_acm), and I've ran
into an issue: there's no way for the application to know if the host
has issued a SetLineCoding requests (after which parity/baudrate should
be changed to match the host's request).

This series adds the support necessary to achieve that:
- Allowing tty drivers to supply a poll() function to notify the user of
driver-specific events.
- Propagating poll() and ioctl() from u_serial to the next layer (f_acm)
in this case.
- Let the user read the current line coding set by the host (via an
ioctl() call).
- Notify the user when there's a pending SetLineCoding request they
haven't read yet

The last patch also removes up the port_line_config field from
struct gserial. It made no sense to have there (and had a REVISIT
comment at every turn), it was never used and it was initialized with
invalid values.

Tal Shorer (8):
  tty: add a poll() callback in struct tty_operations
  usb: gadget: u_serial: propagate poll() to the next layer
  usb: gadget: f_acm: validate set_line_coding requests
  usb: gadget: u_serial: propagate ioctl() to the next layer
  usb: gadget: f_acm: initialize port_line_coding when creating an
instance
  usb: gadget: f_acm: add an ioctl to get the current line coding
  usb: gadget: f_acm: notify the user on SetLineCoding
  usb: gadget: u_serial: remove port_line_config from struct gserial

 Documentation/ioctl/ioctl-number.txt   |  1 +
 drivers/tty/n_tty.c|  2 ++
 drivers/usb/gadget/function/f_acm.c| 66 +-
 drivers/usb/gadget/function/u_serial.c | 53 ---
 drivers/usb/gadget/function/u_serial.h |  7 ++--
 include/linux/tty_driver.h |  3 ++
 include/uapi/linux/usb/f_acm.h | 12 +++
 7 files changed, 113 insertions(+), 31 deletions(-)
 create mode 100644 include/uapi/linux/usb/f_acm.h

--
2.7.4


[PATCH] x86/mm: Split read_cr3() into read_cr3_pa() and __read_cr3()

2017-06-12 Thread Andy Lutomirski
The kernel has several code paths that read CR3.  Most of them assume that
CR3 contains the PGD's physical address, whereas some of them awkwardly
use PHYSICAL_PAGE_MASK to mask off low bits.

Add explicit mask macros for CR3 and convert all of the CR3 readers.
This will keep them from breaking when PCID is enabled.

Cc: Tom Lendacky 
Cc: Juergen Gross 
Cc: xen-devel 
Cc: Boris Ostrovsky 
Signed-off-by: Andy Lutomirski 
---

Hi Ingo-

I broke this out because Tom's SME series and my PCID series both need it.
Please consider applying it to tip:x86/mm.

I'll send PCID v2 soon.  It'll apply to x86/mm + sched/urgent + this patch.

Thanks,
Andy

 arch/x86/boot/compressed/pagetable.c   |  2 +-
 arch/x86/include/asm/efi.h |  2 +-
 arch/x86/include/asm/mmu_context.h |  4 ++--
 arch/x86/include/asm/paravirt.h|  2 +-
 arch/x86/include/asm/processor-flags.h | 36 ++
 arch/x86/include/asm/processor.h   |  8 
 arch/x86/include/asm/special_insns.h   | 10 +++---
 arch/x86/include/asm/tlbflush.h|  4 ++--
 arch/x86/kernel/head64.c   |  3 ++-
 arch/x86/kernel/paravirt.c |  2 +-
 arch/x86/kernel/process_32.c   |  2 +-
 arch/x86/kernel/process_64.c   |  2 +-
 arch/x86/kvm/vmx.c |  2 +-
 arch/x86/mm/fault.c| 10 +-
 arch/x86/mm/ioremap.c  |  2 +-
 arch/x86/platform/efi/efi_64.c |  4 ++--
 arch/x86/platform/olpc/olpc-xo1-pm.c   |  2 +-
 arch/x86/power/cpu.c   |  2 +-
 arch/x86/power/hibernate_64.c  |  3 ++-
 arch/x86/xen/mmu_pv.c  |  6 +++---
 20 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c 
b/arch/x86/boot/compressed/pagetable.c
index 1d78f1739087..8e69df96492e 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -92,7 +92,7 @@ void initialize_identity_maps(void)
 * and we must append to the existing area instead of entirely
 * overwriting it.
 */
-   level4p = read_cr3();
+   level4p = read_cr3_pa();
if (level4p == (unsigned long)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 2f77bcefe6b4..d2ff779f347e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -74,7 +74,7 @@ struct efi_scratch {
__kernel_fpu_begin();   \
\
if (efi_scratch.use_pgd) {  \
-   efi_scratch.prev_cr3 = read_cr3();  \
+   efi_scratch.prev_cr3 = __read_cr3();\
write_cr3((unsigned long)efi_scratch.efi_pgt);  \
__flush_tlb_all();  \
}   \
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 5a93f6261302..cfe6034ebfc6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -269,7 +269,7 @@ static inline bool arch_vma_access_permitted(struct 
vm_area_struct *vma,
 
 /*
  * This can be used from process context to figure out what the value of
- * CR3 is without needing to do a (slow) read_cr3().
+ * CR3 is without needing to do a (slow) __read_cr3().
  *
  * It's intended to be used for code like KVM that sneakily changes CR3
  * and needs to restore it.  It needs to be used very carefully.
@@ -281,7 +281,7 @@ static inline unsigned long __get_current_cr3_fast(void)
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || !in_atomic());
 
-   VM_BUG_ON(cr3 != read_cr3());
+   VM_BUG_ON(cr3 != __read_cr3());
return cr3;
 }
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9a15739d9f4b..a63e77f8eb41 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
 }
 
-static inline unsigned long read_cr3(void)
+static inline unsigned long __read_cr3(void)
 {
return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
 }
diff --git a/arch/x86/include/asm/processor-flags.h 
b/arch/x86/include/asm/processor-flags.h
index 39fb618e2211..79aa2f98398d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -8,4 +8,40 @@
 #else
 #define X86_VM_MASK0 /* No VM86 support */
 #endif
+
+/*
+ * CR3's layout 

[PATCH 0/8] Allow f_acm gadgets to notify the user about SetLineCoding requests

2017-06-12 Thread Tal Shorer
I'm currently working on a project where I'd like to have an omap board
running linux be a usb-to-uart converter (using f_acm), and I've ran
into an issue: there's no way for the application to know if the host
has issued a SetLineCoding requests (after which parity/baudrate should
be changed to match the host's request).

This series adds the support necessary to achieve that:
- Allowing tty drivers to supply a poll() function to notify the user of
driver-specific events.
- Propagating poll() and ioctl() from u_serial to the next layer (f_acm)
in this case.
- Let the user read the current line coding set by the host (via an
ioctl() call).
- Notify the user when there's a pending SetLineCoding request they
haven't read yet

The last patch also removes up the port_line_config field from
struct gserial. It made no sense to have there (and had a REVISIT
comment at every turn), it was never used and it was initialized with
invalid values.

Tal Shorer (8):
  tty: add a poll() callback in struct tty_operations
  usb: gadget: u_serial: propagate poll() to the next layer
  usb: gadget: f_acm: validate set_line_coding requests
  usb: gadget: u_serial: propagate ioctl() to the next layer
  usb: gadget: f_acm: initialize port_line_coding when creating an
instance
  usb: gadget: f_acm: add an ioctl to get the current line coding
  usb: gadget: f_acm: notify the user on SetLineCoding
  usb: gadget: u_serial: remove port_line_config from struct gserial

 Documentation/ioctl/ioctl-number.txt   |  1 +
 drivers/tty/n_tty.c|  2 ++
 drivers/usb/gadget/function/f_acm.c| 66 +-
 drivers/usb/gadget/function/u_serial.c | 53 ---
 drivers/usb/gadget/function/u_serial.h |  7 ++--
 include/linux/tty_driver.h |  3 ++
 include/uapi/linux/usb/f_acm.h | 12 +++
 7 files changed, 113 insertions(+), 31 deletions(-)
 create mode 100644 include/uapi/linux/usb/f_acm.h

--
2.7.4


[PATCH] x86/mm: Split read_cr3() into read_cr3_pa() and __read_cr3()

2017-06-12 Thread Andy Lutomirski
The kernel has several code paths that read CR3.  Most of them assume that
CR3 contains the PGD's physical address, whereas some of them awkwardly
use PHYSICAL_PAGE_MASK to mask off low bits.

Add explicit mask macros for CR3 and convert all of the CR3 readers.
This will keep them from breaking when PCID is enabled.

Cc: Tom Lendacky 
Cc: Juergen Gross 
Cc: xen-devel 
Cc: Boris Ostrovsky 
Signed-off-by: Andy Lutomirski 
---

Hi Ingo-

I broke this out because Tom's SME series and my PCID series both need it.
Please consider applying it to tip:x86/mm.

I'll send PCID v2 soon.  It'll apply to x86/mm + sched/urgent + this patch.

Thanks,
Andy

 arch/x86/boot/compressed/pagetable.c   |  2 +-
 arch/x86/include/asm/efi.h |  2 +-
 arch/x86/include/asm/mmu_context.h |  4 ++--
 arch/x86/include/asm/paravirt.h|  2 +-
 arch/x86/include/asm/processor-flags.h | 36 ++
 arch/x86/include/asm/processor.h   |  8 
 arch/x86/include/asm/special_insns.h   | 10 +++---
 arch/x86/include/asm/tlbflush.h|  4 ++--
 arch/x86/kernel/head64.c   |  3 ++-
 arch/x86/kernel/paravirt.c |  2 +-
 arch/x86/kernel/process_32.c   |  2 +-
 arch/x86/kernel/process_64.c   |  2 +-
 arch/x86/kvm/vmx.c |  2 +-
 arch/x86/mm/fault.c| 10 +-
 arch/x86/mm/ioremap.c  |  2 +-
 arch/x86/platform/efi/efi_64.c |  4 ++--
 arch/x86/platform/olpc/olpc-xo1-pm.c   |  2 +-
 arch/x86/power/cpu.c   |  2 +-
 arch/x86/power/hibernate_64.c  |  3 ++-
 arch/x86/xen/mmu_pv.c  |  6 +++---
 20 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c 
b/arch/x86/boot/compressed/pagetable.c
index 1d78f1739087..8e69df96492e 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -92,7 +92,7 @@ void initialize_identity_maps(void)
 * and we must append to the existing area instead of entirely
 * overwriting it.
 */
-   level4p = read_cr3();
+   level4p = read_cr3_pa();
if (level4p == (unsigned long)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 2f77bcefe6b4..d2ff779f347e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -74,7 +74,7 @@ struct efi_scratch {
__kernel_fpu_begin();   \
\
if (efi_scratch.use_pgd) {  \
-   efi_scratch.prev_cr3 = read_cr3();  \
+   efi_scratch.prev_cr3 = __read_cr3();\
write_cr3((unsigned long)efi_scratch.efi_pgt);  \
__flush_tlb_all();  \
}   \
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 5a93f6261302..cfe6034ebfc6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -269,7 +269,7 @@ static inline bool arch_vma_access_permitted(struct 
vm_area_struct *vma,
 
 /*
  * This can be used from process context to figure out what the value of
- * CR3 is without needing to do a (slow) read_cr3().
+ * CR3 is without needing to do a (slow) __read_cr3().
  *
  * It's intended to be used for code like KVM that sneakily changes CR3
  * and needs to restore it.  It needs to be used very carefully.
@@ -281,7 +281,7 @@ static inline unsigned long __get_current_cr3_fast(void)
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || !in_atomic());
 
-   VM_BUG_ON(cr3 != read_cr3());
+   VM_BUG_ON(cr3 != __read_cr3());
return cr3;
 }
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9a15739d9f4b..a63e77f8eb41 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
 }
 
-static inline unsigned long read_cr3(void)
+static inline unsigned long __read_cr3(void)
 {
return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
 }
diff --git a/arch/x86/include/asm/processor-flags.h 
b/arch/x86/include/asm/processor-flags.h
index 39fb618e2211..79aa2f98398d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -8,4 +8,40 @@
 #else
 #define X86_VM_MASK0 /* No VM86 support */
 #endif
+
+/*
+ * CR3's layout varies depending on several things.
+ *
+ * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address 

Re: usb/gadget: potential deadlock in gadgetfs_suspend

2017-06-12 Thread Andrey Konovalov
On Mon, Jun 12, 2017 at 6:11 PM, Andrey Konovalov  wrote:
> On Fri, Jun 9, 2017 at 9:14 PM, Alan Stern  wrote:
>> On Fri, 9 Jun 2017, Andrey Konovalov wrote:
>>
>>> On Fri, Jun 9, 2017 at 2:41 PM, Andrey Konovalov  
>>> wrote:
>>> > Hi,
>>> >
>>> > I'm getting some hangs while fuzzing the kernel with syzkaller.
>>> >
>>> > Possibly it happens during the execution of the following syzkaller 
>>> > program:
>>> >
>>> > mmap(&(0x7f00/0xb9)=nil, (0xb9), 0x3, 0x32,
>>> > 0x, 0x0)
>>> > r0 = 
>>> > open$usb(&(0x7f001000)="2f6465762f6761646765742f64756d6d795f75646300",
>>> > 0xc002, 0x0)
>>> > r1 = 
>>> > open$usb(&(0x7f002000)="2f6465762f6761646765742f64756d6d795f75646300",
>>> > 0x1, 0x102)
>>> > write$usb(r1, &(0x7f003000)={0x0, {0x9, 0x2, 0x1b, 0x0, 0x5, 0x0,
>>> > 0x80, 0x8, 0x9, 0x4, 0x1000, 0xfef9, 0x1, 0xff, 0x0,
>>
>> I don't understand these large constants.  They're supposed to be __u8
>> values.  Do they get truncated to the least significant byte?
>
> This program doesn't lead to crashes, it was a bug in syzkaller. The
> format keeps changing, I'll explain it if I send another program.
>
>>
>>> > 0x8, 0x80, [{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}]}, {0x12, 0x1,
>>> > 0x0, 0x0, 0x4b5, 0x7c, 0x0, 0x3, 0x4, 0x0, 0x8, 0xd686, 0x0, 0x1}},
>>> > 0x31)
>>> >
>>> > I haven't managed to get the exact same stack trace (or any at all
>>> > actually) while trying to reproduce the bug with this program, but the
>>> > kernel definitely hangs.
>
> It seems that I get the stall reports quite rarely and I can't
> reproduce them. However I get the "bad spinlock magic" crashes quite
> often and able to reproduce them. So I can apply debug patches.
>
> Reproducing sometimes requires quite some time (~10 minutes), so this
> seems to be some kind of race. I noticed, that often I unmount
> gadgetfs before the USB device has enough time to properly initialize,
> so this might be a race of unmount vs device initialization or
> something similar.

Aha, got KASAN report:

==
BUG: KASAN: use-after-free in __lock_acquire+0x3069/0x3690
kernel/locking/lockdep.c:3246
Read of size 8 at addr 88003a2bdaf8 by task kworker/3:1/903

CPU: 3 PID: 903 Comm: kworker/3:1 Not tainted 4.12.0-rc4+ #35
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: usb_hub_wq hub_event
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x292/0x395 lib/dump_stack.c:52
 print_address_description+0x78/0x280 mm/kasan/report.c:252
 kasan_report_error mm/kasan/report.c:351 [inline]
 kasan_report+0x230/0x340 mm/kasan/report.c:408
 __asan_report_load8_noabort+0x19/0x20 mm/kasan/report.c:429
 __lock_acquire+0x3069/0x3690 kernel/locking/lockdep.c:3246
 lock_acquire+0x22d/0x560 kernel/locking/lockdep.c:3855
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:299 [inline]
 gadgetfs_suspend+0x89/0x130 drivers/usb/gadget/legacy/inode.c:1682
 set_link_state+0x88e/0xae0 drivers/usb/gadget/udc/dummy_hcd.c:455
 dummy_hub_control+0xd7e/0x1fb0 drivers/usb/gadget/udc/dummy_hcd.c:2074
 rh_call_control drivers/usb/core/hcd.c:689 [inline]
 rh_urb_enqueue drivers/usb/core/hcd.c:846 [inline]
 usb_hcd_submit_urb+0x92f/0x20b0 drivers/usb/core/hcd.c:1650
 usb_submit_urb+0x8b2/0x12c0 drivers/usb/core/urb.c:542
 usb_start_wait_urb+0x148/0x5b0 drivers/usb/core/message.c:56
 usb_internal_control_msg drivers/usb/core/message.c:100 [inline]
 usb_control_msg+0x341/0x4d0 drivers/usb/core/message.c:151
 usb_clear_port_feature+0x74/0xa0 drivers/usb/core/hub.c:412
 hub_port_disable+0x123/0x510 drivers/usb/core/hub.c:4177
 hub_port_init+0x1ed/0x2940 drivers/usb/core/hub.c:4648
 hub_port_connect drivers/usb/core/hub.c:4826 [inline]
 hub_port_connect_change drivers/usb/core/hub.c:4999 [inline]
 port_event drivers/usb/core/hub.c:5105 [inline]
 hub_event+0x1ae1/0x3d40 drivers/usb/core/hub.c:5185
 process_one_work+0xc08/0x1bd0 kernel/workqueue.c:2097
 process_scheduled_works kernel/workqueue.c:2157 [inline]
 worker_thread+0xb2b/0x1860 kernel/workqueue.c:2233
 kthread+0x363/0x440 kernel/kthread.c:231
 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:424

Allocated by task 9958:
 save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:617
 kmem_cache_alloc_trace+0x87/0x280 mm/slub.c:2745
 kmalloc include/linux/slab.h:492 [inline]
 kzalloc include/linux/slab.h:665 [inline]
 dev_new drivers/usb/gadget/legacy/inode.c:170 [inline]
 gadgetfs_fill_super+0x24f/0x540 drivers/usb/gadget/legacy/inode.c:1993
 mount_single+0xf6/0x160 fs/super.c:1192
 gadgetfs_mount+0x31/0x40 drivers/usb/gadget/legacy/inode.c:2019
 mount_fs+0x9c/0x2d0 

Re: usb/gadget: potential deadlock in gadgetfs_suspend

2017-06-12 Thread Andrey Konovalov
On Mon, Jun 12, 2017 at 6:11 PM, Andrey Konovalov  wrote:
> On Fri, Jun 9, 2017 at 9:14 PM, Alan Stern  wrote:
>> On Fri, 9 Jun 2017, Andrey Konovalov wrote:
>>
>>> On Fri, Jun 9, 2017 at 2:41 PM, Andrey Konovalov  
>>> wrote:
>>> > Hi,
>>> >
>>> > I'm getting some hangs while fuzzing the kernel with syzkaller.
>>> >
>>> > Possibly it happens during the execution of the following syzkaller 
>>> > program:
>>> >
>>> > mmap(&(0x7f00/0xb9)=nil, (0xb9), 0x3, 0x32,
>>> > 0x, 0x0)
>>> > r0 = 
>>> > open$usb(&(0x7f001000)="2f6465762f6761646765742f64756d6d795f75646300",
>>> > 0xc002, 0x0)
>>> > r1 = 
>>> > open$usb(&(0x7f002000)="2f6465762f6761646765742f64756d6d795f75646300",
>>> > 0x1, 0x102)
>>> > write$usb(r1, &(0x7f003000)={0x0, {0x9, 0x2, 0x1b, 0x0, 0x5, 0x0,
>>> > 0x80, 0x8, 0x9, 0x4, 0x1000, 0xfef9, 0x1, 0xff, 0x0,
>>
>> I don't understand these large constants.  They're supposed to be __u8
>> values.  Do they get truncated to the least significant byte?
>
> This program doesn't lead to crashes, it was a bug in syzkaller. The
> format keeps changing, I'll explain it if I send another program.
>
>>
>>> > 0x8, 0x80, [{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}]}, {0x12, 0x1,
>>> > 0x0, 0x0, 0x4b5, 0x7c, 0x0, 0x3, 0x4, 0x0, 0x8, 0xd686, 0x0, 0x1}},
>>> > 0x31)
>>> >
>>> > I haven't managed to get the exact same stack trace (or any at all
>>> > actually) while trying to reproduce the bug with this program, but the
>>> > kernel definitely hangs.
>
> It seems that I get the stall reports quite rarely and I can't
> reproduce them. However I get the "bad spinlock magic" crashes quite
> often and able to reproduce them. So I can apply debug patches.
>
> Reproducing sometimes requires quite some time (~10 minutes), so this
> seems to be some kind of race. I noticed, that often I unmount
> gadgetfs before the USB device has enough time to properly initialize,
> so this might be a race of unmount vs device initialization or
> something similar.

Aha, got KASAN report:

==
BUG: KASAN: use-after-free in __lock_acquire+0x3069/0x3690
kernel/locking/lockdep.c:3246
Read of size 8 at addr 88003a2bdaf8 by task kworker/3:1/903

CPU: 3 PID: 903 Comm: kworker/3:1 Not tainted 4.12.0-rc4+ #35
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: usb_hub_wq hub_event
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x292/0x395 lib/dump_stack.c:52
 print_address_description+0x78/0x280 mm/kasan/report.c:252
 kasan_report_error mm/kasan/report.c:351 [inline]
 kasan_report+0x230/0x340 mm/kasan/report.c:408
 __asan_report_load8_noabort+0x19/0x20 mm/kasan/report.c:429
 __lock_acquire+0x3069/0x3690 kernel/locking/lockdep.c:3246
 lock_acquire+0x22d/0x560 kernel/locking/lockdep.c:3855
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:299 [inline]
 gadgetfs_suspend+0x89/0x130 drivers/usb/gadget/legacy/inode.c:1682
 set_link_state+0x88e/0xae0 drivers/usb/gadget/udc/dummy_hcd.c:455
 dummy_hub_control+0xd7e/0x1fb0 drivers/usb/gadget/udc/dummy_hcd.c:2074
 rh_call_control drivers/usb/core/hcd.c:689 [inline]
 rh_urb_enqueue drivers/usb/core/hcd.c:846 [inline]
 usb_hcd_submit_urb+0x92f/0x20b0 drivers/usb/core/hcd.c:1650
 usb_submit_urb+0x8b2/0x12c0 drivers/usb/core/urb.c:542
 usb_start_wait_urb+0x148/0x5b0 drivers/usb/core/message.c:56
 usb_internal_control_msg drivers/usb/core/message.c:100 [inline]
 usb_control_msg+0x341/0x4d0 drivers/usb/core/message.c:151
 usb_clear_port_feature+0x74/0xa0 drivers/usb/core/hub.c:412
 hub_port_disable+0x123/0x510 drivers/usb/core/hub.c:4177
 hub_port_init+0x1ed/0x2940 drivers/usb/core/hub.c:4648
 hub_port_connect drivers/usb/core/hub.c:4826 [inline]
 hub_port_connect_change drivers/usb/core/hub.c:4999 [inline]
 port_event drivers/usb/core/hub.c:5105 [inline]
 hub_event+0x1ae1/0x3d40 drivers/usb/core/hub.c:5185
 process_one_work+0xc08/0x1bd0 kernel/workqueue.c:2097
 process_scheduled_works kernel/workqueue.c:2157 [inline]
 worker_thread+0xb2b/0x1860 kernel/workqueue.c:2233
 kthread+0x363/0x440 kernel/kthread.c:231
 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:424

Allocated by task 9958:
 save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:617
 kmem_cache_alloc_trace+0x87/0x280 mm/slub.c:2745
 kmalloc include/linux/slab.h:492 [inline]
 kzalloc include/linux/slab.h:665 [inline]
 dev_new drivers/usb/gadget/legacy/inode.c:170 [inline]
 gadgetfs_fill_super+0x24f/0x540 drivers/usb/gadget/legacy/inode.c:1993
 mount_single+0xf6/0x160 fs/super.c:1192
 gadgetfs_mount+0x31/0x40 drivers/usb/gadget/legacy/inode.c:2019
 mount_fs+0x9c/0x2d0 fs/super.c:1223
 vfs_kern_mount.part.25+0xcb/0x490 fs/namespace.c:976
 

[PATCH 4.11 034/150] serial: core: fix crash in uart_suspend_port

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Lucas Stach 

commit 88e2582e90bb89fe895ff0dceeb5d5ab65d07997 upstream.

With serdev we might end up with serial ports that have no cdev exported
to userspace, as they are used as the bus interface to other devices. In
that case serial_match_port() won't be able to find a matching tty_dev.

Skip the irq wakeup enabling in that case, as serdev will make sure to
keep the port active, as long as there are devices depending on it.

Fixes: 8ee3fde04758 (tty_port: register tty ports with serdev bus)
Signed-off-by: Lucas Stach 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/serial/serial_core.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2083,7 +2083,7 @@ int uart_suspend_port(struct uart_driver
mutex_lock(>mutex);
 
tty_dev = device_find_child(uport->dev, , serial_match_port);
-   if (device_may_wakeup(tty_dev)) {
+   if (tty_dev && device_may_wakeup(tty_dev)) {
if (!enable_irq_wake(uport->irq))
uport->irq_wake = 1;
put_device(tty_dev);




[PATCH 4.11 034/150] serial: core: fix crash in uart_suspend_port

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Lucas Stach 

commit 88e2582e90bb89fe895ff0dceeb5d5ab65d07997 upstream.

With serdev we might end up with serial ports that have no cdev exported
to userspace, as they are used as the bus interface to other devices. In
that case serial_match_port() won't be able to find a matching tty_dev.

Skip the irq wakeup enabling in that case, as serdev will make sure to
keep the port active, as long as there are devices depending on it.

Fixes: 8ee3fde04758 (tty_port: register tty ports with serdev bus)
Signed-off-by: Lucas Stach 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/serial/serial_core.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2083,7 +2083,7 @@ int uart_suspend_port(struct uart_driver
mutex_lock(>mutex);
 
tty_dev = device_find_child(uport->dev, , serial_match_port);
-   if (device_may_wakeup(tty_dev)) {
+   if (tty_dev && device_may_wakeup(tty_dev)) {
if (!enable_irq_wake(uport->irq))
uport->irq_wake = 1;
put_device(tty_dev);




[PATCH 4.11 035/150] ptrace: Properly initialize ptracer_cred on fork

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Eric W. Biederman 

commit c70d9d809fdeecedb96972457ee45c49a232d97f upstream.

When I introduced ptracer_cred I failed to consider the weirdness of
fork where the task_struct copies the old value by default.  This
winds up leaving ptracer_cred set even when a process forks and
the child process does not wind up being ptraced.

Because ptracer_cred is not set on non-ptraced processes whose
parents were ptraced this has broken the ability of the enlightenment
window manager to start setuid children.

Fix this by properly initializing ptracer_cred in ptrace_init_task

This must be done with a little bit of care to preserve the current value
of ptracer_cred when ptrace carries through fork.  Re-reading the
ptracer_cred from the ptracing process at this point is inconsistent
with how PT_PTRACE_CAP has been maintained all of these years.

Tested-by: Takashi Iwai 
Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP")
Signed-off-by: "Eric W. Biederman" 
Cc: Ralph Sennhauser 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/ptrace.h |7 +--
 kernel/ptrace.c|   20 +---
 2 files changed, 18 insertions(+), 9 deletions(-)

--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_st
  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
- struct task_struct *new_parent);
+ struct task_struct *new_parent,
+ const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ   0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(stru
 
if (unlikely(ptrace) && current->ptrace) {
child->ptrace = current->ptrace;
-   __ptrace_link(child, current->parent);
+   __ptrace_link(child, current->parent, current->ptracer_cred);
 
if (child->ptrace & PT_SEIZED)
task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(stru
 
set_tsk_thread_flag(child, TIF_SIGPENDING);
}
+   else
+   child->ptracer_cred = NULL;
 }
 
 /**
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+  const struct cred *ptracer_cred)
+{
+   BUG_ON(!list_empty(>ptrace_entry));
+   list_add(>ptrace_entry, _parent->ptraced);
+   child->parent = new_parent;
+   child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct 
*new_parent)
 {
-   BUG_ON(!list_empty(>ptrace_entry));
-   list_add(>ptrace_entry, _parent->ptraced);
-   child->parent = new_parent;
rcu_read_lock();
-   child->ptracer_cred = get_cred(__task_cred(new_parent));
+   __ptrace_link(child, new_parent, __task_cred(new_parent));
rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_str
flags |= PT_SEIZED;
task->ptrace = flags;
 
-   __ptrace_link(task, current);
+   ptrace_link(task, current);
 
/* SEIZE doesn't trap tracee on attach */
if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 */
if (!ret && !(current->real_parent->flags & PF_EXITING)) {
current->ptrace = PT_PTRACED;
-   __ptrace_link(current, current->real_parent);
+   ptrace_link(current, current->real_parent);
}
}
write_unlock_irq(_lock);




[PATCH 4.11 035/150] ptrace: Properly initialize ptracer_cred on fork

2017-06-12 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Eric W. Biederman 

commit c70d9d809fdeecedb96972457ee45c49a232d97f upstream.

When I introduced ptracer_cred I failed to consider the weirdness of
fork where the task_struct copies the old value by default.  This
winds up leaving ptracer_cred set even when a process forks and
the child process does not wind up being ptraced.

Because ptracer_cred is not set on non-ptraced processes whose
parents were ptraced this has broken the ability of the enlightenment
window manager to start setuid children.

Fix this by properly initializing ptracer_cred in ptrace_init_task

This must be done with a little bit of care to preserve the current value
of ptracer_cred when ptrace carries through fork.  Re-reading the
ptracer_cred from the ptracing process at this point is inconsistent
with how PT_PTRACE_CAP has been maintained all of these years.

Tested-by: Takashi Iwai 
Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP")
Signed-off-by: "Eric W. Biederman" 
Cc: Ralph Sennhauser 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/ptrace.h |7 +--
 kernel/ptrace.c|   20 +---
 2 files changed, 18 insertions(+), 9 deletions(-)

--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_st
  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
- struct task_struct *new_parent);
+ struct task_struct *new_parent,
+ const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ   0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(stru
 
if (unlikely(ptrace) && current->ptrace) {
child->ptrace = current->ptrace;
-   __ptrace_link(child, current->parent);
+   __ptrace_link(child, current->parent, current->ptracer_cred);
 
if (child->ptrace & PT_SEIZED)
task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(stru
 
set_tsk_thread_flag(child, TIF_SIGPENDING);
}
+   else
+   child->ptracer_cred = NULL;
 }
 
 /**
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+  const struct cred *ptracer_cred)
+{
+   BUG_ON(!list_empty(>ptrace_entry));
+   list_add(>ptrace_entry, _parent->ptraced);
+   child->parent = new_parent;
+   child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct 
*new_parent)
 {
-   BUG_ON(!list_empty(>ptrace_entry));
-   list_add(>ptrace_entry, _parent->ptraced);
-   child->parent = new_parent;
rcu_read_lock();
-   child->ptracer_cred = get_cred(__task_cred(new_parent));
+   __ptrace_link(child, new_parent, __task_cred(new_parent));
rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_str
flags |= PT_SEIZED;
task->ptrace = flags;
 
-   __ptrace_link(task, current);
+   ptrace_link(task, current);
 
/* SEIZE doesn't trap tracee on attach */
if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 */
if (!ret && !(current->real_parent->flags & PF_EXITING)) {
current->ptrace = PT_PTRACED;
-   __ptrace_link(current, current->real_parent);
+   ptrace_link(current, current->real_parent);
}
}
write_unlock_irq(_lock);




<    3   4   5   6   7   8   9   10   11   12   >