Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K

2019-04-30 Thread Rasmus Villemoes
On 30/04/2019 19.12, Christophe Leroy wrote:
>  
> Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
>> The current array of struct qe_snum use 256*4 bytes for just keeping
>> track of the free/used state of each index, and the struct layout
>> means there's another 768 bytes of padding. If we just unzip that
>> structure, the array of snum values just use 256 bytes, while the
>> free/inuse state can be tracked in a 32 byte bitmap.
>>
>> So this reduces the .data footprint by 1760 bytes. It also serves as
>> preparation for introducing another DT binding for specifying the snum
>> values.
>>
>> Signed-off-by: Rasmus Villemoes 
>> ---
>> -
>>   /* We allocate this here because it is used almost exclusively for
>>    * the communication processor devices.
>>    */
>>   struct qe_immap __iomem *qe_immr;
>>   EXPORT_SYMBOL(qe_immr);
>>   -static struct qe_snum snums[QE_NUM_OF_SNUM];    /* Dynamically
>> allocated SNUMs */
>> +static u8 snums[QE_NUM_OF_SNUM];    /* Dynamically allocated SNUMs */
>> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
>>   static unsigned int qe_num_of_snum;
>>     static phys_addr_t qebase = -1;
>> @@ -308,6 +298,7 @@ static void qe_snums_init(void)
>>   };
>>   const u8 *snum_init;
>>   +    bitmap_zero(snum_state, QE_NUM_OF_SNUM);
> 
> Doesn't make much importance, but wouldn't it be more logical to add
> this line where the setting of .state = QE_SNUM_STATE_FREE was done
> previously, ie around the for() loop below ?

This was on purpose, to avoid having to move it up in patch 4, where we
don't necessarily reach the for loop.

>>   qe_num_of_snum = qe_get_num_of_snums();
>>     if (qe_num_of_snum == 76)
>> @@ -315,10 +306,8 @@ static void qe_snums_init(void)
>>   else
>>   snum_init = snum_init_46;
>>   -    for (i = 0; i < qe_num_of_snum; i++) {
>> -    snums[i].num = snum_init[i];
>> -    snums[i].state = QE_SNUM_STATE_FREE;
>> -    }
>> +    for (i = 0; i < qe_num_of_snum; i++)
>> +    snums[i] = snum_init[i];
> 
> Could use memcpy() instead ?

Yes, I switch to that in 5/5. Sure, I could do it here already, but I
did it this way to keep close to the current style. I don't care either
way, so if you prefer introducing memcpy here, fine by me.


>>   spin_unlock_irqrestore(_lock, flags);
>>   @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
>>   int i;
>>     for (i = 0; i < qe_num_of_snum; i++) {
>> -    if (snums[i].num == snum) {
>> -    snums[i].state = QE_SNUM_STATE_FREE;
>> +    if (snums[i] == snum) {
>> +    clear_bit(i, snum_state);
>>   break;
>>   }
>>   }
> 
> Can we replace this loop by memchr() ?

Hm, yes. So that would be

  const u8 *p = memchr(snums, snum, qe_num_of_snum)
  if (p)
clear_bit(p - snums, snum_state);

I guess. Let me fold that in and see how it looks.

Thanks,
Rasmus


Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue

2019-04-30 Thread Michael Ellerman
Vakul Garg wrote:
> In function caam_jr_dequeue(), a full memory barrier is used before
> writing response job ring's register to signal removal of the completed
> job. Therefore for writing the register, we do not need another write
> memory barrier. Hence it is removed by replacing the call to wr_reg32()
> with a newly defined function wr_reg32_relaxed().
> 
> Signed-off-by: Vakul Garg 
> ---
>  drivers/crypto/caam/jr.c   | 2 +-
>  drivers/crypto/caam/regs.h | 8 
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
> index 4e9b3fca5627..2ce6d7d2ad72 100644
> --- a/drivers/crypto/caam/jr.c
> +++ b/drivers/crypto/caam/jr.c
> @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg)
>   mb();
>  
>   /* set done */
> - wr_reg32(>rregs->outring_rmvd, 1);
> + wr_reg32_relaxed(>rregs->outring_rmvd, 1);
>  
>   jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>  (JOBR_DEPTH - 1);
> diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
> index 3cd0822ea819..9e912c722e33 100644
> --- a/drivers/crypto/caam/regs.h
> +++ b/drivers/crypto/caam/regs.h
> @@ -96,6 +96,14 @@ cpu_to_caam(16)
>  cpu_to_caam(32)
>  cpu_to_caam(64)
>  
> +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data)
> +{
> + if (caam_little_end)
> + writel_relaxed(data, reg);
> + else
> + writel_relaxed(cpu_to_be32(data), reg);
> +}
> +
>  static inline void wr_reg32(void __iomem *reg, u32 data)
>  {
>   if (caam_little_end)

This crashes on my p5020ds. Did you test on powerpc?

# first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr 
- Remove extra memory barrier during job ring dequeue

Log:

  [ cut here ]
  kernel BUG at drivers/crypto/caam/jr.c:191!
  Oops: Exception in kernel mode, sig: 5 [#1]
  BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
  Modules linked in:
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 
5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2 #31
  NIP:  c079d704 LR: c079d498 CTR: c0086914
  REGS: c000fffc7970 TRAP: 0700   Not tainted  
(5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2)
  MSR:  80029000   CR: 28008484  XER: 
  IRQMASK: 0
  GPR00: c079d6b0 c000fffc7c00 c0fbc800 0001
  GPR04: 7e080080 ffc0 0001 67d7
  GPR08: 880401a9  0001 fa83b2da
  GPR12: 28008224 c0003800 c0fc20b0 0100
  GPR16: 8920f09520bea117 c0def480  0001
  GPR20: c0fc3940 c000f3537e18 0001 c1026cc5
  GPR24: 0001 c000f3328000 0001 c000f3451010
  GPR28:  0001  
  NIP [c079d704] .caam_jr_dequeue+0x2f0/0x410
  LR [c079d498] .caam_jr_dequeue+0x84/0x410
  Call Trace:
  [c000fffc7c00] [c079d6b0] .caam_jr_dequeue+0x29c/0x410 
(unreliable)
  [c000fffc7cd0] [c004fef0] .tasklet_action_common.isra.3+0xac/0x180
  [c000fffc7d80] [c0a2f99c] .__do_softirq+0x174/0x3f8
  [c000fffc7e90] [c004fb94] .irq_exit+0xc4/0xdc
  [c000fffc7f00] [c0007348] .__do_irq+0x8c/0x1b0
  [c000fffc7f90] [c00150c4] .call_do_irq+0x14/0x24
  [c000f3137930] [c00074e4] .do_IRQ+0x78/0xd4
  [c000f31379c0] [c0019998] exc_0x500_common+0xfc/0x100
  --- interrupt: 501 at .book3e_idle+0x24/0x5c
  LR = .book3e_idle+0x24/0x5c
  [c000f3137cc0] [c000a6a4] .arch_cpu_idle+0x34/0xa0 (unreliable)
  [c000f3137d30] [c0a2f2e8] .default_idle_call+0x5c/0x70
  [c000f3137da0] [c0084210] .do_idle+0x1b0/0x1f4
  [c000f3137e40] [c0084434] .cpu_startup_entry+0x28/0x30
  [c000f3137eb0] [c0021538] .start_secondary+0x59c/0x5b0
  [c000f3137f90] [c45c] start_secondary_prolog+0x10/0x14
  Instruction dump:
  7d284a14 e9290018 2fa9 40de001c 3bbd0001 57bd05fe 7d3db050 712901ff
  7fbd07b4 40e2ffcc 93b500dc 4b94 <0fe0> 78890022 79270020 41d600ec
  ---[ end trace 7bedbdf37a95ab35 ]---

That's hitting:

/* we should never fail to find a matching descriptor */
BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);

cheers


[PATCH kernel v2 2/2] powerpc/powernv/ioda2: Create bigger default window with 64k IOMMU pages

2019-04-30 Thread Alexey Kardashevskiy
At the moment we create a small window only for 32bit devices, the window
maps 0..2GB of the PCI space only. For other devices we either use
a sketchy bypass or hardware bypass but the former can only work if
the amount of RAM is no bigger than the device's DMA mask and the latter
requires devices to support at least 59bit DMA.

This extends the default DMA window to the maximum size possible to allow
a wider DMA mask than just 32bit. The default window size is now limited
by the the iommu_table::it_map allocation bitmap which is a contiguous
array, 1 bit per an IOMMU page.

This increases the default IOMMU page size from hard coded 4K to
the system page size to allow wider DMA masks.

This increases the level number to not exceed the max order allocation
limit per TCE level. By the same time, this keeps minimal levels number
as 2 in order to save memory.

As the extended window now overlaps the 32bit MMIO region, this adds
an area reservation to iommu_init_table().

After this change the default window size is 0x800==1<<43 so
devices limited to DMA mask smaller than the amount of system RAM can
still use more than just 2GB of memory for DMA.

With the on-demand allocation of indirect TCE table levels enabled and
2 levels, the first TCE level size is just
1<
---
Changes:
v2:
* adjusted level number to the max order
---
 arch/powerpc/include/asm/iommu.h  |  8 +++-
 arch/powerpc/kernel/iommu.c   | 58 +++
 arch/powerpc/platforms/powernv/pci-ioda.c | 40 +---
 3 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 0ac52392ed99..5ea782e04803 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -124,6 +124,8 @@ struct iommu_table {
struct iommu_table_ops *it_ops;
struct krefit_kref;
int it_nid;
+   unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */
+   unsigned long it_reserved_end;
 };
 
 #define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
@@ -162,8 +164,10 @@ extern int iommu_tce_table_put(struct iommu_table *tbl);
 /* Initializes an iommu_table based in values set in the passed-in
  * structure
  */
-extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
-   int nid);
+extern struct iommu_table *iommu_init_table_res(struct iommu_table *tbl,
+   int nid, unsigned long res_start, unsigned long res_end);
+#define iommu_init_table(tbl, nid) iommu_init_table_res((tbl), (nid), 0, 0)
+
 #define IOMMU_TABLE_GROUP_MAX_TABLES   2
 
 struct iommu_table_group;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 33bbd59cff79..209306ce7f4b 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -646,11 +646,43 @@ static void iommu_table_clear(struct iommu_table *tbl)
 #endif
 }
 
+static void iommu_table_reserve_pages(struct iommu_table *tbl)
+{
+   int i;
+
+   /*
+* Reserve page 0 so it will not be used for any mappings.
+* This avoids buggy drivers that consider page 0 to be invalid
+* to crash the machine or even lose data.
+*/
+   if (tbl->it_offset == 0)
+   set_bit(0, tbl->it_map);
+
+   for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+   set_bit(i, tbl->it_map);
+}
+
+static void iommu_table_release_pages(struct iommu_table *tbl)
+{
+   int i;
+
+   /*
+* In case we have reserved the first bit, we should not emit
+* the warning below.
+*/
+   if (tbl->it_offset == 0)
+   clear_bit(0, tbl->it_map);
+
+   for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+   clear_bit(i, tbl->it_map);
+}
+
 /*
  * Build a iommu_table structure.  This contains a bit map which
  * is used to manage allocation of the tce space.
  */
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table_res(struct iommu_table *tbl, int nid,
+   unsigned long res_start, unsigned long res_end)
 {
unsigned long sz;
static int welcomed = 0;
@@ -669,13 +701,9 @@ struct iommu_table *iommu_init_table(struct iommu_table 
*tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
 
-   /*
-* Reserve page 0 so it will not be used for any mappings.
-* This avoids buggy drivers that consider page 0 to be invalid
-* to crash the machine or even lose data.
-*/
-   if (tbl->it_offset == 0)
-   set_bit(0, tbl->it_map);
+   tbl->it_reserved_start = res_start;
+   tbl->it_reserved_end = res_end;
+   iommu_table_reserve_pages(tbl);
 
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))

[PATCH kernel v2 1/2] powerpc/powernv/ioda2: Allocate TCE table levels on demand for default DMA window

2019-04-30 Thread Alexey Kardashevskiy
We allocate only the first level of multilevel TCE tables for KVM
already (alloc_userspace_copy==true), and the rest is allocated on demand.
This is not enabled though for baremetal.

This removes the KVM limitation (implicit, via the alloc_userspace_copy
parameter) and always allocates just the first level. The on-demand
allocation of missing levels is already implemented.

As from now on DMA map might happen with disabled interrupts, this
allocates TCEs with GFP_ATOMIC.

To save time when creating a new clean table, this skips non-allocated
indirect TCE entries in pnv_tce_free just like we already do in
the VFIO IOMMU TCE driver.

This changes the default level number from 1 to 2 to reduce the amount
of memory required for the default 32bit DMA window at the boot time.
The default window size is up to 2GB which requires 4MB of TCEs which is
unlikely to be used entirely or at all as most devices these days are
64bit capable so by switching to 2 levels by default we save 4032KB of
RAM per a device.

While at this, add __GFP_NOWARN to alloc_pages_node() as the userspace
can trigger this path via VFIO, see the failure and try creating a table
again with different parameters which might succeed.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* added __GFP_NOWARN to alloc_pages_node
---
 arch/powerpc/platforms/powernv/pci.h  |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 20 +--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..f44987b90ac2 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -223,7 +223,7 @@ extern struct iommu_table_group *pnv_npu_compound_attach(
struct pnv_ioda_pe *pe);
 
 /* pci-ioda-tce.c */
-#define POWERNV_IOMMU_DEFAULT_LEVELS   1
+#define POWERNV_IOMMU_DEFAULT_LEVELS   2
 #define POWERNV_IOMMU_MAX_LEVELS   5
 
 extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c 
b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index e28f03e1eb5e..c75ec37bf0cd 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -36,7 +36,8 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int 
shift)
struct page *tce_mem = NULL;
__be64 *addr;
 
-   tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
+   tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
+   shift - PAGE_SHIFT);
if (!tce_mem) {
pr_err("Failed to allocate a TCE memory, level shift=%d\n",
shift);
@@ -161,6 +162,9 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long 
npages)
 
if (ptce)
*ptce = cpu_to_be64(0);
+   else
+   /* Skip the rest of the level */
+   i |= tbl->it_level_size - 1;
}
 }
 
@@ -260,7 +264,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 
bus_offset,
unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
PAGE_SHIFT);
const unsigned long tce_table_size = 1UL << table_shift;
-   unsigned int tmplevels = levels;
 
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
return -EINVAL;
@@ -268,9 +271,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 
bus_offset,
if (!is_power_of_2(window_size))
return -EINVAL;
 
-   if (alloc_userspace_copy && (window_size > (1ULL << 32)))
-   tmplevels = 1;
-
/* Adjust direct table size from window_size and levels */
entries_shift = (entries_shift + levels - 1) / levels;
level_shift = entries_shift + 3;
@@ -281,7 +281,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 
bus_offset,
 
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
-   tmplevels, tce_table_size, , _allocated);
+   1, tce_table_size, , _allocated);
 
/* addr==NULL means that the first level allocation failed */
if (!addr)
@@ -292,18 +292,18 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 
bus_offset,
 * we did not allocate as much as we wanted,
 * release partially allocated table.
 */
-   if (tmplevels == levels && offset < tce_table_size)
+   if (levels == 1 && offset < tce_table_size)
goto free_tces_exit;
 
/* Allocate userspace view of the TCE table */
if (alloc_userspace_copy) {
offset = 0;
uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
-   tmplevels, tce_table_size, ,
+   1, tce_table_size, ,
  

[PATCH kernel v2 0/2] powerpc/ioda2: Another attempt to allow DMA masks between 32 and 59

2019-04-30 Thread Alexey Kardashevskiy
This is an attempt to allow DMA masks between 32..59 which are not large
enough to use either a PHB3 bypass mode or a sketchy bypass. Depending
on the max order, up to 40 is usually available.


This is based on sha1
37624b58542f Linus Torvalds "Linux 5.1-rc7".

Please comment. Thanks.



Alexey Kardashevskiy (2):
  powerpc/powernv/ioda2: Allocate TCE table levels on demand for default
DMA window
  powerpc/powernv/ioda2: Create bigger default window with 64k IOMMU
pages

 arch/powerpc/include/asm/iommu.h  |  8 ++-
 arch/powerpc/platforms/powernv/pci.h  |  2 +-
 arch/powerpc/kernel/iommu.c   | 58 +--
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 20 +++
 arch/powerpc/platforms/powernv/pci-ioda.c | 40 +++--
 5 files changed, 90 insertions(+), 38 deletions(-)

-- 
2.17.1



[PATCH kernel] prom_init: Fetch flatten device tree from the system firmware

2019-04-30 Thread Alexey Kardashevskiy
At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
about 8.5sec to fetch the entire device tree via the client interface
as the DT is traversed twice - for strings blob and for struct blob.
Also, "getprop" is quite slow too as SLOF stores properties in a linked
list.

However, since [1] SLOF builds flattened device tree (FDT) for another
purpose. [2] adds a new "fdt-fetch" client interface for the OS to fetch
the FDT.

This tries the new method; if not supported, this falls back to
the old method.

There is a change in the FDT layout - the old method produced
(reserved map, strings, structs), the new one receives only strings and
structs from the firmware and adds the final reserved map to the end,
so it is (fw reserved map, strings, structs, reserved map).
This still produces the same unflattened device tree.

This merges the reserved map from the firmware into the kernel's reserved
map. At the moment SLOF generates an empty reserved map so this does not
change the existing behaviour in regard of reservations.

This supports only v17 onward as only that version provides dt_struct_size
which works as "fdt-fetch" only produces v17 blobs.

If "fdt-fetch" is not available, the old method of fetching the DT is used.

[1] https://git.qemu.org/?p=SLOF.git;a=commitdiff;h=e6fc84652c9c00
[2] https://git.qemu.org/?p=SLOF.git;a=commit;h=ecda95906930b80

Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/kernel/prom_init.c | 43 +
 1 file changed, 43 insertions(+)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f33ff4163a51..72e7a602b68e 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2457,6 +2457,48 @@ static void __init flatten_device_tree(void)
prom_panic("Can't allocate initial device-tree chunk\n");
mem_end = mem_start + room;
 
+   hdr = (void *) mem_start;
+   if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
+   room - sizeof(mem_reserve_map)) &&
+   hdr->version >= 17) {
+   u32 size;
+   struct mem_map_entry *fwrmap;
+
+   /* Fixup the boot cpuid */
+   hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
+
+   /*
+* Store the struct and strings addresses, mostly
+* for consistency, only dt_header_start actually matters later.
+*/
+   dt_header_start = mem_start;
+   dt_string_start = mem_start + be32_to_cpu(hdr->off_dt_strings);
+   dt_string_end = dt_string_start +
+   be32_to_cpu(hdr->dt_strings_size);
+   dt_struct_start = mem_start + be32_to_cpu(hdr->off_dt_struct);
+   dt_struct_end = dt_struct_start +
+   be32_to_cpu(hdr->dt_struct_size);
+
+   /*
+* Calculate the reserved map location (which we put
+* at the blob end) and update total size.
+*/
+   fwrmap = (void *)(mem_start + be32_to_cpu(hdr->off_mem_rsvmap));
+   hdr->off_mem_rsvmap = hdr->totalsize;
+   size = be32_to_cpu(hdr->totalsize);
+   hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
+
+   /* Merge reserved map from firmware to ours */
+   for ( ; fwrmap->size; ++fwrmap)
+   reserve_mem(be64_to_cpu(fwrmap->base),
+   be64_to_cpu(fwrmap->size));
+
+   rsvmap = (u64 *)(mem_start + size);
+
+   prom_debug("Fetched DTB: %d bytes to @%lx\n", size, mem_start);
+   goto finalize_exit;
+   }
+
/* Get root of tree */
root = call_prom("peer", 1, 1, (phandle)0);
if (root == (phandle)0)
@@ -2504,6 +2546,7 @@ static void __init flatten_device_tree(void)
/* Version 16 is not backward compatible */
hdr->last_comp_version = cpu_to_be32(0x10);
 
+finalize_exit:
/* Copy the reserve map in */
memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
 
-- 
2.17.1



Re: [PATCH 06/41] drivers: tty: serial: sb1250-duart: use dev_err() instead of printk()

2019-04-30 Thread Maciej W. Rozycki
On Sat, 27 Apr 2019, Enrico Weigelt, metux IT consult wrote:

> diff --git a/drivers/tty/serial/sb1250-duart.c 
> b/drivers/tty/serial/sb1250-duart.c
> index 329aced..655961c 100644
> --- a/drivers/tty/serial/sb1250-duart.c
> +++ b/drivers/tty/serial/sb1250-duart.c
> @@ -663,7 +663,6 @@ static void sbd_release_port(struct uart_port *uport)
>  
>  static int sbd_map_port(struct uart_port *uport)
>  {
> - const char *err = KERN_ERR "sbd: Cannot map MMIO\n";
>   struct sbd_port *sport = to_sport(uport);
>   struct sbd_duart *duart = sport->duart;
>  
> @@ -671,7 +670,7 @@ static int sbd_map_port(struct uart_port *uport)
>   uport->membase = ioremap_nocache(uport->mapbase,
>DUART_CHANREG_SPACING);
>   if (!uport->membase) {
> - printk(err);
> + dev_err(uport->dev, "Cannot map MMIO (base)\n");
>   return -ENOMEM;
>   }
>  
> @@ -679,7 +678,7 @@ static int sbd_map_port(struct uart_port *uport)
>   sport->memctrl = ioremap_nocache(duart->mapctrl,
>DUART_CHANREG_SPACING);
>   if (!sport->memctrl) {
> - printk(err);
> + dev_err(uport->dev, "Cannot map MMIO (ctrl)\n");
>   iounmap(uport->membase);
>   uport->membase = NULL;
>   return -ENOMEM;

 Hmm, what's the point to have separate messages, which consume extra 
memory, for a hardly if at all possible error condition?

  Maciej


Re: [PATCH 01/41] drivers: tty: serial: dz: use dev_err() instead of printk()

2019-04-30 Thread Maciej W. Rozycki
On Mon, 29 Apr 2019, Greg KH wrote:

> > >>  drivers/tty/serial/dz.c | 8 
> > > 
> > > Do you have this hardware to test any of these changes with?
> > 
> > Unfortunately not :(
> 
> Then I can take the "basic" types of patches for the driver (like this
> one), but not any others, sorry.

 I can verify changes to dz.c, sb1250-duart.c and zs.c with real hardware, 
but regrettably not right away: the hardware is in a remote location and 
while I have it wired for remote operation unfortunately its connectivity 
has been cut off by an unfriendly ISP.

 I'm not sure if all the changes make sense though: if there is a compiler 
warning or a usability issue, then a patch is surely welcome, otherwise: 
"If it ain't broke, don't fix it".

  Maciej


Re: [PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test

2019-04-30 Thread Michael Ellerman
"Aneesh Kumar K.V"  writes:

> This patch fixes the below crash by making sure we touch the subpage 
> protection
> related structures only if we know they are allocated on the platform. With
> radix translation we don't allocate hash context at all and trying to access
> subpage_prot_table results in
>
>  Faulting instruction address: 0xc008bdb4
>  Oops: Kernel access of bad area, sig: 11 [#1]
>  LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>  
>  NIP [c008bdb4] sys_subpage_prot+0x74/0x590
>  LR [c000b688] system_call+0x5c/0x70
>  Call Trace:
>  [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable)
>  [c00020002c6b7e20] [c000b688] system_call+0x5c/0x70
>  Instruction dump:
>  fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068
>  3920 e92d0968 ebe90630 e93f03e8  6000 3860fffe e9410068
>
> We also move the subpage_prot_table with mmp_sem held to avoid racec
> between two parallel subpage_prot syscall.
>
> Reported-by: Sachin Sant 
> Signed-off-by: Aneesh Kumar K.V 

Presumably it was:

701101865f5d ("powerpc/mm: Reduce memory usage for mm_context_t for radix")

That caused the breakage?

cheers


Re: [PATCH v2] powerpc/32s: fix BATs setting with CONFIG_STRICT_KERNEL_RWX

2019-04-30 Thread Michael Ellerman
Christophe Leroy  writes:
> Serge reported some crashes with CONFIG_STRICT_KERNEL_RWX enabled
> on a book3s32 machine.
>
> Analysis shows two issues:
> - BATs addresses and sizes are not properly aligned.
> - There is a gap between the last address covered by BATs and the
> first address covered by pages.
>
> Memory mapped with DBATs:
> 0: 0xc000-0xc07f 0x Kernel RO coherent
> 1: 0xc080-0xc0bf 0x0080 Kernel RO coherent
> 2: 0xc0c0-0xc13f 0x00c0 Kernel RW coherent
> 3: 0xc140-0xc23f 0x0140 Kernel RW coherent
> 4: 0xc240-0xc43f 0x0240 Kernel RW coherent
> 5: 0xc440-0xc83f 0x0440 Kernel RW coherent
> 6: 0xc840-0xd03f 0x0840 Kernel RW coherent
> 7: 0xd040-0xe03f 0x1040 Kernel RW coherent
>
> Memory mapped with pages:
> 0xe100-0xefff  0x2100   240Mrw   present  
>  dirty  accessed
>
> This patch fixes both issues. With the patch, we get the following
> which is as expected:
>
> Memory mapped with DBATs:
> 0: 0xc000-0xc07f 0x Kernel RO coherent
> 1: 0xc080-0xc0bf 0x0080 Kernel RO coherent
> 2: 0xc0c0-0xc0ff 0x00c0 Kernel RW coherent
> 3: 0xc100-0xc1ff 0x0100 Kernel RW coherent
> 4: 0xc200-0xc3ff 0x0200 Kernel RW coherent
> 5: 0xc400-0xc7ff 0x0400 Kernel RW coherent
> 6: 0xc800-0xcfff 0x0800 Kernel RW coherent
> 7: 0xd000-0xdfff 0x1000 Kernel RW coherent
>
> Memory mapped with pages:
> 0xe000-0xefff  0x2000   256Mrw   present  
>  dirty  accessed
>
> Reported-by: Serge Belyshev 
> Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX")
> Cc: sta...@vger.kernel.org

I could probably still get this into v5.1 if you're confident it's a
good fix.

cheers


Re: [PATCH] powerpc: vdso: drop unnecessary cc-ldoption

2019-04-30 Thread Nicholas Piggin
Nick Desaulniers's on May 1, 2019 6:25 am:
> On Tue, Apr 23, 2019 at 2:11 PM Nick Desaulniers
>  wrote:
>>
>> Towards the goal of removing cc-ldoption, it seems that --hash-style=
>> was added to binutils 2.17.50.0.2 in 2006. The minimal required version
>> of binutils for the kernel according to
>> Documentation/process/changes.rst is 2.20.
>>
>> Link: https://gcc.gnu.org/ml/gcc/2007-01/msg01141.html
>> Cc: clang-built-li...@googlegroups.com
>> Suggested-by: Masahiro Yamada 
>> Signed-off-by: Nick Desaulniers 
>> ---
>>  arch/powerpc/kernel/vdso32/Makefile | 5 ++---
>>  arch/powerpc/kernel/vdso64/Makefile | 5 ++---
>>  2 files changed, 4 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/vdso32/Makefile 
>> b/arch/powerpc/kernel/vdso32/Makefile
>> index ce199f6e4256..06f54d947057 100644
>> --- a/arch/powerpc/kernel/vdso32/Makefile
>> +++ b/arch/powerpc/kernel/vdso32/Makefile
>> @@ -26,9 +26,8 @@ GCOV_PROFILE := n
>>  KCOV_INSTRUMENT := n
>>  UBSAN_SANITIZE := n
>>
>> -ccflags-y := -shared -fno-common -fno-builtin
>> -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
>> -   $(call cc-ldoption, -Wl$(comma)--hash-style=both)
>> +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
>> +   -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
>>  asflags-y := -D__VDSO32__ -s
>>
>>  obj-y += vdso32_wrapper.o
>> diff --git a/arch/powerpc/kernel/vdso64/Makefile 
>> b/arch/powerpc/kernel/vdso64/Makefile
>> index 28e7d112aa2f..32ebb3522ea1 100644
>> --- a/arch/powerpc/kernel/vdso64/Makefile
>> +++ b/arch/powerpc/kernel/vdso64/Makefile
>> @@ -12,9 +12,8 @@ GCOV_PROFILE := n
>>  KCOV_INSTRUMENT := n
>>  UBSAN_SANITIZE := n
>>
>> -ccflags-y := -shared -fno-common -fno-builtin
>> -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
>> -   $(call cc-ldoption, -Wl$(comma)--hash-style=both)
>> +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
>> +   -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
>>  asflags-y := -D__VDSO64__ -s
>>
>>  obj-y += vdso64_wrapper.o
>> --
>> 2.21.0.593.g511ec345e18-goog
>>
> 
> bumping for review

This looks like a good cleanup.

Reviewed-by: Nicholas Piggin 



Re: [PATCH 41/41] drivers: tty: serial: lpc32xx_hs: fill mapsize and use it

2019-04-30 Thread Vladimir Zapolskiy
Hi Enrico,

On 04/27/2019 03:52 PM, Enrico Weigelt, metux IT consult wrote:
> Fill the struct uart_port->mapsize field and use it, insteaf of

typo, s/insteaf/instead/

> hardcoded values in many places. This makes the code layout a bit
> more consistent and easily allows using generic helpers for the
> io memory handling.
> 
> Candidates for such helpers could be eg. the request+ioremap and
> iounmap+release combinations.
> 
> Signed-off-by: Enrico Weigelt 

Acked-by: Vladimir Zapolskiy 

--
Best wishes,
Vladimir


Re: [PATCH] powerpc: Fix kobject memleak

2019-04-30 Thread Tyrel Datwyler
On 04/29/2019 06:09 PM, Tobin C. Harding wrote:
> Currently error return from kobject_init_and_add() is not followed by a
> call to kobject_put().  This means there is a memory leak.
> 
> Add call to kobject_put() in error path of kobject_init_and_add().
> 
> Signed-off-by: Tobin C. Harding 
> ---

Reviewed-by: Tyrel Datwyler 



Re: [PATCH] powerpc: vdso: drop unnecessary cc-ldoption

2019-04-30 Thread Nick Desaulniers
On Tue, Apr 23, 2019 at 2:11 PM Nick Desaulniers
 wrote:
>
> Towards the goal of removing cc-ldoption, it seems that --hash-style=
> was added to binutils 2.17.50.0.2 in 2006. The minimal required version
> of binutils for the kernel according to
> Documentation/process/changes.rst is 2.20.
>
> Link: https://gcc.gnu.org/ml/gcc/2007-01/msg01141.html
> Cc: clang-built-li...@googlegroups.com
> Suggested-by: Masahiro Yamada 
> Signed-off-by: Nick Desaulniers 
> ---
>  arch/powerpc/kernel/vdso32/Makefile | 5 ++---
>  arch/powerpc/kernel/vdso64/Makefile | 5 ++---
>  2 files changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kernel/vdso32/Makefile 
> b/arch/powerpc/kernel/vdso32/Makefile
> index ce199f6e4256..06f54d947057 100644
> --- a/arch/powerpc/kernel/vdso32/Makefile
> +++ b/arch/powerpc/kernel/vdso32/Makefile
> @@ -26,9 +26,8 @@ GCOV_PROFILE := n
>  KCOV_INSTRUMENT := n
>  UBSAN_SANITIZE := n
>
> -ccflags-y := -shared -fno-common -fno-builtin
> -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
> -   $(call cc-ldoption, -Wl$(comma)--hash-style=both)
> +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
> +   -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
>  asflags-y := -D__VDSO32__ -s
>
>  obj-y += vdso32_wrapper.o
> diff --git a/arch/powerpc/kernel/vdso64/Makefile 
> b/arch/powerpc/kernel/vdso64/Makefile
> index 28e7d112aa2f..32ebb3522ea1 100644
> --- a/arch/powerpc/kernel/vdso64/Makefile
> +++ b/arch/powerpc/kernel/vdso64/Makefile
> @@ -12,9 +12,8 @@ GCOV_PROFILE := n
>  KCOV_INSTRUMENT := n
>  UBSAN_SANITIZE := n
>
> -ccflags-y := -shared -fno-common -fno-builtin
> -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
> -   $(call cc-ldoption, -Wl$(comma)--hash-style=both)
> +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
> +   -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
>  asflags-y := -D__VDSO64__ -s
>
>  obj-y += vdso64_wrapper.o
> --
> 2.21.0.593.g511ec345e18-goog
>

bumping for review
-- 
Thanks,
~Nick Desaulniers


Re: [PATCH v4] powerpc/pseries: Remove limit in wait for dying CPU

2019-04-30 Thread Thiago Jung Bauermann


Hello Nathan,

Thanks for reviewing the patch!

Nathan Lynch  writes:

> Thiago Jung Bauermann  writes:
>> This can be a problem because if the busy loop finishes too early, then the
>> kernel may offline another CPU before the previous one finished dying,
>> which would lead to two concurrent calls to rtas-stop-self, which is
>> prohibited by the PAPR.
>>
>> Since the hotplug machinery already assumes that cpu_die() is going to
>> work, we can simply loop until the CPU stops.
>>
>> Also change the loop to wait 100 µs between each call to
>> smp_query_cpu_stopped() to avoid querying RTAS too often.
>
> [...]
>
>> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
>> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> index 97feb6e79f1a..d75cee60644c 100644
>> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> @@ -214,13 +214,17 @@ static void pseries_cpu_die(unsigned int cpu)
>>  msleep(1);
>>  }
>>  } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
>> -
>> -for (tries = 0; tries < 25; tries++) {
>> +/*
>> + * rtas_stop_self() panics if the CPU fails to stop and our
>> + * callers already assume that we are going to succeed, so we
>> + * can just loop until the CPU stops.
>> + */
>> +while (true) {
>>  cpu_status = smp_query_cpu_stopped(pcpu);
>>  if (cpu_status == QCSS_STOPPED ||
>>  cpu_status == QCSS_HARDWARE_ERROR)
>>  break;
>> -cpu_relax();
>> +udelay(100);
>>  }
>>  }
>
> I agree with looping indefinitely but doesn't it need a cond_resched()
> or similar check?

If there's no kernel or hypervisor bug, it shouldn't take more than a
few tens of ms for this loop to complete (Gautham measured a maximum of
10 ms on a POWER9 with an earlier version of this patch).

In case of bugs related to CPU hotplug (either in the kernel or the
hypervisor), I was hoping that the resulting lockup warnings would be a
good indicator that something is wrong. :-)

Though perhaps adding a cond_resched() every 10 ms or so, with a
WARN_ON() if it loops for more than 50 ms would be better.

I'll send an alternative patch.

--
Thiago Jung Bauermann
IBM Linux Technology Center


[PATCH v2] powerpc: remove the __kernel_io_end export

2019-04-30 Thread Christoph Hellwig
This export was added in this merge window, but without any actual
user, or justification for a modular user.

Fixes: a35a3c6f6065 ("powerpc/mm/hash64: Add a variable to track the end of IO 
mapping")
Signed-off-by: Christoph Hellwig 
---

Chanes since v1:

 - actually compiles now..

 arch/powerpc/mm/pgtable_64.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 72f58c076e26..dd610dab98e0 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -97,7 +97,6 @@ EXPORT_SYMBOL(__vmalloc_end);
 unsigned long __kernel_io_start;
 EXPORT_SYMBOL(__kernel_io_start);
 unsigned long __kernel_io_end;
-EXPORT_SYMBOL(__kernel_io_end);
 struct page *vmemmap;
 EXPORT_SYMBOL(vmemmap);
 unsigned long __pte_frag_nr;
-- 
2.20.1



Re: [PATCH 5/5] soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init

2019-04-30 Thread Christophe Leroy




Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :

The comment "No QE ever has fewer than 28 SNUMs" is false; e.g. the
MPC8309 has 14. The code path returning -EINVAL is also a recipe for
instant disaster, since the caller (qe_snums_init) uncritically
assigns the return value to the unsigned qe_num_of_snum, and would
thus proceed to attempt to copy 4GB from snum_init_46[] to the snum[]
array.

So fold the handling of the legacy fsl,qe-num-snums into
qe_snums_init, and make sure we do not end up using the snum_init_46
array in cases other than the two where we know it makes sense.

Signed-off-by: Rasmus Villemoes 
---
  drivers/net/ethernet/freescale/ucc_geth.c |  2 +-
  drivers/soc/fsl/qe/qe.c   | 54 +++
  include/soc/fsl/qe/qe.h   |  2 +-
  3 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c 
b/drivers/net/ethernet/freescale/ucc_geth.c
index eb3e65e8868f..5748eb8464d0 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3837,7 +3837,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
}
  
  	if (max_speed == SPEED_1000) {

-   unsigned int snums = qe_get_num_of_snums();
+   unsigned int snums = qe_num_of_snum;
  
  		/* configure muram FIFOs for gigabit operation */

ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT;
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index af3c2b2b268f..8c3b3c62d81b 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -52,7 +52,8 @@ EXPORT_SYMBOL(qe_immr);
  
  static u8 snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */

  static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
-static unsigned int qe_num_of_snum;
+unsigned int qe_num_of_snum;
+EXPORT_SYMBOL(qe_num_of_snum);


By exporting the object you allow other drivers to modify it. Is that 
really what we want ?


Why not keep qe_get_num_of_snums() as a helper that simply returns 
qe_num_of_snum ?


  
  static phys_addr_t qebase = -1;
  
@@ -308,26 +309,34 @@ static void qe_snums_init(void)

int i;
  
  	bitmap_zero(snum_state, QE_NUM_OF_SNUM);

+   qe_num_of_snum = 28; /* The default number of snum for threads is 28 */
qe = qe_get_device_node();
if (qe) {
i = of_property_read_variable_u8_array(qe, "fsl,qe-snums",
   snums, 1, 
QE_NUM_OF_SNUM);
-   of_node_put(qe);
if (i > 0) {
+   of_node_put(qe);
qe_num_of_snum = i;
return;
}
+   /*
+* Fall back to legacy binding of using the value of
+* fsl,qe-num-snums to choose one of the static arrays
+* above.
+*/
+   of_property_read_u32(qe, "fsl,qe-num-snums", _num_of_snum);
+   of_node_put(qe);
}
  
-	qe_num_of_snum = qe_get_num_of_snums();

-
if (qe_num_of_snum == 76)
snum_init = snum_init_76;
-   else
+   else if (qe_num_of_snum == 28 || qe_num_of_snum == 46)
snum_init = snum_init_46;
-
-   for (i = 0; i < qe_num_of_snum; i++)
-   snums[i] = snum_init[i];
+   else {
+   pr_err("QE: unsupported value of fsl,qe-num-snums: %u\n", 
qe_num_of_snum);
+   return;
+   }


The first leg of the if/else must have {} too when the second leg has them.


+   memcpy(snums, snum_init, qe_num_of_snum);
  }
  
  int qe_get_snum(void)

@@ -645,35 +654,6 @@ unsigned int qe_get_num_of_risc(void)
  }
  EXPORT_SYMBOL(qe_get_num_of_risc);
  
-unsigned int qe_get_num_of_snums(void)


I think this function should remain and just return num_of_snums, see my 
other comment above.


Christophe



-{
-   struct device_node *qe;
-   int size;
-   unsigned int num_of_snums;
-   const u32 *prop;
-
-   num_of_snums = 28; /* The default number of snum for threads is 28 */
-   qe = qe_get_device_node();
-   if (!qe)
-   return num_of_snums;
-
-   prop = of_get_property(qe, "fsl,qe-num-snums", );
-   if (prop && size == sizeof(*prop)) {
-   num_of_snums = *prop;
-   if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) {
-   /* No QE ever has fewer than 28 SNUMs */
-   pr_err("QE: number of snum is invalid\n");
-   of_node_put(qe);
-   return -EINVAL;
-   }
-   }
-
-   of_node_put(qe);
-
-   return num_of_snums;
-}
-EXPORT_SYMBOL(qe_get_num_of_snums);
-
  static int __init qe_init(void)
  {
struct device_node *np;
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index b3d1aff5e8ad..af5739850bf4 100644
--- a/include/soc/fsl/qe/qe.h
+++ 

Re: [PATCH 4/5] soc/fsl/qe: qe.c: support fsl,qe-snums property

2019-04-30 Thread Christophe Leroy




Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :

The current code assumes that the set of snum _values_ to populate the
snums[] array with is a function of the _number_ of snums
alone. However, reading table 4-30, and its footnotes, of the QUICC
Engine Block Reference Manual shows that that is a bit too naive.

As an alternative, this introduces a new binding fsl,qe-snums, which
automatically encodes both the number of snums and the actual values to
use. Conveniently, of_property_read_variable_u8_array does exactly
what we need.

For example, for the MPC8309, one would specify the property as

fsl,qe-snums = /bits/ 8 <
0x88 0x89 0x98 0x99 0xa8 0xa9 0xb8 0xb9
0xc8 0xc9 0xd8 0xd9 0xe8 0xe9>;

Signed-off-by: Rasmus Villemoes 
---
  .../devicetree/bindings/soc/fsl/cpm_qe/qe.txt  |  8 +++-
  drivers/soc/fsl/qe/qe.c| 14 +-
  2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt 
b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
index d7afaff5faff..05f5f485562a 100644
--- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
@@ -18,7 +18,8 @@ Required properties:
  - reg : offset and length of the device registers.
  - bus-frequency : the clock frequency for QUICC Engine.
  - fsl,qe-num-riscs: define how many RISC engines the QE has.
-- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
+- fsl,qe-snums: This property has to be specified as '/bits/ 8' value,
+  defining the array of serial number (SNUM) values for the virtual
threads.
  
  Optional properties:

@@ -34,6 +35,11 @@ Recommended properties
  - brg-frequency : the internal clock source frequency for baud-rate
generators in Hz.
  
+Deprecated properties

+- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use
+  for the threads. Use fsl,qe-snums instead to not only specify the
+  number of snums, but also their values.
+
  Example:
   qe@e010 {
#address-cells = <1>;
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index aff9d1373529..af3c2b2b268f 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -283,7 +283,6 @@ EXPORT_SYMBOL(qe_clock_source);
   */
  static void qe_snums_init(void)
  {
-   int i;


Why do you move this one ?


static const u8 snum_init_76[] = {
0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
@@ -304,9 +303,22 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
+   struct device_node *qe;
const u8 *snum_init;
+   int i;
  
  	bitmap_zero(snum_state, QE_NUM_OF_SNUM);

+   qe = qe_get_device_node();
+   if (qe) {
+   i = of_property_read_variable_u8_array(qe, "fsl,qe-snums",
+  snums, 1, 
QE_NUM_OF_SNUM);
+   of_node_put(qe);
+   if (i > 0) {
+   qe_num_of_snum = i;
+   return;


In that case you skip the rest of the init ? Can you explain ?

Christophe


+   }
+   }
+
qe_num_of_snum = qe_get_num_of_snums();
  
  	if (qe_num_of_snum == 76)




Re: [PATCH 3/5] soc/fsl/qe: qe.c: introduce qe_get_device_node helper

2019-04-30 Thread Christophe Leroy




Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :

The 'try of_find_compatible_node(NULL, NULL, "fsl,qe"), fall back to
of_find_node_by_type(NULL, "qe")' pattern is repeated five
times. Factor it into a common helper.

Signed-off-by: Rasmus Villemoes 


Reviewed-by: Christophe Leroy 



---
  drivers/soc/fsl/qe/qe.c | 71 +
  1 file changed, 29 insertions(+), 42 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index d0393f83145c..aff9d1373529 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -56,6 +56,20 @@ static unsigned int qe_num_of_snum;
  
  static phys_addr_t qebase = -1;
  
+static struct device_node *qe_get_device_node(void)

+{
+   struct device_node *qe;
+
+   /*
+* Newer device trees have an "fsl,qe" compatible property for the QE
+* node, but we still need to support older device trees.
+*/
+   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
+   if (qe)
+   return qe;
+   return of_find_node_by_type(NULL, "qe");
+}
+
  static phys_addr_t get_qe_base(void)
  {
struct device_node *qe;
@@ -65,12 +79,9 @@ static phys_addr_t get_qe_base(void)
if (qebase != -1)
return qebase;
  
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");

-   if (!qe) {
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return qebase;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return qebase;
  
  	ret = of_address_to_resource(qe, 0, );

if (!ret)
@@ -164,12 +175,9 @@ unsigned int qe_get_brg_clk(void)
if (brg_clk)
return brg_clk;
  
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");

-   if (!qe) {
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return brg_clk;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return brg_clk;
  
  	prop = of_get_property(qe, "brg-frequency", );

if (prop && size == sizeof(*prop))
@@ -563,16 +571,9 @@ struct qe_firmware_info *qe_get_firmware_info(void)
  
  	initialized = 1;
  
-	/*

-* Newer device trees have an "fsl,qe" compatible property for the QE
-* node, but we still need to support older device trees.
-   */
-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return NULL;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return NULL;
  
  	/* Find the 'firmware' child node */

fw = of_get_child_by_name(qe, "firmware");
@@ -618,16 +619,9 @@ unsigned int qe_get_num_of_risc(void)
unsigned int num_of_risc = 0;
const u32 *prop;
  
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");

-   if (!qe) {
-   /* Older devices trees did not have an "fsl,qe"
-* compatible property, so we need to look for
-* the QE node by name.
-*/
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return num_of_risc;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return num_of_risc;
  
  	prop = of_get_property(qe, "fsl,qe-num-riscs", );

if (prop && size == sizeof(*prop))
@@ -647,16 +641,9 @@ unsigned int qe_get_num_of_snums(void)
const u32 *prop;
  
  	num_of_snums = 28; /* The default number of snum for threads is 28 */

-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   /* Older devices trees did not have an "fsl,qe"
-* compatible property, so we need to look for
-* the QE node by name.
-*/
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return num_of_snums;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return num_of_snums;
  
  	prop = of_get_property(qe, "fsl,qe-num-snums", );

if (prop && size == sizeof(*prop)) {



Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K

2019-04-30 Thread Christophe Leroy




Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :

The current array of struct qe_snum use 256*4 bytes for just keeping
track of the free/used state of each index, and the struct layout
means there's another 768 bytes of padding. If we just unzip that
structure, the array of snum values just use 256 bytes, while the
free/inuse state can be tracked in a 32 byte bitmap.

So this reduces the .data footprint by 1760 bytes. It also serves as
preparation for introducing another DT binding for specifying the snum
values.

Signed-off-by: Rasmus Villemoes 
---
  drivers/soc/fsl/qe/qe.c | 37 -
  1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 855373deb746..d0393f83145c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -14,6 +14,7 @@
   * Free Software Foundation;  either version 2 of the  License, or (at your
   * option) any later version.
   */
+#include 
  #include 
  #include 
  #include 
@@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
  DEFINE_SPINLOCK(cmxgcr_lock);
  EXPORT_SYMBOL(cmxgcr_lock);
  
-/* QE snum state */

-enum qe_snum_state {
-   QE_SNUM_STATE_USED,
-   QE_SNUM_STATE_FREE
-};
-
-/* QE snum */
-struct qe_snum {
-   u8 num;
-   enum qe_snum_state state;
-};
-
  /* We allocate this here because it is used almost exclusively for
   * the communication processor devices.
   */
  struct qe_immap __iomem *qe_immr;
  EXPORT_SYMBOL(qe_immr);
  
-static struct qe_snum snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */

+static u8 snums[QE_NUM_OF_SNUM];   /* Dynamically allocated SNUMs */
+static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
  static unsigned int qe_num_of_snum;
  
  static phys_addr_t qebase = -1;

@@ -308,6 +298,7 @@ static void qe_snums_init(void)
};
const u8 *snum_init;
  
+	bitmap_zero(snum_state, QE_NUM_OF_SNUM);


Doesn't make much importance, but wouldn't it be more logical to add 
this line where the setting of .state = QE_SNUM_STATE_FREE was done 
previously, ie around the for() loop below ?



qe_num_of_snum = qe_get_num_of_snums();
  
  	if (qe_num_of_snum == 76)

@@ -315,10 +306,8 @@ static void qe_snums_init(void)
else
snum_init = snum_init_46;
  
-	for (i = 0; i < qe_num_of_snum; i++) {

-   snums[i].num = snum_init[i];
-   snums[i].state = QE_SNUM_STATE_FREE;
-   }
+   for (i = 0; i < qe_num_of_snum; i++)
+   snums[i] = snum_init[i];


Could use memcpy() instead ?


  }
  
  int qe_get_snum(void)

@@ -328,12 +317,10 @@ int qe_get_snum(void)
int i;
  
  	spin_lock_irqsave(_lock, flags);

-   for (i = 0; i < qe_num_of_snum; i++) {
-   if (snums[i].state == QE_SNUM_STATE_FREE) {
-   snums[i].state = QE_SNUM_STATE_USED;
-   snum = snums[i].num;
-   break;
-   }
+   i = find_first_zero_bit(snum_state, qe_num_of_snum);
+   if (i < qe_num_of_snum) {
+   set_bit(i, snum_state);
+   snum = snums[i];
}
spin_unlock_irqrestore(_lock, flags);
  
@@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)

int i;
  
  	for (i = 0; i < qe_num_of_snum; i++) {

-   if (snums[i].num == snum) {
-   snums[i].state = QE_SNUM_STATE_FREE;
+   if (snums[i] == snum) {
+   clear_bit(i, snum_state);
break;
}
}


Can we replace this loop by memchr() ?

Christophe



Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook

2019-04-30 Thread Sudeep Holla



On 30/04/2019 17:46, Andy Lutomirski wrote:
> On Mon, Mar 18, 2019 at 3:49 AM Sudeep Holla  wrote:
>>
>> Now that we have a new hook ptrace_syscall_enter that can be called from
>> syscall entry code and it handles PTRACE_SYSEMU in generic code, we
>> can do some cleanup using the same in syscall_trace_enter.
>>
>> Further the extra logic to find single stepping PTRACE_SYSEMU_SINGLESTEP
>> in syscall_slow_exit_work seems unnecessary. Let's remove the same.
>>
> 
> Unless the patch set contains a selftest that exercises all the
> interesting cases here, NAK.  To be clear, there needs to be a test
> that passes on an unmodified kernel and still passes on a patched
> kernel.  And that test case needs to *fail* if, for example, you force
> "emulated" to either true or false rather than reading out the actual
> value.
> 

Tested using tools/testing/selftests/x86/ptrace_syscall.c

Also v3 doesn't change any logic or additional call to new function as
in v2. It's just simple cleanup as suggested by Oleg.

-- 
Regards,
Sudeep


Re: [PATCH 1/5] soc/fsl/qe: qe.c: drop useless static qualifier

2019-04-30 Thread Christophe Leroy




Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :

The local variable snum_init has no reason to have static storage duration.

Signed-off-by: Rasmus Villemoes 


Reviewed-by: Christophe Leroy 


---
  drivers/soc/fsl/qe/qe.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 612d9c551be5..855373deb746 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -306,7 +306,7 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
-   static const u8 *snum_init;
+   const u8 *snum_init;
  
  	qe_num_of_snum = qe_get_num_of_snums();
  



Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook

2019-04-30 Thread Andy Lutomirski
On Mon, Mar 18, 2019 at 3:49 AM Sudeep Holla  wrote:
>
> Now that we have a new hook ptrace_syscall_enter that can be called from
> syscall entry code and it handles PTRACE_SYSEMU in generic code, we
> can do some cleanup using the same in syscall_trace_enter.
>
> Further the extra logic to find single stepping PTRACE_SYSEMU_SINGLESTEP
> in syscall_slow_exit_work seems unnecessary. Let's remove the same.
>

Unless the patch set contains a selftest that exercises all the
interesting cases here, NAK.  To be clear, there needs to be a test
that passes on an unmodified kernel and still passes on a patched
kernel.  And that test case needs to *fail* if, for example, you force
"emulated" to either true or false rather than reading out the actual
value.

--Andy


Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook

2019-04-30 Thread Sudeep Holla
On Mon, Mar 18, 2019 at 04:33:22PM +0100, Oleg Nesterov wrote:
> On 03/18, Sudeep Holla wrote:
> >
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -70,22 +70,16 @@ static long syscall_trace_enter(struct pt_regs *regs)
> >
> > struct thread_info *ti = current_thread_info();
> > unsigned long ret = 0;
> > -   bool emulated = false;
> > u32 work;
> >
> > if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
> > BUG_ON(regs != task_pt_regs(current));
> >
> > -   work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
> > -
> > -   if (unlikely(work & _TIF_SYSCALL_EMU))
> > -   emulated = true;
> > -
> > -   if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
> > -   tracehook_report_syscall_entry(regs))
> > +   if (unlikely(ptrace_syscall_enter(regs)))
> > return -1L;
> >
> > -   if (emulated)
> > +   work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
> > +   if ((work & _TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs))
> > return -1L;
>
[...]

>
> And it seems that _TIF_WORK_SYSCALL_ENTRY needs some cleanups too... We don't 
> need
> "& _TIF_WORK_SYSCALL_ENTRY" in syscall_trace_enter, and 
> _TIF_WORK_SYSCALL_ENTRY
> should not include _TIF_NOHZ?
>

I was about to post the updated version and checked this to make sure I have
covered everything or not. I had missed the above comment. All architectures
have _TIF_NOHZ in their mask that they check to do work. And from x86, I read
"...syscall_trace_enter(). Also includes TIF_NOHZ for enter_from_user_mode()"
So I don't understand why _TIF_NOHZ needs to be dropped.

Also if we need to drop, we can address that separately examining all archs.
I will post the cleanup as you suggested for now.

--
Regards,
Sudeep


[PATCH 5/5] soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init

2019-04-30 Thread Rasmus Villemoes
The comment "No QE ever has fewer than 28 SNUMs" is false; e.g. the
MPC8309 has 14. The code path returning -EINVAL is also a recipe for
instant disaster, since the caller (qe_snums_init) uncritically
assigns the return value to the unsigned qe_num_of_snum, and would
thus proceed to attempt to copy 4GB from snum_init_46[] to the snum[]
array.

So fold the handling of the legacy fsl,qe-num-snums into
qe_snums_init, and make sure we do not end up using the snum_init_46
array in cases other than the two where we know it makes sense.

Signed-off-by: Rasmus Villemoes 
---
 drivers/net/ethernet/freescale/ucc_geth.c |  2 +-
 drivers/soc/fsl/qe/qe.c   | 54 +++
 include/soc/fsl/qe/qe.h   |  2 +-
 3 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c 
b/drivers/net/ethernet/freescale/ucc_geth.c
index eb3e65e8868f..5748eb8464d0 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3837,7 +3837,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
}
 
if (max_speed == SPEED_1000) {
-   unsigned int snums = qe_get_num_of_snums();
+   unsigned int snums = qe_num_of_snum;
 
/* configure muram FIFOs for gigabit operation */
ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT;
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index af3c2b2b268f..8c3b3c62d81b 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -52,7 +52,8 @@ EXPORT_SYMBOL(qe_immr);
 
 static u8 snums[QE_NUM_OF_SNUM];   /* Dynamically allocated SNUMs */
 static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
-static unsigned int qe_num_of_snum;
+unsigned int qe_num_of_snum;
+EXPORT_SYMBOL(qe_num_of_snum);
 
 static phys_addr_t qebase = -1;
 
@@ -308,26 +309,34 @@ static void qe_snums_init(void)
int i;
 
bitmap_zero(snum_state, QE_NUM_OF_SNUM);
+   qe_num_of_snum = 28; /* The default number of snum for threads is 28 */
qe = qe_get_device_node();
if (qe) {
i = of_property_read_variable_u8_array(qe, "fsl,qe-snums",
   snums, 1, 
QE_NUM_OF_SNUM);
-   of_node_put(qe);
if (i > 0) {
+   of_node_put(qe);
qe_num_of_snum = i;
return;
}
+   /*
+* Fall back to legacy binding of using the value of
+* fsl,qe-num-snums to choose one of the static arrays
+* above.
+*/
+   of_property_read_u32(qe, "fsl,qe-num-snums", _num_of_snum);
+   of_node_put(qe);
}
 
-   qe_num_of_snum = qe_get_num_of_snums();
-
if (qe_num_of_snum == 76)
snum_init = snum_init_76;
-   else
+   else if (qe_num_of_snum == 28 || qe_num_of_snum == 46)
snum_init = snum_init_46;
-
-   for (i = 0; i < qe_num_of_snum; i++)
-   snums[i] = snum_init[i];
+   else {
+   pr_err("QE: unsupported value of fsl,qe-num-snums: %u\n", 
qe_num_of_snum);
+   return;
+   }
+   memcpy(snums, snum_init, qe_num_of_snum);
 }
 
 int qe_get_snum(void)
@@ -645,35 +654,6 @@ unsigned int qe_get_num_of_risc(void)
 }
 EXPORT_SYMBOL(qe_get_num_of_risc);
 
-unsigned int qe_get_num_of_snums(void)
-{
-   struct device_node *qe;
-   int size;
-   unsigned int num_of_snums;
-   const u32 *prop;
-
-   num_of_snums = 28; /* The default number of snum for threads is 28 */
-   qe = qe_get_device_node();
-   if (!qe)
-   return num_of_snums;
-
-   prop = of_get_property(qe, "fsl,qe-num-snums", );
-   if (prop && size == sizeof(*prop)) {
-   num_of_snums = *prop;
-   if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) {
-   /* No QE ever has fewer than 28 SNUMs */
-   pr_err("QE: number of snum is invalid\n");
-   of_node_put(qe);
-   return -EINVAL;
-   }
-   }
-
-   of_node_put(qe);
-
-   return num_of_snums;
-}
-EXPORT_SYMBOL(qe_get_num_of_snums);
-
 static int __init qe_init(void)
 {
struct device_node *np;
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index b3d1aff5e8ad..af5739850bf4 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -212,7 +212,7 @@ int qe_setbrg(enum qe_clock brg, unsigned int rate, 
unsigned int multiplier);
 int qe_get_snum(void);
 void qe_put_snum(u8 snum);
 unsigned int qe_get_num_of_risc(void);
-unsigned int qe_get_num_of_snums(void);
+extern unsigned int qe_num_of_snum;
 
 static inline int qe_alive_during_sleep(void)
 {
-- 
2.20.1



[PATCH 4/5] soc/fsl/qe: qe.c: support fsl,qe-snums property

2019-04-30 Thread Rasmus Villemoes
The current code assumes that the set of snum _values_ to populate the
snums[] array with is a function of the _number_ of snums
alone. However, reading table 4-30, and its footnotes, of the QUICC
Engine Block Reference Manual shows that that is a bit too naive.

As an alternative, this introduces a new binding fsl,qe-snums, which
automatically encodes both the number of snums and the actual values to
use. Conveniently, of_property_read_variable_u8_array does exactly
what we need.

For example, for the MPC8309, one would specify the property as

   fsl,qe-snums = /bits/ 8 <
   0x88 0x89 0x98 0x99 0xa8 0xa9 0xb8 0xb9
   0xc8 0xc9 0xd8 0xd9 0xe8 0xe9>;

Signed-off-by: Rasmus Villemoes 
---
 .../devicetree/bindings/soc/fsl/cpm_qe/qe.txt  |  8 +++-
 drivers/soc/fsl/qe/qe.c| 14 +-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt 
b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
index d7afaff5faff..05f5f485562a 100644
--- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
@@ -18,7 +18,8 @@ Required properties:
 - reg : offset and length of the device registers.
 - bus-frequency : the clock frequency for QUICC Engine.
 - fsl,qe-num-riscs: define how many RISC engines the QE has.
-- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
+- fsl,qe-snums: This property has to be specified as '/bits/ 8' value,
+  defining the array of serial number (SNUM) values for the virtual
   threads.
 
 Optional properties:
@@ -34,6 +35,11 @@ Recommended properties
 - brg-frequency : the internal clock source frequency for baud-rate
   generators in Hz.
 
+Deprecated properties
+- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use
+  for the threads. Use fsl,qe-snums instead to not only specify the
+  number of snums, but also their values.
+
 Example:
  qe@e010 {
#address-cells = <1>;
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index aff9d1373529..af3c2b2b268f 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -283,7 +283,6 @@ EXPORT_SYMBOL(qe_clock_source);
  */
 static void qe_snums_init(void)
 {
-   int i;
static const u8 snum_init_76[] = {
0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
@@ -304,9 +303,22 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
+   struct device_node *qe;
const u8 *snum_init;
+   int i;
 
bitmap_zero(snum_state, QE_NUM_OF_SNUM);
+   qe = qe_get_device_node();
+   if (qe) {
+   i = of_property_read_variable_u8_array(qe, "fsl,qe-snums",
+  snums, 1, 
QE_NUM_OF_SNUM);
+   of_node_put(qe);
+   if (i > 0) {
+   qe_num_of_snum = i;
+   return;
+   }
+   }
+
qe_num_of_snum = qe_get_num_of_snums();
 
if (qe_num_of_snum == 76)
-- 
2.20.1



[PATCH 1/5] soc/fsl/qe: qe.c: drop useless static qualifier

2019-04-30 Thread Rasmus Villemoes
The local variable snum_init has no reason to have static storage duration.

Signed-off-by: Rasmus Villemoes 
---
 drivers/soc/fsl/qe/qe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 612d9c551be5..855373deb746 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -306,7 +306,7 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
-   static const u8 *snum_init;
+   const u8 *snum_init;
 
qe_num_of_snum = qe_get_num_of_snums();
 
-- 
2.20.1



[PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K

2019-04-30 Thread Rasmus Villemoes
The current array of struct qe_snum use 256*4 bytes for just keeping
track of the free/used state of each index, and the struct layout
means there's another 768 bytes of padding. If we just unzip that
structure, the array of snum values just use 256 bytes, while the
free/inuse state can be tracked in a 32 byte bitmap.

So this reduces the .data footprint by 1760 bytes. It also serves as
preparation for introducing another DT binding for specifying the snum
values.

Signed-off-by: Rasmus Villemoes 
---
 drivers/soc/fsl/qe/qe.c | 37 -
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 855373deb746..d0393f83145c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -14,6 +14,7 @@
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  */
+#include 
 #include 
 #include 
 #include 
@@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
 DEFINE_SPINLOCK(cmxgcr_lock);
 EXPORT_SYMBOL(cmxgcr_lock);
 
-/* QE snum state */
-enum qe_snum_state {
-   QE_SNUM_STATE_USED,
-   QE_SNUM_STATE_FREE
-};
-
-/* QE snum */
-struct qe_snum {
-   u8 num;
-   enum qe_snum_state state;
-};
-
 /* We allocate this here because it is used almost exclusively for
  * the communication processor devices.
  */
 struct qe_immap __iomem *qe_immr;
 EXPORT_SYMBOL(qe_immr);
 
-static struct qe_snum snums[QE_NUM_OF_SNUM];   /* Dynamically allocated SNUMs 
*/
+static u8 snums[QE_NUM_OF_SNUM];   /* Dynamically allocated SNUMs */
+static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
 static unsigned int qe_num_of_snum;
 
 static phys_addr_t qebase = -1;
@@ -308,6 +298,7 @@ static void qe_snums_init(void)
};
const u8 *snum_init;
 
+   bitmap_zero(snum_state, QE_NUM_OF_SNUM);
qe_num_of_snum = qe_get_num_of_snums();
 
if (qe_num_of_snum == 76)
@@ -315,10 +306,8 @@ static void qe_snums_init(void)
else
snum_init = snum_init_46;
 
-   for (i = 0; i < qe_num_of_snum; i++) {
-   snums[i].num = snum_init[i];
-   snums[i].state = QE_SNUM_STATE_FREE;
-   }
+   for (i = 0; i < qe_num_of_snum; i++)
+   snums[i] = snum_init[i];
 }
 
 int qe_get_snum(void)
@@ -328,12 +317,10 @@ int qe_get_snum(void)
int i;
 
spin_lock_irqsave(_lock, flags);
-   for (i = 0; i < qe_num_of_snum; i++) {
-   if (snums[i].state == QE_SNUM_STATE_FREE) {
-   snums[i].state = QE_SNUM_STATE_USED;
-   snum = snums[i].num;
-   break;
-   }
+   i = find_first_zero_bit(snum_state, qe_num_of_snum);
+   if (i < qe_num_of_snum) {
+   set_bit(i, snum_state);
+   snum = snums[i];
}
spin_unlock_irqrestore(_lock, flags);
 
@@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
int i;
 
for (i = 0; i < qe_num_of_snum; i++) {
-   if (snums[i].num == snum) {
-   snums[i].state = QE_SNUM_STATE_FREE;
+   if (snums[i] == snum) {
+   clear_bit(i, snum_state);
break;
}
}
-- 
2.20.1



[PATCH 3/5] soc/fsl/qe: qe.c: introduce qe_get_device_node helper

2019-04-30 Thread Rasmus Villemoes
The 'try of_find_compatible_node(NULL, NULL, "fsl,qe"), fall back to
of_find_node_by_type(NULL, "qe")' pattern is repeated five
times. Factor it into a common helper.

Signed-off-by: Rasmus Villemoes 
---
 drivers/soc/fsl/qe/qe.c | 71 +
 1 file changed, 29 insertions(+), 42 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index d0393f83145c..aff9d1373529 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -56,6 +56,20 @@ static unsigned int qe_num_of_snum;
 
 static phys_addr_t qebase = -1;
 
+static struct device_node *qe_get_device_node(void)
+{
+   struct device_node *qe;
+
+   /*
+* Newer device trees have an "fsl,qe" compatible property for the QE
+* node, but we still need to support older device trees.
+*/
+   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
+   if (qe)
+   return qe;
+   return of_find_node_by_type(NULL, "qe");
+}
+
 static phys_addr_t get_qe_base(void)
 {
struct device_node *qe;
@@ -65,12 +79,9 @@ static phys_addr_t get_qe_base(void)
if (qebase != -1)
return qebase;
 
-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return qebase;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return qebase;
 
ret = of_address_to_resource(qe, 0, );
if (!ret)
@@ -164,12 +175,9 @@ unsigned int qe_get_brg_clk(void)
if (brg_clk)
return brg_clk;
 
-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return brg_clk;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return brg_clk;
 
prop = of_get_property(qe, "brg-frequency", );
if (prop && size == sizeof(*prop))
@@ -563,16 +571,9 @@ struct qe_firmware_info *qe_get_firmware_info(void)
 
initialized = 1;
 
-   /*
-* Newer device trees have an "fsl,qe" compatible property for the QE
-* node, but we still need to support older device trees.
-   */
-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return NULL;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return NULL;
 
/* Find the 'firmware' child node */
fw = of_get_child_by_name(qe, "firmware");
@@ -618,16 +619,9 @@ unsigned int qe_get_num_of_risc(void)
unsigned int num_of_risc = 0;
const u32 *prop;
 
-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   /* Older devices trees did not have an "fsl,qe"
-* compatible property, so we need to look for
-* the QE node by name.
-*/
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return num_of_risc;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return num_of_risc;
 
prop = of_get_property(qe, "fsl,qe-num-riscs", );
if (prop && size == sizeof(*prop))
@@ -647,16 +641,9 @@ unsigned int qe_get_num_of_snums(void)
const u32 *prop;
 
num_of_snums = 28; /* The default number of snum for threads is 28 */
-   qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-   if (!qe) {
-   /* Older devices trees did not have an "fsl,qe"
-* compatible property, so we need to look for
-* the QE node by name.
-*/
-   qe = of_find_node_by_type(NULL, "qe");
-   if (!qe)
-   return num_of_snums;
-   }
+   qe = qe_get_device_node();
+   if (!qe)
+   return num_of_snums;
 
prop = of_get_property(qe, "fsl,qe-num-snums", );
if (prop && size == sizeof(*prop)) {
-- 
2.20.1



[PATCH RESEND 0/5] soc/fsl/qe: cleanups and new DT binding

2019-04-30 Thread Rasmus Villemoes
This small series consists of some small cleanups and simplifications
of the QUICC engine driver, and introduces a new DT binding that makes
it much easier to support other variants of the QUICC engine IP block
that appears in the wild: There's no reason to expect in general that
the number of valid SNUMs uniquely determines the set of such, so it's
better to simply let the device tree specify the values (and,
implicitly via the array length, also the count).

I sent these two months ago, but mostly as POC inside another
thread. Resending as proper patch series.

Rasmus Villemoes (5):
  soc/fsl/qe: qe.c: drop useless static qualifier
  soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
  soc/fsl/qe: qe.c: introduce qe_get_device_node helper
  soc/fsl/qe: qe.c: support fsl,qe-snums property
  soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init

 .../devicetree/bindings/soc/fsl/cpm_qe/qe.txt |   8 +-
 drivers/net/ethernet/freescale/ucc_geth.c |   2 +-
 drivers/soc/fsl/qe/qe.c   | 162 +++---
 include/soc/fsl/qe/qe.h   |   2 +-
 4 files changed, 73 insertions(+), 101 deletions(-)

-- 
2.20.1



Re: [PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test

2019-04-30 Thread Sachin Sant


> On 30-Apr-2019, at 1:29 PM, Aneesh Kumar K.V  
> wrote:
> 
> This patch fixes the below crash by making sure we touch the subpage 
> protection
> related structures only if we know they are allocated on the platform. With
> radix translation we don't allocate hash context at all and trying to access
> subpage_prot_table results in
> 
> Faulting instruction address: 0xc008bdb4
> Oops: Kernel access of bad area, sig: 11 [#1]
> LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> 
> NIP [c008bdb4] sys_subpage_prot+0x74/0x590
> LR [c000b688] system_call+0x5c/0x70
> Call Trace:
> [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable)
> [c00020002c6b7e20] [c000b688] system_call+0x5c/0x70
> Instruction dump:
> fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068
> 3920 e92d0968 ebe90630 e93f03e8  6000 3860fffe e9410068
> 
> We also move the subpage_prot_table with mmp_sem held to avoid racec
> between two parallel subpage_prot syscall.
> 
> Reported-by: Sachin Sant 
> Signed-off-by: Aneesh Kumar K.V 
> —

Thanks for the patch. Fixes the kernel crash.

Tested-by: Sachin Sant mailto:sach...@linux.vnet.ibm.com>>

Thanks
-Sachin

Re: [PATCH v4] powerpc/pseries: Remove limit in wait for dying CPU

2019-04-30 Thread Nathan Lynch
Thiago Jung Bauermann  writes:
> This can be a problem because if the busy loop finishes too early, then the
> kernel may offline another CPU before the previous one finished dying,
> which would lead to two concurrent calls to rtas-stop-self, which is
> prohibited by the PAPR.
>
> Since the hotplug machinery already assumes that cpu_die() is going to
> work, we can simply loop until the CPU stops.
>
> Also change the loop to wait 100 µs between each call to
> smp_query_cpu_stopped() to avoid querying RTAS too often.

[...]

> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 97feb6e79f1a..d75cee60644c 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -214,13 +214,17 @@ static void pseries_cpu_die(unsigned int cpu)
>   msleep(1);
>   }
>   } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
> -
> - for (tries = 0; tries < 25; tries++) {
> + /*
> +  * rtas_stop_self() panics if the CPU fails to stop and our
> +  * callers already assume that we are going to succeed, so we
> +  * can just loop until the CPU stops.
> +  */
> + while (true) {
>   cpu_status = smp_query_cpu_stopped(pcpu);
>   if (cpu_status == QCSS_STOPPED ||
>   cpu_status == QCSS_HARDWARE_ERROR)
>   break;
> - cpu_relax();
> + udelay(100);
>   }
>   }

I agree with looping indefinitely but doesn't it need a cond_resched()
or similar check?



[PATCH v2] powerpc/32s: fix BATs setting with CONFIG_STRICT_KERNEL_RWX

2019-04-30 Thread Christophe Leroy
Serge reported some crashes with CONFIG_STRICT_KERNEL_RWX enabled
on a book3s32 machine.

Analysis shows two issues:
- BATs addresses and sizes are not properly aligned.
- There is a gap between the last address covered by BATs and the
first address covered by pages.

Memory mapped with DBATs:
0: 0xc000-0xc07f 0x Kernel RO coherent
1: 0xc080-0xc0bf 0x0080 Kernel RO coherent
2: 0xc0c0-0xc13f 0x00c0 Kernel RW coherent
3: 0xc140-0xc23f 0x0140 Kernel RW coherent
4: 0xc240-0xc43f 0x0240 Kernel RW coherent
5: 0xc440-0xc83f 0x0440 Kernel RW coherent
6: 0xc840-0xd03f 0x0840 Kernel RW coherent
7: 0xd040-0xe03f 0x1040 Kernel RW coherent

Memory mapped with pages:
0xe100-0xefff  0x2100   240Mrw   present   
dirty  accessed

This patch fixes both issues. With the patch, we get the following
which is as expected:

Memory mapped with DBATs:
0: 0xc000-0xc07f 0x Kernel RO coherent
1: 0xc080-0xc0bf 0x0080 Kernel RO coherent
2: 0xc0c0-0xc0ff 0x00c0 Kernel RW coherent
3: 0xc100-0xc1ff 0x0100 Kernel RW coherent
4: 0xc200-0xc3ff 0x0200 Kernel RW coherent
5: 0xc400-0xc7ff 0x0400 Kernel RW coherent
6: 0xc800-0xcfff 0x0800 Kernel RW coherent
7: 0xd000-0xdfff 0x1000 Kernel RW coherent

Memory mapped with pages:
0xe000-0xefff  0x2000   256Mrw   present   
dirty  accessed

Reported-by: Serge Belyshev 
Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX")
Cc: sta...@vger.kernel.org
Acked-by: Segher Boessenkool 
Signed-off-by: Christophe Leroy 
---
 v2: Added comment to explain block_size() function as recommended by Segher.

 arch/powerpc/mm/ppc_mmu_32.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index bf1de3ca39bc..afd8dcb11432 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -98,10 +98,20 @@ static int find_free_bat(void)
return -1;
 }
 
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 8M on 601 and 256 on other 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ *   is identified by the lowest bit set to 1 in the base address (for instance
+ *   if base is 0x1600, max size is 0x0200).
+ * - block size has to be a power of two. This is calculated by finding the
+ *   highest bit set to 1.
+ */
 static unsigned int block_size(unsigned long base, unsigned long top)
 {
unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
-   unsigned int base_shift = (fls(base) - 1) & 31;
+   unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
 
return min3(max_size, 1U << base_shift, 1U << block_shift);
@@ -157,7 +167,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long 
base, unsigned long to
 
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
-   int done;
+   unsigned long done;
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
 
if (__map_without_bats) {
@@ -169,10 +179,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
return __mmu_mapin_ram(base, top);
 
done = __mmu_mapin_ram(base, border);
-   if (done != border - base)
+   if (done != border)
return done;
 
-   return done + __mmu_mapin_ram(border, top);
+   return __mmu_mapin_ram(border, top);
 }
 
 void mmu_mark_initmem_nx(void)
-- 
2.13.3



Re: [PATCH 22/41] drivers: tty: serial: cpm_uart: fix logging calls

2019-04-30 Thread Andy Shevchenko
On Mon, Apr 29, 2019 at 05:59:04PM +0200, Christophe Leroy wrote:
> Le 27/04/2019 à 14:52, Enrico Weigelt, metux IT consult a écrit :
> > Fix checkpatch warnings by using pr_err():
> > 
> >  WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then 
> > dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
> >  #109: FILE: drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c:109:
> >  +  printk(KERN_ERR
> > 
> >  WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then 
> > dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
> >  #128: FILE: drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c:128:
> >  +  printk(KERN_ERR
> > 
> >  WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then 
> > dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
> >  +   printk(KERN_ERR
> > 
> >  WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then 
> > dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
> >  +   printk(KERN_ERR
> > 
> > Signed-off-by: Enrico Weigelt 
> 
> Reviewed-by: Christophe Leroy 
> 
> But is that really worth doing those changes ?
> 
> If we want to do something useful, wouldn't it make more sense to introduce
> the use of dev_err() in order to identify the faulting device in the message
> ?

+1 for switching to dev_*().

-- 
With Best Regards,
Andy Shevchenko




Re: [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter

2019-04-30 Thread Greg KH
On Tue, Apr 30, 2019 at 03:42:27PM +0300, Diana Craciun wrote:
> commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream.
> 
> Currently only supported on powerpc.
> 
> Signed-off-by: Diana Craciun 
> Signed-off-by: Michael Ellerman 
> ---
>  Documentation/kernel-parameters.txt | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/Documentation/kernel-parameters.txt 
> b/Documentation/kernel-parameters.txt
> index f0bdf78420a0..3ff87d5d6fea 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be 
> entirely omitted.
>   legacy floating-point registers on task switch.
>  
>   nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
> + 
> + nospectre_v1[PPC] Disable mitigations for Spectre Variant 1 (bounds
> + check bypass). With this option data leaks are possible
> + in the system.
>  
>   nospectre_v2[X86,PPC_FSL_BOOK3E] Disable all mitigations for the 
> Spectre variant 2
>   (indirect branch prediction) vulnerability. System may
> -- 
> 2.17.1
>

Both of these patches needed to be added to a bunch of the stable trees,
so I've now done that.

thanks,

greg k-h


Re: [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter

2019-04-30 Thread Greg KH
On Tue, Apr 30, 2019 at 03:42:27PM +0300, Diana Craciun wrote:
> commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream.
> 
> Currently only supported on powerpc.
> 
> Signed-off-by: Diana Craciun 
> Signed-off-by: Michael Ellerman 
> ---
>  Documentation/kernel-parameters.txt | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/Documentation/kernel-parameters.txt 
> b/Documentation/kernel-parameters.txt
> index f0bdf78420a0..3ff87d5d6fea 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be 
> entirely omitted.
>   legacy floating-point registers on task switch.
>  
>   nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
> + 

Trailing whitespace :(

Fix up your editor to flag this as RED or something.  I'll go fix it
up...



Re: [PATCH kernel v3] powerpc/powernv: Isolate NVLinks between GV100GL on Witherspoon

2019-04-30 Thread Alex Williamson
On Tue, 30 Apr 2019 16:14:35 +1000
Alexey Kardashevskiy  wrote:

> On 30/04/2019 15:45, Alistair Popple wrote:
> > Alexey,
> >   
> > +void pnv_try_isolate_nvidia_v100(struct pci_dev *bridge)
> > +{
> > +   u32 mask, val;
> > +   void __iomem *bar0_0, *bar0_12, *bar0_a0;
> > +   struct pci_dev *pdev;
> > +   u16 cmd = 0, cmdmask = PCI_COMMAND_MEMORY;
> > +
> > +   if (!bridge->subordinate)
> > +   return;
> > +
> > +   pdev = list_first_entry_or_null(>subordinate->devices,
> > +   struct pci_dev, bus_list);
> > +   if (!pdev)
> > +   return;
> > +
> > +   if (pdev->vendor != PCI_VENDOR_ID_NVIDIA)  
> > 
> > Don't you also need to check the PCIe devid to match only [PV]100 devices 
> > as 
> > well? I doubt there's any guarantee these registers will remain the same 
> > for 
> > all future (or older) NVIDIA devices.  
> 
> 
> I do not have the complete list of IDs and I already saw 3 different
> device ids and this only works for machines with ibm,npu/gpu/nvlinks
> properties so for now it works and for the future we are hoping to
> either have an open source nvidia driver or some small minidriver (also
> from nvidia, or may be a spec allowing us to write one) to allow
> topology discovery on the host so we would not depend on the skiboot's
> powernv DT.
> 
> > IMHO this should really be done in the device driver in the guest. A 
> > malcious 
> > guest could load a modified driver that doesn't do this, but that should 
> > not 
> > compromise other guests which presumably load a non-compromised driver that 
> > disables the links on that guests GPU. However I guess in practice what you 
> > have here should work equally well.  
> 
> Doing it in the guest means a good guest needs to have an updated
> driver, we do not really want to depend on this. The idea of IOMMU
> groups is that the hypervisor provides isolation irrespective to what
> the guest does.

+1 It's not the user/guest driver's responsibility to maintain the
isolation of the device.  Thanks,

Alex

> Also vfio+qemu+slof needs to convey the nvlink topology to the guest,
> seems like an unnecessary complication.
> 
> 
> 
> > - Alistair
> >   
> > +   return;
> > +
> > +   mask = nvlinkgpu_get_disable_mask(>dev);
> > +   if (!mask)
> > +   return;
> > +
> > +   bar0_0 = pci_iomap_range(pdev, 0, 0, 0x1);
> > +   if (!bar0_0) {
> > +   pci_err(pdev, "Error mapping BAR0 @0\n");
> > +   return;
> > +   }
> > +   bar0_12 = pci_iomap_range(pdev, 0, 0x12, 0x1);
> > +   if (!bar0_12) {
> > +   pci_err(pdev, "Error mapping BAR0 @12\n");
> > +   goto bar0_0_unmap;
> > +   }
> > +   bar0_a0 = pci_iomap_range(pdev, 0, 0xA0, 0x1);
> > +   if (!bar0_a0) {
> > +   pci_err(pdev, "Error mapping BAR0 @A0\n");
> > +   goto bar0_12_unmap;
> > +   }  
> 
>  Is it really necessary to do three separate ioremaps vs one that would
>  cover them all here?  I suspect you're just sneaking in PAGE_SIZE with
>  the 0x1 size mappings anyway.  Seems like it would simplify setup,
>  error reporting, and cleanup to to ioremap to the PAGE_ALIGN'd range
>  of the highest register accessed. Thanks,  
> >>>
> >>> Sure I can map it once, I just do not see the point in mapping/unmapping
> >>> all 0xa1>>16=161 system pages for a very short period of time while
> >>> we know precisely that we need just 3 pages.
> >>>
> >>> Repost?  
> >>
> >> Ping?
> >>
> >> Can this go in as it is (i.e. should I ping Michael) or this needs
> >> another round? It would be nice to get some formal acks. Thanks,
> >>  
>  Alex
>   
> > +
> > +   pci_restore_state(pdev);
> > +   pci_read_config_word(pdev, PCI_COMMAND, );
> > +   if ((cmd & cmdmask) != cmdmask)
> > +   pci_write_config_word(pdev, PCI_COMMAND, cmd | cmdmask);
> > +
> > +   /*
> > +* The sequence is from "Tesla P100 and V100 SXM2 NVLink 
> > Isolation on
> > +* Multi-Tenant Systems".
> > +* The register names are not provided there either, hence raw 
> > values.
> > +*/
> > +   iowrite32(0x4, bar0_12 + 0x4C);
> > +   iowrite32(0x2, bar0_12 + 0x2204);
> > +   val = ioread32(bar0_0 + 0x200);
> > +   val |= 0x0200;
> > +   iowrite32(val, bar0_0 + 0x200);
> > +   val = ioread32(bar0_a0 + 0x148);
> > +   val |= mask;
> > +   iowrite32(val, bar0_a0 + 0x148);
> > +
> > +   if ((cmd | cmdmask) != cmd)
> > +   pci_write_config_word(pdev, PCI_COMMAND, cmd);
> > +
> > +   

[PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter

2019-04-30 Thread Diana Craciun
commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream.

Currently only supported on powerpc.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
---
 Documentation/kernel-parameters.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index f0bdf78420a0..3ff87d5d6fea 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
legacy floating-point registers on task switch.
 
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+   
+   nospectre_v1[PPC] Disable mitigations for Spectre Variant 1 (bounds
+   check bypass). With this option data leaks are possible
+   in the system.
 
nospectre_v2[X86,PPC_FSL_BOOK3E] Disable all mitigations for the 
Spectre variant 2
(indirect branch prediction) vulnerability. System may
-- 
2.17.1



[PATCH v2 stable v4.4 1/2] powerpc/fsl: Add FSL_PPC_BOOK3E as supported arch for nospectre_v2 boot arg

2019-04-30 Thread Diana Craciun
commit e59f5bd759b7dee57593c5b6c0441609bda5d530 upstream.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
---
 Documentation/kernel-parameters.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index da515c535e62..f0bdf78420a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2450,7 +2450,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
 
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
 
-   nospectre_v2[X86] Disable all mitigations for the Spectre variant 2
+   nospectre_v2[X86,PPC_FSL_BOOK3E] Disable all mitigations for the 
Spectre variant 2
(indirect branch prediction) vulnerability. System may
allow data leaks with this option, which is equivalent
to spectre_v2=off.
-- 
2.17.1



[PATCH v3 16/16] powerpc/32: Don't add dummy frames when calling trace_hardirqs_on/off

2019-04-30 Thread Christophe Leroy
No need to add dummy frames when calling trace_hardirqs_on or
trace_hardirqs_off. GCC properly handles empty stacks.

In addition, powerpc doesn't set CONFIG_FRAME_POINTER, therefore
__builtin_return_address(1..) returns NULL at all time. So the
dummy frames are definitely unneeded here.

In the meantime, avoid reading memory for loading r1 with a value
we already know.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 16 ++--
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e65c3e70c648..235a01d34b6d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -243,12 +243,7 @@ transfer_to_handler_cont:
 
 reenable_mmu:
/*
-* The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
-* If from user mode there is only one stack frame on the stack, and
-* accessing CALLER_ADDR1 will cause oops. So we need create a dummy
-* stack frame to make trace_hardirqs_off happy.
-*
-* This is handy because we also need to save a bunch of GPRs,
+* We save a bunch of GPRs,
 * r3 can be different from GPR3(r1) at this point, r9 and r11
 * contains the old MSR and handler address respectively,
 * r4 & r5 can contain page fault arguments that need to be passed
@@ -950,18 +945,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
 */
andi.   r10,r9,MSR_EE
beq 1f
-   /*
-* Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
-* which is the stack frame here, we need to force a stack frame
-* in case we came from user space.
-*/
stwur1,-32(r1)
mflrr0
stw r0,4(r1)
-   stwur1,-32(r1)
bl  trace_hardirqs_on
-   lwz r1,0(r1)
-   lwz r1,0(r1)
+   addir1, r1, 32
lwz r9,_MSR(r1)
 1:
 #endif /* CONFIG_TRACE_IRQFLAGS */
-- 
2.13.3



[PATCH v3 14/16] powerpc/32: implement fast entry for syscalls on BOOKE

2019-04-30 Thread Christophe Leroy
This patch implements a fast entry for syscalls.

Syscalls don't have to preserve non volatile registers except LR.

This patch then implement a fast entry for syscalls, where
volatile registers get clobbered.

As this entry is dedicated to syscall it always sets MSR_EE
and warns in case MSR_EE was previously off

It also assumes that the call is always from user, system calls are
unexpected from kernel.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   |   7 ---
 arch/powerpc/kernel/head_44x.S   |   3 +-
 arch/powerpc/kernel/head_booke.h | 103 +--
 arch/powerpc/kernel/head_fsl_booke.S |   3 +-
 4 files changed, 100 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 184cc1de2f37..dc58fec51ed6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -342,7 +342,6 @@ stack_ovf:
SYNC
RFI
 
-#ifndef CONFIG_BOOKE   /* to be removed once BOOKE uses fast syscall entry */
 #ifdef CONFIG_TRACE_IRQFLAGS
 trace_syscall_entry_irq_off:
/*
@@ -369,7 +368,6 @@ transfer_to_syscall:
andi.   r12,r9,MSR_EE
beq-trace_syscall_entry_irq_off
 #endif /* CONFIG_TRACE_IRQFLAGS */
-#endif /* !CONFIG_BOOKE */
 
 /*
  * Handle a system call.
@@ -382,11 +380,6 @@ _GLOBAL(DoSyscall)
stw r3,ORIG_GPR3(r1)
li  r12,0
stw r12,RESULT(r1)
-#ifdef CONFIG_BOOKE/* to be removed once BOOKE uses fast syscall entry */
-   lwz r11,_CCR(r1)/* Clear SO bit in CR */
-   rlwinm  r11,r11,0,4,2
-   stw r11,_CCR(r1)
-#endif
 #ifdef CONFIG_TRACE_IRQFLAGS
/* Make sure interrupts are enabled */
mfmsr   r11
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index e06cb1c84951..7d73c7e39afe 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -282,8 +282,7 @@ interrupt_base:
 #endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
-   NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
-   EXC_XFER_SYS(0x0c00, DoSyscall)
+   SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL
 
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 56dd1341eb3d..bfeb469e8106 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -6,6 +6,8 @@
 #include 
 #include 
 
+#ifdef __ASSEMBLY__
+
 /*
  * Macros used for common Book-e exception handling
  */
@@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION
SAVE_4GPRS(3, r11);  \
SAVE_2GPRS(7, r11)
 
+.macro SYSCALL_ENTRY trapno intno
+   mfspr   r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+   mtspr   SPRN_SPRG_WSCRATCH0, r10
+   stw r11, THREAD_NORMSAVE(0)(r10)
+   stw r13, THREAD_NORMSAVE(2)(r10)
+   mfcrr13 /* save CR in r13 for now  */
+   mfspr   r11, SPRN_SRR1
+   mtocrf  0x80, r11   /* check MSR[GS] without clobbering reg */
+   bf  3, 1975f
+   b   kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
+1975:
+   mr  r12, r13
+   lwz r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+#endif
+   mfcrr12
+#ifdef CONFIG_KVM_BOOKE_HV
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#endif
+   BOOKE_CLEAR_BTB(r11)
+   lwz r11, TASK_STACK - THREAD(r10)
+   rlwinm  r12,r12,0,4,2   /* Clear SO bit in CR */
+   ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
+   stw r12, _CCR(r11)  /* save various registers */
+   mflrr12
+   stw r12,_LINK(r11)
+   mfspr   r12,SPRN_SRR0
+   stw r1, GPR1(r11)
+   mfspr   r9,SPRN_SRR1
+   stw r1, 0(r11)
+   mr  r1, r11
+   stw r12,_NIP(r11)
+   rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?)   */
+   lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+   stw r2,GPR2(r11)
+   addir12, r12, STACK_FRAME_REGS_MARKER@l
+   stw r9,_MSR(r11)
+   li  r2, \trapno + 1
+   stw r12, 8(r11)
+   stw r2,_TRAP(r11)
+   SAVE_GPR(0, r11)
+   SAVE_4GPRS(3, r11)
+   SAVE_2GPRS(7, r11)
+
+   addir11,r1,STACK_FRAME_OVERHEAD
+   addir2,r10,-THREAD
+   stw r11,PT_REGS(r10)
+   /* Check to see if the dbcr0 register is set up to debug.  Use the
+  internal debug mode bit to do this. */
+   lwz r12,THREAD_DBCR0(r10)
+   andis.  r12,r12,DBCR0_IDM@h
+   ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+   beq+3f
+   /* From user and task is ptraced - load up global dbcr0 */
+   li  r12,-1  /* clear all pending debug events */
+   mtspr   SPRN_DBSR,r12
+ 

[PATCH v3 15/16] powerpc/32: don't do syscall stuff in transfer_to_handler

2019-04-30 Thread Christophe Leroy
As syscalls are now handled via a fast entry path, syscall related
actions can be removed from the generic transfer_to_handler path.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 19 ---
 1 file changed, 19 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index dc58fec51ed6..e65c3e70c648 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -217,7 +217,6 @@ transfer_to_handler_cont:
 */
tophys(r12, r1)
lwz r12,_MSR(r12)
-   xor r12,r10,r12
andi.   r12,r12,MSR_EE
bne 1f
 
@@ -258,9 +257,6 @@ reenable_mmu:
 * the rest is restored from the exception frame.
 */
 
-   /* Are we enabling or disabling interrupts ? */
-   andi.   r0,r10,MSR_EE
-
stwur1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
@@ -268,8 +264,6 @@ reenable_mmu:
stw r4,20(r1)
stw r5,24(r1)
 
-   bne-0f
-
/* If we are disabling interrupts (normal case), simply log it with
 * lockdep
 */
@@ -287,19 +281,6 @@ reenable_mmu:
mtctr   r11
mtlrr9
bctr/* jump to handler */
-
-   /* If we are enabling interrupt, this is a syscall. They shouldn't
-* happen while interrupts are disabled, so let's do a warning here.
-*/
-0: trap
-   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-   bl  trace_hardirqs_on
-
-   /* Now enable for real */
-   mfmsr   r10
-   ori r10,r10,MSR_EE
-   mtmsr   r10
-   b   2b
 #endif /* CONFIG_TRACE_IRQFLAGS */
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
-- 
2.13.3



[PATCH v3 12/16] powerpc: Fix 32-bit handling of MSR_EE on exceptions

2019-04-30 Thread Christophe Leroy
[text mostly copied from benh's RFC/WIP]

ppc32 are still doing something rather gothic and wrong on 32-bit
which we stopped doing on 64-bit a while ago.

We have that thing where some handlers "copy" the EE value from the
original stack frame into the new MSR before transferring to the
handler.

Thus for a number of exceptions, we enter the handlers with interrupts
enabled.

This is rather fishy, some of the stuff that handlers might do early
on such as irq_enter/exit or user_exit, context tracking, etc...
should be run with interrupts off afaik.

Generally our handlers know when to re-enable interrupts if needed.

The problem we were having is that we assumed these interrupts would
return with interrupts enabled. However that isn't the case.

Instead, this patch changes things so that we always enter exception
handlers with interrupts *off* with the notable exception of syscalls
which are special (and get a fast path).

Suggested-by: Benjamin Herrenschmidt 
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 116 -
 1 file changed, 67 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d0cea3deb86c..0c555f9f1543 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "head_32.h"
 
@@ -206,19 +207,42 @@ transfer_to_handler_cont:
mtspr   SPRN_NRI, r0
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
+   /*
+* When tracing IRQ state (lockdep) we enable the MMU before we call
+* the IRQ tracing functions as they might access vmalloc space or
+* perform IOs for console output.
+*
+* To speed up the syscall path where interrupts stay on, let's check
+* first if we are changing the MSR value at all.
+*/
+   tophys(r12, r1)
+   lwz r12,_MSR(r12)
+   xor r12,r10,r12
+   andi.   r12,r12,MSR_EE
+   bne 1f
+
+   /* MSR isn't changing, just transition directly */
+#endif
+   mtspr   SPRN_SRR0,r11
+   mtspr   SPRN_SRR1,r10
+   mtlrr9
+   SYNC
+   RFI /* jump to handler, enable MMU */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
+* keep interrupts disabled at this point otherwise we might risk
+* taking an interrupt before we tell lockdep they are enabled.
+*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
+   LOAD_MSR_KERNEL(r0, MSR_KERNEL)
mtspr   SPRN_SRR0,r12
-   mtspr   SPRN_SRR1,r10
+   mtspr   SPRN_SRR1,r0
SYNC
RFI
-reenable_mmu:  /* re-enable mmu so we can */
-   mfmsr   r10
-   lwz r12,_MSR(r1)
-   xor r10,r10,r12
-   andi.   r10,r10,MSR_EE  /* Did EE change? */
-   beq 1f
 
+reenable_mmu:
/*
 * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
 * If from user mode there is only one stack frame on the stack, and
@@ -233,14 +257,24 @@ reenable_mmu: /* re-enable 
mmu so we can */
 * they aren't useful past this point (aren't syscall arguments),
 * the rest is restored from the exception frame.
 */
+
+   /* Are we enabling or disabling interrupts ? */
+   andi.   r0,r10,MSR_EE
+
stwur1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
stw r3,16(r1)
stw r4,20(r1)
stw r5,24(r1)
-   bl  trace_hardirqs_off
-   lwz r5,24(r1)
+
+   bne-0f
+
+   /* If we are disabling interrupts (normal case), simply log it with
+* lockdep
+*/
+1: bl  trace_hardirqs_off
+2: lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
@@ -250,15 +284,22 @@ reenable_mmu: /* re-enable 
mmu so we can */
lwz r6,GPR6(r1)
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
-1: mtctr   r11
+   mtctr   r11
mtlrr9
bctr/* jump to handler */
-#else /* CONFIG_TRACE_IRQFLAGS */
-   mtspr   SPRN_SRR0,r11
-   mtspr   SPRN_SRR1,r10
-   mtlrr9
-   SYNC
-   RFI /* jump to handler, enable MMU */
+
+   /* If we are enabling interrupt, this is a syscall. They shouldn't
+* happen while interrupts are disabled, so let's do a warning here.
+*/
+0: trap
+   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+   bl  trace_hardirqs_on
+
+   /* Now enable for real */
+   mfmsr   r10
+   ori r10,r10,MSR_EE
+   mtmsr   r10
+   b   2b
 #endif /* CONFIG_TRACE_IRQFLAGS */
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -316,29 

[PATCH v3 13/16] powerpc/32: implement fast entry for syscalls on non BOOKE

2019-04-30 Thread Christophe Leroy
This patch implements a fast entry for syscalls.

Syscalls don't have to preserve non volatile registers except LR.

This patch then implement a fast entry for syscalls, where
volatile registers get clobbered.

As this entry is dedicated to syscall it always sets MSR_EE
and warns in case MSR_EE was previously off

It also assumes that the call is always from user, system calls are
unexpected from kernel.

The overall series improves null_syscall selftest by 12,5% on an 83xx
and by 17% on a 8xx.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 32 
 arch/powerpc/kernel/head_32.S  |  3 +-
 arch/powerpc/kernel/head_32.h  | 85 --
 arch/powerpc/kernel/head_40x.S |  3 +-
 arch/powerpc/kernel/head_8xx.S |  3 +-
 5 files changed, 116 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c555f9f1543..184cc1de2f37 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -342,6 +342,35 @@ stack_ovf:
SYNC
RFI
 
+#ifndef CONFIG_BOOKE   /* to be removed once BOOKE uses fast syscall entry */
+#ifdef CONFIG_TRACE_IRQFLAGS
+trace_syscall_entry_irq_off:
+   /*
+* Syscall shouldn't happen while interrupts are disabled,
+* so let's do a warning here.
+*/
+0: trap
+   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+   bl  trace_hardirqs_on
+
+   /* Now enable for real */
+   LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+   mtmsr   r10
+
+   REST_GPR(0, r1)
+   REST_4GPRS(3, r1)
+   REST_2GPRS(7, r1)
+   b   DoSyscall
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+   .globl  transfer_to_syscall
+transfer_to_syscall:
+#ifdef CONFIG_TRACE_IRQFLAGS
+   andi.   r12,r9,MSR_EE
+   beq-trace_syscall_entry_irq_off
+#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif /* !CONFIG_BOOKE */
+
 /*
  * Handle a system call.
  */
@@ -353,9 +382,11 @@ _GLOBAL(DoSyscall)
stw r3,ORIG_GPR3(r1)
li  r12,0
stw r12,RESULT(r1)
+#ifdef CONFIG_BOOKE/* to be removed once BOOKE uses fast syscall entry */
lwz r11,_CCR(r1)/* Clear SO bit in CR */
rlwinm  r11,r11,0,4,2
stw r11,_CCR(r1)
+#endif
 #ifdef CONFIG_TRACE_IRQFLAGS
/* Make sure interrupts are enabled */
mfmsr   r11
@@ -1219,6 +1250,7 @@ load_dbcr0:
 
.section .bss
.align  4
+   .global global_dbcr0
 global_dbcr0:
.space  8*NR_CPUS
.previous
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 2404c39373d3..f1da8fef726a 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -370,8 +370,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
. = 0xc00
DO_KVM  0xc00
 SystemCall:
-   EXCEPTION_PROLOG
-   EXC_XFER_SYS(0xc00, DoSyscall)
+   SYSCALL_ENTRY   0xc00
 
 /* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 14cb0af2f494..4a692553651f 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -73,6 +73,87 @@
SAVE_2GPRS(7, r11)
 .endm
 
+.macro SYSCALL_ENTRY trapno
+   mfspr   r12,SPRN_SPRG_THREAD
+   mfcrr10
+   lwz r11,TASK_STACK-THREAD(r12)
+   mflrr9
+   addir11,r11,THREAD_SIZE - INT_FRAME_SIZE
+   rlwinm  r10,r10,0,4,2   /* Clear SO bit in CR */
+   tophys(r11,r11)
+   stw r10,_CCR(r11)   /* save registers */
+   mfspr   r10,SPRN_SRR0
+   stw r9,_LINK(r11)
+   mfspr   r9,SPRN_SRR1
+   stw r1,GPR1(r11)
+   stw r1,0(r11)
+   tovirt(r1,r11)  /* set new kernel sp */
+   stw r10,_NIP(r11)
+#ifdef CONFIG_40x
+   rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?) */
+#else
+   LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take 
exceptions */
+   MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+   lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+   stw r2,GPR2(r11)
+   addir10,r10,STACK_FRAME_REGS_MARKER@l
+   stw r9,_MSR(r11)
+   li  r2, \trapno + 1
+   stw r10,8(r11)
+   stw r2,_TRAP(r11)
+   SAVE_GPR(0, r11)
+   SAVE_4GPRS(3, r11)
+   SAVE_2GPRS(7, r11)
+   addir11,r1,STACK_FRAME_OVERHEAD
+   addir2,r12,-THREAD
+   stw r11,PT_REGS(r12)
+#if defined(CONFIG_40x)
+   /* Check to see if the dbcr0 register is set up to debug.  Use the
+  internal debug mode bit to do this. */
+   lwz r12,THREAD_DBCR0(r12)
+   andis.  r12,r12,DBCR0_IDM@h
+#endif
+   ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x)
+   beq+3f
+   /* From user and task is 

[PATCH v3 11/16] powerpc/32: get rid of COPY_EE in exception entry

2019-04-30 Thread Christophe Leroy
EXC_XFER_TEMPLATE() is not called with COPY_EE anymore so
we can get rid of copyee parameters and related COPY_EE and NOCOPY
macros.

Suggested-by: Benjamin Herrenschmidt 
[splited out from benh RFC patch]

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 12 
 arch/powerpc/kernel/head_40x.S   |  8 +++-
 arch/powerpc/kernel/head_booke.h | 22 --
 3 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 8881b6887841..14cb0af2f494 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -103,28 +103,24 @@
addir3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
 
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret)  \
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
li  r10,trap;   \
stw r10,_TRAP(r11); \
LOAD_MSR_KERNEL(r10, msr);  \
-   copyee(r10, r9);\
bl  tfer;   \
.long   hdlr;   \
.long   ret
 
-#define COPY_EE(d, s)  rlwimi d,s,0,MSR_EE
-#define NOCOPY(d, s)
-
 #define EXC_XFER_STD(n, hdlr)  \
-   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, 
transfer_to_handler_full,\
+   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full,
\
  ret_from_except_full)
 
 #define EXC_XFER_LITE(n, hdlr) \
-   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
  ret_from_except)
 
 #define EXC_XFER_SYS(n, hdlr)  \
-   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, NOCOPY, 
transfer_to_handler, \
+   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, transfer_to_handler, \
  ret_from_except)
 
 #endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 71597fb7cd89..b68de183faf1 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -166,8 +166,7 @@ _ENTRY(saved_ksp_limit)
CRITICAL_EXCEPTION_PROLOG;  \
addir3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
 
 /*
  * 0x0100 - Critical Interrupt Exception
@@ -651,7 +650,7 @@ _ENTRY(saved_ksp_limit)
addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_TEMPLATE(DebugException, 0x2002, \
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-   NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+   crit_transfer_to_handler, ret_from_crit_exc)
 
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
 Decrementer:
@@ -673,8 +672,7 @@ WDTException:
addir3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
  (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- NOCOPY, crit_transfer_to_handler,
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
 
 /*
  * The other Data TLB exceptions bail out to this point
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 264976c43f34..56dd1341eb3d 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -217,8 +217,7 @@ END_BTB_FLUSH_SECTION
CRITICAL_EXCEPTION_PROLOG(intno);   \
addir3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
 
 #define MCHECK_EXCEPTION(n, label, hdlr)   \
START_EXCEPTION(label); \
@@ -227,32 +226,27 @@ END_BTB_FLUSH_SECTION
stw r5,_ESR(r11);   \
addir3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, mcheck_transfer_to_handler,   \
- ret_from_mcheck_exc)
+ mcheck_transfer_to_handler, ret_from_mcheck_exc)
 
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret)  \
+#define 

[PATCH v3 10/16] powerpc/32: Enter exceptions with MSR_EE unset

2019-04-30 Thread Christophe Leroy
All exceptions handlers know when to reenable interrupts, so
it is safer to enter all of them with MSR_EE unset, except
for syscalls.

Suggested-by: Benjamin Herrenschmidt 
[splited out from benh RFC patch]

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S| 68 ++--
 arch/powerpc/kernel/head_32.h|  8 -
 arch/powerpc/kernel/head_40x.S   | 44 +++
 arch/powerpc/kernel/head_44x.S   |  6 ++--
 arch/powerpc/kernel/head_8xx.S   | 32 -
 arch/powerpc/kernel/head_booke.h | 12 ++-
 arch/powerpc/kernel/head_fsl_booke.S | 26 +++---
 7 files changed, 90 insertions(+), 106 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 6aa8addce296..2404c39373d3 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -337,7 +337,7 @@ Alignment:
mfspr   r5,SPRN_DSISR
stw r5,_DSISR(r11)
addir3,r1,STACK_FRAME_OVERHEAD
-   EXC_XFER_EE(0x600, alignment_exception)
+   EXC_XFER_STD(0x600, alignment_exception)
 
 /* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -358,13 +358,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
bl  load_up_fpu /* if from user, just load it up */
b   fast_exception_return
 1: addir3,r1,STACK_FRAME_OVERHEAD
-   EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+   EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
 
 /* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
 
-   EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+   EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
 
 /* System call */
. = 0xc00
@@ -375,7 +375,7 @@ SystemCall:
 
 /* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-   EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+   EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
 
 /*
  * The Altivec unavailable trap is at 0x0f20.  Foo.
@@ -607,35 +607,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 #define altivec_assist_exception   unknown_exception
 #endif
 
-   EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, 
EXC_XFER_EE)
-   EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
-   EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
+   EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, 
EXC_XFER_STD)
+   EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
+   EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
-   EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
-   EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
-   EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
+   EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+   

[PATCH v3 06/16] powerpc/40x: Split and rename NORMAL_EXCEPTION_PROLOG

2019-04-30 Thread Christophe Leroy
This patch splits NORMAL_EXCEPTION_PROLOG in the same way as in
head_8xx.S and head_32.S and renames it EXCEPTION_PROLOG() as well
to match head_32.h

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_40x.S | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index cb95a5c17cea..1547750567b6 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -103,10 +103,14 @@ _ENTRY(saved_ksp_limit)
  * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
  * the physical address of the current task thread_struct.
  */
-#define NORMAL_EXCEPTION_PROLOG
 \
+#define EXCEPTION_PROLOG\
mtspr   SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
mtspr   SPRN_SPRG_SCRATCH1,r11;  \
mfcrr10;/* save CR in r10 for now  */\
+   EXCEPTION_PROLOG_1;  \
+   EXCEPTION_PROLOG_2
+
+#define EXCEPTION_PROLOG_1  \
mfspr   r11,SPRN_SRR1;  /* check whether user or kernel*/\
andi.   r11,r11,MSR_PR;  \
tophys(r11,r1);  \
@@ -115,7 +119,9 @@ _ENTRY(saved_ksp_limit)
lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
addir11,r11,THREAD_SIZE; \
tophys(r11,r11); \
-1: subir11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
+1: subir11,r11,INT_FRAME_SIZE  /* Allocate an exception frame */
+
+#define EXCEPTION_PROLOG_2  \
stw r10,_CCR(r11);  /* save various registers  */\
stw r12,GPR12(r11);  \
stw r9,GPR9(r11);\
@@ -205,7 +211,7 @@ label:
 
 #define EXCEPTION(n, label, hdlr, xfer)\
START_EXCEPTION(n, label);  \
-   NORMAL_EXCEPTION_PROLOG;\
+   EXCEPTION_PROLOG;   \
addir3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
 
@@ -396,7 +402,7 @@ label:
  * This is caused by a fetch from non-execute or guarded pages.
  */
START_EXCEPTION(0x0400, InstructionAccess)
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
mr  r4,r12  /* Pass SRR0 as arg2 */
li  r5,0/* Pass zero as arg3 */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -406,7 +412,7 @@ label:
 
 /* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
mfspr   r4,SPRN_DEAR/* Grab the DEAR and save it */
stw r4,_DEAR(r11)
addir3,r1,STACK_FRAME_OVERHEAD
@@ -414,7 +420,7 @@ label:
 
 /* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
mfspr   r4,SPRN_ESR /* Grab the ESR and save it */
stw r4,_ESR(r11)
addir3,r1,STACK_FRAME_OVERHEAD
@@ -427,7 +433,7 @@ label:
 
 /* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
EXC_XFER_EE_LITE(0xc00, DoSyscall)
 
EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
@@ -733,7 +739,7 @@ label:
 
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
 Decrementer:
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
lis r0,TSR_PIS@h
mtspr   SPRN_TSR,r0 /* Clear the PIT exception */
addir3,r1,STACK_FRAME_OVERHEAD
@@ -741,7 +747,7 @@ Decrementer:
 
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
 FITException:
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
addir3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_EE(0x1010, unknown_exception)
 
@@ -759,7 +765,7 @@ WDTException:
  * if they can't resolve the lightweight TLB fault.
  */
 DataAccess:
-   NORMAL_EXCEPTION_PROLOG
+   EXCEPTION_PROLOG
mfspr   r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr   r4,SPRN_DEAR/* Grab the DEAR, save it, pass arg2 */
-- 
2.13.3



[PATCH v3 09/16] powerpc/32: enter syscall with MSR_EE inconditionaly set

2019-04-30 Thread Christophe Leroy
syscalls are expected to be entered with MSR_EE set. Lets
make it inconditional by forcing MSR_EE on syscalls.

This patch adds EXC_XFER_SYS for that.

Suggested-by: Benjamin Herrenschmidt 
[splited out from benh RFC patch]

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S| 2 +-
 arch/powerpc/kernel/head_32.h| 4 
 arch/powerpc/kernel/head_40x.S   | 2 +-
 arch/powerpc/kernel/head_44x.S   | 2 +-
 arch/powerpc/kernel/head_8xx.S   | 2 +-
 arch/powerpc/kernel/head_booke.h | 4 
 arch/powerpc/kernel/head_fsl_booke.S | 2 +-
 7 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index fbc655aa0acf..6aa8addce296 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -371,7 +371,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
DO_KVM  0xc00
 SystemCall:
EXCEPTION_PROLOG
-   EXC_XFER_EE_LITE(0xc00, DoSyscall)
+   EXC_XFER_SYS(0xc00, DoSyscall)
 
 /* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index aa0131bb09b5..7221418a883f 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -123,6 +123,10 @@
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
  ret_from_except)
 
+#define EXC_XFER_SYS(n, hdlr)  \
+   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, NOCOPY, 
transfer_to_handler, \
+ ret_from_except)
+
 #define EXC_XFER_EE(n, hdlr)   \
EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, 
transfer_to_handler_full, \
  ret_from_except_full)
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index cce9bd33a176..1a80a3e45e44 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -350,7 +350,7 @@ _ENTRY(saved_ksp_limit)
 /* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
EXCEPTION_PROLOG
-   EXC_XFER_EE_LITE(0xc00, DoSyscall)
+   EXC_XFER_SYS(0xc00, DoSyscall)
 
EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 37117ab11584..9cc01948651f 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -283,7 +283,7 @@ interrupt_base:
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
-   EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+   EXC_XFER_SYS(0x0c00, DoSyscall)
 
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 7b76ad1b9620..19ad6484f198 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -186,7 +186,7 @@ Alignment:
. = 0xc00
 SystemCall:
EXCEPTION_PROLOG
-   EXC_XFER_EE_LITE(0xc00, DoSyscall)
+   EXC_XFER_SYS(0xc00, DoSyscall)
 
 /* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 1b22a8dea399..612f54ba1125 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -251,6 +251,10 @@ END_BTB_FLUSH_SECTION
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
  ret_from_except)
 
+#define EXC_XFER_SYS(n, hdlr)  \
+   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, NOCOPY, 
transfer_to_handler, \
+ ret_from_except)
+
 #define EXC_XFER_EE(n, hdlr)   \
EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, 
transfer_to_handler_full, \
  ret_from_except_full)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 32332e24e421..e77a2ed94642 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -411,7 +411,7 @@ interrupt_base:
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
NORMAL_EXCEPTION_PROLOG(SYSCALL)
-   EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+   EXC_XFER_SYS(0x0c00, DoSyscall)
 
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
-- 
2.13.3



[PATCH v3 08/16] powerpc/fsl_booke: ensure SPEFloatingPointException() reenables interrupts

2019-04-30 Thread Christophe Leroy
SPEFloatingPointException() is the only exception handler which 'forgets' to
re-enable interrupts. This patch makes sure it does.

Suggested-by: Benjamin Herrenschmidt 
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/traps.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1fd45a8650e1..665f294725cb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs)
int code = FPE_FLTUNK;
int err;
 
+   /* We restore the interrupt state now */
+   if (!arch_irq_disabled_regs(regs))
+   local_irq_enable();
+
flush_spe_to_thread(current);
 
spefscr = current->thread.spefscr;
@@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
extern int speround_handler(struct pt_regs *regs);
int err;
 
+   /* We restore the interrupt state now */
+   if (!arch_irq_disabled_regs(regs))
+   local_irq_enable();
+
preempt_disable();
if (regs->msr & MSR_SPE)
giveup_spe(current);
-- 
2.13.3



[PATCH v3 05/16] powerpc/40x: add exception frame marker

2019-04-30 Thread Christophe Leroy
This patch adds STACK_FRAME_REGS_MARKER in the stack at exception entry
in order to see interrupts in call traces as below:

[0.013964] Call Trace:
[0.014014] [c0745db0] [c007a9d4] tick_periodic.constprop.5+0xd8/0x104 
(unreliable)
[0.014086] [c0745dc0] [c007aa20] tick_handle_periodic+0x20/0x9c
[0.014181] [c0745de0] [c0009cd0] timer_interrupt+0xa0/0x264
[0.014258] [c0745e10] [c000e484] ret_from_except+0x0/0x14
[0.014390] --- interrupt: 901 at console_unlock.part.7+0x3f4/0x528
[0.014390] LR = console_unlock.part.7+0x3f0/0x528
[0.014455] [c0745ee0] [c0050334] console_unlock.part.7+0x114/0x528 
(unreliable)
[0.014542] [c0745f30] [c00524e0] register_console+0x3d8/0x44c
[0.014625] [c0745f60] [c0675aac] cpm_uart_console_init+0x18/0x2c
[0.014709] [c0745f70] [c06614f4] console_init+0x114/0x1cc
[0.014795] [c0745fb0] [c0658b68] start_kernel+0x300/0x3d8
[0.014864] [c0745ff0] [c00022cc] start_here+0x44/0x98

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_40x.S | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index f49b0278e995..cb95a5c17cea 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -132,6 +132,9 @@ _ENTRY(saved_ksp_limit)
tovirt(r1,r11); /* set new kernel sp */ \
rlwinm  r9,r9,0,14,12;  /* clear MSR_WE (necessary?)   */\
stw r0,GPR0(r11);\
+   lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
+   addir10, r10, STACK_FRAME_REGS_MARKER@l; \
+   stw r10, 8(r11); \
SAVE_4GPRS(3, r11);  \
SAVE_2GPRS(7, r11)
 
@@ -174,6 +177,9 @@ _ENTRY(saved_ksp_limit)
tovirt(r1,r11);  \
rlwinm  r9,r9,0,14,12;  /* clear MSR_WE (necessary?)   */\
stw r0,GPR0(r11);\
+   lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
+   addir10, r10, STACK_FRAME_REGS_MARKER@l; \
+   stw r10, 8(r11); \
SAVE_4GPRS(3, r11);  \
SAVE_2GPRS(7, r11)
 
-- 
2.13.3



[PATCH v3 07/16] powerpc/40x: Refactor exception entry macros by using head_32.h

2019-04-30 Thread Christophe Leroy
Refactor exception entry macros by using the ones defined in head_32.h

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h  |  4 ++
 arch/powerpc/kernel/head_40x.S | 88 +-
 2 files changed, 6 insertions(+), 86 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 985758cbf577..aa0131bb09b5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -59,8 +59,12 @@
stw r1,GPR1(r11)
stw r1,0(r11)
tovirt(r1,r11)  /* set new kernel sp */
+#ifdef CONFIG_40x
+   rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?) */
+#else
li  r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
stw r0,GPR0(r11)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
addir10,r10,STACK_FRAME_REGS_MARKER@l
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 1547750567b6..cce9bd33a176 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -44,6 +44,8 @@
 #include 
 #include 
 
+#include "head_32.h"
+
 /* As with the other PowerPC ports, it is expected that when code
  * execution begins here, the following registers contain valid, yet
  * optional, information:
@@ -99,52 +101,6 @@ _ENTRY(saved_ksp_limit)
.space  4
 
 /*
- * Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
- * the physical address of the current task thread_struct.
- */
-#define EXCEPTION_PROLOG\
-   mtspr   SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
-   mtspr   SPRN_SPRG_SCRATCH1,r11;  \
-   mfcrr10;/* save CR in r10 for now  */\
-   EXCEPTION_PROLOG_1;  \
-   EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1  \
-   mfspr   r11,SPRN_SRR1;  /* check whether user or kernel*/\
-   andi.   r11,r11,MSR_PR;  \
-   tophys(r11,r1);  \
-   beq 1f;  \
-   mfspr   r11,SPRN_SPRG_THREAD;   /* if from user, start at top of   */\
-   lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-   addir11,r11,THREAD_SIZE; \
-   tophys(r11,r11); \
-1: subir11,r11,INT_FRAME_SIZE  /* Allocate an exception frame */
-
-#define EXCEPTION_PROLOG_2  \
-   stw r10,_CCR(r11);  /* save various registers  */\
-   stw r12,GPR12(r11);  \
-   stw r9,GPR9(r11);\
-   mfspr   r10,SPRN_SPRG_SCRATCH0;  \
-   stw r10,GPR10(r11);  \
-   mfspr   r12,SPRN_SPRG_SCRATCH1;  \
-   stw r12,GPR11(r11);  \
-   mflrr10; \
-   stw r10,_LINK(r11);  \
-   mfspr   r12,SPRN_SRR0;   \
-   stw r1,GPR1(r11);\
-   mfspr   r9,SPRN_SRR1;\
-   stw r1,0(r11);   \
-   tovirt(r1,r11); /* set new kernel sp */ \
-   rlwinm  r9,r9,0,14,12;  /* clear MSR_WE (necessary?)   */\
-   stw r0,GPR0(r11);\
-   lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
-   addir10, r10, STACK_FRAME_REGS_MARKER@l; \
-   stw r10, 8(r11); \
-   SAVE_4GPRS(3, r11);  \
-   SAVE_2GPRS(7, r11)
-
-/*
  * Exception prolog for critical exceptions.  This is a little different
  * from the normal exception prolog above since a critical exception
  * can potentially occur at any point during normal exception processing.
@@ -205,16 +161,6 @@ _ENTRY(saved_ksp_limit)
 /*
  * Exception vectors.
  */
-#defineSTART_EXCEPTION(n, label)   
 \

[PATCH v3 04/16] powerpc/40x: Don't use SPRN_SPRG_SCRATCH2 in EXCEPTION_PROLOG

2019-04-30 Thread Christophe Leroy
Unlike said in the comment, r1 is not reused by the critical
exception handler, as it uses a dedicated critirq_ctx stack.
Decrementing r1 early is then unneeded.

Should the above be valid, the code is crap buggy anyway as
r1 gets some intermediate values that would jeopardise the
whole process (for instance after mfspr   r1,SPRN_SPRG_THREAD)

Using SPRN_SPRG_SCRATCH2 to save r1 is then not needed, r11 can be
used instead. This avoids one mtspr and one mfspr and makes the
prolog closer to what's done on 6xx and 8xx.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_40x.S | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a9c934f2319b..f49b0278e995 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -102,23 +102,20 @@ _ENTRY(saved_ksp_limit)
  * Exception vector entry code. This code runs with address translation
  * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
  * the physical address of the current task thread_struct.
- * Note that we have to have decremented r1 before we write to any fields
- * of the exception frame, since a critical interrupt could occur at any
- * time, and it will write to the area immediately below the current r1.
  */
 #define NORMAL_EXCEPTION_PROLOG
 \
mtspr   SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
mtspr   SPRN_SPRG_SCRATCH1,r11;  \
-   mtspr   SPRN_SPRG_SCRATCH2,r1;   \
mfcrr10;/* save CR in r10 for now  */\
mfspr   r11,SPRN_SRR1;  /* check whether user or kernel*/\
andi.   r11,r11,MSR_PR;  \
-   beq 1f;  \
-   mfspr   r1,SPRN_SPRG_THREAD;/* if from user, start at top of   */\
-   lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack   */\
-   addir1,r1,THREAD_SIZE;   \
-1: subir1,r1,INT_FRAME_SIZE;   /* Allocate an exception frame */\
tophys(r11,r1);  \
+   beq 1f;  \
+   mfspr   r11,SPRN_SPRG_THREAD;   /* if from user, start at top of   */\
+   lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
+   addir11,r11,THREAD_SIZE; \
+   tophys(r11,r11); \
+1: subir11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
stw r10,_CCR(r11);  /* save various registers  */\
stw r12,GPR12(r11);  \
stw r9,GPR9(r11);\
@@ -128,11 +125,11 @@ _ENTRY(saved_ksp_limit)
stw r12,GPR11(r11);  \
mflrr10; \
stw r10,_LINK(r11);  \
-   mfspr   r10,SPRN_SPRG_SCRATCH2;  \
mfspr   r12,SPRN_SRR0;   \
-   stw r10,GPR1(r11);   \
+   stw r1,GPR1(r11);\
mfspr   r9,SPRN_SRR1;\
-   stw r10,0(r11);  \
+   stw r1,0(r11);   \
+   tovirt(r1,r11); /* set new kernel sp */ \
rlwinm  r9,r9,0,14,12;  /* clear MSR_WE (necessary?)   */\
stw r0,GPR0(r11);\
SAVE_4GPRS(3, r11);  \
-- 
2.13.3



[PATCH v3 03/16] powerpc/32: make the 6xx/8xx EXC_XFER_TEMPLATE() similar to the 40x/booke one

2019-04-30 Thread Christophe Leroy
6xx/8xx EXC_XFER_TEMPLATE() macro adds a i##n symbol which is
unused and can be removed.
40x and booke EXC_XFER_TEMPLATE() macros takes msr from the caller
while the 6xx/8xx version uses only MSR_KERNEL as msr value.

This patch modifies the 6xx/8xx version to make it similar to the
40x and booke versions.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index cf3d00844597..985758cbf577 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -99,13 +99,12 @@
addir3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
 
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)\
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret)  \
li  r10,trap;   \
stw r10,_TRAP(r11); \
-   LOAD_MSR_KERNEL(r10, MSR_KERNEL);   \
+   LOAD_MSR_KERNEL(r10, msr);  \
copyee(r10, r9);\
bl  tfer;   \
-i##n:  \
.long   hdlr;   \
.long   ret
 
@@ -113,19 +112,19 @@ i##n: 
\
 #define NOCOPY(d, s)
 
 #define EXC_XFER_STD(n, hdlr)  \
-   EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
+   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, 
transfer_to_handler_full,\
  ret_from_except_full)
 
 #define EXC_XFER_LITE(n, hdlr) \
-   EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
+   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
  ret_from_except)
 
 #define EXC_XFER_EE(n, hdlr)   \
-   EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
+   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, 
transfer_to_handler_full, \
  ret_from_except_full)
 
 #define EXC_XFER_EE_LITE(n, hdlr)  \
-   EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
+   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
  ret_from_except)
 
 #endif /* __HEAD_32_H__ */
-- 
2.13.3



[PATCH v3 02/16] powerpc/32: move LOAD_MSR_KERNEL() into head_32.h and use it

2019-04-30 Thread Christophe Leroy
As preparation for using head_32.h for head_40x.S, move
LOAD_MSR_KERNEL() there and use it to load r10 with MSR_KERNEL value.

In the mean time, this patch modifies it so that it takes into account
the size of the passed value to determine if 'li' can be used or if
'lis/ori' is needed instead of using the size of MSR_KERNEL. This is
done by using gas macro.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S |  9 +
 arch/powerpc/kernel/head_32.h  | 15 ++-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 2f3d159c11d7..d0cea3deb86c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -38,14 +38,7 @@
 #include 
 #include 
 
-/*
- * MSR_KERNEL is > 0x1 on 4xx/Book-E since it include MSR_CE.
- */
-#if MSR_KERNEL >= 0x1
-#define LOAD_MSR_KERNEL(r, x)  lis r,(x)@h; ori r,r,(x)@l
-#else
-#define LOAD_MSR_KERNEL(r, x)  li r,(x)
-#endif
+#include "head_32.h"
 
 /*
  * Align to 4k in order to ensure that all functions modyfing srr0/srr1
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 7456e2a45acc..cf3d00844597 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,6 +5,19 @@
 #include /* for STACK_FRAME_REGS_MARKER */
 
 /*
+ * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
+ */
+.macro __LOAD_MSR_KERNEL r, x
+.if \x >= 0x8000
+   lis \r, (\x)@h
+   ori \r, \r, (\x)@l
+.else
+   li \r, (\x)
+.endif
+.endm
+#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
+
+/*
  * Exception entry code.  This code runs with address translation
  * turned off, i.e. using physical addresses.
  * We assume sprg3 has the physical address of the current
@@ -89,7 +102,7 @@
 #define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)\
li  r10,trap;   \
stw r10,_TRAP(r11); \
-   li  r10,MSR_KERNEL; \
+   LOAD_MSR_KERNEL(r10, MSR_KERNEL);   \
copyee(r10, r9);\
bl  tfer;   \
 i##n:  \
-- 
2.13.3



[PATCH v3 01/16] powerpc/32: Refactor EXCEPTION entry macros for head_8xx.S and head_32.S

2019-04-30 Thread Christophe Leroy
EXCEPTION_PROLOG is similar in head_8xx.S and head_32.S

This patch creates head_32.h and moves EXCEPTION_PROLOG macro
into it. It also converts it from a GCC macro to a GAS macro
in order to ease refactorisation with 40x later, since
GAS macros allows the use of #ifdef/#else/#endif inside it.
And it also has the advantage of not requiring the uggly "; \"
at the end of each line.

This patch also moves EXCEPTION() and EXC_XFER_() macros which
are also similar while adding START_EXCEPTION() out of EXCEPTION().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S  |  99 +-
 arch/powerpc/kernel/head_32.h  | 118 +
 arch/powerpc/kernel/head_8xx.S |  98 +-
 3 files changed, 122 insertions(+), 193 deletions(-)
 create mode 100644 arch/powerpc/kernel/head_32.h

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 40aec3f00a05..fbc655aa0acf 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -37,6 +37,8 @@
 #include 
 #include 
 
+#include "head_32.h"
+
 /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
 #define LOAD_BAT(n, reg, RA, RB)   \
/* see the comment for clear_bats() -- Cort */ \
@@ -242,103 +244,6 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
.long   -1
 
-/*
- * Exception entry code.  This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG   \
-   mtspr   SPRN_SPRG_SCRATCH0,r10; \
-   mtspr   SPRN_SPRG_SCRATCH1,r11; \
-   mfcrr10;\
-   EXCEPTION_PROLOG_1; \
-   EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
-   mfspr   r11,SPRN_SRR1;  /* check whether user or kernel */ \
-   andi.   r11,r11,MSR_PR; \
-   tophys(r11,r1); /* use tophys(r1) if kernel */ \
-   beq 1f; \
-   mfspr   r11,SPRN_SPRG_THREAD;   \
-   lwz r11,TASK_STACK-THREAD(r11); \
-   addir11,r11,THREAD_SIZE;\
-   tophys(r11,r11);\
-1: subir11,r11,INT_FRAME_SIZE  /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
-   stw r10,_CCR(r11);  /* save registers */ \
-   stw r12,GPR12(r11); \
-   stw r9,GPR9(r11);   \
-   mfspr   r10,SPRN_SPRG_SCRATCH0; \
-   stw r10,GPR10(r11); \
-   mfspr   r12,SPRN_SPRG_SCRATCH1; \
-   stw r12,GPR11(r11); \
-   mflrr10;\
-   stw r10,_LINK(r11); \
-   mfspr   r12,SPRN_SRR0;  \
-   mfspr   r9,SPRN_SRR1;   \
-   stw r1,GPR1(r11);   \
-   stw r1,0(r11);  \
-   tovirt(r1,r11); /* set new kernel sp */ \
-   li  r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
-   MTMSRD(r10);/* (except for mach check in rtas) */ \
-   stw r0,GPR0(r11);   \
-   lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
-   addir10,r10,STACK_FRAME_REGS_MARKER@l; \
-   stw r10,8(r11); \
-   SAVE_4GPRS(3, r11); \
-   SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer)\
-   . = n;  \
-   DO_KVM n;   \
-label: \
-   EXCEPTION_PROLOG;   \
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)\
-   li  r10,trap;   \
-   stw r10,_TRAP(r11); \
-   li  r10,MSR_KERNEL; \
-   copyee(r10, r9);\
-   bl  tfer;   \
-i##n:  \
-   .long   hdlr;   \
-   .long   ret
-
-#define COPY_EE(d, s)  rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr)  \
-   EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
-   EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr)   \
-   EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, 

[PATCH v3 00/16] powerpc/32: Implement fast syscall entry

2019-04-30 Thread Christophe Leroy
The purpose of this series is to implement a fast syscall entry
on ppc32, as already done on ppc64.

Unlike all other exceptions which can happen at any time and
require to preserve all registers, the syscalls do not
require the preservation of volatile registers (except LR).

Syscall entries can then be optimised with lighter entry code
than the general exception handling.

In the meantime this series refactorises the exception entry on
40x/6xx/8xx as they are pretty similar, and it takes benh series
on rationalising the settings of MSR_EE at exceptions/syscall entries
as this change pretty simplies exception entries.

The refactorisation of exception entry will help when it comes to
implementing VMAP_STACK

On a 8xx, this series improves null_syscall selftest by 17%
On a 83xx, this series improves null_syscall selftest by 12,5%

v3:
- Rebased on latest powerpc/merge branch
- Fixed trivial conflict due to KUP functionnality
- Dropped patch 15 (already applied)

v2:
- Rebased on latest powerpc/merge branch.
- Added booke support as well (tested on qemu bamboo).
- Added a patch to get rid of the dummy frames when calling 
trace_hardirqs_on/off.

Christophe Leroy (16):
  powerpc/32: Refactor EXCEPTION entry macros for head_8xx.S and
head_32.S
  powerpc/32: move LOAD_MSR_KERNEL() into head_32.h and use it
  powerpc/32: make the 6xx/8xx EXC_XFER_TEMPLATE() similar to the
40x/booke one
  powerpc/40x: Don't use SPRN_SPRG_SCRATCH2 in EXCEPTION_PROLOG
  powerpc/40x: add exception frame marker
  powerpc/40x: Split and rename NORMAL_EXCEPTION_PROLOG
  powerpc/40x: Refactor exception entry macros by using head_32.h
  powerpc/fsl_booke: ensure SPEFloatingPointException() reenables
interrupts
  powerpc/32: enter syscall with MSR_EE inconditionaly set
  powerpc/32: Enter exceptions with MSR_EE unset
  powerpc/32: get rid of COPY_EE in exception entry
  powerpc: Fix 32-bit handling of MSR_EE on exceptions
  powerpc/32: implement fast entry for syscalls on non BOOKE
  powerpc/32: implement fast entry for syscalls on BOOKE
  powerpc/32: don't do syscall stuff in transfer_to_handler
  powerpc/32: Don't add dummy frames when calling trace_hardirqs_on/off

 arch/powerpc/kernel/entry_32.S   | 153 +-
 arch/powerpc/kernel/head_32.S| 170 +++--
 arch/powerpc/kernel/head_32.h| 203 +++
 arch/powerpc/kernel/head_40x.S   | 152 +++---
 arch/powerpc/kernel/head_44x.S   |   9 +-
 arch/powerpc/kernel/head_8xx.S   | 133 ---
 arch/powerpc/kernel/head_booke.h | 131 +-
 arch/powerpc/kernel/head_fsl_booke.S |  29 +++--
 arch/powerpc/kernel/traps.c  |   8 ++
 9 files changed, 508 insertions(+), 480 deletions(-)
 create mode 100644 arch/powerpc/kernel/head_32.h

-- 
2.13.3



Re: [PATCH v2 0/5] Allow CPU0 to be nohz full

2019-04-30 Thread Peter Zijlstra
On Tue, Apr 30, 2019 at 12:46:40PM +1000, Nicholas Piggin wrote:
> Peter Zijlstra's on April 25, 2019 10:04 pm:
> > On Thu, Apr 11, 2019 at 01:34:43PM +1000, Nicholas Piggin wrote:
> >> Since last time, I added a compile time option to opt-out of this
> >> if the platform does not support suspend on non-zero, and tried to
> >> improve legibility of changelogs and explain the justification
> >> better.
> >> 
> >> I have been testing this on powerpc/pseries and it seems to work
> >> fine (the firmware call to suspend can be called on any CPU and
> >> resumes where it left off), but not included here because the
> >> code has some bitrot unrelated to this series which I hacked to
> >> fix. I will discuss it and either send an acked patch to go with
> >> this series if it is small, or fix it in powerpc tree.
> >> 
> > 
> > Rafael, Frederic, any comments?
> > 
> 
> Sorry to ping again, I guess people are probably busy after vacation.
> Any chance we could get this in next merge window? Peter are you okay
> with the config option as it is, then we can look at adapting it to
> what x86 needs as a follow up (e.g., allow nohz CPU0 for
> cpu0_hotpluggable case)?

Yeah, let me just queue these here patches. Not sure they'll still make
the upcoming merge window, but we can try.


Re: [PATCH stable v4.4 0/8] missing powerpc spectre backports for 4.4

2019-04-30 Thread Greg KH
On Mon, Apr 29, 2019 at 06:49:00PM +0300, Diana Craciun wrote:
> Hi Greg,
> 
> These are missing patches from the initial powerpc spectre backports for 4.4.
> Please queue them as well if you don't have any objections.

I applied the first 6 of these now.  If you could fix up the last two
and resend them, that would be wonderful.

thanks,

greg k-h


Patch "powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit)" has been added to the 4.4-stable tree

2019-04-30 Thread gregkh


This is a note to let you know that I've just added the patch titled

powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit)

to the 4.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-32-bit.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST
From: Diana Craciun 
Date: Mon, 29 Apr 2019 18:49:04 +0300
Subject: powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit)
To: sta...@vger.kernel.org, gre...@linuxfoundation.org
Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun 

Message-ID: <1556552948-24957-5-git-send-email-diana.crac...@nxp.com>

From: Diana Craciun 

commit 7fef436295bf6c05effe682c8797dfcb0deb112a upstream.

In order to protect against speculation attacks on
indirect branches, the branch predictor is flushed at
kernel entry to protect for the following situations:
- userspace process attacking another userspace process
- userspace process attacking the kernel
Basically when the privillege level change (i.e.the kernel
is entered), the branch predictor state is flushed.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kernel/head_booke.h |6 ++
 arch/powerpc/kernel/head_fsl_booke.S |   15 +++
 2 files changed, 21 insertions(+)

--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -42,6 +42,9 @@
andi.   r11, r11, MSR_PR;   /* check whether user or kernel*/\
mr  r11, r1; \
beq 1f;  \
+START_BTB_FLUSH_SECTION\
+   BTB_FLUSH(r11)  \
+END_BTB_FLUSH_SECTION  \
/* if from user, start at top of this thread's kernel stack */   \
lwz r11, THREAD_INFO-THREAD(r10);\
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
@@ -127,6 +130,9 @@
stw r9,_CCR(r8);/* save CR on stack*/\
mfspr   r11,exc_level_srr1; /* check whether user or kernel*/\
DO_KVM  BOOKE_INTERRUPT_##intno exc_level_srr1;  \
+START_BTB_FLUSH_SECTION
\
+   BTB_FLUSH(r10)  
\
+END_BTB_FLUSH_SECTION  
\
andi.   r11,r11,MSR_PR;  \
mfspr   r11,SPRN_SPRG_THREAD;   /* if from user, start at top of   */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -451,6 +451,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfcrr13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM  BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
+START_BTB_FLUSH_SECTION
+   mfspr r11, SPRN_SRR1
+   andi. r10,r11,MSR_PR
+   beq 1f
+   BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
mfspr   r10, SPRN_DEAR  /* Get faulting address */
 
/* If we are faulting a kernel address, we have to use the
@@ -545,6 +552,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfcrr13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM  BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
+START_BTB_FLUSH_SECTION
+   mfspr r11, SPRN_SRR1
+   andi. r10,r11,MSR_PR
+   beq 1f
+   BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+
mfspr   r10, SPRN_SRR0  /* Get faulting address */
 
/* If we are faulting a kernel address, we have to use the


Patches currently in stable-queue which might be from diana.crac...@nxp.com are

queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch
queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch
queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch
queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch
queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch
queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch
queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch
queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch
queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch

Patch "powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms" has been added to the 4.4-stable tree

2019-04-30 Thread gregkh


This is a note to let you know that I've just added the patch titled

powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms

to the 4.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 
powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST
From: Diana Craciun 
Date: Mon, 29 Apr 2019 18:49:05 +0300
Subject: powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit 
platforms
To: sta...@vger.kernel.org, gre...@linuxfoundation.org
Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun 

Message-ID: <1556552948-24957-6-git-send-email-diana.crac...@nxp.com>

From: Diana Craciun 

commit c28218d4abbf4f2035495334d8bfcba64bda4787 upstream.

Used barrier_nospec to sanitize the syscall table.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kernel/entry_32.S |   10 ++
 1 file changed, 10 insertions(+)

--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * MSR_KERNEL is > 0x1 on 4xx/Book-E since it include MSR_CE.
@@ -340,6 +341,15 @@ syscall_dotrace_cont:
ori r10,r10,sys_call_table@l
slwir0,r0,2
bge-66f
+
+   barrier_nospec_asm
+   /*
+* Prevent the load of the handler below (based on the user-passed
+* system call number) being speculatively executed until the test
+* against NR_syscalls and branch to .66f above has
+* committed.
+*/
+
lwzxr10,r10,r0  /* Fetch system call handler [ptr] */
mtlrr10
addir9,r1,STACK_FRAME_OVERHEAD


Patches currently in stable-queue which might be from diana.crac...@nxp.com are

queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch
queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch
queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch
queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch
queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch
queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch
queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch
queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch
queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch
queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch
queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch
queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch
queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch
queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch
queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch
queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch
queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch
queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch
queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch
queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch
queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch
queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch
queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch
queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch
queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch
queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch
queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch
queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch
queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch
queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch
queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch
queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch
queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch
queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch
queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch
queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch
queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch
queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch

Patch "powerpc/fsl: Flush branch predictor when entering KVM" has been added to the 4.4-stable tree

2019-04-30 Thread gregkh


This is a note to let you know that I've just added the patch titled

powerpc/fsl: Flush branch predictor when entering KVM

to the 4.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 powerpc-fsl-flush-branch-predictor-when-entering-kvm.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST
From: Diana Craciun 
Date: Mon, 29 Apr 2019 18:49:02 +0300
Subject: powerpc/fsl: Flush branch predictor when entering KVM
To: sta...@vger.kernel.org, gre...@linuxfoundation.org
Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun 

Message-ID: <1556552948-24957-3-git-send-email-diana.crac...@nxp.com>

From: Diana Craciun 

commit e7aa61f47b23afbec41031bc47ca8d6cb6516abc upstream.

Switching from the guest to host is another place
where the speculative accesses can be exploited.
Flush the branch predictor when entering KVM.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kvm/bookehv_interrupts.S |4 
 1 file changed, 4 insertions(+)

--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -75,6 +75,10 @@
PPC_LL  r1, VCPU_HOST_STACK(r4)
PPC_LL  r2, HOST_R2(r1)
 
+START_BTB_FLUSH_SECTION
+   BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+
mfspr   r10, SPRN_PID
lwz r8, VCPU_HOST_PID(r4)
PPC_LL  r11, VCPU_SHARED(r4)


Patches currently in stable-queue which might be from diana.crac...@nxp.com are

queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch
queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch
queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch
queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch
queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch
queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch
queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch
queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch
queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch
queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch
queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch
queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch
queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch
queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch
queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch
queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch
queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch
queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch
queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch
queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch
queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch
queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch
queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch
queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch
queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch
queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch
queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch
queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch
queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch
queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch
queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch
queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch
queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch
queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch
queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch
queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch
queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch
queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch
queue-4.4/powerpc-64s-enhance-the-information-in-cpu_show_spectre_v1.patch
queue-4.4/powerpc-64-call-setup_barrier_nospec-from-setup_arch.patch
queue-4.4/powerpc-rfi-flush-always-enable-fallback-flush-on-pseries.patch
queue-4.4/powerpc-64s-improve-rfi-l1-d-cache-flush-fallback.patch
queue-4.4/powerpc-asm-add-a-patch_site-macro-helpers-for-patching-instructions.patch

Patch "powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup'" has been added to the 4.4-stable tree

2019-04-30 Thread gregkh


This is a note to let you know that I've just added the patch titled

powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup'

to the 4.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST
From: Diana Craciun 
Date: Mon, 29 Apr 2019 18:49:06 +0300
Subject: powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup'
To: sta...@vger.kernel.org, gre...@linuxfoundation.org
Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun 

Message-ID: <1556552948-24957-7-git-send-email-diana.crac...@nxp.com>

From: Diana Craciun 

commit 039daac5526932ec731e4499613018d263af8b3e upstream.

Fixed the following build warning:
powerpc-linux-gnu-ld: warning: orphan section `__btb_flush_fixup' from
`arch/powerpc/kernel/head_44x.o' being placed in section
`__btb_flush_fixup'.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kernel/head_booke.h |   18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -31,6 +31,16 @@
  */
 #define THREAD_NORMSAVE(offset)(THREAD_NORMSAVES + (offset * 4))
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define BOOKE_CLEAR_BTB(reg)   
\
+START_BTB_FLUSH_SECTION
\
+   BTB_FLUSH(reg)  
\
+END_BTB_FLUSH_SECTION
+#else
+#define BOOKE_CLEAR_BTB(reg)
+#endif
+
+
 #define NORMAL_EXCEPTION_PROLOG(intno) 
 \
mtspr   SPRN_SPRG_WSCRATCH0, r10;   /* save one register */  \
mfspr   r10, SPRN_SPRG_THREAD;   \
@@ -42,9 +52,7 @@
andi.   r11, r11, MSR_PR;   /* check whether user or kernel*/\
mr  r11, r1; \
beq 1f;  \
-START_BTB_FLUSH_SECTION\
-   BTB_FLUSH(r11)  \
-END_BTB_FLUSH_SECTION  \
+   BOOKE_CLEAR_BTB(r11)\
/* if from user, start at top of this thread's kernel stack */   \
lwz r11, THREAD_INFO-THREAD(r10);\
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
@@ -130,9 +138,7 @@ END_BTB_FLUSH_SECTION   
\
stw r9,_CCR(r8);/* save CR on stack*/\
mfspr   r11,exc_level_srr1; /* check whether user or kernel*/\
DO_KVM  BOOKE_INTERRUPT_##intno exc_level_srr1;  \
-START_BTB_FLUSH_SECTION
\
-   BTB_FLUSH(r10)  
\
-END_BTB_FLUSH_SECTION  
\
+   BOOKE_CLEAR_BTB(r10)\
andi.   r11,r11,MSR_PR;  \
mfspr   r11,SPRN_SPRG_THREAD;   /* if from user, start at top of   */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\


Patches currently in stable-queue which might be from diana.crac...@nxp.com are

queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch
queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch
queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch
queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch
queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch
queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch
queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch
queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch
queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch
queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch
queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch
queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch
queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch

Patch "powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used" has been added to the 4.4-stable tree

2019-04-30 Thread gregkh


This is a note to let you know that I've just added the patch titled

powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used

to the 4.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST
From: Diana Craciun 
Date: Mon, 29 Apr 2019 18:49:01 +0300
Subject: powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used
To: sta...@vger.kernel.org, gre...@linuxfoundation.org
Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun 

Message-ID: <1556552948-24957-2-git-send-email-diana.crac...@nxp.com>

From: Diana Craciun 

commit 3bc8ea8603ae4c1e09aca8de229ad38b8091fcb3 upstream.

If the user choses not to use the mitigations, replace
the code sequence with nops.

Signed-off-by: Diana Craciun 
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kernel/setup_32.c |1 +
 arch/powerpc/kernel/setup_64.c |1 +
 2 files changed, 2 insertions(+)

--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -323,6 +323,7 @@ void __init setup_arch(char **cmdline_p)
if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
 
setup_barrier_nospec();
+   setup_spectre_v2();
 
paging_init();
 
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -737,6 +737,7 @@ void __init setup_arch(char **cmdline_p)
ppc_md.setup_arch();
 
setup_barrier_nospec();
+   setup_spectre_v2();
 
paging_init();
 


Patches currently in stable-queue which might be from diana.crac...@nxp.com are

queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch
queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch
queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch
queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch
queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch
queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch
queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch
queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch
queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch
queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch
queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch
queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch
queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch
queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch
queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch
queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch
queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch
queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch
queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch
queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch
queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch
queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch
queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch
queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch
queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch
queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch
queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch
queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch
queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch
queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch
queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch
queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch
queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch
queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch
queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch
queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch
queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch
queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch
queue-4.4/powerpc-64s-enhance-the-information-in-cpu_show_spectre_v1.patch
queue-4.4/powerpc-64-call-setup_barrier_nospec-from-setup_arch.patch

Patch "powerpc/fsl: Emulate SPRN_BUCSR register" has been added to the 4.4-stable tree

2019-04-30 Thread gregkh


This is a note to let you know that I've just added the patch titled

powerpc/fsl: Emulate SPRN_BUCSR register

to the 4.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 powerpc-fsl-emulate-sprn_bucsr-register.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST
From: Diana Craciun 
Date: Mon, 29 Apr 2019 18:49:03 +0300
Subject: powerpc/fsl: Emulate SPRN_BUCSR register
To: sta...@vger.kernel.org, gre...@linuxfoundation.org
Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun 

Message-ID: <1556552948-24957-4-git-send-email-diana.crac...@nxp.com>

From: Diana Craciun 

commit 98518c4d8728656db349f875fcbbc7c126d4c973 upstream.

In order to flush the branch predictor the guest kernel performs
writes to the BUCSR register which is hypervisor privilleged. However,
the branch predictor is flushed at each KVM entry, so the branch
predictor has been already flushed, so just return as soon as possible
to guest.

Signed-off-by: Diana Craciun 
[mpe: Tweak comment formatting]
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/powerpc/kvm/e500_emulate.c |7 +++
 1 file changed, 7 insertions(+)

--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -277,6 +277,13 @@ int kvmppc_core_emulate_mtspr_e500(struc
vcpu->arch.pwrmgtcr0 = spr_val;
break;
 
+   case SPRN_BUCSR:
+   /*
+* If we are here, it means that we have already flushed the
+* branch predictor, so just return to guest.
+*/
+   break;
+
/* extra exceptions */
 #ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:


Patches currently in stable-queue which might be from diana.crac...@nxp.com are

queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch
queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch
queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch
queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch
queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch
queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch
queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch
queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch
queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch
queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch
queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch
queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch
queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch
queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch
queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch
queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch
queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch
queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch
queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch
queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch
queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch
queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch
queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch
queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch
queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch
queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch
queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch
queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch
queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch
queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch
queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch
queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch
queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch
queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch
queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch
queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch
queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch
queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch
queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch
queue-4.4/powerpc-64s-enhance-the-information-in-cpu_show_spectre_v1.patch

Re: [PATCH stable v4.4 8/8] Documentation: Add nospectre_v1 parameter

2019-04-30 Thread Greg KH
On Mon, Apr 29, 2019 at 06:49:08PM +0300, Diana Craciun wrote:
> Currently only supported on powerpc.

No upstream git commit id for this one?

thanks,

greg k-h


Re: [PATCH stable v4.4 7/8] powerpc/fsl: Add FSL_PPC_BOOK3E as supported arch for nospectre_v2 boot arg

2019-04-30 Thread Greg KH
On Mon, Apr 29, 2019 at 06:49:07PM +0300, Diana Craciun wrote:
> commit f633a8ad636efb5d4bba1a047d4a0f1ef719aa06 upstream.

No, the patch below is not that git commit :(

I'll stop here in applying these patches.

thanks,

greg k-h


Re: [PATCH] KVM: PPC: Book3S HV: smb->smp comment fixup

2019-04-30 Thread Paul Mackerras
On Thu, Apr 25, 2019 at 12:53:39PM -0700, Palmer Dabbelt wrote:
> I made the same typo when trying to grep for uses of smp_wmb and figured
> I might as well fix it.
> 
> Signed-off-by: Palmer Dabbelt 

Thanks, patch applied to my kvm-ppc-next tree.

Paul.


Re: [PATCH] powerpc: Fix kobject memleak

2019-04-30 Thread Greg Kroah-Hartman
On Tue, Apr 30, 2019 at 11:09:23AM +1000, Tobin C. Harding wrote:
> Currently error return from kobject_init_and_add() is not followed by a
> call to kobject_put().  This means there is a memory leak.
> 
> Add call to kobject_put() in error path of kobject_init_and_add().
> 
> Signed-off-by: Tobin C. Harding 

Reviewed-by: Greg Kroah-Hartman 



[PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test

2019-04-30 Thread Aneesh Kumar K.V
This patch fixes the below crash by making sure we touch the subpage protection
related structures only if we know they are allocated on the platform. With
radix translation we don't allocate hash context at all and trying to access
subpage_prot_table results in

 Faulting instruction address: 0xc008bdb4
 Oops: Kernel access of bad area, sig: 11 [#1]
 LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
 
 NIP [c008bdb4] sys_subpage_prot+0x74/0x590
 LR [c000b688] system_call+0x5c/0x70
 Call Trace:
 [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable)
 [c00020002c6b7e20] [c000b688] system_call+0x5c/0x70
 Instruction dump:
 fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068
 3920 e92d0968 ebe90630 e93f03e8  6000 3860fffe e9410068

We also move the subpage_prot_table with mmp_sem held to avoid racec
between two parallel subpage_prot syscall.

Reported-by: Sachin Sant 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/subpage-prot.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index c9dff4e1f295..473dd430e306 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -90,16 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned 
long addr,
 static void subpage_prot_clear(unsigned long addr, unsigned long len)
 {
struct mm_struct *mm = current->mm;
-   struct subpage_prot_table *spt = mm_ctx_subpage_prot(>context);
+   struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
unsigned long next, limit;
 
+   down_write(>mmap_sem);
+
+   spt = mm_ctx_subpage_prot(>context);
if (!spt)
-   return ;
+   goto err_out;
 
-   down_write(>mmap_sem);
limit = addr + len;
if (limit > spt->maxaddr)
limit = spt->maxaddr;
@@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned 
long len)
/* now flush any existing HPTEs for the range */
hpte_flush_range(mm, addr, nw);
}
+
+err_out:
up_write(>mmap_sem);
 }
 
@@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
unsigned long, len, u32 __user *, map)
 {
struct mm_struct *mm = current->mm;
-   struct subpage_prot_table *spt = mm_ctx_subpage_prot(>context);
+   struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
@@ -219,6 +223,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
 
down_write(>mmap_sem);
 
+   spt = mm_ctx_subpage_prot(>context);
if (!spt) {
/*
 * Allocate subpage prot table if not already done.
-- 
2.20.1



Re: [PATCH v2 2/2] powerpc/perf: Add generic compat mode pmu driver

2019-04-30 Thread Madhavan Srinivasan



On 29/04/19 11:12 AM, Christophe Leroy wrote:



Le 29/04/2019 à 04:52, Madhavan Srinivasan a écrit :

Most of the power processor generation performance monitoring
unit (PMU) driver code is bundled in the kernel and one of those
is enabled/registered based on the oprofile_cpu_type check at
the boot.

But things get little tricky incase of "compat" mode boot.
IBM POWER System Server based processors has a compactibility
mode feature, which simpily put is, Nth generation processor
(lets say POWER8) will act and appear in a mode consistent
with an earlier generation (N-1) processor (that is POWER7).
And in this "compat" mode boot, kernel modify the
"oprofile_cpu_type" to be Nth generation (POWER8). If Nth
generation pmu driver is bundled (POWER8), it gets registered.

Key dependency here is to have distro support for latest
processor performance monitoring support. Patch here adds
a generic "compat-mode" performance monitoring driver to
be register in absence of powernv platform specific pmu driver.

Driver supports "cycles", "instruction" and "branch-miss" events.
"0x100F0" used as event code for "cycles", "0x2"
used as event code for "instruction" events and "0x400F6"
used as event code for "branch miss". These are architected events
as part of ISA. New file called "generic-compat-pmu.c" is
created to contain the driver specific code. And base raw event
code format modeled on PPMU_ARCH_207S.

Signed-off-by: Madhavan Srinivasan 
---
Changelog v1:
- Updated architected event opcodes
- included branch miss with architected event opcode

  arch/powerpc/perf/Makefile |   3 +-
  arch/powerpc/perf/core-book3s.c    |   2 +-
  arch/powerpc/perf/generic-compat-pmu.c | 245 
+

  arch/powerpc/perf/internal.h   |   1 +
  4 files changed, 249 insertions(+), 2 deletions(-)
  create mode 100644 arch/powerpc/perf/generic-compat-pmu.c

diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index ab26df5bacb9..c155dcbb8691 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -5,7 +5,8 @@ obj-$(CONFIG_PERF_EVENTS)    += callchain.o perf_regs.o
  obj-$(CONFIG_PPC_PERF_CTRS)    += core-book3s.o bhrb.o
  obj64-$(CONFIG_PPC_PERF_CTRS)    += ppc970-pmu.o power5-pmu.o \
 power5+-pmu.o power6-pmu.o power7-pmu.o \
-   isa207-common.o power8-pmu.o power9-pmu.o
+   isa207-common.o power8-pmu.o power9-pmu.o \
+   generic-compat-pmu.o


Isn't that name a bit long ? What about compat-pmu instead ?


yeah I guess. Will fix it.




  obj32-$(CONFIG_PPC_PERF_CTRS)    += mpc7450-pmu.o
    obj-$(CONFIG_PPC_POWERNV)    += imc-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c 
b/arch/powerpc/perf/core-book3s.c

index a96f9420139c..a66fb9c01c9e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2318,7 +2318,7 @@ static int __init init_ppc64_pmu(void)
  else if (!init_ppc970_pmu())
  return 0;
  else
-    return -ENODEV;
+    return init_generic_compat_pmu();
  }
  early_initcall(init_ppc64_pmu);
  #endif
diff --git a/arch/powerpc/perf/generic-compat-pmu.c 
b/arch/powerpc/perf/generic-compat-pmu.c

new file mode 100644
index ..9c2d4bbc5c87
--- /dev/null
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -0,0 +1,245 @@
+/*
+ * Performance counter support.
+ *
+ * Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.


Shouldn't we use the new licence format for new files ? ie:

// SPDX-License-Identifier: GPL-2.0+


My bad. Thanks for pointing out.
Will fix and re-spin.

Thanks for review
Maddy





+ */
+
+#define pr_fmt(fmt)    "generic-compat-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding:
+ *
+ *    60    56    52    48    44 40    
36    32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - 
- - | - - - - |

+ *
+ *    28    24    20    16    12 8 
4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - 
- - | - - - - |
+ * [ pmc ]   [unit ]   [ ] m   [    
pmcxsel    ]

+ * | |
+ * | *- mark
+ * |
+ * |
+ * *- combine
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit    (PMCxUNIT)
+ * MMCR1[24]   = pmc1combine[0]
+ * MMCR1[25]   = pmc1combine[1]
+ * MMCR1[26]   = pmc2combine[0]
+ * MMCR1[27]   = pmc2combine[1]
+ * MMCR1[28]   = pmc3combine[0]
+ 

Re: [PATCH v2 1/2] powerpc/perf: init pmu from core-book3s

2019-04-30 Thread Madhavan Srinivasan



On 29/04/19 11:08 AM, Christophe Leroy wrote:



Le 29/04/2019 à 04:52, Madhavan Srinivasan a écrit :

Currenty pmu driver file for each ppc64 generation processor
has a __init call in itself. Refactor the code by moving the
__init call to core-books.c. This also clean's up compat mode
pmu driver registration.


Can you explain the advantage of doing so ?


Was not comfortable having dependency on the link ordering, so
took this approach. This will avoid registering generic driver
when there is a platform specific driver.


For me it makes more sense to have independant drivers with their own 
init call.





Suggested-by: Michael Ellerman 
Signed-off-by: Madhavan Srinivasan 
---
Changelog v1:
- Added "internal.h" file and moved the extern definitions to that file

  arch/powerpc/perf/core-book3s.c | 28 
  arch/powerpc/perf/internal.h    | 16 
  arch/powerpc/perf/power5+-pmu.c |  4 +---
  arch/powerpc/perf/power5-pmu.c  |  4 +---
  arch/powerpc/perf/power6-pmu.c  |  4 +---
  arch/powerpc/perf/power7-pmu.c  |  4 +---
  arch/powerpc/perf/power8-pmu.c  |  3 +--
  arch/powerpc/perf/power9-pmu.c  |  3 +--
  arch/powerpc/perf/ppc970-pmu.c  |  4 +---
  9 files changed, 51 insertions(+), 19 deletions(-)
  create mode 100644 arch/powerpc/perf/internal.h

diff --git a/arch/powerpc/perf/core-book3s.c 
b/arch/powerpc/perf/core-book3s.c

index b0723002a396..a96f9420139c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -22,6 +22,10 @@
  #include 
  #include 
  +#ifdef CONFIG_PPC64


Can we avoid that CONFIG_PPC64 ifdef ? Why isn't it compatible with 
PPC32 ?


IIUC, Driver handled here are specific to server side ppc and secondly, 
infrastructure

can be extend for ppc32 if needed.

+#include "internal.h"
+#endif
+
  #define BHRB_MAX_ENTRIES    32
  #define BHRB_TARGET    0x0002
  #define BHRB_PREDICTION    0x0001
@@ -2294,3 +2298,27 @@ int register_power_pmu(struct power_pmu *pmu)
    power_pmu_prepare_cpu, NULL);
  return 0;
  }
+
+#ifdef CONFIG_PPC64


Same, why PPC64 ?


+static int __init init_ppc64_pmu(void)
+{
+    /* run through all the pmu drivers one at a time */
+    if (!init_power5_pmu())
+    return 0;
+    else if (!init_power5p_pmu())
+    return 0;
+    else if (!init_power6_pmu())
+    return 0;
+    else if (!init_power7_pmu())
+    return 0;
+    else if (!init_power8_pmu())
+    return 0;
+    else if (!init_power9_pmu())
+    return 0;
+    else if (!init_ppc970_pmu())
+    return 0;
+    else
+    return -ENODEV;
+}
+early_initcall(init_ppc64_pmu);
+#endif
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
new file mode 100644
index ..e54d524d4283
--- /dev/null
+++ b/arch/powerpc/perf/internal.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+extern int init_ppc970_pmu(void);
+extern int init_power5_pmu(void);
+extern int init_power5p_pmu(void);
+extern int init_power6_pmu(void);
+extern int init_power7_pmu(void);
+extern int init_power8_pmu(void);
+extern int init_power9_pmu(void);


'extern' keyword is pointless, please remove it (checkpatch --strict 
probably told it to you).


Ok will re-spin it (will use --strict in future patches thanks :) )

Thanks for review
Maddy




Christophe


diff --git a/arch/powerpc/perf/power5+-pmu.c 
b/arch/powerpc/perf/power5+-pmu.c

index 0526dac66007..9aa803504cb2 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = {
  .cache_events    = _cache_events,
  };
  -static int __init init_power5p_pmu(void)
+int init_power5p_pmu(void)
  {
  if (!cur_cpu_spec->oprofile_cpu_type ||
  (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
@@ -686,5 +686,3 @@ static int __init init_power5p_pmu(void)
    return register_power_pmu(_pmu);
  }
-
-early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/perf/power5-pmu.c 
b/arch/powerpc/perf/power5-pmu.c

index 4dc99f9f7962..30cb13d081a9 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = {
  .flags    = PPMU_HAS_SSLOT,
  };
  -static int __init init_power5_pmu(void)
+int init_power5_pmu(void)
  {
  if (!cur_cpu_spec->oprofile_cpu_type ||
  strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
@@ -626,5 +626,3 @@ static int __init init_power5_pmu(void)
    return register_power_pmu(_pmu);
  }
-
-early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/perf/power6-pmu.c 
b/arch/powerpc/perf/power6-pmu.c


Re: [PATCH kernel v3] powerpc/powernv: Isolate NVLinks between GV100GL on Witherspoon

2019-04-30 Thread Alexey Kardashevskiy



On 30/04/2019 15:45, Alistair Popple wrote:
> Alexey,
> 
> +void pnv_try_isolate_nvidia_v100(struct pci_dev *bridge)
> +{
> + u32 mask, val;
> + void __iomem *bar0_0, *bar0_12, *bar0_a0;
> + struct pci_dev *pdev;
> + u16 cmd = 0, cmdmask = PCI_COMMAND_MEMORY;
> +
> + if (!bridge->subordinate)
> + return;
> +
> + pdev = list_first_entry_or_null(>subordinate->devices,
> + struct pci_dev, bus_list);
> + if (!pdev)
> + return;
> +
> + if (pdev->vendor != PCI_VENDOR_ID_NVIDIA)
> 
> Don't you also need to check the PCIe devid to match only [PV]100 devices as 
> well? I doubt there's any guarantee these registers will remain the same for 
> all future (or older) NVIDIA devices.


I do not have the complete list of IDs and I already saw 3 different
device ids and this only works for machines with ibm,npu/gpu/nvlinks
properties so for now it works and for the future we are hoping to
either have an open source nvidia driver or some small minidriver (also
from nvidia, or may be a spec allowing us to write one) to allow
topology discovery on the host so we would not depend on the skiboot's
powernv DT.

> IMHO this should really be done in the device driver in the guest. A malcious 
> guest could load a modified driver that doesn't do this, but that should not 
> compromise other guests which presumably load a non-compromised driver that 
> disables the links on that guests GPU. However I guess in practice what you 
> have here should work equally well.

Doing it in the guest means a good guest needs to have an updated
driver, we do not really want to depend on this. The idea of IOMMU
groups is that the hypervisor provides isolation irrespective to what
the guest does.

Also vfio+qemu+slof needs to convey the nvlink topology to the guest,
seems like an unnecessary complication.



> - Alistair
> 
> + return;
> +
> + mask = nvlinkgpu_get_disable_mask(>dev);
> + if (!mask)
> + return;
> +
> + bar0_0 = pci_iomap_range(pdev, 0, 0, 0x1);
> + if (!bar0_0) {
> + pci_err(pdev, "Error mapping BAR0 @0\n");
> + return;
> + }
> + bar0_12 = pci_iomap_range(pdev, 0, 0x12, 0x1);
> + if (!bar0_12) {
> + pci_err(pdev, "Error mapping BAR0 @12\n");
> + goto bar0_0_unmap;
> + }
> + bar0_a0 = pci_iomap_range(pdev, 0, 0xA0, 0x1);
> + if (!bar0_a0) {
> + pci_err(pdev, "Error mapping BAR0 @A0\n");
> + goto bar0_12_unmap;
> + }

 Is it really necessary to do three separate ioremaps vs one that would
 cover them all here?  I suspect you're just sneaking in PAGE_SIZE with
 the 0x1 size mappings anyway.  Seems like it would simplify setup,
 error reporting, and cleanup to to ioremap to the PAGE_ALIGN'd range
 of the highest register accessed. Thanks,
>>>
>>> Sure I can map it once, I just do not see the point in mapping/unmapping
>>> all 0xa1>>16=161 system pages for a very short period of time while
>>> we know precisely that we need just 3 pages.
>>>
>>> Repost?
>>
>> Ping?
>>
>> Can this go in as it is (i.e. should I ping Michael) or this needs
>> another round? It would be nice to get some formal acks. Thanks,
>>
 Alex

> +
> + pci_restore_state(pdev);
> + pci_read_config_word(pdev, PCI_COMMAND, );
> + if ((cmd & cmdmask) != cmdmask)
> + pci_write_config_word(pdev, PCI_COMMAND, cmd | cmdmask);
> +
> + /*
> +  * The sequence is from "Tesla P100 and V100 SXM2 NVLink Isolation on
> +  * Multi-Tenant Systems".
> +  * The register names are not provided there either, hence raw values.
> +  */
> + iowrite32(0x4, bar0_12 + 0x4C);
> + iowrite32(0x2, bar0_12 + 0x2204);
> + val = ioread32(bar0_0 + 0x200);
> + val |= 0x0200;
> + iowrite32(val, bar0_0 + 0x200);
> + val = ioread32(bar0_a0 + 0x148);
> + val |= mask;
> + iowrite32(val, bar0_a0 + 0x148);
> +
> + if ((cmd | cmdmask) != cmd)
> + pci_write_config_word(pdev, PCI_COMMAND, cmd);
> +
> + pci_iounmap(pdev, bar0_a0);
> +bar0_12_unmap:
> + pci_iounmap(pdev, bar0_12);
> +bar0_0_unmap:
> + pci_iounmap(pdev, bar0_0);
> +}
> 
> 

-- 
Alexey


Re: [PATCH v5 5/8] powerpc/pci/IOV: Add support for runtime enabling the VFs

2019-04-30 Thread Oliver O'Halloran
On Mon, 2019-03-11 at 14:52 +0300, Sergey Miroshnichenko wrote:

> When called within pcibios_sriov_enable(), the pci_sriov_get_totalvfs(pdev)
> returns zero, because the device is yet preparing to enable the VFs.

I don't think this is correct. The earliest pcibios_sriov_enable() can
be called is during a driver probe function. The totalvfs field is
initialised by pci_iov_init() which is called before the device has
been added to the bus. If it's returning zero then maybe the driver
limited the number of VFs to zero?

That said, you need to reset numvfs to zero before changing the value. 
So limiting the number of pci_dns that are created to the number
actually required rather than totalvfs doesn't hurt.

> With this patch it becomes possible to enable VFs via sysfs "sriov_numvfs"
> on PowerNV.

I tested on a few of our lab systems with random kernel versions
spanning from 4.15 to 5.0 and sriov_numvfs seemed to work fine on all
of them. Is there a specific configuration you're testing that needed
this change?

> Signed-off-by: Sergey Miroshnichenko 
> ---
>  arch/powerpc/include/asm/pci-bridge.h |  4 +--
>  arch/powerpc/kernel/pci_dn.c  | 32 ++-
>  arch/powerpc/platforms/powernv/pci-ioda.c |  4 +--
>  arch/powerpc/platforms/pseries/pci.c  |  4 +--
>  4 files changed, 25 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pci-bridge.h 
> b/arch/powerpc/include/asm/pci-bridge.h
> index fc188e0e9179..6479bc96e0b6 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -225,8 +225,8 @@ struct pci_dn {
>  extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
>  int devfn);
>  extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
> -extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
> -extern void remove_dev_pci_data(struct pci_dev *pdev);
> +extern struct pci_dn *pci_create_vf_pdns(struct pci_dev *pdev, int num_vfs);
> +extern void pci_destroy_vf_pdns(struct pci_dev *pdev);
>  extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
>  struct device_node *dn);
>  extern void pci_remove_device_node_info(struct device_node *dn);
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index 7f12882d8882..7fa362f8038d 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -222,18 +222,19 @@ static struct pci_dn *pci_create_pdn_from_dev(struct 
> pci_dev *pdev,
>   return pdn;
>  }
>  
> -struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
> +struct pci_dn *pci_create_vf_pdns(struct pci_dev *pdev, int num_vfs)
>  {
> + struct pci_dn *pdn = pci_get_pdn(pdev);
> +
>  #ifdef CONFIG_PCI_IOV
> - struct pci_dn *parent, *pdn;
> + struct pci_dn *parent;
>   int i;
>  
>   /* Only support IOV for now */
>   if (!pdev->is_physfn)
> - return pci_get_pdn(pdev);
> + return pdn;
>  
>   /* Check if VFs have been populated */
> - pdn = pci_get_pdn(pdev);
>   if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
>   return NULL;
>  
> @@ -242,33 +243,38 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
>   if (!parent)
>   return NULL;
>  
> - for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
> + for (i = 0; i < num_vfs; i++) {
>   struct eeh_dev *edev __maybe_unused;
> + struct pci_dn *vpdn;
>  
> - pdn = pci_alloc_pdn(parent,
> - pci_iov_virtfn_bus(pdev, i),
> - pci_iov_virtfn_devfn(pdev, i));
> - if (!pdn) {
> + vpdn = pci_alloc_pdn(parent,
> +  pci_iov_virtfn_bus(pdev, i),
> +  pci_iov_virtfn_devfn(pdev, i));
> + if (!vpdn) {
>   dev_warn(>dev, "%s: Cannot create firmware data 
> for VF#%d\n",
>__func__, i);
>   return NULL;
>   }
>  
> - pdn->vf_index = i;
> + vpdn->vf_index = i;
> + vpdn->vendor_id = pdn->vendor_id;
> + vpdn->device_id = pdn->device_id;
> + vpdn->class_code = pdn->class_code;
> + vpdn->pci_ext_config_space = 0;
>  
>  #ifdef CONFIG_EEH
>   /* Create the EEH device for the VF */
> - edev = eeh_dev_init(pdn);
> + edev = eeh_dev_init(vpdn);
>   BUG_ON(!edev);
>   edev->physfn = pdev;
>  #endif /* CONFIG_EEH */
>   }
>  #endif /* CONFIG_PCI_IOV */
>  
> - return pci_get_pdn(pdev);
> + return pdn;
>  }
>  
> -void remove_dev_pci_data(struct pci_dev *pdev)
> +void pci_destroy_vf_pdns(struct pci_dev *pdev)
>  {
>  #ifdef CONFIG_PCI_IOV
>   struct pci_dn *parent;
> diff --git