Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
On 30/04/2019 19.12, Christophe Leroy wrote: > > Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit : >> The current array of struct qe_snum use 256*4 bytes for just keeping >> track of the free/used state of each index, and the struct layout >> means there's another 768 bytes of padding. If we just unzip that >> structure, the array of snum values just use 256 bytes, while the >> free/inuse state can be tracked in a 32 byte bitmap. >> >> So this reduces the .data footprint by 1760 bytes. It also serves as >> preparation for introducing another DT binding for specifying the snum >> values. >> >> Signed-off-by: Rasmus Villemoes >> --- >> - >> /* We allocate this here because it is used almost exclusively for >> * the communication processor devices. >> */ >> struct qe_immap __iomem *qe_immr; >> EXPORT_SYMBOL(qe_immr); >> -static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically >> allocated SNUMs */ >> +static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ >> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM); >> static unsigned int qe_num_of_snum; >> static phys_addr_t qebase = -1; >> @@ -308,6 +298,7 @@ static void qe_snums_init(void) >> }; >> const u8 *snum_init; >> + bitmap_zero(snum_state, QE_NUM_OF_SNUM); > > Doesn't make much importance, but wouldn't it be more logical to add > this line where the setting of .state = QE_SNUM_STATE_FREE was done > previously, ie around the for() loop below ? This was on purpose, to avoid having to move it up in patch 4, where we don't necessarily reach the for loop. >> qe_num_of_snum = qe_get_num_of_snums(); >> if (qe_num_of_snum == 76) >> @@ -315,10 +306,8 @@ static void qe_snums_init(void) >> else >> snum_init = snum_init_46; >> - for (i = 0; i < qe_num_of_snum; i++) { >> - snums[i].num = snum_init[i]; >> - snums[i].state = QE_SNUM_STATE_FREE; >> - } >> + for (i = 0; i < qe_num_of_snum; i++) >> + snums[i] = snum_init[i]; > > Could use memcpy() instead ? Yes, I switch to that in 5/5. Sure, I could do it here already, but I did it this way to keep close to the current style. I don't care either way, so if you prefer introducing memcpy here, fine by me. >> spin_unlock_irqrestore(_lock, flags); >> @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum) >> int i; >> for (i = 0; i < qe_num_of_snum; i++) { >> - if (snums[i].num == snum) { >> - snums[i].state = QE_SNUM_STATE_FREE; >> + if (snums[i] == snum) { >> + clear_bit(i, snum_state); >> break; >> } >> } > > Can we replace this loop by memchr() ? Hm, yes. So that would be const u8 *p = memchr(snums, snum, qe_num_of_snum) if (p) clear_bit(p - snums, snum_state); I guess. Let me fold that in and see how it looks. Thanks, Rasmus
Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
Vakul Garg wrote: > In function caam_jr_dequeue(), a full memory barrier is used before > writing response job ring's register to signal removal of the completed > job. Therefore for writing the register, we do not need another write > memory barrier. Hence it is removed by replacing the call to wr_reg32() > with a newly defined function wr_reg32_relaxed(). > > Signed-off-by: Vakul Garg > --- > drivers/crypto/caam/jr.c | 2 +- > drivers/crypto/caam/regs.h | 8 > 2 files changed, 9 insertions(+), 1 deletion(-) > > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c > index 4e9b3fca5627..2ce6d7d2ad72 100644 > --- a/drivers/crypto/caam/jr.c > +++ b/drivers/crypto/caam/jr.c > @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg) > mb(); > > /* set done */ > - wr_reg32(>rregs->outring_rmvd, 1); > + wr_reg32_relaxed(>rregs->outring_rmvd, 1); > > jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) & > (JOBR_DEPTH - 1); > diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h > index 3cd0822ea819..9e912c722e33 100644 > --- a/drivers/crypto/caam/regs.h > +++ b/drivers/crypto/caam/regs.h > @@ -96,6 +96,14 @@ cpu_to_caam(16) > cpu_to_caam(32) > cpu_to_caam(64) > > +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data) > +{ > + if (caam_little_end) > + writel_relaxed(data, reg); > + else > + writel_relaxed(cpu_to_be32(data), reg); > +} > + > static inline void wr_reg32(void __iomem *reg, u32 data) > { > if (caam_little_end) This crashes on my p5020ds. Did you test on powerpc? # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue Log: [ cut here ] kernel BUG at drivers/crypto/caam/jr.c:191! Oops: Exception in kernel mode, sig: 5 [#1] BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic Modules linked in: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2 #31 NIP: c079d704 LR: c079d498 CTR: c0086914 REGS: c000fffc7970 TRAP: 0700 Not tainted (5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2) MSR: 80029000 CR: 28008484 XER: IRQMASK: 0 GPR00: c079d6b0 c000fffc7c00 c0fbc800 0001 GPR04: 7e080080 ffc0 0001 67d7 GPR08: 880401a9 0001 fa83b2da GPR12: 28008224 c0003800 c0fc20b0 0100 GPR16: 8920f09520bea117 c0def480 0001 GPR20: c0fc3940 c000f3537e18 0001 c1026cc5 GPR24: 0001 c000f3328000 0001 c000f3451010 GPR28: 0001 NIP [c079d704] .caam_jr_dequeue+0x2f0/0x410 LR [c079d498] .caam_jr_dequeue+0x84/0x410 Call Trace: [c000fffc7c00] [c079d6b0] .caam_jr_dequeue+0x29c/0x410 (unreliable) [c000fffc7cd0] [c004fef0] .tasklet_action_common.isra.3+0xac/0x180 [c000fffc7d80] [c0a2f99c] .__do_softirq+0x174/0x3f8 [c000fffc7e90] [c004fb94] .irq_exit+0xc4/0xdc [c000fffc7f00] [c0007348] .__do_irq+0x8c/0x1b0 [c000fffc7f90] [c00150c4] .call_do_irq+0x14/0x24 [c000f3137930] [c00074e4] .do_IRQ+0x78/0xd4 [c000f31379c0] [c0019998] exc_0x500_common+0xfc/0x100 --- interrupt: 501 at .book3e_idle+0x24/0x5c LR = .book3e_idle+0x24/0x5c [c000f3137cc0] [c000a6a4] .arch_cpu_idle+0x34/0xa0 (unreliable) [c000f3137d30] [c0a2f2e8] .default_idle_call+0x5c/0x70 [c000f3137da0] [c0084210] .do_idle+0x1b0/0x1f4 [c000f3137e40] [c0084434] .cpu_startup_entry+0x28/0x30 [c000f3137eb0] [c0021538] .start_secondary+0x59c/0x5b0 [c000f3137f90] [c45c] start_secondary_prolog+0x10/0x14 Instruction dump: 7d284a14 e9290018 2fa9 40de001c 3bbd0001 57bd05fe 7d3db050 712901ff 7fbd07b4 40e2ffcc 93b500dc 4b94 <0fe0> 78890022 79270020 41d600ec ---[ end trace 7bedbdf37a95ab35 ]--- That's hitting: /* we should never fail to find a matching descriptor */ BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0); cheers
[PATCH kernel v2 2/2] powerpc/powernv/ioda2: Create bigger default window with 64k IOMMU pages
At the moment we create a small window only for 32bit devices, the window maps 0..2GB of the PCI space only. For other devices we either use a sketchy bypass or hardware bypass but the former can only work if the amount of RAM is no bigger than the device's DMA mask and the latter requires devices to support at least 59bit DMA. This extends the default DMA window to the maximum size possible to allow a wider DMA mask than just 32bit. The default window size is now limited by the the iommu_table::it_map allocation bitmap which is a contiguous array, 1 bit per an IOMMU page. This increases the default IOMMU page size from hard coded 4K to the system page size to allow wider DMA masks. This increases the level number to not exceed the max order allocation limit per TCE level. By the same time, this keeps minimal levels number as 2 in order to save memory. As the extended window now overlaps the 32bit MMIO region, this adds an area reservation to iommu_init_table(). After this change the default window size is 0x800==1<<43 so devices limited to DMA mask smaller than the amount of system RAM can still use more than just 2GB of memory for DMA. With the on-demand allocation of indirect TCE table levels enabled and 2 levels, the first TCE level size is just 1< --- Changes: v2: * adjusted level number to the max order --- arch/powerpc/include/asm/iommu.h | 8 +++- arch/powerpc/kernel/iommu.c | 58 +++ arch/powerpc/platforms/powernv/pci-ioda.c | 40 +--- 3 files changed, 79 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 0ac52392ed99..5ea782e04803 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -124,6 +124,8 @@ struct iommu_table { struct iommu_table_ops *it_ops; struct krefit_kref; int it_nid; + unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */ + unsigned long it_reserved_end; }; #define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \ @@ -162,8 +164,10 @@ extern int iommu_tce_table_put(struct iommu_table *tbl); /* Initializes an iommu_table based in values set in the passed-in * structure */ -extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, - int nid); +extern struct iommu_table *iommu_init_table_res(struct iommu_table *tbl, + int nid, unsigned long res_start, unsigned long res_end); +#define iommu_init_table(tbl, nid) iommu_init_table_res((tbl), (nid), 0, 0) + #define IOMMU_TABLE_GROUP_MAX_TABLES 2 struct iommu_table_group; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 33bbd59cff79..209306ce7f4b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -646,11 +646,43 @@ static void iommu_table_clear(struct iommu_table *tbl) #endif } +static void iommu_table_reserve_pages(struct iommu_table *tbl) +{ + int i; + + /* +* Reserve page 0 so it will not be used for any mappings. +* This avoids buggy drivers that consider page 0 to be invalid +* to crash the machine or even lose data. +*/ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); + + for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i) + set_bit(i, tbl->it_map); +} + +static void iommu_table_release_pages(struct iommu_table *tbl) +{ + int i; + + /* +* In case we have reserved the first bit, we should not emit +* the warning below. +*/ + if (tbl->it_offset == 0) + clear_bit(0, tbl->it_map); + + for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i) + clear_bit(i, tbl->it_map); +} + /* * Build a iommu_table structure. This contains a bit map which * is used to manage allocation of the tce space. */ -struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) +struct iommu_table *iommu_init_table_res(struct iommu_table *tbl, int nid, + unsigned long res_start, unsigned long res_end) { unsigned long sz; static int welcomed = 0; @@ -669,13 +701,9 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) tbl->it_map = page_address(page); memset(tbl->it_map, 0, sz); - /* -* Reserve page 0 so it will not be used for any mappings. -* This avoids buggy drivers that consider page 0 to be invalid -* to crash the machine or even lose data. -*/ - if (tbl->it_offset == 0) - set_bit(0, tbl->it_map); + tbl->it_reserved_start = res_start; + tbl->it_reserved_end = res_end; + iommu_table_reserve_pages(tbl); /* We only split the IOMMU table if we have 1GB or more of space */ if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
[PATCH kernel v2 1/2] powerpc/powernv/ioda2: Allocate TCE table levels on demand for default DMA window
We allocate only the first level of multilevel TCE tables for KVM already (alloc_userspace_copy==true), and the rest is allocated on demand. This is not enabled though for baremetal. This removes the KVM limitation (implicit, via the alloc_userspace_copy parameter) and always allocates just the first level. The on-demand allocation of missing levels is already implemented. As from now on DMA map might happen with disabled interrupts, this allocates TCEs with GFP_ATOMIC. To save time when creating a new clean table, this skips non-allocated indirect TCE entries in pnv_tce_free just like we already do in the VFIO IOMMU TCE driver. This changes the default level number from 1 to 2 to reduce the amount of memory required for the default 32bit DMA window at the boot time. The default window size is up to 2GB which requires 4MB of TCEs which is unlikely to be used entirely or at all as most devices these days are 64bit capable so by switching to 2 levels by default we save 4032KB of RAM per a device. While at this, add __GFP_NOWARN to alloc_pages_node() as the userspace can trigger this path via VFIO, see the failure and try creating a table again with different parameters which might succeed. Signed-off-by: Alexey Kardashevskiy --- Changes: v2: * added __GFP_NOWARN to alloc_pages_node --- arch/powerpc/platforms/powernv/pci.h | 2 +- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 20 +-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8e36da379252..f44987b90ac2 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -223,7 +223,7 @@ extern struct iommu_table_group *pnv_npu_compound_attach( struct pnv_ioda_pe *pe); /* pci-ioda-tce.c */ -#define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_DEFAULT_LEVELS 2 #define POWERNV_IOMMU_MAX_LEVELS 5 extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index e28f03e1eb5e..c75ec37bf0cd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -36,7 +36,8 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) struct page *tce_mem = NULL; __be64 *addr; - tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT); + tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN, + shift - PAGE_SHIFT); if (!tce_mem) { pr_err("Failed to allocate a TCE memory, level shift=%d\n", shift); @@ -161,6 +162,9 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages) if (ptce) *ptce = cpu_to_be64(0); + else + /* Skip the rest of the level */ + i |= tbl->it_level_size - 1; } } @@ -260,7 +264,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, unsigned int table_shift = max_t(unsigned int, entries_shift + 3, PAGE_SHIFT); const unsigned long tce_table_size = 1UL << table_shift; - unsigned int tmplevels = levels; if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) return -EINVAL; @@ -268,9 +271,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, if (!is_power_of_2(window_size)) return -EINVAL; - if (alloc_userspace_copy && (window_size > (1ULL << 32))) - tmplevels = 1; - /* Adjust direct table size from window_size and levels */ entries_shift = (entries_shift + levels - 1) / levels; level_shift = entries_shift + 3; @@ -281,7 +281,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - tmplevels, tce_table_size, , _allocated); + 1, tce_table_size, , _allocated); /* addr==NULL means that the first level allocation failed */ if (!addr) @@ -292,18 +292,18 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, * we did not allocate as much as we wanted, * release partially allocated table. */ - if (tmplevels == levels && offset < tce_table_size) + if (levels == 1 && offset < tce_table_size) goto free_tces_exit; /* Allocate userspace view of the TCE table */ if (alloc_userspace_copy) { offset = 0; uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - tmplevels, tce_table_size, , + 1, tce_table_size, ,
[PATCH kernel v2 0/2] powerpc/ioda2: Another attempt to allow DMA masks between 32 and 59
This is an attempt to allow DMA masks between 32..59 which are not large enough to use either a PHB3 bypass mode or a sketchy bypass. Depending on the max order, up to 40 is usually available. This is based on sha1 37624b58542f Linus Torvalds "Linux 5.1-rc7". Please comment. Thanks. Alexey Kardashevskiy (2): powerpc/powernv/ioda2: Allocate TCE table levels on demand for default DMA window powerpc/powernv/ioda2: Create bigger default window with 64k IOMMU pages arch/powerpc/include/asm/iommu.h | 8 ++- arch/powerpc/platforms/powernv/pci.h | 2 +- arch/powerpc/kernel/iommu.c | 58 +-- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 20 +++ arch/powerpc/platforms/powernv/pci-ioda.c | 40 +++-- 5 files changed, 90 insertions(+), 38 deletions(-) -- 2.17.1
[PATCH kernel] prom_init: Fetch flatten device tree from the system firmware
At the moment, on 256CPU + 256 PCI devices guest, it takes the guest about 8.5sec to fetch the entire device tree via the client interface as the DT is traversed twice - for strings blob and for struct blob. Also, "getprop" is quite slow too as SLOF stores properties in a linked list. However, since [1] SLOF builds flattened device tree (FDT) for another purpose. [2] adds a new "fdt-fetch" client interface for the OS to fetch the FDT. This tries the new method; if not supported, this falls back to the old method. There is a change in the FDT layout - the old method produced (reserved map, strings, structs), the new one receives only strings and structs from the firmware and adds the final reserved map to the end, so it is (fw reserved map, strings, structs, reserved map). This still produces the same unflattened device tree. This merges the reserved map from the firmware into the kernel's reserved map. At the moment SLOF generates an empty reserved map so this does not change the existing behaviour in regard of reservations. This supports only v17 onward as only that version provides dt_struct_size which works as "fdt-fetch" only produces v17 blobs. If "fdt-fetch" is not available, the old method of fetching the DT is used. [1] https://git.qemu.org/?p=SLOF.git;a=commitdiff;h=e6fc84652c9c00 [2] https://git.qemu.org/?p=SLOF.git;a=commit;h=ecda95906930b80 Signed-off-by: Alexey Kardashevskiy --- arch/powerpc/kernel/prom_init.c | 43 + 1 file changed, 43 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f33ff4163a51..72e7a602b68e 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2457,6 +2457,48 @@ static void __init flatten_device_tree(void) prom_panic("Can't allocate initial device-tree chunk\n"); mem_end = mem_start + room; + hdr = (void *) mem_start; + if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start, + room - sizeof(mem_reserve_map)) && + hdr->version >= 17) { + u32 size; + struct mem_map_entry *fwrmap; + + /* Fixup the boot cpuid */ + hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu); + + /* +* Store the struct and strings addresses, mostly +* for consistency, only dt_header_start actually matters later. +*/ + dt_header_start = mem_start; + dt_string_start = mem_start + be32_to_cpu(hdr->off_dt_strings); + dt_string_end = dt_string_start + + be32_to_cpu(hdr->dt_strings_size); + dt_struct_start = mem_start + be32_to_cpu(hdr->off_dt_struct); + dt_struct_end = dt_struct_start + + be32_to_cpu(hdr->dt_struct_size); + + /* +* Calculate the reserved map location (which we put +* at the blob end) and update total size. +*/ + fwrmap = (void *)(mem_start + be32_to_cpu(hdr->off_mem_rsvmap)); + hdr->off_mem_rsvmap = hdr->totalsize; + size = be32_to_cpu(hdr->totalsize); + hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map)); + + /* Merge reserved map from firmware to ours */ + for ( ; fwrmap->size; ++fwrmap) + reserve_mem(be64_to_cpu(fwrmap->base), + be64_to_cpu(fwrmap->size)); + + rsvmap = (u64 *)(mem_start + size); + + prom_debug("Fetched DTB: %d bytes to @%lx\n", size, mem_start); + goto finalize_exit; + } + /* Get root of tree */ root = call_prom("peer", 1, 1, (phandle)0); if (root == (phandle)0) @@ -2504,6 +2546,7 @@ static void __init flatten_device_tree(void) /* Version 16 is not backward compatible */ hdr->last_comp_version = cpu_to_be32(0x10); +finalize_exit: /* Copy the reserve map in */ memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map)); -- 2.17.1
Re: [PATCH 06/41] drivers: tty: serial: sb1250-duart: use dev_err() instead of printk()
On Sat, 27 Apr 2019, Enrico Weigelt, metux IT consult wrote: > diff --git a/drivers/tty/serial/sb1250-duart.c > b/drivers/tty/serial/sb1250-duart.c > index 329aced..655961c 100644 > --- a/drivers/tty/serial/sb1250-duart.c > +++ b/drivers/tty/serial/sb1250-duart.c > @@ -663,7 +663,6 @@ static void sbd_release_port(struct uart_port *uport) > > static int sbd_map_port(struct uart_port *uport) > { > - const char *err = KERN_ERR "sbd: Cannot map MMIO\n"; > struct sbd_port *sport = to_sport(uport); > struct sbd_duart *duart = sport->duart; > > @@ -671,7 +670,7 @@ static int sbd_map_port(struct uart_port *uport) > uport->membase = ioremap_nocache(uport->mapbase, >DUART_CHANREG_SPACING); > if (!uport->membase) { > - printk(err); > + dev_err(uport->dev, "Cannot map MMIO (base)\n"); > return -ENOMEM; > } > > @@ -679,7 +678,7 @@ static int sbd_map_port(struct uart_port *uport) > sport->memctrl = ioremap_nocache(duart->mapctrl, >DUART_CHANREG_SPACING); > if (!sport->memctrl) { > - printk(err); > + dev_err(uport->dev, "Cannot map MMIO (ctrl)\n"); > iounmap(uport->membase); > uport->membase = NULL; > return -ENOMEM; Hmm, what's the point to have separate messages, which consume extra memory, for a hardly if at all possible error condition? Maciej
Re: [PATCH 01/41] drivers: tty: serial: dz: use dev_err() instead of printk()
On Mon, 29 Apr 2019, Greg KH wrote: > > >> drivers/tty/serial/dz.c | 8 > > > > > > Do you have this hardware to test any of these changes with? > > > > Unfortunately not :( > > Then I can take the "basic" types of patches for the driver (like this > one), but not any others, sorry. I can verify changes to dz.c, sb1250-duart.c and zs.c with real hardware, but regrettably not right away: the hardware is in a remote location and while I have it wired for remote operation unfortunately its connectivity has been cut off by an unfriendly ISP. I'm not sure if all the changes make sense though: if there is a compiler warning or a usability issue, then a patch is surely welcome, otherwise: "If it ain't broke, don't fix it". Maciej
Re: [PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test
"Aneesh Kumar K.V" writes: > This patch fixes the below crash by making sure we touch the subpage > protection > related structures only if we know they are allocated on the platform. With > radix translation we don't allocate hash context at all and trying to access > subpage_prot_table results in > > Faulting instruction address: 0xc008bdb4 > Oops: Kernel access of bad area, sig: 11 [#1] > LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV > > NIP [c008bdb4] sys_subpage_prot+0x74/0x590 > LR [c000b688] system_call+0x5c/0x70 > Call Trace: > [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable) > [c00020002c6b7e20] [c000b688] system_call+0x5c/0x70 > Instruction dump: > fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068 > 3920 e92d0968 ebe90630 e93f03e8 6000 3860fffe e9410068 > > We also move the subpage_prot_table with mmp_sem held to avoid racec > between two parallel subpage_prot syscall. > > Reported-by: Sachin Sant > Signed-off-by: Aneesh Kumar K.V Presumably it was: 701101865f5d ("powerpc/mm: Reduce memory usage for mm_context_t for radix") That caused the breakage? cheers
Re: [PATCH v2] powerpc/32s: fix BATs setting with CONFIG_STRICT_KERNEL_RWX
Christophe Leroy writes: > Serge reported some crashes with CONFIG_STRICT_KERNEL_RWX enabled > on a book3s32 machine. > > Analysis shows two issues: > - BATs addresses and sizes are not properly aligned. > - There is a gap between the last address covered by BATs and the > first address covered by pages. > > Memory mapped with DBATs: > 0: 0xc000-0xc07f 0x Kernel RO coherent > 1: 0xc080-0xc0bf 0x0080 Kernel RO coherent > 2: 0xc0c0-0xc13f 0x00c0 Kernel RW coherent > 3: 0xc140-0xc23f 0x0140 Kernel RW coherent > 4: 0xc240-0xc43f 0x0240 Kernel RW coherent > 5: 0xc440-0xc83f 0x0440 Kernel RW coherent > 6: 0xc840-0xd03f 0x0840 Kernel RW coherent > 7: 0xd040-0xe03f 0x1040 Kernel RW coherent > > Memory mapped with pages: > 0xe100-0xefff 0x2100 240Mrw present > dirty accessed > > This patch fixes both issues. With the patch, we get the following > which is as expected: > > Memory mapped with DBATs: > 0: 0xc000-0xc07f 0x Kernel RO coherent > 1: 0xc080-0xc0bf 0x0080 Kernel RO coherent > 2: 0xc0c0-0xc0ff 0x00c0 Kernel RW coherent > 3: 0xc100-0xc1ff 0x0100 Kernel RW coherent > 4: 0xc200-0xc3ff 0x0200 Kernel RW coherent > 5: 0xc400-0xc7ff 0x0400 Kernel RW coherent > 6: 0xc800-0xcfff 0x0800 Kernel RW coherent > 7: 0xd000-0xdfff 0x1000 Kernel RW coherent > > Memory mapped with pages: > 0xe000-0xefff 0x2000 256Mrw present > dirty accessed > > Reported-by: Serge Belyshev > Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX") > Cc: sta...@vger.kernel.org I could probably still get this into v5.1 if you're confident it's a good fix. cheers
Re: [PATCH] powerpc: vdso: drop unnecessary cc-ldoption
Nick Desaulniers's on May 1, 2019 6:25 am: > On Tue, Apr 23, 2019 at 2:11 PM Nick Desaulniers > wrote: >> >> Towards the goal of removing cc-ldoption, it seems that --hash-style= >> was added to binutils 2.17.50.0.2 in 2006. The minimal required version >> of binutils for the kernel according to >> Documentation/process/changes.rst is 2.20. >> >> Link: https://gcc.gnu.org/ml/gcc/2007-01/msg01141.html >> Cc: clang-built-li...@googlegroups.com >> Suggested-by: Masahiro Yamada >> Signed-off-by: Nick Desaulniers >> --- >> arch/powerpc/kernel/vdso32/Makefile | 5 ++--- >> arch/powerpc/kernel/vdso64/Makefile | 5 ++--- >> 2 files changed, 4 insertions(+), 6 deletions(-) >> >> diff --git a/arch/powerpc/kernel/vdso32/Makefile >> b/arch/powerpc/kernel/vdso32/Makefile >> index ce199f6e4256..06f54d947057 100644 >> --- a/arch/powerpc/kernel/vdso32/Makefile >> +++ b/arch/powerpc/kernel/vdso32/Makefile >> @@ -26,9 +26,8 @@ GCOV_PROFILE := n >> KCOV_INSTRUMENT := n >> UBSAN_SANITIZE := n >> >> -ccflags-y := -shared -fno-common -fno-builtin >> -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ >> - $(call cc-ldoption, -Wl$(comma)--hash-style=both) >> +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ >> + -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both >> asflags-y := -D__VDSO32__ -s >> >> obj-y += vdso32_wrapper.o >> diff --git a/arch/powerpc/kernel/vdso64/Makefile >> b/arch/powerpc/kernel/vdso64/Makefile >> index 28e7d112aa2f..32ebb3522ea1 100644 >> --- a/arch/powerpc/kernel/vdso64/Makefile >> +++ b/arch/powerpc/kernel/vdso64/Makefile >> @@ -12,9 +12,8 @@ GCOV_PROFILE := n >> KCOV_INSTRUMENT := n >> UBSAN_SANITIZE := n >> >> -ccflags-y := -shared -fno-common -fno-builtin >> -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ >> - $(call cc-ldoption, -Wl$(comma)--hash-style=both) >> +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ >> + -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both >> asflags-y := -D__VDSO64__ -s >> >> obj-y += vdso64_wrapper.o >> -- >> 2.21.0.593.g511ec345e18-goog >> > > bumping for review This looks like a good cleanup. Reviewed-by: Nicholas Piggin
Re: [PATCH 41/41] drivers: tty: serial: lpc32xx_hs: fill mapsize and use it
Hi Enrico, On 04/27/2019 03:52 PM, Enrico Weigelt, metux IT consult wrote: > Fill the struct uart_port->mapsize field and use it, insteaf of typo, s/insteaf/instead/ > hardcoded values in many places. This makes the code layout a bit > more consistent and easily allows using generic helpers for the > io memory handling. > > Candidates for such helpers could be eg. the request+ioremap and > iounmap+release combinations. > > Signed-off-by: Enrico Weigelt Acked-by: Vladimir Zapolskiy -- Best wishes, Vladimir
Re: [PATCH] powerpc: Fix kobject memleak
On 04/29/2019 06:09 PM, Tobin C. Harding wrote: > Currently error return from kobject_init_and_add() is not followed by a > call to kobject_put(). This means there is a memory leak. > > Add call to kobject_put() in error path of kobject_init_and_add(). > > Signed-off-by: Tobin C. Harding > --- Reviewed-by: Tyrel Datwyler
Re: [PATCH] powerpc: vdso: drop unnecessary cc-ldoption
On Tue, Apr 23, 2019 at 2:11 PM Nick Desaulniers wrote: > > Towards the goal of removing cc-ldoption, it seems that --hash-style= > was added to binutils 2.17.50.0.2 in 2006. The minimal required version > of binutils for the kernel according to > Documentation/process/changes.rst is 2.20. > > Link: https://gcc.gnu.org/ml/gcc/2007-01/msg01141.html > Cc: clang-built-li...@googlegroups.com > Suggested-by: Masahiro Yamada > Signed-off-by: Nick Desaulniers > --- > arch/powerpc/kernel/vdso32/Makefile | 5 ++--- > arch/powerpc/kernel/vdso64/Makefile | 5 ++--- > 2 files changed, 4 insertions(+), 6 deletions(-) > > diff --git a/arch/powerpc/kernel/vdso32/Makefile > b/arch/powerpc/kernel/vdso32/Makefile > index ce199f6e4256..06f54d947057 100644 > --- a/arch/powerpc/kernel/vdso32/Makefile > +++ b/arch/powerpc/kernel/vdso32/Makefile > @@ -26,9 +26,8 @@ GCOV_PROFILE := n > KCOV_INSTRUMENT := n > UBSAN_SANITIZE := n > > -ccflags-y := -shared -fno-common -fno-builtin > -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ > - $(call cc-ldoption, -Wl$(comma)--hash-style=both) > +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ > + -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both > asflags-y := -D__VDSO32__ -s > > obj-y += vdso32_wrapper.o > diff --git a/arch/powerpc/kernel/vdso64/Makefile > b/arch/powerpc/kernel/vdso64/Makefile > index 28e7d112aa2f..32ebb3522ea1 100644 > --- a/arch/powerpc/kernel/vdso64/Makefile > +++ b/arch/powerpc/kernel/vdso64/Makefile > @@ -12,9 +12,8 @@ GCOV_PROFILE := n > KCOV_INSTRUMENT := n > UBSAN_SANITIZE := n > > -ccflags-y := -shared -fno-common -fno-builtin > -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ > - $(call cc-ldoption, -Wl$(comma)--hash-style=both) > +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ > + -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both > asflags-y := -D__VDSO64__ -s > > obj-y += vdso64_wrapper.o > -- > 2.21.0.593.g511ec345e18-goog > bumping for review -- Thanks, ~Nick Desaulniers
Re: [PATCH v4] powerpc/pseries: Remove limit in wait for dying CPU
Hello Nathan, Thanks for reviewing the patch! Nathan Lynch writes: > Thiago Jung Bauermann writes: >> This can be a problem because if the busy loop finishes too early, then the >> kernel may offline another CPU before the previous one finished dying, >> which would lead to two concurrent calls to rtas-stop-self, which is >> prohibited by the PAPR. >> >> Since the hotplug machinery already assumes that cpu_die() is going to >> work, we can simply loop until the CPU stops. >> >> Also change the loop to wait 100 µs between each call to >> smp_query_cpu_stopped() to avoid querying RTAS too often. > > [...] > >> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c >> b/arch/powerpc/platforms/pseries/hotplug-cpu.c >> index 97feb6e79f1a..d75cee60644c 100644 >> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c >> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c >> @@ -214,13 +214,17 @@ static void pseries_cpu_die(unsigned int cpu) >> msleep(1); >> } >> } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { >> - >> -for (tries = 0; tries < 25; tries++) { >> +/* >> + * rtas_stop_self() panics if the CPU fails to stop and our >> + * callers already assume that we are going to succeed, so we >> + * can just loop until the CPU stops. >> + */ >> +while (true) { >> cpu_status = smp_query_cpu_stopped(pcpu); >> if (cpu_status == QCSS_STOPPED || >> cpu_status == QCSS_HARDWARE_ERROR) >> break; >> -cpu_relax(); >> +udelay(100); >> } >> } > > I agree with looping indefinitely but doesn't it need a cond_resched() > or similar check? If there's no kernel or hypervisor bug, it shouldn't take more than a few tens of ms for this loop to complete (Gautham measured a maximum of 10 ms on a POWER9 with an earlier version of this patch). In case of bugs related to CPU hotplug (either in the kernel or the hypervisor), I was hoping that the resulting lockup warnings would be a good indicator that something is wrong. :-) Though perhaps adding a cond_resched() every 10 ms or so, with a WARN_ON() if it loops for more than 50 ms would be better. I'll send an alternative patch. -- Thiago Jung Bauermann IBM Linux Technology Center
[PATCH v2] powerpc: remove the __kernel_io_end export
This export was added in this merge window, but without any actual user, or justification for a modular user. Fixes: a35a3c6f6065 ("powerpc/mm/hash64: Add a variable to track the end of IO mapping") Signed-off-by: Christoph Hellwig --- Chanes since v1: - actually compiles now.. arch/powerpc/mm/pgtable_64.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 72f58c076e26..dd610dab98e0 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -97,7 +97,6 @@ EXPORT_SYMBOL(__vmalloc_end); unsigned long __kernel_io_start; EXPORT_SYMBOL(__kernel_io_start); unsigned long __kernel_io_end; -EXPORT_SYMBOL(__kernel_io_end); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); unsigned long __pte_frag_nr; -- 2.20.1
Re: [PATCH 5/5] soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit : The comment "No QE ever has fewer than 28 SNUMs" is false; e.g. the MPC8309 has 14. The code path returning -EINVAL is also a recipe for instant disaster, since the caller (qe_snums_init) uncritically assigns the return value to the unsigned qe_num_of_snum, and would thus proceed to attempt to copy 4GB from snum_init_46[] to the snum[] array. So fold the handling of the legacy fsl,qe-num-snums into qe_snums_init, and make sure we do not end up using the snum_init_46 array in cases other than the two where we know it makes sense. Signed-off-by: Rasmus Villemoes --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- drivers/soc/fsl/qe/qe.c | 54 +++ include/soc/fsl/qe/qe.h | 2 +- 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index eb3e65e8868f..5748eb8464d0 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3837,7 +3837,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) } if (max_speed == SPEED_1000) { - unsigned int snums = qe_get_num_of_snums(); + unsigned int snums = qe_num_of_snum; /* configure muram FIFOs for gigabit operation */ ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT; diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index af3c2b2b268f..8c3b3c62d81b 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -52,7 +52,8 @@ EXPORT_SYMBOL(qe_immr); static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM); -static unsigned int qe_num_of_snum; +unsigned int qe_num_of_snum; +EXPORT_SYMBOL(qe_num_of_snum); By exporting the object you allow other drivers to modify it. Is that really what we want ? Why not keep qe_get_num_of_snums() as a helper that simply returns qe_num_of_snum ? static phys_addr_t qebase = -1; @@ -308,26 +309,34 @@ static void qe_snums_init(void) int i; bitmap_zero(snum_state, QE_NUM_OF_SNUM); + qe_num_of_snum = 28; /* The default number of snum for threads is 28 */ qe = qe_get_device_node(); if (qe) { i = of_property_read_variable_u8_array(qe, "fsl,qe-snums", snums, 1, QE_NUM_OF_SNUM); - of_node_put(qe); if (i > 0) { + of_node_put(qe); qe_num_of_snum = i; return; } + /* +* Fall back to legacy binding of using the value of +* fsl,qe-num-snums to choose one of the static arrays +* above. +*/ + of_property_read_u32(qe, "fsl,qe-num-snums", _num_of_snum); + of_node_put(qe); } - qe_num_of_snum = qe_get_num_of_snums(); - if (qe_num_of_snum == 76) snum_init = snum_init_76; - else + else if (qe_num_of_snum == 28 || qe_num_of_snum == 46) snum_init = snum_init_46; - - for (i = 0; i < qe_num_of_snum; i++) - snums[i] = snum_init[i]; + else { + pr_err("QE: unsupported value of fsl,qe-num-snums: %u\n", qe_num_of_snum); + return; + } The first leg of the if/else must have {} too when the second leg has them. + memcpy(snums, snum_init, qe_num_of_snum); } int qe_get_snum(void) @@ -645,35 +654,6 @@ unsigned int qe_get_num_of_risc(void) } EXPORT_SYMBOL(qe_get_num_of_risc); -unsigned int qe_get_num_of_snums(void) I think this function should remain and just return num_of_snums, see my other comment above. Christophe -{ - struct device_node *qe; - int size; - unsigned int num_of_snums; - const u32 *prop; - - num_of_snums = 28; /* The default number of snum for threads is 28 */ - qe = qe_get_device_node(); - if (!qe) - return num_of_snums; - - prop = of_get_property(qe, "fsl,qe-num-snums", ); - if (prop && size == sizeof(*prop)) { - num_of_snums = *prop; - if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) { - /* No QE ever has fewer than 28 SNUMs */ - pr_err("QE: number of snum is invalid\n"); - of_node_put(qe); - return -EINVAL; - } - } - - of_node_put(qe); - - return num_of_snums; -} -EXPORT_SYMBOL(qe_get_num_of_snums); - static int __init qe_init(void) { struct device_node *np; diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index b3d1aff5e8ad..af5739850bf4 100644 --- a/include/soc/fsl/qe/qe.h +++
Re: [PATCH 4/5] soc/fsl/qe: qe.c: support fsl,qe-snums property
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit : The current code assumes that the set of snum _values_ to populate the snums[] array with is a function of the _number_ of snums alone. However, reading table 4-30, and its footnotes, of the QUICC Engine Block Reference Manual shows that that is a bit too naive. As an alternative, this introduces a new binding fsl,qe-snums, which automatically encodes both the number of snums and the actual values to use. Conveniently, of_property_read_variable_u8_array does exactly what we need. For example, for the MPC8309, one would specify the property as fsl,qe-snums = /bits/ 8 < 0x88 0x89 0x98 0x99 0xa8 0xa9 0xb8 0xb9 0xc8 0xc9 0xd8 0xd9 0xe8 0xe9>; Signed-off-by: Rasmus Villemoes --- .../devicetree/bindings/soc/fsl/cpm_qe/qe.txt | 8 +++- drivers/soc/fsl/qe/qe.c| 14 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt index d7afaff5faff..05f5f485562a 100644 --- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt @@ -18,7 +18,8 @@ Required properties: - reg : offset and length of the device registers. - bus-frequency : the clock frequency for QUICC Engine. - fsl,qe-num-riscs: define how many RISC engines the QE has. -- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the +- fsl,qe-snums: This property has to be specified as '/bits/ 8' value, + defining the array of serial number (SNUM) values for the virtual threads. Optional properties: @@ -34,6 +35,11 @@ Recommended properties - brg-frequency : the internal clock source frequency for baud-rate generators in Hz. +Deprecated properties +- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use + for the threads. Use fsl,qe-snums instead to not only specify the + number of snums, but also their values. + Example: qe@e010 { #address-cells = <1>; diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index aff9d1373529..af3c2b2b268f 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -283,7 +283,6 @@ EXPORT_SYMBOL(qe_clock_source); */ static void qe_snums_init(void) { - int i; Why do you move this one ? static const u8 snum_init_76[] = { 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89, @@ -304,9 +303,22 @@ static void qe_snums_init(void) 0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59, 0x68, 0x69, 0x78, 0x79, 0x80, 0x81, }; + struct device_node *qe; const u8 *snum_init; + int i; bitmap_zero(snum_state, QE_NUM_OF_SNUM); + qe = qe_get_device_node(); + if (qe) { + i = of_property_read_variable_u8_array(qe, "fsl,qe-snums", + snums, 1, QE_NUM_OF_SNUM); + of_node_put(qe); + if (i > 0) { + qe_num_of_snum = i; + return; In that case you skip the rest of the init ? Can you explain ? Christophe + } + } + qe_num_of_snum = qe_get_num_of_snums(); if (qe_num_of_snum == 76)
Re: [PATCH 3/5] soc/fsl/qe: qe.c: introduce qe_get_device_node helper
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit : The 'try of_find_compatible_node(NULL, NULL, "fsl,qe"), fall back to of_find_node_by_type(NULL, "qe")' pattern is repeated five times. Factor it into a common helper. Signed-off-by: Rasmus Villemoes Reviewed-by: Christophe Leroy --- drivers/soc/fsl/qe/qe.c | 71 + 1 file changed, 29 insertions(+), 42 deletions(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index d0393f83145c..aff9d1373529 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -56,6 +56,20 @@ static unsigned int qe_num_of_snum; static phys_addr_t qebase = -1; +static struct device_node *qe_get_device_node(void) +{ + struct device_node *qe; + + /* +* Newer device trees have an "fsl,qe" compatible property for the QE +* node, but we still need to support older device trees. +*/ + qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); + if (qe) + return qe; + return of_find_node_by_type(NULL, "qe"); +} + static phys_addr_t get_qe_base(void) { struct device_node *qe; @@ -65,12 +79,9 @@ static phys_addr_t get_qe_base(void) if (qebase != -1) return qebase; - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return qebase; - } + qe = qe_get_device_node(); + if (!qe) + return qebase; ret = of_address_to_resource(qe, 0, ); if (!ret) @@ -164,12 +175,9 @@ unsigned int qe_get_brg_clk(void) if (brg_clk) return brg_clk; - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return brg_clk; - } + qe = qe_get_device_node(); + if (!qe) + return brg_clk; prop = of_get_property(qe, "brg-frequency", ); if (prop && size == sizeof(*prop)) @@ -563,16 +571,9 @@ struct qe_firmware_info *qe_get_firmware_info(void) initialized = 1; - /* -* Newer device trees have an "fsl,qe" compatible property for the QE -* node, but we still need to support older device trees. - */ - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return NULL; - } + qe = qe_get_device_node(); + if (!qe) + return NULL; /* Find the 'firmware' child node */ fw = of_get_child_by_name(qe, "firmware"); @@ -618,16 +619,9 @@ unsigned int qe_get_num_of_risc(void) unsigned int num_of_risc = 0; const u32 *prop; - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - /* Older devices trees did not have an "fsl,qe" -* compatible property, so we need to look for -* the QE node by name. -*/ - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return num_of_risc; - } + qe = qe_get_device_node(); + if (!qe) + return num_of_risc; prop = of_get_property(qe, "fsl,qe-num-riscs", ); if (prop && size == sizeof(*prop)) @@ -647,16 +641,9 @@ unsigned int qe_get_num_of_snums(void) const u32 *prop; num_of_snums = 28; /* The default number of snum for threads is 28 */ - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - /* Older devices trees did not have an "fsl,qe" -* compatible property, so we need to look for -* the QE node by name. -*/ - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return num_of_snums; - } + qe = qe_get_device_node(); + if (!qe) + return num_of_snums; prop = of_get_property(qe, "fsl,qe-num-snums", ); if (prop && size == sizeof(*prop)) {
Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit : The current array of struct qe_snum use 256*4 bytes for just keeping track of the free/used state of each index, and the struct layout means there's another 768 bytes of padding. If we just unzip that structure, the array of snum values just use 256 bytes, while the free/inuse state can be tracked in a 32 byte bitmap. So this reduces the .data footprint by 1760 bytes. It also serves as preparation for introducing another DT binding for specifying the snum values. Signed-off-by: Rasmus Villemoes --- drivers/soc/fsl/qe/qe.c | 37 - 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index 855373deb746..d0393f83145c 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -14,6 +14,7 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ +#include #include #include #include @@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock); DEFINE_SPINLOCK(cmxgcr_lock); EXPORT_SYMBOL(cmxgcr_lock); -/* QE snum state */ -enum qe_snum_state { - QE_SNUM_STATE_USED, - QE_SNUM_STATE_FREE -}; - -/* QE snum */ -struct qe_snum { - u8 num; - enum qe_snum_state state; -}; - /* We allocate this here because it is used almost exclusively for * the communication processor devices. */ struct qe_immap __iomem *qe_immr; EXPORT_SYMBOL(qe_immr); -static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ +static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM); static unsigned int qe_num_of_snum; static phys_addr_t qebase = -1; @@ -308,6 +298,7 @@ static void qe_snums_init(void) }; const u8 *snum_init; + bitmap_zero(snum_state, QE_NUM_OF_SNUM); Doesn't make much importance, but wouldn't it be more logical to add this line where the setting of .state = QE_SNUM_STATE_FREE was done previously, ie around the for() loop below ? qe_num_of_snum = qe_get_num_of_snums(); if (qe_num_of_snum == 76) @@ -315,10 +306,8 @@ static void qe_snums_init(void) else snum_init = snum_init_46; - for (i = 0; i < qe_num_of_snum; i++) { - snums[i].num = snum_init[i]; - snums[i].state = QE_SNUM_STATE_FREE; - } + for (i = 0; i < qe_num_of_snum; i++) + snums[i] = snum_init[i]; Could use memcpy() instead ? } int qe_get_snum(void) @@ -328,12 +317,10 @@ int qe_get_snum(void) int i; spin_lock_irqsave(_lock, flags); - for (i = 0; i < qe_num_of_snum; i++) { - if (snums[i].state == QE_SNUM_STATE_FREE) { - snums[i].state = QE_SNUM_STATE_USED; - snum = snums[i].num; - break; - } + i = find_first_zero_bit(snum_state, qe_num_of_snum); + if (i < qe_num_of_snum) { + set_bit(i, snum_state); + snum = snums[i]; } spin_unlock_irqrestore(_lock, flags); @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum) int i; for (i = 0; i < qe_num_of_snum; i++) { - if (snums[i].num == snum) { - snums[i].state = QE_SNUM_STATE_FREE; + if (snums[i] == snum) { + clear_bit(i, snum_state); break; } } Can we replace this loop by memchr() ? Christophe
Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook
On 30/04/2019 17:46, Andy Lutomirski wrote: > On Mon, Mar 18, 2019 at 3:49 AM Sudeep Holla wrote: >> >> Now that we have a new hook ptrace_syscall_enter that can be called from >> syscall entry code and it handles PTRACE_SYSEMU in generic code, we >> can do some cleanup using the same in syscall_trace_enter. >> >> Further the extra logic to find single stepping PTRACE_SYSEMU_SINGLESTEP >> in syscall_slow_exit_work seems unnecessary. Let's remove the same. >> > > Unless the patch set contains a selftest that exercises all the > interesting cases here, NAK. To be clear, there needs to be a test > that passes on an unmodified kernel and still passes on a patched > kernel. And that test case needs to *fail* if, for example, you force > "emulated" to either true or false rather than reading out the actual > value. > Tested using tools/testing/selftests/x86/ptrace_syscall.c Also v3 doesn't change any logic or additional call to new function as in v2. It's just simple cleanup as suggested by Oleg. -- Regards, Sudeep
Re: [PATCH 1/5] soc/fsl/qe: qe.c: drop useless static qualifier
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit : The local variable snum_init has no reason to have static storage duration. Signed-off-by: Rasmus Villemoes Reviewed-by: Christophe Leroy --- drivers/soc/fsl/qe/qe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index 612d9c551be5..855373deb746 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -306,7 +306,7 @@ static void qe_snums_init(void) 0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59, 0x68, 0x69, 0x78, 0x79, 0x80, 0x81, }; - static const u8 *snum_init; + const u8 *snum_init; qe_num_of_snum = qe_get_num_of_snums();
Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook
On Mon, Mar 18, 2019 at 3:49 AM Sudeep Holla wrote: > > Now that we have a new hook ptrace_syscall_enter that can be called from > syscall entry code and it handles PTRACE_SYSEMU in generic code, we > can do some cleanup using the same in syscall_trace_enter. > > Further the extra logic to find single stepping PTRACE_SYSEMU_SINGLESTEP > in syscall_slow_exit_work seems unnecessary. Let's remove the same. > Unless the patch set contains a selftest that exercises all the interesting cases here, NAK. To be clear, there needs to be a test that passes on an unmodified kernel and still passes on a patched kernel. And that test case needs to *fail* if, for example, you force "emulated" to either true or false rather than reading out the actual value. --Andy
Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook
On Mon, Mar 18, 2019 at 04:33:22PM +0100, Oleg Nesterov wrote: > On 03/18, Sudeep Holla wrote: > > > > --- a/arch/x86/entry/common.c > > +++ b/arch/x86/entry/common.c > > @@ -70,22 +70,16 @@ static long syscall_trace_enter(struct pt_regs *regs) > > > > struct thread_info *ti = current_thread_info(); > > unsigned long ret = 0; > > - bool emulated = false; > > u32 work; > > > > if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) > > BUG_ON(regs != task_pt_regs(current)); > > > > - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; > > - > > - if (unlikely(work & _TIF_SYSCALL_EMU)) > > - emulated = true; > > - > > - if ((emulated || (work & _TIF_SYSCALL_TRACE)) && > > - tracehook_report_syscall_entry(regs)) > > + if (unlikely(ptrace_syscall_enter(regs))) > > return -1L; > > > > - if (emulated) > > + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; > > + if ((work & _TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) > > return -1L; > [...] > > And it seems that _TIF_WORK_SYSCALL_ENTRY needs some cleanups too... We don't > need > "& _TIF_WORK_SYSCALL_ENTRY" in syscall_trace_enter, and > _TIF_WORK_SYSCALL_ENTRY > should not include _TIF_NOHZ? > I was about to post the updated version and checked this to make sure I have covered everything or not. I had missed the above comment. All architectures have _TIF_NOHZ in their mask that they check to do work. And from x86, I read "...syscall_trace_enter(). Also includes TIF_NOHZ for enter_from_user_mode()" So I don't understand why _TIF_NOHZ needs to be dropped. Also if we need to drop, we can address that separately examining all archs. I will post the cleanup as you suggested for now. -- Regards, Sudeep
[PATCH 5/5] soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init
The comment "No QE ever has fewer than 28 SNUMs" is false; e.g. the MPC8309 has 14. The code path returning -EINVAL is also a recipe for instant disaster, since the caller (qe_snums_init) uncritically assigns the return value to the unsigned qe_num_of_snum, and would thus proceed to attempt to copy 4GB from snum_init_46[] to the snum[] array. So fold the handling of the legacy fsl,qe-num-snums into qe_snums_init, and make sure we do not end up using the snum_init_46 array in cases other than the two where we know it makes sense. Signed-off-by: Rasmus Villemoes --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- drivers/soc/fsl/qe/qe.c | 54 +++ include/soc/fsl/qe/qe.h | 2 +- 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index eb3e65e8868f..5748eb8464d0 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3837,7 +3837,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) } if (max_speed == SPEED_1000) { - unsigned int snums = qe_get_num_of_snums(); + unsigned int snums = qe_num_of_snum; /* configure muram FIFOs for gigabit operation */ ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT; diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index af3c2b2b268f..8c3b3c62d81b 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -52,7 +52,8 @@ EXPORT_SYMBOL(qe_immr); static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM); -static unsigned int qe_num_of_snum; +unsigned int qe_num_of_snum; +EXPORT_SYMBOL(qe_num_of_snum); static phys_addr_t qebase = -1; @@ -308,26 +309,34 @@ static void qe_snums_init(void) int i; bitmap_zero(snum_state, QE_NUM_OF_SNUM); + qe_num_of_snum = 28; /* The default number of snum for threads is 28 */ qe = qe_get_device_node(); if (qe) { i = of_property_read_variable_u8_array(qe, "fsl,qe-snums", snums, 1, QE_NUM_OF_SNUM); - of_node_put(qe); if (i > 0) { + of_node_put(qe); qe_num_of_snum = i; return; } + /* +* Fall back to legacy binding of using the value of +* fsl,qe-num-snums to choose one of the static arrays +* above. +*/ + of_property_read_u32(qe, "fsl,qe-num-snums", _num_of_snum); + of_node_put(qe); } - qe_num_of_snum = qe_get_num_of_snums(); - if (qe_num_of_snum == 76) snum_init = snum_init_76; - else + else if (qe_num_of_snum == 28 || qe_num_of_snum == 46) snum_init = snum_init_46; - - for (i = 0; i < qe_num_of_snum; i++) - snums[i] = snum_init[i]; + else { + pr_err("QE: unsupported value of fsl,qe-num-snums: %u\n", qe_num_of_snum); + return; + } + memcpy(snums, snum_init, qe_num_of_snum); } int qe_get_snum(void) @@ -645,35 +654,6 @@ unsigned int qe_get_num_of_risc(void) } EXPORT_SYMBOL(qe_get_num_of_risc); -unsigned int qe_get_num_of_snums(void) -{ - struct device_node *qe; - int size; - unsigned int num_of_snums; - const u32 *prop; - - num_of_snums = 28; /* The default number of snum for threads is 28 */ - qe = qe_get_device_node(); - if (!qe) - return num_of_snums; - - prop = of_get_property(qe, "fsl,qe-num-snums", ); - if (prop && size == sizeof(*prop)) { - num_of_snums = *prop; - if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) { - /* No QE ever has fewer than 28 SNUMs */ - pr_err("QE: number of snum is invalid\n"); - of_node_put(qe); - return -EINVAL; - } - } - - of_node_put(qe); - - return num_of_snums; -} -EXPORT_SYMBOL(qe_get_num_of_snums); - static int __init qe_init(void) { struct device_node *np; diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index b3d1aff5e8ad..af5739850bf4 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -212,7 +212,7 @@ int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier); int qe_get_snum(void); void qe_put_snum(u8 snum); unsigned int qe_get_num_of_risc(void); -unsigned int qe_get_num_of_snums(void); +extern unsigned int qe_num_of_snum; static inline int qe_alive_during_sleep(void) { -- 2.20.1
[PATCH 4/5] soc/fsl/qe: qe.c: support fsl,qe-snums property
The current code assumes that the set of snum _values_ to populate the snums[] array with is a function of the _number_ of snums alone. However, reading table 4-30, and its footnotes, of the QUICC Engine Block Reference Manual shows that that is a bit too naive. As an alternative, this introduces a new binding fsl,qe-snums, which automatically encodes both the number of snums and the actual values to use. Conveniently, of_property_read_variable_u8_array does exactly what we need. For example, for the MPC8309, one would specify the property as fsl,qe-snums = /bits/ 8 < 0x88 0x89 0x98 0x99 0xa8 0xa9 0xb8 0xb9 0xc8 0xc9 0xd8 0xd9 0xe8 0xe9>; Signed-off-by: Rasmus Villemoes --- .../devicetree/bindings/soc/fsl/cpm_qe/qe.txt | 8 +++- drivers/soc/fsl/qe/qe.c| 14 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt index d7afaff5faff..05f5f485562a 100644 --- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt @@ -18,7 +18,8 @@ Required properties: - reg : offset and length of the device registers. - bus-frequency : the clock frequency for QUICC Engine. - fsl,qe-num-riscs: define how many RISC engines the QE has. -- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the +- fsl,qe-snums: This property has to be specified as '/bits/ 8' value, + defining the array of serial number (SNUM) values for the virtual threads. Optional properties: @@ -34,6 +35,11 @@ Recommended properties - brg-frequency : the internal clock source frequency for baud-rate generators in Hz. +Deprecated properties +- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use + for the threads. Use fsl,qe-snums instead to not only specify the + number of snums, but also their values. + Example: qe@e010 { #address-cells = <1>; diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index aff9d1373529..af3c2b2b268f 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -283,7 +283,6 @@ EXPORT_SYMBOL(qe_clock_source); */ static void qe_snums_init(void) { - int i; static const u8 snum_init_76[] = { 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89, @@ -304,9 +303,22 @@ static void qe_snums_init(void) 0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59, 0x68, 0x69, 0x78, 0x79, 0x80, 0x81, }; + struct device_node *qe; const u8 *snum_init; + int i; bitmap_zero(snum_state, QE_NUM_OF_SNUM); + qe = qe_get_device_node(); + if (qe) { + i = of_property_read_variable_u8_array(qe, "fsl,qe-snums", + snums, 1, QE_NUM_OF_SNUM); + of_node_put(qe); + if (i > 0) { + qe_num_of_snum = i; + return; + } + } + qe_num_of_snum = qe_get_num_of_snums(); if (qe_num_of_snum == 76) -- 2.20.1
[PATCH 1/5] soc/fsl/qe: qe.c: drop useless static qualifier
The local variable snum_init has no reason to have static storage duration. Signed-off-by: Rasmus Villemoes --- drivers/soc/fsl/qe/qe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index 612d9c551be5..855373deb746 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -306,7 +306,7 @@ static void qe_snums_init(void) 0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59, 0x68, 0x69, 0x78, 0x79, 0x80, 0x81, }; - static const u8 *snum_init; + const u8 *snum_init; qe_num_of_snum = qe_get_num_of_snums(); -- 2.20.1
[PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
The current array of struct qe_snum use 256*4 bytes for just keeping track of the free/used state of each index, and the struct layout means there's another 768 bytes of padding. If we just unzip that structure, the array of snum values just use 256 bytes, while the free/inuse state can be tracked in a 32 byte bitmap. So this reduces the .data footprint by 1760 bytes. It also serves as preparation for introducing another DT binding for specifying the snum values. Signed-off-by: Rasmus Villemoes --- drivers/soc/fsl/qe/qe.c | 37 - 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index 855373deb746..d0393f83145c 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -14,6 +14,7 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ +#include #include #include #include @@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock); DEFINE_SPINLOCK(cmxgcr_lock); EXPORT_SYMBOL(cmxgcr_lock); -/* QE snum state */ -enum qe_snum_state { - QE_SNUM_STATE_USED, - QE_SNUM_STATE_FREE -}; - -/* QE snum */ -struct qe_snum { - u8 num; - enum qe_snum_state state; -}; - /* We allocate this here because it is used almost exclusively for * the communication processor devices. */ struct qe_immap __iomem *qe_immr; EXPORT_SYMBOL(qe_immr); -static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ +static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM); static unsigned int qe_num_of_snum; static phys_addr_t qebase = -1; @@ -308,6 +298,7 @@ static void qe_snums_init(void) }; const u8 *snum_init; + bitmap_zero(snum_state, QE_NUM_OF_SNUM); qe_num_of_snum = qe_get_num_of_snums(); if (qe_num_of_snum == 76) @@ -315,10 +306,8 @@ static void qe_snums_init(void) else snum_init = snum_init_46; - for (i = 0; i < qe_num_of_snum; i++) { - snums[i].num = snum_init[i]; - snums[i].state = QE_SNUM_STATE_FREE; - } + for (i = 0; i < qe_num_of_snum; i++) + snums[i] = snum_init[i]; } int qe_get_snum(void) @@ -328,12 +317,10 @@ int qe_get_snum(void) int i; spin_lock_irqsave(_lock, flags); - for (i = 0; i < qe_num_of_snum; i++) { - if (snums[i].state == QE_SNUM_STATE_FREE) { - snums[i].state = QE_SNUM_STATE_USED; - snum = snums[i].num; - break; - } + i = find_first_zero_bit(snum_state, qe_num_of_snum); + if (i < qe_num_of_snum) { + set_bit(i, snum_state); + snum = snums[i]; } spin_unlock_irqrestore(_lock, flags); @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum) int i; for (i = 0; i < qe_num_of_snum; i++) { - if (snums[i].num == snum) { - snums[i].state = QE_SNUM_STATE_FREE; + if (snums[i] == snum) { + clear_bit(i, snum_state); break; } } -- 2.20.1
[PATCH 3/5] soc/fsl/qe: qe.c: introduce qe_get_device_node helper
The 'try of_find_compatible_node(NULL, NULL, "fsl,qe"), fall back to of_find_node_by_type(NULL, "qe")' pattern is repeated five times. Factor it into a common helper. Signed-off-by: Rasmus Villemoes --- drivers/soc/fsl/qe/qe.c | 71 + 1 file changed, 29 insertions(+), 42 deletions(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index d0393f83145c..aff9d1373529 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -56,6 +56,20 @@ static unsigned int qe_num_of_snum; static phys_addr_t qebase = -1; +static struct device_node *qe_get_device_node(void) +{ + struct device_node *qe; + + /* +* Newer device trees have an "fsl,qe" compatible property for the QE +* node, but we still need to support older device trees. +*/ + qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); + if (qe) + return qe; + return of_find_node_by_type(NULL, "qe"); +} + static phys_addr_t get_qe_base(void) { struct device_node *qe; @@ -65,12 +79,9 @@ static phys_addr_t get_qe_base(void) if (qebase != -1) return qebase; - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return qebase; - } + qe = qe_get_device_node(); + if (!qe) + return qebase; ret = of_address_to_resource(qe, 0, ); if (!ret) @@ -164,12 +175,9 @@ unsigned int qe_get_brg_clk(void) if (brg_clk) return brg_clk; - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return brg_clk; - } + qe = qe_get_device_node(); + if (!qe) + return brg_clk; prop = of_get_property(qe, "brg-frequency", ); if (prop && size == sizeof(*prop)) @@ -563,16 +571,9 @@ struct qe_firmware_info *qe_get_firmware_info(void) initialized = 1; - /* -* Newer device trees have an "fsl,qe" compatible property for the QE -* node, but we still need to support older device trees. - */ - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return NULL; - } + qe = qe_get_device_node(); + if (!qe) + return NULL; /* Find the 'firmware' child node */ fw = of_get_child_by_name(qe, "firmware"); @@ -618,16 +619,9 @@ unsigned int qe_get_num_of_risc(void) unsigned int num_of_risc = 0; const u32 *prop; - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - /* Older devices trees did not have an "fsl,qe" -* compatible property, so we need to look for -* the QE node by name. -*/ - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return num_of_risc; - } + qe = qe_get_device_node(); + if (!qe) + return num_of_risc; prop = of_get_property(qe, "fsl,qe-num-riscs", ); if (prop && size == sizeof(*prop)) @@ -647,16 +641,9 @@ unsigned int qe_get_num_of_snums(void) const u32 *prop; num_of_snums = 28; /* The default number of snum for threads is 28 */ - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - /* Older devices trees did not have an "fsl,qe" -* compatible property, so we need to look for -* the QE node by name. -*/ - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return num_of_snums; - } + qe = qe_get_device_node(); + if (!qe) + return num_of_snums; prop = of_get_property(qe, "fsl,qe-num-snums", ); if (prop && size == sizeof(*prop)) { -- 2.20.1
[PATCH RESEND 0/5] soc/fsl/qe: cleanups and new DT binding
This small series consists of some small cleanups and simplifications of the QUICC engine driver, and introduces a new DT binding that makes it much easier to support other variants of the QUICC engine IP block that appears in the wild: There's no reason to expect in general that the number of valid SNUMs uniquely determines the set of such, so it's better to simply let the device tree specify the values (and, implicitly via the array length, also the count). I sent these two months ago, but mostly as POC inside another thread. Resending as proper patch series. Rasmus Villemoes (5): soc/fsl/qe: qe.c: drop useless static qualifier soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K soc/fsl/qe: qe.c: introduce qe_get_device_node helper soc/fsl/qe: qe.c: support fsl,qe-snums property soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init .../devicetree/bindings/soc/fsl/cpm_qe/qe.txt | 8 +- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- drivers/soc/fsl/qe/qe.c | 162 +++--- include/soc/fsl/qe/qe.h | 2 +- 4 files changed, 73 insertions(+), 101 deletions(-) -- 2.20.1
Re: [PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test
> On 30-Apr-2019, at 1:29 PM, Aneesh Kumar K.V > wrote: > > This patch fixes the below crash by making sure we touch the subpage > protection > related structures only if we know they are allocated on the platform. With > radix translation we don't allocate hash context at all and trying to access > subpage_prot_table results in > > Faulting instruction address: 0xc008bdb4 > Oops: Kernel access of bad area, sig: 11 [#1] > LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV > > NIP [c008bdb4] sys_subpage_prot+0x74/0x590 > LR [c000b688] system_call+0x5c/0x70 > Call Trace: > [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable) > [c00020002c6b7e20] [c000b688] system_call+0x5c/0x70 > Instruction dump: > fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068 > 3920 e92d0968 ebe90630 e93f03e8 6000 3860fffe e9410068 > > We also move the subpage_prot_table with mmp_sem held to avoid racec > between two parallel subpage_prot syscall. > > Reported-by: Sachin Sant > Signed-off-by: Aneesh Kumar K.V > — Thanks for the patch. Fixes the kernel crash. Tested-by: Sachin Sant mailto:sach...@linux.vnet.ibm.com>> Thanks -Sachin
Re: [PATCH v4] powerpc/pseries: Remove limit in wait for dying CPU
Thiago Jung Bauermann writes: > This can be a problem because if the busy loop finishes too early, then the > kernel may offline another CPU before the previous one finished dying, > which would lead to two concurrent calls to rtas-stop-self, which is > prohibited by the PAPR. > > Since the hotplug machinery already assumes that cpu_die() is going to > work, we can simply loop until the CPU stops. > > Also change the loop to wait 100 µs between each call to > smp_query_cpu_stopped() to avoid querying RTAS too often. [...] > diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c > b/arch/powerpc/platforms/pseries/hotplug-cpu.c > index 97feb6e79f1a..d75cee60644c 100644 > --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c > +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c > @@ -214,13 +214,17 @@ static void pseries_cpu_die(unsigned int cpu) > msleep(1); > } > } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { > - > - for (tries = 0; tries < 25; tries++) { > + /* > + * rtas_stop_self() panics if the CPU fails to stop and our > + * callers already assume that we are going to succeed, so we > + * can just loop until the CPU stops. > + */ > + while (true) { > cpu_status = smp_query_cpu_stopped(pcpu); > if (cpu_status == QCSS_STOPPED || > cpu_status == QCSS_HARDWARE_ERROR) > break; > - cpu_relax(); > + udelay(100); > } > } I agree with looping indefinitely but doesn't it need a cond_resched() or similar check?
[PATCH v2] powerpc/32s: fix BATs setting with CONFIG_STRICT_KERNEL_RWX
Serge reported some crashes with CONFIG_STRICT_KERNEL_RWX enabled on a book3s32 machine. Analysis shows two issues: - BATs addresses and sizes are not properly aligned. - There is a gap between the last address covered by BATs and the first address covered by pages. Memory mapped with DBATs: 0: 0xc000-0xc07f 0x Kernel RO coherent 1: 0xc080-0xc0bf 0x0080 Kernel RO coherent 2: 0xc0c0-0xc13f 0x00c0 Kernel RW coherent 3: 0xc140-0xc23f 0x0140 Kernel RW coherent 4: 0xc240-0xc43f 0x0240 Kernel RW coherent 5: 0xc440-0xc83f 0x0440 Kernel RW coherent 6: 0xc840-0xd03f 0x0840 Kernel RW coherent 7: 0xd040-0xe03f 0x1040 Kernel RW coherent Memory mapped with pages: 0xe100-0xefff 0x2100 240Mrw present dirty accessed This patch fixes both issues. With the patch, we get the following which is as expected: Memory mapped with DBATs: 0: 0xc000-0xc07f 0x Kernel RO coherent 1: 0xc080-0xc0bf 0x0080 Kernel RO coherent 2: 0xc0c0-0xc0ff 0x00c0 Kernel RW coherent 3: 0xc100-0xc1ff 0x0100 Kernel RW coherent 4: 0xc200-0xc3ff 0x0200 Kernel RW coherent 5: 0xc400-0xc7ff 0x0400 Kernel RW coherent 6: 0xc800-0xcfff 0x0800 Kernel RW coherent 7: 0xd000-0xdfff 0x1000 Kernel RW coherent Memory mapped with pages: 0xe000-0xefff 0x2000 256Mrw present dirty accessed Reported-by: Serge Belyshev Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX") Cc: sta...@vger.kernel.org Acked-by: Segher Boessenkool Signed-off-by: Christophe Leroy --- v2: Added comment to explain block_size() function as recommended by Segher. arch/powerpc/mm/ppc_mmu_32.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index bf1de3ca39bc..afd8dcb11432 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -98,10 +98,20 @@ static int find_free_bat(void) return -1; } +/* + * This function calculates the size of the larger block usable to map the + * beginning of an area based on the start address and size of that area: + * - max block size is 8M on 601 and 256 on other 6xx. + * - base address must be aligned to the block size. So the maximum block size + * is identified by the lowest bit set to 1 in the base address (for instance + * if base is 0x1600, max size is 0x0200). + * - block size has to be a power of two. This is calculated by finding the + * highest bit set to 1. + */ static unsigned int block_size(unsigned long base, unsigned long top) { unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20; - unsigned int base_shift = (fls(base) - 1) & 31; + unsigned int base_shift = (ffs(base) - 1) & 31; unsigned int block_shift = (fls(top - base) - 1) & 31; return min3(max_size, 1U << base_shift, 1U << block_shift); @@ -157,7 +167,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { - int done; + unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; if (__map_without_bats) { @@ -169,10 +179,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return __mmu_mapin_ram(base, top); done = __mmu_mapin_ram(base, border); - if (done != border - base) + if (done != border) return done; - return done + __mmu_mapin_ram(border, top); + return __mmu_mapin_ram(border, top); } void mmu_mark_initmem_nx(void) -- 2.13.3
Re: [PATCH 22/41] drivers: tty: serial: cpm_uart: fix logging calls
On Mon, Apr 29, 2019 at 05:59:04PM +0200, Christophe Leroy wrote: > Le 27/04/2019 à 14:52, Enrico Weigelt, metux IT consult a écrit : > > Fix checkpatch warnings by using pr_err(): > > > > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then > > dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... > > #109: FILE: drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c:109: > > + printk(KERN_ERR > > > > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then > > dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... > > #128: FILE: drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c:128: > > + printk(KERN_ERR > > > > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then > > dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... > > + printk(KERN_ERR > > > > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then > > dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... > > + printk(KERN_ERR > > > > Signed-off-by: Enrico Weigelt > > Reviewed-by: Christophe Leroy > > But is that really worth doing those changes ? > > If we want to do something useful, wouldn't it make more sense to introduce > the use of dev_err() in order to identify the faulting device in the message > ? +1 for switching to dev_*(). -- With Best Regards, Andy Shevchenko
Re: [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter
On Tue, Apr 30, 2019 at 03:42:27PM +0300, Diana Craciun wrote: > commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream. > > Currently only supported on powerpc. > > Signed-off-by: Diana Craciun > Signed-off-by: Michael Ellerman > --- > Documentation/kernel-parameters.txt | 4 > 1 file changed, 4 insertions(+) > > diff --git a/Documentation/kernel-parameters.txt > b/Documentation/kernel-parameters.txt > index f0bdf78420a0..3ff87d5d6fea 100644 > --- a/Documentation/kernel-parameters.txt > +++ b/Documentation/kernel-parameters.txt > @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be > entirely omitted. > legacy floating-point registers on task switch. > > nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. > + > + nospectre_v1[PPC] Disable mitigations for Spectre Variant 1 (bounds > + check bypass). With this option data leaks are possible > + in the system. > > nospectre_v2[X86,PPC_FSL_BOOK3E] Disable all mitigations for the > Spectre variant 2 > (indirect branch prediction) vulnerability. System may > -- > 2.17.1 > Both of these patches needed to be added to a bunch of the stable trees, so I've now done that. thanks, greg k-h
Re: [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter
On Tue, Apr 30, 2019 at 03:42:27PM +0300, Diana Craciun wrote: > commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream. > > Currently only supported on powerpc. > > Signed-off-by: Diana Craciun > Signed-off-by: Michael Ellerman > --- > Documentation/kernel-parameters.txt | 4 > 1 file changed, 4 insertions(+) > > diff --git a/Documentation/kernel-parameters.txt > b/Documentation/kernel-parameters.txt > index f0bdf78420a0..3ff87d5d6fea 100644 > --- a/Documentation/kernel-parameters.txt > +++ b/Documentation/kernel-parameters.txt > @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be > entirely omitted. > legacy floating-point registers on task switch. > > nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. > + Trailing whitespace :( Fix up your editor to flag this as RED or something. I'll go fix it up...
Re: [PATCH kernel v3] powerpc/powernv: Isolate NVLinks between GV100GL on Witherspoon
On Tue, 30 Apr 2019 16:14:35 +1000 Alexey Kardashevskiy wrote: > On 30/04/2019 15:45, Alistair Popple wrote: > > Alexey, > > > > +void pnv_try_isolate_nvidia_v100(struct pci_dev *bridge) > > +{ > > + u32 mask, val; > > + void __iomem *bar0_0, *bar0_12, *bar0_a0; > > + struct pci_dev *pdev; > > + u16 cmd = 0, cmdmask = PCI_COMMAND_MEMORY; > > + > > + if (!bridge->subordinate) > > + return; > > + > > + pdev = list_first_entry_or_null(>subordinate->devices, > > + struct pci_dev, bus_list); > > + if (!pdev) > > + return; > > + > > + if (pdev->vendor != PCI_VENDOR_ID_NVIDIA) > > > > Don't you also need to check the PCIe devid to match only [PV]100 devices > > as > > well? I doubt there's any guarantee these registers will remain the same > > for > > all future (or older) NVIDIA devices. > > > I do not have the complete list of IDs and I already saw 3 different > device ids and this only works for machines with ibm,npu/gpu/nvlinks > properties so for now it works and for the future we are hoping to > either have an open source nvidia driver or some small minidriver (also > from nvidia, or may be a spec allowing us to write one) to allow > topology discovery on the host so we would not depend on the skiboot's > powernv DT. > > > IMHO this should really be done in the device driver in the guest. A > > malcious > > guest could load a modified driver that doesn't do this, but that should > > not > > compromise other guests which presumably load a non-compromised driver that > > disables the links on that guests GPU. However I guess in practice what you > > have here should work equally well. > > Doing it in the guest means a good guest needs to have an updated > driver, we do not really want to depend on this. The idea of IOMMU > groups is that the hypervisor provides isolation irrespective to what > the guest does. +1 It's not the user/guest driver's responsibility to maintain the isolation of the device. Thanks, Alex > Also vfio+qemu+slof needs to convey the nvlink topology to the guest, > seems like an unnecessary complication. > > > > > - Alistair > > > > + return; > > + > > + mask = nvlinkgpu_get_disable_mask(>dev); > > + if (!mask) > > + return; > > + > > + bar0_0 = pci_iomap_range(pdev, 0, 0, 0x1); > > + if (!bar0_0) { > > + pci_err(pdev, "Error mapping BAR0 @0\n"); > > + return; > > + } > > + bar0_12 = pci_iomap_range(pdev, 0, 0x12, 0x1); > > + if (!bar0_12) { > > + pci_err(pdev, "Error mapping BAR0 @12\n"); > > + goto bar0_0_unmap; > > + } > > + bar0_a0 = pci_iomap_range(pdev, 0, 0xA0, 0x1); > > + if (!bar0_a0) { > > + pci_err(pdev, "Error mapping BAR0 @A0\n"); > > + goto bar0_12_unmap; > > + } > > Is it really necessary to do three separate ioremaps vs one that would > cover them all here? I suspect you're just sneaking in PAGE_SIZE with > the 0x1 size mappings anyway. Seems like it would simplify setup, > error reporting, and cleanup to to ioremap to the PAGE_ALIGN'd range > of the highest register accessed. Thanks, > >>> > >>> Sure I can map it once, I just do not see the point in mapping/unmapping > >>> all 0xa1>>16=161 system pages for a very short period of time while > >>> we know precisely that we need just 3 pages. > >>> > >>> Repost? > >> > >> Ping? > >> > >> Can this go in as it is (i.e. should I ping Michael) or this needs > >> another round? It would be nice to get some formal acks. Thanks, > >> > Alex > > > + > > + pci_restore_state(pdev); > > + pci_read_config_word(pdev, PCI_COMMAND, ); > > + if ((cmd & cmdmask) != cmdmask) > > + pci_write_config_word(pdev, PCI_COMMAND, cmd | cmdmask); > > + > > + /* > > +* The sequence is from "Tesla P100 and V100 SXM2 NVLink > > Isolation on > > +* Multi-Tenant Systems". > > +* The register names are not provided there either, hence raw > > values. > > +*/ > > + iowrite32(0x4, bar0_12 + 0x4C); > > + iowrite32(0x2, bar0_12 + 0x2204); > > + val = ioread32(bar0_0 + 0x200); > > + val |= 0x0200; > > + iowrite32(val, bar0_0 + 0x200); > > + val = ioread32(bar0_a0 + 0x148); > > + val |= mask; > > + iowrite32(val, bar0_a0 + 0x148); > > + > > + if ((cmd | cmdmask) != cmd) > > + pci_write_config_word(pdev, PCI_COMMAND, cmd); > > + > > +
[PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter
commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream. Currently only supported on powerpc. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- Documentation/kernel-parameters.txt | 4 1 file changed, 4 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f0bdf78420a0..3ff87d5d6fea 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. legacy floating-point registers on task switch. nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. + + nospectre_v1[PPC] Disable mitigations for Spectre Variant 1 (bounds + check bypass). With this option data leaks are possible + in the system. nospectre_v2[X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may -- 2.17.1
[PATCH v2 stable v4.4 1/2] powerpc/fsl: Add FSL_PPC_BOOK3E as supported arch for nospectre_v2 boot arg
commit e59f5bd759b7dee57593c5b6c0441609bda5d530 upstream. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- Documentation/kernel-parameters.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index da515c535e62..f0bdf78420a0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2450,7 +2450,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. - nospectre_v2[X86] Disable all mitigations for the Spectre variant 2 + nospectre_v2[X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may allow data leaks with this option, which is equivalent to spectre_v2=off. -- 2.17.1
[PATCH v3 16/16] powerpc/32: Don't add dummy frames when calling trace_hardirqs_on/off
No need to add dummy frames when calling trace_hardirqs_on or trace_hardirqs_off. GCC properly handles empty stacks. In addition, powerpc doesn't set CONFIG_FRAME_POINTER, therefore __builtin_return_address(1..) returns NULL at all time. So the dummy frames are definitely unneeded here. In the meantime, avoid reading memory for loading r1 with a value we already know. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 16 ++-- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e65c3e70c648..235a01d34b6d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -243,12 +243,7 @@ transfer_to_handler_cont: reenable_mmu: /* -* The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. -* If from user mode there is only one stack frame on the stack, and -* accessing CALLER_ADDR1 will cause oops. So we need create a dummy -* stack frame to make trace_hardirqs_off happy. -* -* This is handy because we also need to save a bunch of GPRs, +* We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, * r4 & r5 can contain page fault arguments that need to be passed @@ -950,18 +945,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) */ andi. r10,r9,MSR_EE beq 1f - /* -* Since the ftrace irqsoff latency trace checks CALLER_ADDR1, -* which is the stack frame here, we need to force a stack frame -* in case we came from user space. -*/ stwur1,-32(r1) mflrr0 stw r0,4(r1) - stwur1,-32(r1) bl trace_hardirqs_on - lwz r1,0(r1) - lwz r1,0(r1) + addir1, r1, 32 lwz r9,_MSR(r1) 1: #endif /* CONFIG_TRACE_IRQFLAGS */ -- 2.13.3
[PATCH v3 14/16] powerpc/32: implement fast entry for syscalls on BOOKE
This patch implements a fast entry for syscalls. Syscalls don't have to preserve non volatile registers except LR. This patch then implement a fast entry for syscalls, where volatile registers get clobbered. As this entry is dedicated to syscall it always sets MSR_EE and warns in case MSR_EE was previously off It also assumes that the call is always from user, system calls are unexpected from kernel. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 7 --- arch/powerpc/kernel/head_44x.S | 3 +- arch/powerpc/kernel/head_booke.h | 103 +-- arch/powerpc/kernel/head_fsl_booke.S | 3 +- 4 files changed, 100 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 184cc1de2f37..dc58fec51ed6 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -342,7 +342,6 @@ stack_ovf: SYNC RFI -#ifndef CONFIG_BOOKE /* to be removed once BOOKE uses fast syscall entry */ #ifdef CONFIG_TRACE_IRQFLAGS trace_syscall_entry_irq_off: /* @@ -369,7 +368,6 @@ transfer_to_syscall: andi. r12,r9,MSR_EE beq-trace_syscall_entry_irq_off #endif /* CONFIG_TRACE_IRQFLAGS */ -#endif /* !CONFIG_BOOKE */ /* * Handle a system call. @@ -382,11 +380,6 @@ _GLOBAL(DoSyscall) stw r3,ORIG_GPR3(r1) li r12,0 stw r12,RESULT(r1) -#ifdef CONFIG_BOOKE/* to be removed once BOOKE uses fast syscall entry */ - lwz r11,_CCR(r1)/* Clear SO bit in CR */ - rlwinm r11,r11,0,4,2 - stw r11,_CCR(r1) -#endif #ifdef CONFIG_TRACE_IRQFLAGS /* Make sure interrupts are enabled */ mfmsr r11 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index e06cb1c84951..7d73c7e39afe 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -282,8 +282,7 @@ interrupt_base: #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) - EXC_XFER_SYS(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 56dd1341eb3d..bfeb469e8106 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -6,6 +6,8 @@ #include #include +#ifdef __ASSEMBLY__ + /* * Macros used for common Book-e exception handling */ @@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +.macro SYSCALL_ENTRY trapno intno + mfspr r10, SPRN_SPRG_THREAD +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtspr SPRN_SPRG_WSCRATCH0, r10 + stw r11, THREAD_NORMSAVE(0)(r10) + stw r13, THREAD_NORMSAVE(2)(r10) + mfcrr13 /* save CR in r13 for now */ + mfspr r11, SPRN_SRR1 + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, 1975f + b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 +1975: + mr r12, r13 + lwz r13, THREAD_NORMSAVE(2)(r10) +FTR_SECTION_ELSE +#endif + mfcrr12 +#ifdef CONFIG_KVM_BOOKE_HV +ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#endif + BOOKE_CLEAR_BTB(r11) + lwz r11, TASK_STACK - THREAD(r10) + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r11) /* save various registers */ + mflrr12 + stw r12,_LINK(r11) + mfspr r12,SPRN_SRR0 + stw r1, GPR1(r11) + mfspr r9,SPRN_SRR1 + stw r1, 0(r11) + mr r1, r11 + stw r12,_NIP(r11) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addir12, r12, STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r12, 8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + + addir11,r1,STACK_FRAME_OVERHEAD + addir2,r10,-THREAD + stw r11,PT_REGS(r10) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r10) + andis. r12,r12,DBCR0_IDM@h + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) + beq+3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 +
[PATCH v3 15/16] powerpc/32: don't do syscall stuff in transfer_to_handler
As syscalls are now handled via a fast entry path, syscall related actions can be removed from the generic transfer_to_handler path. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 19 --- 1 file changed, 19 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index dc58fec51ed6..e65c3e70c648 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -217,7 +217,6 @@ transfer_to_handler_cont: */ tophys(r12, r1) lwz r12,_MSR(r12) - xor r12,r10,r12 andi. r12,r12,MSR_EE bne 1f @@ -258,9 +257,6 @@ reenable_mmu: * the rest is restored from the exception frame. */ - /* Are we enabling or disabling interrupts ? */ - andi. r0,r10,MSR_EE - stwur1,-32(r1) stw r9,8(r1) stw r11,12(r1) @@ -268,8 +264,6 @@ reenable_mmu: stw r4,20(r1) stw r5,24(r1) - bne-0f - /* If we are disabling interrupts (normal case), simply log it with * lockdep */ @@ -287,19 +281,6 @@ reenable_mmu: mtctr r11 mtlrr9 bctr/* jump to handler */ - - /* If we are enabling interrupt, this is a syscall. They shouldn't -* happen while interrupts are disabled, so let's do a warning here. -*/ -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - bl trace_hardirqs_on - - /* Now enable for real */ - mfmsr r10 - ori r10,r10,MSR_EE - mtmsr r10 - b 2b #endif /* CONFIG_TRACE_IRQFLAGS */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) -- 2.13.3
[PATCH v3 12/16] powerpc: Fix 32-bit handling of MSR_EE on exceptions
[text mostly copied from benh's RFC/WIP] ppc32 are still doing something rather gothic and wrong on 32-bit which we stopped doing on 64-bit a while ago. We have that thing where some handlers "copy" the EE value from the original stack frame into the new MSR before transferring to the handler. Thus for a number of exceptions, we enter the handlers with interrupts enabled. This is rather fishy, some of the stuff that handlers might do early on such as irq_enter/exit or user_exit, context tracking, etc... should be run with interrupts off afaik. Generally our handlers know when to re-enable interrupts if needed. The problem we were having is that we assumed these interrupts would return with interrupts enabled. However that isn't the case. Instead, this patch changes things so that we always enter exception handlers with interrupts *off* with the notable exception of syscalls which are special (and get a fast path). Suggested-by: Benjamin Herrenschmidt Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 116 - 1 file changed, 67 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d0cea3deb86c..0c555f9f1543 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -37,6 +37,7 @@ #include #include #include +#include #include "head_32.h" @@ -206,19 +207,42 @@ transfer_to_handler_cont: mtspr SPRN_NRI, r0 #endif #ifdef CONFIG_TRACE_IRQFLAGS + /* +* When tracing IRQ state (lockdep) we enable the MMU before we call +* the IRQ tracing functions as they might access vmalloc space or +* perform IOs for console output. +* +* To speed up the syscall path where interrupts stay on, let's check +* first if we are changing the MSR value at all. +*/ + tophys(r12, r1) + lwz r12,_MSR(r12) + xor r12,r10,r12 + andi. r12,r12,MSR_EE + bne 1f + + /* MSR isn't changing, just transition directly */ +#endif + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r10 + mtlrr9 + SYNC + RFI /* jump to handler, enable MMU */ + +#ifdef CONFIG_TRACE_IRQFLAGS +1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to +* keep interrupts disabled at this point otherwise we might risk +* taking an interrupt before we tell lockdep they are enabled. +*/ lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l + LOAD_MSR_KERNEL(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR1,r0 SYNC RFI -reenable_mmu: /* re-enable mmu so we can */ - mfmsr r10 - lwz r12,_MSR(r1) - xor r10,r10,r12 - andi. r10,r10,MSR_EE /* Did EE change? */ - beq 1f +reenable_mmu: /* * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. * If from user mode there is only one stack frame on the stack, and @@ -233,14 +257,24 @@ reenable_mmu: /* re-enable mmu so we can */ * they aren't useful past this point (aren't syscall arguments), * the rest is restored from the exception frame. */ + + /* Are we enabling or disabling interrupts ? */ + andi. r0,r10,MSR_EE + stwur1,-32(r1) stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) stw r4,20(r1) stw r5,24(r1) - bl trace_hardirqs_off - lwz r5,24(r1) + + bne-0f + + /* If we are disabling interrupts (normal case), simply log it with +* lockdep +*/ +1: bl trace_hardirqs_off +2: lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) @@ -250,15 +284,22 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r6,GPR6(r1) lwz r7,GPR7(r1) lwz r8,GPR8(r1) -1: mtctr r11 + mtctr r11 mtlrr9 bctr/* jump to handler */ -#else /* CONFIG_TRACE_IRQFLAGS */ - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 - mtlrr9 - SYNC - RFI /* jump to handler, enable MMU */ + + /* If we are enabling interrupt, this is a syscall. They shouldn't +* happen while interrupts are disabled, so let's do a warning here. +*/ +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + bl trace_hardirqs_on + + /* Now enable for real */ + mfmsr r10 + ori r10,r10,MSR_EE + mtmsr r10 + b 2b #endif /* CONFIG_TRACE_IRQFLAGS */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -316,29
[PATCH v3 13/16] powerpc/32: implement fast entry for syscalls on non BOOKE
This patch implements a fast entry for syscalls. Syscalls don't have to preserve non volatile registers except LR. This patch then implement a fast entry for syscalls, where volatile registers get clobbered. As this entry is dedicated to syscall it always sets MSR_EE and warns in case MSR_EE was previously off It also assumes that the call is always from user, system calls are unexpected from kernel. The overall series improves null_syscall selftest by 12,5% on an 83xx and by 17% on a 8xx. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 32 arch/powerpc/kernel/head_32.S | 3 +- arch/powerpc/kernel/head_32.h | 85 -- arch/powerpc/kernel/head_40x.S | 3 +- arch/powerpc/kernel/head_8xx.S | 3 +- 5 files changed, 116 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0c555f9f1543..184cc1de2f37 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -342,6 +342,35 @@ stack_ovf: SYNC RFI +#ifndef CONFIG_BOOKE /* to be removed once BOOKE uses fast syscall entry */ +#ifdef CONFIG_TRACE_IRQFLAGS +trace_syscall_entry_irq_off: + /* +* Syscall shouldn't happen while interrupts are disabled, +* so let's do a warning here. +*/ +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + bl trace_hardirqs_on + + /* Now enable for real */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) + mtmsr r10 + + REST_GPR(0, r1) + REST_4GPRS(3, r1) + REST_2GPRS(7, r1) + b DoSyscall +#endif /* CONFIG_TRACE_IRQFLAGS */ + + .globl transfer_to_syscall +transfer_to_syscall: +#ifdef CONFIG_TRACE_IRQFLAGS + andi. r12,r9,MSR_EE + beq-trace_syscall_entry_irq_off +#endif /* CONFIG_TRACE_IRQFLAGS */ +#endif /* !CONFIG_BOOKE */ + /* * Handle a system call. */ @@ -353,9 +382,11 @@ _GLOBAL(DoSyscall) stw r3,ORIG_GPR3(r1) li r12,0 stw r12,RESULT(r1) +#ifdef CONFIG_BOOKE/* to be removed once BOOKE uses fast syscall entry */ lwz r11,_CCR(r1)/* Clear SO bit in CR */ rlwinm r11,r11,0,4,2 stw r11,_CCR(r1) +#endif #ifdef CONFIG_TRACE_IRQFLAGS /* Make sure interrupts are enabled */ mfmsr r11 @@ -1219,6 +1250,7 @@ load_dbcr0: .section .bss .align 4 + .global global_dbcr0 global_dbcr0: .space 8*NR_CPUS .previous diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 2404c39373d3..f1da8fef726a 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -370,8 +370,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) . = 0xc00 DO_KVM 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_SYS(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 14cb0af2f494..4a692553651f 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -73,6 +73,87 @@ SAVE_2GPRS(7, r11) .endm +.macro SYSCALL_ENTRY trapno + mfspr r12,SPRN_SPRG_THREAD + mfcrr10 + lwz r11,TASK_STACK-THREAD(r12) + mflrr9 + addir11,r11,THREAD_SIZE - INT_FRAME_SIZE + rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + tophys(r11,r11) + stw r10,_CCR(r11) /* save registers */ + mfspr r10,SPRN_SRR0 + stw r9,_LINK(r11) + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ + stw r10,_NIP(r11) +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addir10,r10,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r10,8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + addir11,r1,STACK_FRAME_OVERHEAD + addir2,r12,-THREAD + stw r11,PT_REGS(r12) +#if defined(CONFIG_40x) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r12) + andis. r12,r12,DBCR0_IDM@h +#endif + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#if defined(CONFIG_40x) + beq+3f + /* From user and task is
[PATCH v3 11/16] powerpc/32: get rid of COPY_EE in exception entry
EXC_XFER_TEMPLATE() is not called with COPY_EE anymore so we can get rid of copyee parameters and related COPY_EE and NOCOPY macros. Suggested-by: Benjamin Herrenschmidt [splited out from benh RFC patch] Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 12 arch/powerpc/kernel/head_40x.S | 8 +++- arch/powerpc/kernel/head_booke.h | 22 -- 3 files changed, 15 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 8881b6887841..14cb0af2f494 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -103,28 +103,24 @@ addir3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ LOAD_MSR_KERNEL(r10, msr); \ - copyee(r10, r9);\ bl tfer; \ .long hdlr; \ .long ret -#define COPY_EE(d, s) rlwimi d,s,0,MSR_EE -#define NOCOPY(d, s) - #define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full,\ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ ret_from_except) #define EXC_XFER_SYS(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, NOCOPY, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, transfer_to_handler, \ ret_from_except) #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 71597fb7cd89..b68de183faf1 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -166,8 +166,7 @@ _ENTRY(saved_ksp_limit) CRITICAL_EXCEPTION_PROLOG; \ addir3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* * 0x0100 - Critical Interrupt Exception @@ -651,7 +650,7 @@ _ENTRY(saved_ksp_limit) addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ Decrementer: @@ -673,8 +672,7 @@ WDTException: addir3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - NOCOPY, crit_transfer_to_handler, - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* * The other Data TLB exceptions bail out to this point diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 264976c43f34..56dd1341eb3d 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -217,8 +217,7 @@ END_BTB_FLUSH_SECTION CRITICAL_EXCEPTION_PROLOG(intno); \ addir3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ @@ -227,32 +226,27 @@ END_BTB_FLUSH_SECTION stw r5,_ESR(r11); \ addir3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, mcheck_transfer_to_handler, \ - ret_from_mcheck_exc) + mcheck_transfer_to_handler, ret_from_mcheck_exc) -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ +#define
[PATCH v3 10/16] powerpc/32: Enter exceptions with MSR_EE unset
All exceptions handlers know when to reenable interrupts, so it is safer to enter all of them with MSR_EE unset, except for syscalls. Suggested-by: Benjamin Herrenschmidt [splited out from benh RFC patch] Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.S| 68 ++-- arch/powerpc/kernel/head_32.h| 8 - arch/powerpc/kernel/head_40x.S | 44 +++ arch/powerpc/kernel/head_44x.S | 6 ++-- arch/powerpc/kernel/head_8xx.S | 32 - arch/powerpc/kernel/head_booke.h | 12 ++- arch/powerpc/kernel/head_fsl_booke.S | 26 +++--- 7 files changed, 90 insertions(+), 106 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 6aa8addce296..2404c39373d3 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -337,7 +337,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addir3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -358,13 +358,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) bl load_up_fpu /* if from user, just load it up */ b fast_exception_return 1: addir3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 @@ -375,7 +375,7 @@ SystemCall: /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -607,35 +607,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) +
[PATCH v3 06/16] powerpc/40x: Split and rename NORMAL_EXCEPTION_PROLOG
This patch splits NORMAL_EXCEPTION_PROLOG in the same way as in head_8xx.S and head_32.S and renames it EXCEPTION_PROLOG() as well to match head_32.h Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_40x.S | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index cb95a5c17cea..1547750567b6 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -103,10 +103,14 @@ _ENTRY(saved_ksp_limit) * turned off (i.e. using physical addresses). We assume SPRG_THREAD has * the physical address of the current task thread_struct. */ -#define NORMAL_EXCEPTION_PROLOG \ +#define EXCEPTION_PROLOG\ mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ mtspr SPRN_SPRG_SCRATCH1,r11; \ mfcrr10;/* save CR in r10 for now */\ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel*/\ andi. r11,r11,MSR_PR; \ tophys(r11,r1); \ @@ -115,7 +119,9 @@ _ENTRY(saved_ksp_limit) lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\ addir11,r11,THREAD_SIZE; \ tophys(r11,r11); \ -1: subir11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ +1: subir11,r11,INT_FRAME_SIZE /* Allocate an exception frame */ + +#define EXCEPTION_PROLOG_2 \ stw r10,_CCR(r11); /* save various registers */\ stw r12,GPR12(r11); \ stw r9,GPR9(r11);\ @@ -205,7 +211,7 @@ label: #define EXCEPTION(n, label, hdlr, xfer)\ START_EXCEPTION(n, label); \ - NORMAL_EXCEPTION_PROLOG;\ + EXCEPTION_PROLOG; \ addir3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) @@ -396,7 +402,7 @@ label: * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0/* Pass zero as arg3 */ EXC_XFER_LITE(0x400, handle_page_fault) @@ -406,7 +412,7 @@ label: /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_DEAR/* Grab the DEAR and save it */ stw r4,_DEAR(r11) addir3,r1,STACK_FRAME_OVERHEAD @@ -414,7 +420,7 @@ label: /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addir3,r1,STACK_FRAME_OVERHEAD @@ -427,7 +433,7 @@ label: /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) @@ -733,7 +739,7 @@ label: /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ Decrementer: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ addir3,r1,STACK_FRAME_OVERHEAD @@ -741,7 +747,7 @@ Decrementer: /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ FITException: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG addir3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_EE(0x1010, unknown_exception) @@ -759,7 +765,7 @@ WDTException: * if they can't resolve the lightweight TLB fault. */ DataAccess: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR/* Grab the DEAR, save it, pass arg2 */ -- 2.13.3
[PATCH v3 09/16] powerpc/32: enter syscall with MSR_EE inconditionaly set
syscalls are expected to be entered with MSR_EE set. Lets make it inconditional by forcing MSR_EE on syscalls. This patch adds EXC_XFER_SYS for that. Suggested-by: Benjamin Herrenschmidt [splited out from benh RFC patch] Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.S| 2 +- arch/powerpc/kernel/head_32.h| 4 arch/powerpc/kernel/head_40x.S | 2 +- arch/powerpc/kernel/head_44x.S | 2 +- arch/powerpc/kernel/head_8xx.S | 2 +- arch/powerpc/kernel/head_booke.h | 4 arch/powerpc/kernel/head_fsl_booke.S | 2 +- 7 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index fbc655aa0acf..6aa8addce296 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -371,7 +371,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) DO_KVM 0xc00 SystemCall: EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + EXC_XFER_SYS(0xc00, DoSyscall) /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index aa0131bb09b5..7221418a883f 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -123,6 +123,10 @@ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ret_from_except) +#define EXC_XFER_SYS(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, NOCOPY, transfer_to_handler, \ + ret_from_except) + #define EXC_XFER_EE(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ ret_from_except_full) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index cce9bd33a176..1a80a3e45e44 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -350,7 +350,7 @@ _ENTRY(saved_ksp_limit) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + EXC_XFER_SYS(0xc00, DoSyscall) EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 37117ab11584..9cc01948651f 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -283,7 +283,7 @@ interrupt_base: /* System Call Interrupt */ START_EXCEPTION(SystemCall) NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + EXC_XFER_SYS(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7b76ad1b9620..19ad6484f198 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -186,7 +186,7 @@ Alignment: . = 0xc00 SystemCall: EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + EXC_XFER_SYS(0xc00, DoSyscall) /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1b22a8dea399..612f54ba1125 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -251,6 +251,10 @@ END_BTB_FLUSH_SECTION EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ret_from_except) +#define EXC_XFER_SYS(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, NOCOPY, transfer_to_handler, \ + ret_from_except) + #define EXC_XFER_EE(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ ret_from_except_full) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 32332e24e421..e77a2ed94642 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -411,7 +411,7 @@ interrupt_base: /* System Call Interrupt */ START_EXCEPTION(SystemCall) NORMAL_EXCEPTION_PROLOG(SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + EXC_XFER_SYS(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ -- 2.13.3
[PATCH v3 08/16] powerpc/fsl_booke: ensure SPEFloatingPointException() reenables interrupts
SPEFloatingPointException() is the only exception handler which 'forgets' to re-enable interrupts. This patch makes sure it does. Suggested-by: Benjamin Herrenschmidt Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/traps.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1fd45a8650e1..665f294725cb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = FPE_FLTUNK; int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + flush_spe_to_thread(current); spefscr = current->thread.spefscr; @@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) extern int speround_handler(struct pt_regs *regs); int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + preempt_disable(); if (regs->msr & MSR_SPE) giveup_spe(current); -- 2.13.3
[PATCH v3 05/16] powerpc/40x: add exception frame marker
This patch adds STACK_FRAME_REGS_MARKER in the stack at exception entry in order to see interrupts in call traces as below: [0.013964] Call Trace: [0.014014] [c0745db0] [c007a9d4] tick_periodic.constprop.5+0xd8/0x104 (unreliable) [0.014086] [c0745dc0] [c007aa20] tick_handle_periodic+0x20/0x9c [0.014181] [c0745de0] [c0009cd0] timer_interrupt+0xa0/0x264 [0.014258] [c0745e10] [c000e484] ret_from_except+0x0/0x14 [0.014390] --- interrupt: 901 at console_unlock.part.7+0x3f4/0x528 [0.014390] LR = console_unlock.part.7+0x3f0/0x528 [0.014455] [c0745ee0] [c0050334] console_unlock.part.7+0x114/0x528 (unreliable) [0.014542] [c0745f30] [c00524e0] register_console+0x3d8/0x44c [0.014625] [c0745f60] [c0675aac] cpm_uart_console_init+0x18/0x2c [0.014709] [c0745f70] [c06614f4] console_init+0x114/0x1cc [0.014795] [c0745fb0] [c0658b68] start_kernel+0x300/0x3d8 [0.014864] [c0745ff0] [c00022cc] start_here+0x44/0x98 Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_40x.S | 6 ++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index f49b0278e995..cb95a5c17cea 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -132,6 +132,9 @@ _ENTRY(saved_ksp_limit) tovirt(r1,r11); /* set new kernel sp */ \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11);\ + lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ + addir10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -174,6 +177,9 @@ _ENTRY(saved_ksp_limit) tovirt(r1,r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11);\ + lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ + addir10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -- 2.13.3
[PATCH v3 07/16] powerpc/40x: Refactor exception entry macros by using head_32.h
Refactor exception entry macros by using the ones defined in head_32.h Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h | 4 ++ arch/powerpc/kernel/head_40x.S | 88 +- 2 files changed, 6 insertions(+), 86 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 985758cbf577..aa0131bb09b5 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -59,8 +59,12 @@ stw r1,GPR1(r11) stw r1,0(r11) tovirt(r1,r11) /* set new kernel sp */ +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ MTMSRD(r10) /* (except for mach check in rtas) */ +#endif stw r0,GPR0(r11) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addir10,r10,STACK_FRAME_REGS_MARKER@l diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 1547750567b6..cce9bd33a176 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -44,6 +44,8 @@ #include #include +#include "head_32.h" + /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet * optional, information: @@ -99,52 +101,6 @@ _ENTRY(saved_ksp_limit) .space 4 /* - * Exception vector entry code. This code runs with address translation - * turned off (i.e. using physical addresses). We assume SPRG_THREAD has - * the physical address of the current task thread_struct. - */ -#define EXCEPTION_PROLOG\ - mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10;/* save CR in r10 for now */\ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel*/\ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\ - addir11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subir11,r11,INT_FRAME_SIZE /* Allocate an exception frame */ - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11);\ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflrr10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - stw r1,GPR1(r11);\ - mfspr r9,SPRN_SRR1;\ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11);\ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ - addir10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* * Exception prolog for critical exceptions. This is a little different * from the normal exception prolog above since a critical exception * can potentially occur at any point during normal exception processing. @@ -205,16 +161,6 @@ _ENTRY(saved_ksp_limit) /* * Exception vectors. */ -#defineSTART_EXCEPTION(n, label) \
[PATCH v3 04/16] powerpc/40x: Don't use SPRN_SPRG_SCRATCH2 in EXCEPTION_PROLOG
Unlike said in the comment, r1 is not reused by the critical exception handler, as it uses a dedicated critirq_ctx stack. Decrementing r1 early is then unneeded. Should the above be valid, the code is crap buggy anyway as r1 gets some intermediate values that would jeopardise the whole process (for instance after mfspr r1,SPRN_SPRG_THREAD) Using SPRN_SPRG_SCRATCH2 to save r1 is then not needed, r11 can be used instead. This avoids one mtspr and one mfspr and makes the prolog closer to what's done on 6xx and 8xx. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_40x.S | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a9c934f2319b..f49b0278e995 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -102,23 +102,20 @@ _ENTRY(saved_ksp_limit) * Exception vector entry code. This code runs with address translation * turned off (i.e. using physical addresses). We assume SPRG_THREAD has * the physical address of the current task thread_struct. - * Note that we have to have decremented r1 before we write to any fields - * of the exception frame, since a critical interrupt could occur at any - * time, and it will write to the area immediately below the current r1. */ #define NORMAL_EXCEPTION_PROLOG \ mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ mtspr SPRN_SPRG_SCRATCH1,r11; \ - mtspr SPRN_SPRG_SCRATCH2,r1; \ mfcrr10;/* save CR in r10 for now */\ mfspr r11,SPRN_SRR1; /* check whether user or kernel*/\ andi. r11,r11,MSR_PR; \ - beq 1f; \ - mfspr r1,SPRN_SPRG_THREAD;/* if from user, start at top of */\ - lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\ - addir1,r1,THREAD_SIZE; \ -1: subir1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ tophys(r11,r1); \ + beq 1f; \ + mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ + lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\ + addir11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subir11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ stw r10,_CCR(r11); /* save various registers */\ stw r12,GPR12(r11); \ stw r9,GPR9(r11);\ @@ -128,11 +125,11 @@ _ENTRY(saved_ksp_limit) stw r12,GPR11(r11); \ mflrr10; \ stw r10,_LINK(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH2; \ mfspr r12,SPRN_SRR0; \ - stw r10,GPR1(r11); \ + stw r1,GPR1(r11);\ mfspr r9,SPRN_SRR1;\ - stw r10,0(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11);\ SAVE_4GPRS(3, r11); \ -- 2.13.3
[PATCH v3 03/16] powerpc/32: make the 6xx/8xx EXC_XFER_TEMPLATE() similar to the 40x/booke one
6xx/8xx EXC_XFER_TEMPLATE() macro adds a i##n symbol which is unused and can be removed. 40x and booke EXC_XFER_TEMPLATE() macros takes msr from the caller while the 6xx/8xx version uses only MSR_KERNEL as msr value. This patch modifies the 6xx/8xx version to make it similar to the 40x and booke versions. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index cf3d00844597..985758cbf577 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -99,13 +99,12 @@ addir3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)\ +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ - LOAD_MSR_KERNEL(r10, MSR_KERNEL); \ + LOAD_MSR_KERNEL(r10, msr); \ copyee(r10, r9);\ bl tfer; \ -i##n: \ .long hdlr; \ .long ret @@ -113,19 +112,19 @@ i##n: \ #define NOCOPY(d, s) #define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full,\ ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ret_from_except) #define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ ret_from_except_full) #define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ ret_from_except) #endif /* __HEAD_32_H__ */ -- 2.13.3
[PATCH v3 02/16] powerpc/32: move LOAD_MSR_KERNEL() into head_32.h and use it
As preparation for using head_32.h for head_40x.S, move LOAD_MSR_KERNEL() there and use it to load r10 with MSR_KERNEL value. In the mean time, this patch modifies it so that it takes into account the size of the passed value to determine if 'li' can be used or if 'lis/ori' is needed instead of using the size of MSR_KERNEL. This is done by using gas macro. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 9 + arch/powerpc/kernel/head_32.h | 15 ++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2f3d159c11d7..d0cea3deb86c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -38,14 +38,7 @@ #include #include -/* - * MSR_KERNEL is > 0x1 on 4xx/Book-E since it include MSR_CE. - */ -#if MSR_KERNEL >= 0x1 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l -#else -#define LOAD_MSR_KERNEL(r, x) li r,(x) -#endif +#include "head_32.h" /* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7456e2a45acc..cf3d00844597 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -5,6 +5,19 @@ #include /* for STACK_FRAME_REGS_MARKER */ /* + * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE. + */ +.macro __LOAD_MSR_KERNEL r, x +.if \x >= 0x8000 + lis \r, (\x)@h + ori \r, \r, (\x)@l +.else + li \r, (\x) +.endif +.endm +#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x + +/* * Exception entry code. This code runs with address translation * turned off, i.e. using physical addresses. * We assume sprg3 has the physical address of the current @@ -89,7 +102,7 @@ #define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)\ li r10,trap; \ stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ + LOAD_MSR_KERNEL(r10, MSR_KERNEL); \ copyee(r10, r9);\ bl tfer; \ i##n: \ -- 2.13.3
[PATCH v3 01/16] powerpc/32: Refactor EXCEPTION entry macros for head_8xx.S and head_32.S
EXCEPTION_PROLOG is similar in head_8xx.S and head_32.S This patch creates head_32.h and moves EXCEPTION_PROLOG macro into it. It also converts it from a GCC macro to a GAS macro in order to ease refactorisation with 40x later, since GAS macros allows the use of #ifdef/#else/#endif inside it. And it also has the advantage of not requiring the uggly "; \" at the end of each line. This patch also moves EXCEPTION() and EXC_XFER_() macros which are also similar while adding START_EXCEPTION() out of EXCEPTION(). Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.S | 99 +- arch/powerpc/kernel/head_32.h | 118 + arch/powerpc/kernel/head_8xx.S | 98 +- 3 files changed, 122 insertions(+), 193 deletions(-) create mode 100644 arch/powerpc/kernel/head_32.h diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 40aec3f00a05..fbc655aa0acf 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -37,6 +37,8 @@ #include #include +#include "head_32.h" + /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ @@ -242,103 +244,6 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .long -1 -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10;\ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addir11,r11,THREAD_SIZE;\ - tophys(r11,r11);\ -1: subir11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflrr10;\ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - MTMSRD(r10);/* (except for mach check in rtas) */ \ - stw r0,GPR0(r11); \ - lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addir10,r10,STACK_FRAME_REGS_MARKER@l; \ - stw r10,8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer)\ - . = n; \ - DO_KVM n; \ -label: \ - EXCEPTION_PROLOG; \ - addir3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)\ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9);\ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE,
[PATCH v3 00/16] powerpc/32: Implement fast syscall entry
The purpose of this series is to implement a fast syscall entry on ppc32, as already done on ppc64. Unlike all other exceptions which can happen at any time and require to preserve all registers, the syscalls do not require the preservation of volatile registers (except LR). Syscall entries can then be optimised with lighter entry code than the general exception handling. In the meantime this series refactorises the exception entry on 40x/6xx/8xx as they are pretty similar, and it takes benh series on rationalising the settings of MSR_EE at exceptions/syscall entries as this change pretty simplies exception entries. The refactorisation of exception entry will help when it comes to implementing VMAP_STACK On a 8xx, this series improves null_syscall selftest by 17% On a 83xx, this series improves null_syscall selftest by 12,5% v3: - Rebased on latest powerpc/merge branch - Fixed trivial conflict due to KUP functionnality - Dropped patch 15 (already applied) v2: - Rebased on latest powerpc/merge branch. - Added booke support as well (tested on qemu bamboo). - Added a patch to get rid of the dummy frames when calling trace_hardirqs_on/off. Christophe Leroy (16): powerpc/32: Refactor EXCEPTION entry macros for head_8xx.S and head_32.S powerpc/32: move LOAD_MSR_KERNEL() into head_32.h and use it powerpc/32: make the 6xx/8xx EXC_XFER_TEMPLATE() similar to the 40x/booke one powerpc/40x: Don't use SPRN_SPRG_SCRATCH2 in EXCEPTION_PROLOG powerpc/40x: add exception frame marker powerpc/40x: Split and rename NORMAL_EXCEPTION_PROLOG powerpc/40x: Refactor exception entry macros by using head_32.h powerpc/fsl_booke: ensure SPEFloatingPointException() reenables interrupts powerpc/32: enter syscall with MSR_EE inconditionaly set powerpc/32: Enter exceptions with MSR_EE unset powerpc/32: get rid of COPY_EE in exception entry powerpc: Fix 32-bit handling of MSR_EE on exceptions powerpc/32: implement fast entry for syscalls on non BOOKE powerpc/32: implement fast entry for syscalls on BOOKE powerpc/32: don't do syscall stuff in transfer_to_handler powerpc/32: Don't add dummy frames when calling trace_hardirqs_on/off arch/powerpc/kernel/entry_32.S | 153 +- arch/powerpc/kernel/head_32.S| 170 +++-- arch/powerpc/kernel/head_32.h| 203 +++ arch/powerpc/kernel/head_40x.S | 152 +++--- arch/powerpc/kernel/head_44x.S | 9 +- arch/powerpc/kernel/head_8xx.S | 133 --- arch/powerpc/kernel/head_booke.h | 131 +- arch/powerpc/kernel/head_fsl_booke.S | 29 +++-- arch/powerpc/kernel/traps.c | 8 ++ 9 files changed, 508 insertions(+), 480 deletions(-) create mode 100644 arch/powerpc/kernel/head_32.h -- 2.13.3
Re: [PATCH v2 0/5] Allow CPU0 to be nohz full
On Tue, Apr 30, 2019 at 12:46:40PM +1000, Nicholas Piggin wrote: > Peter Zijlstra's on April 25, 2019 10:04 pm: > > On Thu, Apr 11, 2019 at 01:34:43PM +1000, Nicholas Piggin wrote: > >> Since last time, I added a compile time option to opt-out of this > >> if the platform does not support suspend on non-zero, and tried to > >> improve legibility of changelogs and explain the justification > >> better. > >> > >> I have been testing this on powerpc/pseries and it seems to work > >> fine (the firmware call to suspend can be called on any CPU and > >> resumes where it left off), but not included here because the > >> code has some bitrot unrelated to this series which I hacked to > >> fix. I will discuss it and either send an acked patch to go with > >> this series if it is small, or fix it in powerpc tree. > >> > > > > Rafael, Frederic, any comments? > > > > Sorry to ping again, I guess people are probably busy after vacation. > Any chance we could get this in next merge window? Peter are you okay > with the config option as it is, then we can look at adapting it to > what x86 needs as a follow up (e.g., allow nohz CPU0 for > cpu0_hotpluggable case)? Yeah, let me just queue these here patches. Not sure they'll still make the upcoming merge window, but we can try.
Re: [PATCH stable v4.4 0/8] missing powerpc spectre backports for 4.4
On Mon, Apr 29, 2019 at 06:49:00PM +0300, Diana Craciun wrote: > Hi Greg, > > These are missing patches from the initial powerpc spectre backports for 4.4. > Please queue them as well if you don't have any objections. I applied the first 6 of these now. If you could fix up the last two and resend them, that would be wonderful. thanks, greg k-h
Patch "powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit)" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit) to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-32-bit.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST From: Diana Craciun Date: Mon, 29 Apr 2019 18:49:04 +0300 Subject: powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit) To: sta...@vger.kernel.org, gre...@linuxfoundation.org Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun Message-ID: <1556552948-24957-5-git-send-email-diana.crac...@nxp.com> From: Diana Craciun commit 7fef436295bf6c05effe682c8797dfcb0deb112a upstream. In order to protect against speculation attacks on indirect branches, the branch predictor is flushed at kernel entry to protect for the following situations: - userspace process attacking another userspace process - userspace process attacking the kernel Basically when the privillege level change (i.e.the kernel is entered), the branch predictor state is flushed. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_booke.h |6 ++ arch/powerpc/kernel/head_fsl_booke.S | 15 +++ 2 files changed, 21 insertions(+) --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -42,6 +42,9 @@ andi. r11, r11, MSR_PR; /* check whether user or kernel*/\ mr r11, r1; \ beq 1f; \ +START_BTB_FLUSH_SECTION\ + BTB_FLUSH(r11) \ +END_BTB_FLUSH_SECTION \ /* if from user, start at top of this thread's kernel stack */ \ lwz r11, THREAD_INFO-THREAD(r10);\ ALLOC_STACK_FRAME(r11, THREAD_SIZE); \ @@ -127,6 +130,9 @@ stw r9,_CCR(r8);/* save CR on stack*/\ mfspr r11,exc_level_srr1; /* check whether user or kernel*/\ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ +START_BTB_FLUSH_SECTION \ + BTB_FLUSH(r10) \ +END_BTB_FLUSH_SECTION \ andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -451,6 +451,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfcrr13 stw r13, THREAD_NORMSAVE(3)(r10) DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -545,6 +552,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfcrr13 stw r13, THREAD_NORMSAVE(3)(r10) DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1 +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION + mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the Patches currently in stable-queue which might be from diana.crac...@nxp.com are queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch
Patch "powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST From: Diana Craciun Date: Mon, 29 Apr 2019 18:49:05 +0300 Subject: powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms To: sta...@vger.kernel.org, gre...@linuxfoundation.org Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun Message-ID: <1556552948-24957-6-git-send-email-diana.crac...@nxp.com> From: Diana Craciun commit c28218d4abbf4f2035495334d8bfcba64bda4787 upstream. Used barrier_nospec to sanitize the syscall table. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_32.S | 10 ++ 1 file changed, 10 insertions(+) --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -33,6 +33,7 @@ #include #include #include +#include /* * MSR_KERNEL is > 0x1 on 4xx/Book-E since it include MSR_CE. @@ -340,6 +341,15 @@ syscall_dotrace_cont: ori r10,r10,sys_call_table@l slwir0,r0,2 bge-66f + + barrier_nospec_asm + /* +* Prevent the load of the handler below (based on the user-passed +* system call number) being speculatively executed until the test +* against NR_syscalls and branch to .66f above has +* committed. +*/ + lwzxr10,r10,r0 /* Fetch system call handler [ptr] */ mtlrr10 addir9,r1,STACK_FRAME_OVERHEAD Patches currently in stable-queue which might be from diana.crac...@nxp.com are queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch
Patch "powerpc/fsl: Flush branch predictor when entering KVM" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled powerpc/fsl: Flush branch predictor when entering KVM to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powerpc-fsl-flush-branch-predictor-when-entering-kvm.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST From: Diana Craciun Date: Mon, 29 Apr 2019 18:49:02 +0300 Subject: powerpc/fsl: Flush branch predictor when entering KVM To: sta...@vger.kernel.org, gre...@linuxfoundation.org Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun Message-ID: <1556552948-24957-3-git-send-email-diana.crac...@nxp.com> From: Diana Craciun commit e7aa61f47b23afbec41031bc47ca8d6cb6516abc upstream. Switching from the guest to host is another place where the speculative accesses can be exploited. Flush the branch predictor when entering KVM. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/bookehv_interrupts.S |4 1 file changed, 4 insertions(+) --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -75,6 +75,10 @@ PPC_LL r1, VCPU_HOST_STACK(r4) PPC_LL r2, HOST_R2(r1) +START_BTB_FLUSH_SECTION + BTB_FLUSH(r10) +END_BTB_FLUSH_SECTION + mfspr r10, SPRN_PID lwz r8, VCPU_HOST_PID(r4) PPC_LL r11, VCPU_SHARED(r4) Patches currently in stable-queue which might be from diana.crac...@nxp.com are queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch queue-4.4/powerpc-64s-enhance-the-information-in-cpu_show_spectre_v1.patch queue-4.4/powerpc-64-call-setup_barrier_nospec-from-setup_arch.patch queue-4.4/powerpc-rfi-flush-always-enable-fallback-flush-on-pseries.patch queue-4.4/powerpc-64s-improve-rfi-l1-d-cache-flush-fallback.patch queue-4.4/powerpc-asm-add-a-patch_site-macro-helpers-for-patching-instructions.patch
Patch "powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup'" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup' to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST From: Diana Craciun Date: Mon, 29 Apr 2019 18:49:06 +0300 Subject: powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup' To: sta...@vger.kernel.org, gre...@linuxfoundation.org Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun Message-ID: <1556552948-24957-7-git-send-email-diana.crac...@nxp.com> From: Diana Craciun commit 039daac5526932ec731e4499613018d263af8b3e upstream. Fixed the following build warning: powerpc-linux-gnu-ld: warning: orphan section `__btb_flush_fixup' from `arch/powerpc/kernel/head_44x.o' being placed in section `__btb_flush_fixup'. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_booke.h | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -31,6 +31,16 @@ */ #define THREAD_NORMSAVE(offset)(THREAD_NORMSAVES + (offset * 4)) +#ifdef CONFIG_PPC_FSL_BOOK3E +#define BOOKE_CLEAR_BTB(reg) \ +START_BTB_FLUSH_SECTION \ + BTB_FLUSH(reg) \ +END_BTB_FLUSH_SECTION +#else +#define BOOKE_CLEAR_BTB(reg) +#endif + + #define NORMAL_EXCEPTION_PROLOG(intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ @@ -42,9 +52,7 @@ andi. r11, r11, MSR_PR; /* check whether user or kernel*/\ mr r11, r1; \ beq 1f; \ -START_BTB_FLUSH_SECTION\ - BTB_FLUSH(r11) \ -END_BTB_FLUSH_SECTION \ + BOOKE_CLEAR_BTB(r11)\ /* if from user, start at top of this thread's kernel stack */ \ lwz r11, THREAD_INFO-THREAD(r10);\ ALLOC_STACK_FRAME(r11, THREAD_SIZE); \ @@ -130,9 +138,7 @@ END_BTB_FLUSH_SECTION \ stw r9,_CCR(r8);/* save CR on stack*/\ mfspr r11,exc_level_srr1; /* check whether user or kernel*/\ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ -START_BTB_FLUSH_SECTION \ - BTB_FLUSH(r10) \ -END_BTB_FLUSH_SECTION \ + BOOKE_CLEAR_BTB(r10)\ andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ Patches currently in stable-queue which might be from diana.crac...@nxp.com are queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch
Patch "powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST From: Diana Craciun Date: Mon, 29 Apr 2019 18:49:01 +0300 Subject: powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used To: sta...@vger.kernel.org, gre...@linuxfoundation.org Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun Message-ID: <1556552948-24957-2-git-send-email-diana.crac...@nxp.com> From: Diana Craciun commit 3bc8ea8603ae4c1e09aca8de229ad38b8091fcb3 upstream. If the user choses not to use the mitigations, replace the code sequence with nops. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_32.c |1 + arch/powerpc/kernel/setup_64.c |1 + 2 files changed, 2 insertions(+) --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -323,6 +323,7 @@ void __init setup_arch(char **cmdline_p) if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); setup_barrier_nospec(); + setup_spectre_v2(); paging_init(); --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -737,6 +737,7 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); setup_barrier_nospec(); + setup_spectre_v2(); paging_init(); Patches currently in stable-queue which might be from diana.crac...@nxp.com are queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch queue-4.4/powerpc-64s-enhance-the-information-in-cpu_show_spectre_v1.patch queue-4.4/powerpc-64-call-setup_barrier_nospec-from-setup_arch.patch
Patch "powerpc/fsl: Emulate SPRN_BUCSR register" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled powerpc/fsl: Emulate SPRN_BUCSR register to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powerpc-fsl-emulate-sprn_bucsr-register.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From foo@baz Tue 30 Apr 2019 12:38:50 PM CEST From: Diana Craciun Date: Mon, 29 Apr 2019 18:49:03 +0300 Subject: powerpc/fsl: Emulate SPRN_BUCSR register To: sta...@vger.kernel.org, gre...@linuxfoundation.org Cc: linuxppc-...@ozlabs.org, m...@ellerman.id.au, Diana Craciun Message-ID: <1556552948-24957-4-git-send-email-diana.crac...@nxp.com> From: Diana Craciun commit 98518c4d8728656db349f875fcbbc7c126d4c973 upstream. In order to flush the branch predictor the guest kernel performs writes to the BUCSR register which is hypervisor privilleged. However, the branch predictor is flushed at each KVM entry, so the branch predictor has been already flushed, so just return as soon as possible to guest. Signed-off-by: Diana Craciun [mpe: Tweak comment formatting] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/e500_emulate.c |7 +++ 1 file changed, 7 insertions(+) --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -277,6 +277,13 @@ int kvmppc_core_emulate_mtspr_e500(struc vcpu->arch.pwrmgtcr0 = spr_val; break; + case SPRN_BUCSR: + /* +* If we are here, it means that we have already flushed the +* branch predictor, so just return to guest. +*/ + break; + /* extra exceptions */ #ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: Patches currently in stable-queue which might be from diana.crac...@nxp.com are queue-4.4/powerpc-64s-add-support-for-a-store-forwarding-barrier-at-kernel-entry-exit.patch queue-4.4/powerpc-fsl-emulate-sprn_bucsr-register.patch queue-4.4/powerpc-64-make-stf-barrier-ppc_book3s_64-specific.patch queue-4.4/powerpc-pseries-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-fsl-fix-spectre_v2-mitigations-reporting.patch queue-4.4/powerpc-64s-patch-barrier_nospec-in-modules.patch queue-4.4/powerpc-pseries-support-firmware-disable-of-rfi-flush.patch queue-4.4/powerpc-rfi-flush-call-setup_rfi_flush-after-lpm-migration.patch queue-4.4/powerpc-pseries-query-hypervisor-for-count-cache-flush-settings.patch queue-4.4/powerpc-powernv-set-or-clear-security-feature-flags.patch queue-4.4/powerpc-64s-add-support-for-software-count-cache-flush.patch queue-4.4/powerpc64s-show-ori31-availability-in-spectre_v1-sysfs-file-not-v2.patch queue-4.4/powerpc-fsl-flush-the-branch-predictor-at-each-kernel-entry-64bit.patch queue-4.4/powerpc-fsl-update-spectre-v2-reporting.patch queue-4.4/powerpc-64s-wire-up-cpu_show_spectre_v2.patch queue-4.4/powerpc-64-make-meltdown-reporting-book3s-64-specific.patch queue-4.4/powerpc-rfi-flush-make-it-possible-to-call-setup_rfi_flush-again.patch queue-4.4/powerpc-64s-add-support-for-ori-barrier_nospec-patching.patch queue-4.4/powerpc-use-barrier_nospec-in-copy_from_user.patch queue-4.4/powerpc-64s-fix-section-mismatch-warnings-from-setup_rfi_flush.patch queue-4.4/powerpc-fsl-sanitize-the-syscall-table-for-nxp-powerpc-32-bit-platforms.patch queue-4.4/powerpc-avoid-code-patching-freed-init-sections.patch queue-4.4/powerpc-fsl-add-macro-to-flush-the-branch-predictor.patch queue-4.4/powerpc-xmon-add-rfi-flush-related-fields-to-paca-dump.patch queue-4.4/powerpc-fsl-add-barrier_nospec-implementation-for-nxp-powerpc-book3e.patch queue-4.4/powerpc-security-fix-spectre_v2-reporting.patch queue-4.4/powerpc-add-security-feature-flags-for-spectre-meltdown.patch queue-4.4/powerpc-powernv-use-the-security-flags-in-pnv_setup_rfi_flush.patch queue-4.4/powerpc-64-disable-the-speculation-barrier-from-the-command-line.patch queue-4.4/powerpc-fsl-fix-the-flush-of-branch-predictor.patch queue-4.4/powerpc-pseries-use-the-security-flags-in-pseries_setup_rfi_flush.patch queue-4.4/powerpc-64-add-config_ppc_barrier_nospec.patch queue-4.4/powerpc-64s-move-cpu_show_meltdown.patch queue-4.4/powerpc-fsl-enable-runtime-patching-if-nospectre_v2-boot-arg-is-used.patch queue-4.4/powerpc-64-use-barrier_nospec-in-syscall-entry.patch queue-4.4/powerpc-fsl-fixed-warning-orphan-section-__btb_flush_fixup.patch queue-4.4/powerpc-fsl-add-nospectre_v2-command-line-argument.patch queue-4.4/powerpc-64s-add-new-security-feature-flags-for-count-cache-flush.patch queue-4.4/powerpc-fsl-add-infrastructure-to-fixup-branch-predictor-flush.patch queue-4.4/powerpc-rfi-flush-differentiate-enabled-and-patched-flush-types.patch queue-4.4/powerpc-64s-enhance-the-information-in-cpu_show_spectre_v1.patch
Re: [PATCH stable v4.4 8/8] Documentation: Add nospectre_v1 parameter
On Mon, Apr 29, 2019 at 06:49:08PM +0300, Diana Craciun wrote: > Currently only supported on powerpc. No upstream git commit id for this one? thanks, greg k-h
Re: [PATCH stable v4.4 7/8] powerpc/fsl: Add FSL_PPC_BOOK3E as supported arch for nospectre_v2 boot arg
On Mon, Apr 29, 2019 at 06:49:07PM +0300, Diana Craciun wrote: > commit f633a8ad636efb5d4bba1a047d4a0f1ef719aa06 upstream. No, the patch below is not that git commit :( I'll stop here in applying these patches. thanks, greg k-h
Re: [PATCH] KVM: PPC: Book3S HV: smb->smp comment fixup
On Thu, Apr 25, 2019 at 12:53:39PM -0700, Palmer Dabbelt wrote: > I made the same typo when trying to grep for uses of smp_wmb and figured > I might as well fix it. > > Signed-off-by: Palmer Dabbelt Thanks, patch applied to my kvm-ppc-next tree. Paul.
Re: [PATCH] powerpc: Fix kobject memleak
On Tue, Apr 30, 2019 at 11:09:23AM +1000, Tobin C. Harding wrote: > Currently error return from kobject_init_and_add() is not followed by a > call to kobject_put(). This means there is a memory leak. > > Add call to kobject_put() in error path of kobject_init_and_add(). > > Signed-off-by: Tobin C. Harding Reviewed-by: Greg Kroah-Hartman
[PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test
This patch fixes the below crash by making sure we touch the subpage protection related structures only if we know they are allocated on the platform. With radix translation we don't allocate hash context at all and trying to access subpage_prot_table results in Faulting instruction address: 0xc008bdb4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV NIP [c008bdb4] sys_subpage_prot+0x74/0x590 LR [c000b688] system_call+0x5c/0x70 Call Trace: [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable) [c00020002c6b7e20] [c000b688] system_call+0x5c/0x70 Instruction dump: fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068 3920 e92d0968 ebe90630 e93f03e8 6000 3860fffe e9410068 We also move the subpage_prot_table with mmp_sem held to avoid racec between two parallel subpage_prot syscall. Reported-by: Sachin Sant Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/mm/subpage-prot.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index c9dff4e1f295..473dd430e306 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -90,16 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, static void subpage_prot_clear(unsigned long addr, unsigned long len) { struct mm_struct *mm = current->mm; - struct subpage_prot_table *spt = mm_ctx_subpage_prot(>context); + struct subpage_prot_table *spt; u32 **spm, *spp; unsigned long i; size_t nw; unsigned long next, limit; + down_write(>mmap_sem); + + spt = mm_ctx_subpage_prot(>context); if (!spt) - return ; + goto err_out; - down_write(>mmap_sem); limit = addr + len; if (limit > spt->maxaddr) limit = spt->maxaddr; @@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) /* now flush any existing HPTEs for the range */ hpte_flush_range(mm, addr, nw); } + +err_out: up_write(>mmap_sem); } @@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, unsigned long, len, u32 __user *, map) { struct mm_struct *mm = current->mm; - struct subpage_prot_table *spt = mm_ctx_subpage_prot(>context); + struct subpage_prot_table *spt; u32 **spm, *spp; unsigned long i; size_t nw; @@ -219,6 +223,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, down_write(>mmap_sem); + spt = mm_ctx_subpage_prot(>context); if (!spt) { /* * Allocate subpage prot table if not already done. -- 2.20.1
Re: [PATCH v2 2/2] powerpc/perf: Add generic compat mode pmu driver
On 29/04/19 11:12 AM, Christophe Leroy wrote: Le 29/04/2019 à 04:52, Madhavan Srinivasan a écrit : Most of the power processor generation performance monitoring unit (PMU) driver code is bundled in the kernel and one of those is enabled/registered based on the oprofile_cpu_type check at the boot. But things get little tricky incase of "compat" mode boot. IBM POWER System Server based processors has a compactibility mode feature, which simpily put is, Nth generation processor (lets say POWER8) will act and appear in a mode consistent with an earlier generation (N-1) processor (that is POWER7). And in this "compat" mode boot, kernel modify the "oprofile_cpu_type" to be Nth generation (POWER8). If Nth generation pmu driver is bundled (POWER8), it gets registered. Key dependency here is to have distro support for latest processor performance monitoring support. Patch here adds a generic "compat-mode" performance monitoring driver to be register in absence of powernv platform specific pmu driver. Driver supports "cycles", "instruction" and "branch-miss" events. "0x100F0" used as event code for "cycles", "0x2" used as event code for "instruction" events and "0x400F6" used as event code for "branch miss". These are architected events as part of ISA. New file called "generic-compat-pmu.c" is created to contain the driver specific code. And base raw event code format modeled on PPMU_ARCH_207S. Signed-off-by: Madhavan Srinivasan --- Changelog v1: - Updated architected event opcodes - included branch miss with architected event opcode arch/powerpc/perf/Makefile | 3 +- arch/powerpc/perf/core-book3s.c | 2 +- arch/powerpc/perf/generic-compat-pmu.c | 245 + arch/powerpc/perf/internal.h | 1 + 4 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/perf/generic-compat-pmu.c diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index ab26df5bacb9..c155dcbb8691 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ - isa207-common.o power8-pmu.o power9-pmu.o + isa207-common.o power8-pmu.o power9-pmu.o \ + generic-compat-pmu.o Isn't that name a bit long ? What about compat-pmu instead ? yeah I guess. Will fix it. obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a96f9420139c..a66fb9c01c9e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2318,7 +2318,7 @@ static int __init init_ppc64_pmu(void) else if (!init_ppc970_pmu()) return 0; else - return -ENODEV; + return init_generic_compat_pmu(); } early_initcall(init_ppc64_pmu); #endif diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c new file mode 100644 index ..9c2d4bbc5c87 --- /dev/null +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -0,0 +1,245 @@ +/* + * Performance counter support. + * + * Copyright 2019 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. Shouldn't we use the new licence format for new files ? ie: // SPDX-License-Identifier: GPL-2.0+ My bad. Thanks for pointing out. Will fix and re-spin. Thanks for review Maddy + */ + +#define pr_fmt(fmt) "generic-compat-pmu: " fmt + +#include "isa207-common.h" + +/* + * Raw event encoding: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ pmc ] [unit ] [ ] m [ pmcxsel ] + * | | + * | *- mark + * | + * | + * *- combine + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] +
Re: [PATCH v2 1/2] powerpc/perf: init pmu from core-book3s
On 29/04/19 11:08 AM, Christophe Leroy wrote: Le 29/04/2019 à 04:52, Madhavan Srinivasan a écrit : Currenty pmu driver file for each ppc64 generation processor has a __init call in itself. Refactor the code by moving the __init call to core-books.c. This also clean's up compat mode pmu driver registration. Can you explain the advantage of doing so ? Was not comfortable having dependency on the link ordering, so took this approach. This will avoid registering generic driver when there is a platform specific driver. For me it makes more sense to have independant drivers with their own init call. Suggested-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan --- Changelog v1: - Added "internal.h" file and moved the extern definitions to that file arch/powerpc/perf/core-book3s.c | 28 arch/powerpc/perf/internal.h | 16 arch/powerpc/perf/power5+-pmu.c | 4 +--- arch/powerpc/perf/power5-pmu.c | 4 +--- arch/powerpc/perf/power6-pmu.c | 4 +--- arch/powerpc/perf/power7-pmu.c | 4 +--- arch/powerpc/perf/power8-pmu.c | 3 +-- arch/powerpc/perf/power9-pmu.c | 3 +-- arch/powerpc/perf/ppc970-pmu.c | 4 +--- 9 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 arch/powerpc/perf/internal.h diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0723002a396..a96f9420139c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -22,6 +22,10 @@ #include #include +#ifdef CONFIG_PPC64 Can we avoid that CONFIG_PPC64 ifdef ? Why isn't it compatible with PPC32 ? IIUC, Driver handled here are specific to server side ppc and secondly, infrastructure can be extend for ppc32 if needed. +#include "internal.h" +#endif + #define BHRB_MAX_ENTRIES 32 #define BHRB_TARGET 0x0002 #define BHRB_PREDICTION 0x0001 @@ -2294,3 +2298,27 @@ int register_power_pmu(struct power_pmu *pmu) power_pmu_prepare_cpu, NULL); return 0; } + +#ifdef CONFIG_PPC64 Same, why PPC64 ? +static int __init init_ppc64_pmu(void) +{ + /* run through all the pmu drivers one at a time */ + if (!init_power5_pmu()) + return 0; + else if (!init_power5p_pmu()) + return 0; + else if (!init_power6_pmu()) + return 0; + else if (!init_power7_pmu()) + return 0; + else if (!init_power8_pmu()) + return 0; + else if (!init_power9_pmu()) + return 0; + else if (!init_ppc970_pmu()) + return 0; + else + return -ENODEV; +} +early_initcall(init_ppc64_pmu); +#endif diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h new file mode 100644 index ..e54d524d4283 --- /dev/null +++ b/arch/powerpc/perf/internal.h @@ -0,0 +1,16 @@ +/* + * Copyright 2019 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +extern int init_ppc970_pmu(void); +extern int init_power5_pmu(void); +extern int init_power5p_pmu(void); +extern int init_power6_pmu(void); +extern int init_power7_pmu(void); +extern int init_power8_pmu(void); +extern int init_power9_pmu(void); 'extern' keyword is pointless, please remove it (checkpatch --strict probably told it to you). Ok will re-spin it (will use --strict in future patches thanks :) ) Thanks for review Maddy Christophe diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 0526dac66007..9aa803504cb2 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = { .cache_events = _cache_events, }; -static int __init init_power5p_pmu(void) +int init_power5p_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") @@ -686,5 +686,3 @@ static int __init init_power5p_pmu(void) return register_power_pmu(_pmu); } - -early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 4dc99f9f7962..30cb13d081a9 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = { .flags = PPMU_HAS_SSLOT, }; -static int __init init_power5_pmu(void) +int init_power5_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) @@ -626,5 +626,3 @@ static int __init init_power5_pmu(void) return register_power_pmu(_pmu); } - -early_initcall(init_power5_pmu); diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
Re: [PATCH kernel v3] powerpc/powernv: Isolate NVLinks between GV100GL on Witherspoon
On 30/04/2019 15:45, Alistair Popple wrote: > Alexey, > > +void pnv_try_isolate_nvidia_v100(struct pci_dev *bridge) > +{ > + u32 mask, val; > + void __iomem *bar0_0, *bar0_12, *bar0_a0; > + struct pci_dev *pdev; > + u16 cmd = 0, cmdmask = PCI_COMMAND_MEMORY; > + > + if (!bridge->subordinate) > + return; > + > + pdev = list_first_entry_or_null(>subordinate->devices, > + struct pci_dev, bus_list); > + if (!pdev) > + return; > + > + if (pdev->vendor != PCI_VENDOR_ID_NVIDIA) > > Don't you also need to check the PCIe devid to match only [PV]100 devices as > well? I doubt there's any guarantee these registers will remain the same for > all future (or older) NVIDIA devices. I do not have the complete list of IDs and I already saw 3 different device ids and this only works for machines with ibm,npu/gpu/nvlinks properties so for now it works and for the future we are hoping to either have an open source nvidia driver or some small minidriver (also from nvidia, or may be a spec allowing us to write one) to allow topology discovery on the host so we would not depend on the skiboot's powernv DT. > IMHO this should really be done in the device driver in the guest. A malcious > guest could load a modified driver that doesn't do this, but that should not > compromise other guests which presumably load a non-compromised driver that > disables the links on that guests GPU. However I guess in practice what you > have here should work equally well. Doing it in the guest means a good guest needs to have an updated driver, we do not really want to depend on this. The idea of IOMMU groups is that the hypervisor provides isolation irrespective to what the guest does. Also vfio+qemu+slof needs to convey the nvlink topology to the guest, seems like an unnecessary complication. > - Alistair > > + return; > + > + mask = nvlinkgpu_get_disable_mask(>dev); > + if (!mask) > + return; > + > + bar0_0 = pci_iomap_range(pdev, 0, 0, 0x1); > + if (!bar0_0) { > + pci_err(pdev, "Error mapping BAR0 @0\n"); > + return; > + } > + bar0_12 = pci_iomap_range(pdev, 0, 0x12, 0x1); > + if (!bar0_12) { > + pci_err(pdev, "Error mapping BAR0 @12\n"); > + goto bar0_0_unmap; > + } > + bar0_a0 = pci_iomap_range(pdev, 0, 0xA0, 0x1); > + if (!bar0_a0) { > + pci_err(pdev, "Error mapping BAR0 @A0\n"); > + goto bar0_12_unmap; > + } Is it really necessary to do three separate ioremaps vs one that would cover them all here? I suspect you're just sneaking in PAGE_SIZE with the 0x1 size mappings anyway. Seems like it would simplify setup, error reporting, and cleanup to to ioremap to the PAGE_ALIGN'd range of the highest register accessed. Thanks, >>> >>> Sure I can map it once, I just do not see the point in mapping/unmapping >>> all 0xa1>>16=161 system pages for a very short period of time while >>> we know precisely that we need just 3 pages. >>> >>> Repost? >> >> Ping? >> >> Can this go in as it is (i.e. should I ping Michael) or this needs >> another round? It would be nice to get some formal acks. Thanks, >> Alex > + > + pci_restore_state(pdev); > + pci_read_config_word(pdev, PCI_COMMAND, ); > + if ((cmd & cmdmask) != cmdmask) > + pci_write_config_word(pdev, PCI_COMMAND, cmd | cmdmask); > + > + /* > + * The sequence is from "Tesla P100 and V100 SXM2 NVLink Isolation on > + * Multi-Tenant Systems". > + * The register names are not provided there either, hence raw values. > + */ > + iowrite32(0x4, bar0_12 + 0x4C); > + iowrite32(0x2, bar0_12 + 0x2204); > + val = ioread32(bar0_0 + 0x200); > + val |= 0x0200; > + iowrite32(val, bar0_0 + 0x200); > + val = ioread32(bar0_a0 + 0x148); > + val |= mask; > + iowrite32(val, bar0_a0 + 0x148); > + > + if ((cmd | cmdmask) != cmd) > + pci_write_config_word(pdev, PCI_COMMAND, cmd); > + > + pci_iounmap(pdev, bar0_a0); > +bar0_12_unmap: > + pci_iounmap(pdev, bar0_12); > +bar0_0_unmap: > + pci_iounmap(pdev, bar0_0); > +} > > -- Alexey
Re: [PATCH v5 5/8] powerpc/pci/IOV: Add support for runtime enabling the VFs
On Mon, 2019-03-11 at 14:52 +0300, Sergey Miroshnichenko wrote: > When called within pcibios_sriov_enable(), the pci_sriov_get_totalvfs(pdev) > returns zero, because the device is yet preparing to enable the VFs. I don't think this is correct. The earliest pcibios_sriov_enable() can be called is during a driver probe function. The totalvfs field is initialised by pci_iov_init() which is called before the device has been added to the bus. If it's returning zero then maybe the driver limited the number of VFs to zero? That said, you need to reset numvfs to zero before changing the value. So limiting the number of pci_dns that are created to the number actually required rather than totalvfs doesn't hurt. > With this patch it becomes possible to enable VFs via sysfs "sriov_numvfs" > on PowerNV. I tested on a few of our lab systems with random kernel versions spanning from 4.15 to 5.0 and sriov_numvfs seemed to work fine on all of them. Is there a specific configuration you're testing that needed this change? > Signed-off-by: Sergey Miroshnichenko > --- > arch/powerpc/include/asm/pci-bridge.h | 4 +-- > arch/powerpc/kernel/pci_dn.c | 32 ++- > arch/powerpc/platforms/powernv/pci-ioda.c | 4 +-- > arch/powerpc/platforms/pseries/pci.c | 4 +-- > 4 files changed, 25 insertions(+), 19 deletions(-) > > diff --git a/arch/powerpc/include/asm/pci-bridge.h > b/arch/powerpc/include/asm/pci-bridge.h > index fc188e0e9179..6479bc96e0b6 100644 > --- a/arch/powerpc/include/asm/pci-bridge.h > +++ b/arch/powerpc/include/asm/pci-bridge.h > @@ -225,8 +225,8 @@ struct pci_dn { > extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus, > int devfn); > extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev); > -extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev); > -extern void remove_dev_pci_data(struct pci_dev *pdev); > +extern struct pci_dn *pci_create_vf_pdns(struct pci_dev *pdev, int num_vfs); > +extern void pci_destroy_vf_pdns(struct pci_dev *pdev); > extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, > struct device_node *dn); > extern void pci_remove_device_node_info(struct device_node *dn); > diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c > index 7f12882d8882..7fa362f8038d 100644 > --- a/arch/powerpc/kernel/pci_dn.c > +++ b/arch/powerpc/kernel/pci_dn.c > @@ -222,18 +222,19 @@ static struct pci_dn *pci_create_pdn_from_dev(struct > pci_dev *pdev, > return pdn; > } > > -struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) > +struct pci_dn *pci_create_vf_pdns(struct pci_dev *pdev, int num_vfs) > { > + struct pci_dn *pdn = pci_get_pdn(pdev); > + > #ifdef CONFIG_PCI_IOV > - struct pci_dn *parent, *pdn; > + struct pci_dn *parent; > int i; > > /* Only support IOV for now */ > if (!pdev->is_physfn) > - return pci_get_pdn(pdev); > + return pdn; > > /* Check if VFs have been populated */ > - pdn = pci_get_pdn(pdev); > if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF)) > return NULL; > > @@ -242,33 +243,38 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) > if (!parent) > return NULL; > > - for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { > + for (i = 0; i < num_vfs; i++) { > struct eeh_dev *edev __maybe_unused; > + struct pci_dn *vpdn; > > - pdn = pci_alloc_pdn(parent, > - pci_iov_virtfn_bus(pdev, i), > - pci_iov_virtfn_devfn(pdev, i)); > - if (!pdn) { > + vpdn = pci_alloc_pdn(parent, > + pci_iov_virtfn_bus(pdev, i), > + pci_iov_virtfn_devfn(pdev, i)); > + if (!vpdn) { > dev_warn(>dev, "%s: Cannot create firmware data > for VF#%d\n", >__func__, i); > return NULL; > } > > - pdn->vf_index = i; > + vpdn->vf_index = i; > + vpdn->vendor_id = pdn->vendor_id; > + vpdn->device_id = pdn->device_id; > + vpdn->class_code = pdn->class_code; > + vpdn->pci_ext_config_space = 0; > > #ifdef CONFIG_EEH > /* Create the EEH device for the VF */ > - edev = eeh_dev_init(pdn); > + edev = eeh_dev_init(vpdn); > BUG_ON(!edev); > edev->physfn = pdev; > #endif /* CONFIG_EEH */ > } > #endif /* CONFIG_PCI_IOV */ > > - return pci_get_pdn(pdev); > + return pdn; > } > > -void remove_dev_pci_data(struct pci_dev *pdev) > +void pci_destroy_vf_pdns(struct pci_dev *pdev) > { > #ifdef CONFIG_PCI_IOV > struct pci_dn *parent; > diff --git