[PATCH 5/7] PCI: Remove unnecessary #includes of asm/pci.h
In include/linux/pci.h, we already #include asm/pci.h, so we don't need to include asm/pci.h directly. Remove the unnecessary includes. All the files here already include linux/pci.h. Signed-off-by: Bjorn Helgaas bhelg...@google.com CC: linux-al...@vger.kernel.org CC: linux-m...@linux-mips.org CC: linuxppc-dev@lists.ozlabs.org CC: linux...@vger.kernel.org CC: x...@kernel.org --- arch/alpha/kernel/core_irongate.c |1 - arch/alpha/kernel/sys_eiger.c |1 - arch/alpha/kernel/sys_nautilus.c |1 - arch/mips/pci/fixup-cobalt.c |1 - arch/mips/pci/ops-mace.c |1 - arch/mips/pci/pci-lantiq.c|1 - arch/powerpc/kernel/prom.c|1 - arch/powerpc/kernel/prom_init.c |1 - arch/sh/drivers/pci/ops-sh5.c |1 - arch/sh/drivers/pci/pci-sh5.c |1 - arch/x86/kernel/x86_init.c|1 - 11 files changed, 11 deletions(-) diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 00096df..83d0a35 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -22,7 +22,6 @@ #include linux/bootmem.h #include asm/ptrace.h -#include asm/pci.h #include asm/cacheflush.h #include asm/tlbflush.h diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 79d69d7..15f4208 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -22,7 +22,6 @@ #include asm/irq.h #include asm/mmu_context.h #include asm/io.h -#include asm/pci.h #include asm/pgtable.h #include asm/core_tsunami.h #include asm/hwrpb.h diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 700686d..2cfaa0e 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -39,7 +39,6 @@ #include asm/irq.h #include asm/mmu_context.h #include asm/io.h -#include asm/pci.h #include asm/pgtable.h #include asm/core_irongate.h #include asm/hwrpb.h diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c index a138e8e..b3ab593 100644 --- a/arch/mips/pci/fixup-cobalt.c +++ b/arch/mips/pci/fixup-cobalt.c @@ -13,7 +13,6 @@ #include linux/kernel.h #include linux/init.h -#include asm/pci.h #include asm/io.h #include asm/gt64120.h diff --git a/arch/mips/pci/ops-mace.c b/arch/mips/pci/ops-mace.c index 6b5821f..951d807 100644 --- a/arch/mips/pci/ops-mace.c +++ b/arch/mips/pci/ops-mace.c @@ -8,7 +8,6 @@ #include linux/kernel.h #include linux/pci.h #include linux/types.h -#include asm/pci.h #include asm/ip32/mace.h #if 0 diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 8b117e6..c5347d9 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -20,7 +20,6 @@ #include linux/of_irq.h #include linux/of_pci.h -#include asm/pci.h #include asm/gpio.h #include asm/addrspace.h diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 308c5e1..00fdea2 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -46,7 +46,6 @@ #include asm/mmu.h #include asm/paca.h #include asm/pgtable.h -#include asm/pci.h #include asm/iommu.h #include asm/btext.h #include asm/sections.h diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index fd1fe4c..fcca807 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -37,7 +37,6 @@ #include asm/smp.h #include asm/mmu.h #include asm/pgtable.h -#include asm/pci.h #include asm/iommu.h #include asm/btext.h #include asm/sections.h diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c index 4ce95a0..4536194 100644 --- a/arch/sh/drivers/pci/ops-sh5.c +++ b/arch/sh/drivers/pci/ops-sh5.c @@ -18,7 +18,6 @@ #include linux/delay.h #include linux/types.h #include linux/irq.h -#include asm/pci.h #include asm/io.h #include pci-sh5.h diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 16c1e72..8229114 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -20,7 +20,6 @@ #include linux/types.h #include linux/irq.h #include cpu/irq.h -#include asm/pci.h #include asm/io.h #include pci-sh5.h diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 234b072..eed5625 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -11,7 +11,6 @@ #include asm/bios_ebda.h #include asm/paravirt.h #include asm/pci_x86.h -#include asm/pci.h #include asm/mpspec.h #include asm/setup.h #include asm/apic.h ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH kernel v11 21/34] powerpc/powernv/ioda2: Add TCE invalidation for all attached groups
On Fri, May 29, 2015 at 06:44:45PM +1000, Alexey Kardashevskiy wrote: The iommu_table struct keeps a list of IOMMU groups it is used for. At the moment there is just a single group attached but further patches will add TCE table sharing. When sharing is enabled, TCE cache in each PE needs to be invalidated so does the patch. This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan to enable TCE table sharing on PHBs older than IODA2. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com --- Changes: v10: * new to the series --- arch/powerpc/platforms/powernv/pci-ioda.c | 35 --- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3fd8b18..94fccc8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -24,6 +24,7 @@ #include linux/msi.h #include linux/memblock.h #include linux/iommu.h +#include linux/rculist.h #include asm/sections.h #include asm/io.h @@ -1764,23 +1765,15 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg); } -static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, - unsigned long index, unsigned long npages, bool rm) +static void pnv_pci_ioda2_tce_do_invalidate(unsigned pe_number, bool rm, + __be64 __iomem *invalidate, unsigned shift, + unsigned long index, unsigned long npages) The better function name would be: pnv_pci_ioda2_do_tce_invalidate(), and it seems we needn't bool rm any more since invalidate has been assigned with virtual/real address by caller. Thanks, Gavin { - struct iommu_table_group_link *tgl = list_first_entry_or_null( - tbl-it_group_list, struct iommu_table_group_link, - next); - struct pnv_ioda_pe *pe = container_of(tgl-table_group, - struct pnv_ioda_pe, table_group); unsigned long start, end, inc; - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys : - pe-phb-ioda.tce_inval_reg; - const unsigned shift = tbl-it_page_shift; /* We'll invalidate DMA address in PE scope */ start = 0x2ull 60; - start |= (pe-pe_number 0xFF); + start |= (pe_number 0xFF); end = start; /* Figure out the start, end and step */ @@ -1798,6 +1791,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) +{ + struct iommu_table_group_link *tgl; + + list_for_each_entry_rcu(tgl, tbl-it_group_list, next) { + struct pnv_ioda_pe *pe = container_of(tgl-table_group, + struct pnv_ioda_pe, table_group); + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys : + pe-phb-ioda.tce_inval_reg; + + pnv_pci_ioda2_tce_do_invalidate(pe-pe_number, rm, + invalidate, tbl-it_page_shift, + index, npages); + } +} + static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, -- 2.4.0.rc3.8.gfb3e7d5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 5/7] PCI: Remove unnecessary #includes of asm/pci.h
On Thu, Jun 04, 2015 at 04:49:57PM -0500, Bjorn Helgaas wrote: In include/linux/pci.h, we already #include asm/pci.h, so we don't need to include asm/pci.h directly. Remove the unnecessary includes. All the files here already include linux/pci.h. Signed-off-by: Bjorn Helgaas bhelg...@google.com CC: linux-al...@vger.kernel.org CC: linux-m...@linux-mips.org CC: linuxppc-dev@lists.ozlabs.org CC: linux...@vger.kernel.org CC: x...@kernel.org --- arch/alpha/kernel/core_irongate.c |1 - arch/alpha/kernel/sys_eiger.c |1 - arch/alpha/kernel/sys_nautilus.c |1 - arch/mips/pci/fixup-cobalt.c |1 - arch/mips/pci/ops-mace.c |1 - arch/mips/pci/pci-lantiq.c|1 - arch/powerpc/kernel/prom.c|1 - arch/powerpc/kernel/prom_init.c |1 - arch/sh/drivers/pci/ops-sh5.c |1 - arch/sh/drivers/pci/pci-sh5.c |1 - arch/x86/kernel/x86_init.c|1 - 11 files changed, 11 deletions(-) arch/sh/drivers portion: Acked-by: Simon Horman horms+rene...@verge.net.au ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH kernel v11 21/34] powerpc/powernv/ioda2: Add TCE invalidation for all attached groups
On 06/05/2015 10:27 AM, Gavin Shan wrote: On Fri, May 29, 2015 at 06:44:45PM +1000, Alexey Kardashevskiy wrote: The iommu_table struct keeps a list of IOMMU groups it is used for. At the moment there is just a single group attached but further patches will add TCE table sharing. When sharing is enabled, TCE cache in each PE needs to be invalidated so does the patch. This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan to enable TCE table sharing on PHBs older than IODA2. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com --- Changes: v10: * new to the series --- arch/powerpc/platforms/powernv/pci-ioda.c | 35 --- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3fd8b18..94fccc8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -24,6 +24,7 @@ #include linux/msi.h #include linux/memblock.h #include linux/iommu.h +#include linux/rculist.h #include asm/sections.h #include asm/io.h @@ -1764,23 +1765,15 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg); } -static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, - unsigned long index, unsigned long npages, bool rm) +static void pnv_pci_ioda2_tce_do_invalidate(unsigned pe_number, bool rm, + __be64 __iomem *invalidate, unsigned shift, + unsigned long index, unsigned long npages) The better function name would be: pnv_pci_ioda2_do_tce_invalidate(), and Ok. it seems we needn't bool rm any more since invalidate has been assigned with virtual/real address by caller. We still need @rm here as different helpers are used for real and virt modes - __raw_rm_writeq and __raw_writeq. Thanks, Gavin { - struct iommu_table_group_link *tgl = list_first_entry_or_null( - tbl-it_group_list, struct iommu_table_group_link, - next); - struct pnv_ioda_pe *pe = container_of(tgl-table_group, - struct pnv_ioda_pe, table_group); unsigned long start, end, inc; - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys : - pe-phb-ioda.tce_inval_reg; - const unsigned shift = tbl-it_page_shift; /* We'll invalidate DMA address in PE scope */ start = 0x2ull 60; - start |= (pe-pe_number 0xFF); + start |= (pe_number 0xFF); end = start; /* Figure out the start, end and step */ @@ -1798,6 +1791,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) +{ + struct iommu_table_group_link *tgl; + + list_for_each_entry_rcu(tgl, tbl-it_group_list, next) { + struct pnv_ioda_pe *pe = container_of(tgl-table_group, + struct pnv_ioda_pe, table_group); + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys : + pe-phb-ioda.tce_inval_reg; + + pnv_pci_ioda2_tce_do_invalidate(pe-pe_number, rm, + invalidate, tbl-it_page_shift, + index, npages); + } +} + static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, -- 2.4.0.rc3.8.gfb3e7d5 -- Alexey ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: of/dynamic: Fix test for PPC_PSERIES
On Thu, 4 Jun 2015 20:57:32 +1000 (AEST) , Michael Ellerman m...@ellerman.id.au wrote: On Thu, 2015-04-06 at 09:34:41 UTC, Geert Uytterhoeven wrote: IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is supposed to be used with the full Kconfig symbol name, including the CONFIG_ prefix. Add the missing CONFIG_ prefix to fix this. Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and into common code) Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be --- Did this bug cause any breakage? If yes, the fix should go to stable (for v3.17 and later). Yikes. Not that I've heard of. But it's reasonably new so possibly it's not hit distros that folks tend to run on those machines. I'm also not clear how it would break, it could be subtle and we've not noticed. Nathan might have more of an idea (on CC). On my machine here everything that has an ibm,phandle also has a linux,phandle, so we wouldn't hit that code path. But I'm not sure how representative that box is. cheers Still, an obvious bug. I've picked it up and marked for stable. g. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver
On Tue, 2015-06-02 at 14:26 +1000, Cyril Bur wrote: Powerpc powernv platforms allow access to certain system flash devices through a firmwarwe interface. This change adds an mtd driver for these flash devices. Minor updates from Jeremy Kerr and Joel Stanley. Signed-off-by: Cyril Bur cyril...@gmail.com Signed-off-by: Joel Stanley j...@jms.id.au Signed-off-by: Jeremy Kerr j...@ozlabs.org --- Hello Brian and MTD folk, Could I please get an ACK for Michael to take this through the powerpc tree. Thanks. Hello Brian, As we have some deadlines approaching, I am getting pressure to ensure this gets merged upstream as quickly as possible, please let me know if there is anything more which can be done. Thanks very much, Cyril V2: Address Brian Norris' review Fix typos Change from NAND flash type to NOR flash type Correctness tweaks V3: Address Neelesh Gupta's review Minor corrections Release the opal token on error Unregister mtd device on module remove --- drivers/mtd/devices/Kconfig | 8 + drivers/mtd/devices/Makefile| 1 + drivers/mtd/devices/powernv_flash.c | 286 3 files changed, 295 insertions(+) create mode 100644 drivers/mtd/devices/powernv_flash.c diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index c49d0b1..f73c416 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -195,6 +195,14 @@ config MTD_BLOCK2MTD Testing MTD users (eg JFFS2) on large media and media that might be removed during a write (using the floppy drive). +config MTD_POWERNV_FLASH + tristate powernv flash MTD driver + depends on PPC_POWERNV + help + This provides an MTD device to access flash on powernv OPAL + platforms from Linux. This device abstracts away the + firmware interface for flash access. + comment Disk-On-Chip Device Drivers config MTD_DOCG3 diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index f0b0e61..7912d3a 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o obj-$(CONFIG_MTD_SST25L) += sst25l.o obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o +obj-$(CONFIG_MTD_POWERNV_FLASH) += powernv_flash.o CFLAGS_docg3.o += -I$(src) diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c new file mode 100644 index 000..777e09f --- /dev/null +++ b/drivers/mtd/devices/powernv_flash.c @@ -0,0 +1,286 @@ +/* + * OPAL PNOR flash MTD abstraction + * + * IBM 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include linux/kernel.h +#include linux/module.h +#include linux/errno.h +#include linux/of.h +#include linux/of_address.h +#include linux/platform_device.h +#include linux/string.h +#include linux/slab.h +#include linux/mtd/mtd.h +#include linux/mtd/partitions.h + +#include linux/debugfs.h +#include linux/seq_file.h + +#include asm/opal.h + + +/* + * This driver creates the a Linux MTD abstraction for platform PNOR flash + * backed by OPAL calls + */ + +struct powernv_flash { + struct mtd_info mtd; + u32 id; +}; + +enum flash_op { + FLASH_OP_READ, + FLASH_OP_WRITE, + FLASH_OP_ERASE, +}; + +static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op, + loff_t offset, size_t len, size_t *retlen, u_char *buf) +{ + struct powernv_flash *info = (struct powernv_flash *)mtd-priv; + struct device *dev = mtd-dev; + int token; + struct opal_msg msg; + int rc; + + dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n, + __func__, op, offset, len); + + token = opal_async_get_token_interruptible(); + if (token 0) { + if (token != -ERESTARTSYS) + dev_err(dev, Failed to get an async token\n); + + return token; + } + + switch (op) { + case FLASH_OP_READ: + rc = opal_flash_read(info-id, offset, __pa(buf), len, token); + break; + case FLASH_OP_WRITE: + rc = opal_flash_write(info-id, offset, __pa(buf), len, token); + break; + case FLASH_OP_ERASE: + rc = opal_flash_erase(info-id, offset, len, token); +
[PATCH] cxl: Reset default context for vPHB on release
When we release the device, we should also invalidate the default context. With this cxl_get_context() will return null after removal. Signed-off-by: Michael Neuling mi...@neuling.org --- drivers/misc/cxl/vphb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index f733309..b1d1983a 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -72,6 +72,7 @@ static void cxl_pci_disable_device(struct pci_dev *dev) dev_err(dev-dev, Default context started\n); return; } + dev-dev.archdata.cxl_ctx = NULL; cxl_release_context(ctx); } } -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: of/dynamic: Fix test for PPC_PSERIES
On 06/04/2015 05:57 AM, Michael Ellerman wrote: On Thu, 2015-04-06 at 09:34:41 UTC, Geert Uytterhoeven wrote: IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is supposed to be used with the full Kconfig symbol name, including the CONFIG_ prefix. Add the missing CONFIG_ prefix to fix this. Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and into common code) Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be --- Did this bug cause any breakage? If yes, the fix should go to stable (for v3.17 and later). Yikes. Not that I've heard of. But it's reasonably new so possibly it's not hit distros that folks tend to run on those machines. I think we do have some distros that have picked this up. I'm also not clear how it would break, it could be subtle and we've not noticed. The only place I can find that this might cause an issue is during device tree updating that pseries does after a live migration or suspend/resume. When removing or updating a device tree node we look up the node by ibm,phandle and without this patch we wouldn't find these nodes. I have not seen any issues because of this but I think pushing this to stable would be good. -Nathan ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH kernel v11 16/34] powerpc/spapr: vfio: Replace iommu_table with iommu_table_group
On Fri, May 29, 2015 at 06:44:40PM +1000, Alexey Kardashevskiy wrote: Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. This defines iommu_table_group struct which stores pointers to iommu_group and iommu_table(s). This replaces iommu_table with iommu_table_group where iommu_table was used to identify a group: - iommu_register_group(); - iommudata of generic iommu_group; This removes @data from iommu_table as it_table_group provides same access to pnv_ioda_pe. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_alloc_group() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_free_group() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized. This change is here because those lines had to be changed anyway. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru [aw: for the vfio related changes] Acked-by: Alex Williamson alex.william...@redhat.com Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com --- Changes: v11: * iommu_table_group moved outside #ifdef CONFIG_IOMMU_API as iommu_table is dynamically allocated and it needs a pointer to PE and iommu_table_group is this pointer v10: * new to the series, separated from powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group * iommu_table is not embedded into iommu_table_group but allocated dynamically in most cases * iommu_table allocation is moved to a single place for IODA2's pnv_pci_ioda_setup_dma_pe where it belongs to * added list of groups into iommu_table; most of the code just looks at the first item to keep the patch simpler --- arch/powerpc/include/asm/iommu.h| 19 ++--- arch/powerpc/include/asm/pci-bridge.h | 2 +- arch/powerpc/kernel/iommu.c | 17 ++--- arch/powerpc/platforms/powernv/pci-ioda.c | 55 +++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 18 +++-- arch/powerpc/platforms/powernv/pci.h| 3 +- arch/powerpc/platforms/pseries/iommu.c | 107 +++- drivers/vfio/vfio_iommu_spapr_tce.c | 23 +++--- 8 files changed, 152 insertions(+), 92 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e2a45c3..5a7267f 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -91,14 +91,9 @@ struct iommu_table { struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ -#ifdef CONFIG_IOMMU_API - struct iommu_group *it_group; -#endif + struct iommu_table_group *it_table_group; struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); -#ifdef CONFIG_PPC_POWERNV - void *data; -#endif }; /* Pure 2^n version of get_order */ @@ -129,14 +124,22 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +#define IOMMU_TABLE_GROUP_MAX_TABLES 1 + +struct iommu_table_group { + struct iommu_group *group; + struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; +}; + Number of TCE tables supported in group would be worthy to be changed dynamically in long run, but not for now. P7IOC has one table per group while PHB3 has two tables per group. Thanks, Gavin #ifdef CONFIG_IOMMU_API -extern void iommu_register_group(struct iommu_table *tbl, + +extern void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num); extern int iommu_add_device(struct device *dev); extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); #else -static inline void iommu_register_group(struct iommu_table *tbl, +static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num) { diff --git a/arch/powerpc/include/asm/pci-bridge.h
Re: [PATCH kernel v11 20/34] powerpc/powernv/ioda2: Move TCE kill register address to PE
On Fri, May 29, 2015 at 06:44:44PM +1000, Alexey Kardashevskiy wrote: At the moment the DMA setup code looks for the ibm,opal-tce-kill property which contains the TCE kill register address. Writing to this register invalidates TCE cache on IODA/IODA2 hub. This moves the register address from iommu_table to pnv_pnb as this register belongs to PHB and invalidates TCE cache for all tables of all attached PEs. This moves the property reading/remapping code to a helper which is called when DMA is being configured for PE and which does DMA setup for both IODA1 and IODA2. This adds a new pnv_pci_ioda2_tce_invalidate_entire() helper which invalidates cache for the entire table. It should be called after every call to opal_pci_map_pe_dma_window(). It was not required before because there was just a single TCE table and 64bit DMA was handled via bypass window (which has no table so no cache was used) but this is going to change with Dynamic DMA windows (DDW). Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com Thanks, Gavin --- Changes: v11: * s/pnv_pci_ioda2_tvt_invalidate/pnv_pci_ioda2_tce_invalidate_entire/g (cannot think of better-and-shorter name) * moved tce_inval_reg_phys/tce_inval_reg to pnv_phb v10: * fixed error from checkpatch.pl * removed comment at ibm,opal-tce-kill parsing as irrelevant * s/addr/val/ in pnv_pci_ioda2_tvt_invalidate() as it was not a kernel address v9: * new in the series --- arch/powerpc/platforms/powernv/pci-ioda.c | 66 ++- arch/powerpc/platforms/powernv/pci.h | 7 +++- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1d0bb5b..3fd8b18 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1679,8 +1679,8 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, struct pnv_ioda_pe *pe = container_of(tgl-table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe-tce_inval_reg_phys : - (__be64 __iomem *)tbl-it_index; + (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys : + pe-phb-ioda.tce_inval_reg; unsigned long start, end, inc; const unsigned shift = tbl-it_page_shift; @@ -1751,6 +1751,19 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; +static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) +{ + /* 01xb - invalidate TCEs that match the specified PE# */ + unsigned long val = (0x4ull 60) | (pe-pe_number 0xFF); + struct pnv_phb *phb = pe-phb; + + if (!phb-ioda.tce_inval_reg) + return; + + mb(); /* Ensure above stores are visible */ + __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg); +} + static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { @@ -1761,8 +1774,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe-tce_inval_reg_phys : - (__be64 __iomem *)tbl-it_index; + (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys : + pe-phb-ioda.tce_inval_reg; const unsigned shift = tbl-it_page_shift; /* We'll invalidate DMA address in PE scope */ @@ -1820,7 +1833,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, { struct page *tce_mem = NULL; - const __be64 *swinvp; struct iommu_table *tbl; unsigned int i; int64_t rc; @@ -1877,20 +1889,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, base 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ - swinvp = of_get_property(phb-hose-dn, ibm,opal-tce-kill, NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe-tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl-it_index = (unsigned long)ioremap(pe-tce_inval_reg_phys, - 8); + if (phb-ioda.tce_inval_reg) tbl-it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR); - } + tbl-it_ops = pnv_ioda1_iommu_ops; iommu_init_table(tbl, phb-hose-node); @@ -1971,12 +1974,24 @@ static struct
Re: [v6] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform
On Thu, 2015-04-06 at 12:03:17 UTC, Vipin K Parashar wrote: This patch adds support for FSP (Flexible Service Processor) EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for the PowerNV platform. EPOW events are generated by FSP due to various critical system conditions that require system shutdown. A few examples of these conditions are high ambient temperature or system running on UPS power with low UPS battery. DPO event is generated in response to admin initiated system shutdown request. Upon receipt of EPOW and DPO events the host kernel invokes orderly_poweroff() for performing graceful system shutdown. Reviewed-by: Joel Stanley j...@jms.id.au Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com Reviewed-by: Michael Ellerman m...@ellerman.id.au Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com Hi Vipin, One issue, on mambo I'm seeing: [666973573,3] OPAL: Called with bad token 105 ! opal-power: Existing DPO event detected. reboot: Failed to start orderly shutdown: forcing the issue reboot: Power down [684431322,5] OPAL: Shutdown request type 0x0... ie. at boot it shuts down immediately. The problem is in here I think: + /* Check for DPO event */ + rc = opal_get_dpo_status(opal_dpo_timeout); + if (rc != OPAL_WRONG_STATE) { + pr_info(Existing DPO event detected.\n); + return true; + } This also makes me think you probably haven't tested this on a BMC machine? cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/
On Thu, 2015-06-04 at 04:27 -0500, Zhao Qiang-B45475 wrote: On Thu, 2015-06-04 at 2:14PM, Wood Scott wrote: -Original Message- From: Wood Scott-B07421 Sent: Thursday, June 04, 2015 2:14 PM To: Zhao Qiang-B45475 Cc: linuxppc-dev@lists.ozlabs.org; Xie Xiaobo-R63061 Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/ On Thu, 2015-06-04 at 00:56 -0500, Zhao Qiang-B45475 wrote: On Thu, 2015-05-28 at 1:37AM +0800, Wood Scott wrote: -Original Message- From: Wood Scott-B07421 Sent: Thursday, May 28, 2015 1:37 AM To: Zhao Qiang-B45475 Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Xie Xiaobo- R63061 Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/ On Wed, 2015-05-27 at 17:12 +0800, Zhao Qiang wrote: qe need to use the rheap, so move it to public directory. You've been previously asked to use lib/genalloc.c rather than introduce duplicate functionality into /lib. NACK. Can't use lib/genalloc.c instead of rheap.c. Qe need to alloc muram of qe, not DIMM. lib/genalloc.h is not for allocating main memory. It is for allocating special regions. It is serving the same purpose as rheap. I need to use the func rh_alloc_align, what it the similar func? I just find a func gen_pool_first_fit_order_align. I don't see anywhere the QE code currently calls rh_alloc_align() -- and cpm_muram_init() calls rh_init() with an alignment of 1. If you do need some functionality that genalloc doesn't offer, add the functionality to genalloc. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH kernel v11 33/34] vfio: powerpc/spapr: Register memory and define IOMMU v2
On Fri, May 29, 2015 at 06:44:57PM +1000, Alexey Kardashevskiy wrote: The existing implementation accounts the whole DMA window in the locked_vm counter. This is going to be worse with multiple containers and huge DMA windows. Also, real-time accounting would requite additional tracking of accounted pages due to the page size difference - IOMMU uses 4K pages and system uses 4K or 64K pages. Another issue is that actual pages pinning/unpinning happens on every DMA map/unmap request. This does not affect the performance much now as we spend way too much time now on switching context between guest/userspace/host but this will start to matter when we add in-kernel DMA map/unmap acceleration. This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU. New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces 2 new ioctls to register/unregister DMA memory - VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - which receive user space address and size of a memory region which needs to be pinned/unpinned and counted in locked_vm. New IOMMU splits physical pages pinning and TCE table update into 2 different operations. It requires: 1) guest pages to be registered first 2) consequent map/unmap requests to work only with pre-registered memory. For the default single window case this means that the entire guest (instead of 2GB) needs to be pinned before using VFIO. When a huge DMA window is added, no additional pinning will be required, otherwise it would be guest RAM + 2GB. The new memory registration ioctls are not supported by VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration will require memory to be preregistered in order to work. The accounting is done per the user process. This advertises v2 SPAPR TCE IOMMU and restricts what the userspace can do with v1 or v2 IOMMUs. In order to support memory pre-registration, we need a way to track the use of every registered memory region and only allow unregistration if a region is not in use anymore. So we need a way to tell from what region the just cleared TCE was from. This adds a userspace view of the TCE table into iommu_table struct. It contains userspace address, one per TCE entry. The table is only allocated when the ownership over an IOMMU group is taken which means it is only used from outside of the powernv code (such as VFIO). Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru [aw: for the vfio related changes] Acked-by: Alex Williamson alex.william...@redhat.com --- Changes: v11: * mm_iommu_put() does not return a code so this does not check it * moved v2 in tce_container to pack the struct v10: * moved it_userspace allocation to vfio_iommu_spapr_tce as it VFIO specific thing * squashed powerpc/iommu: Add userspace view of TCE table into this as it is a part of IOMMU v2 * s/tce_iommu_use_page_v2/tce_iommu_prereg_ua_to_hpa/ * fixed some function names to have tce_iommu_ in the beginning rather just tce_ * as mm_iommu_mapped_inc() can now fail, check for the return code v9: * s/tce_get_hva_cached/tce_iommu_use_page_v2/ v7: * now memory is registered per mm (i.e. process) * moved memory registration code to powerpc/mmu * merged vfio: powerpc/spapr: Define v2 IOMMU into this * limited new ioctls to v2 IOMMU * updated doc * unsupported ioclts return -ENOTTY instead of -EPERM v6: * tce_get_hva_cached() returns hva via a pointer v4: * updated docs * s/kzmalloc/vzalloc/ * in tce_pin_pages()/tce_unpin_pages() removed @vaddr, @size and replaced offset with index * renamed vfio_iommu_type_register_memory to vfio_iommu_spapr_register_memory and removed duplicating vfio_iommu_spapr_register_memory --- Documentation/vfio.txt | 31 ++- arch/powerpc/include/asm/iommu.h| 6 + drivers/vfio/vfio_iommu_spapr_tce.c | 512 ++-- include/uapi/linux/vfio.h | 27 ++ 4 files changed, 487 insertions(+), 89 deletions(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 96978ec..7dcf2b5 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -289,10 +289,12 @@ PPC64 sPAPR implementation note This implementation has some specifics: -1) Only one IOMMU group per container is supported as an IOMMU group -represents the minimal entity which isolation can be guaranteed for and -groups are allocated statically, one per a Partitionable Endpoint (PE) +1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per +container is supported as an IOMMU table is allocated at the boot time, +one table per a IOMMU group which is a Partitionable Endpoint (PE) (PE is often a PCI domain but not always). +Newer systems (POWER8 with IODA2) have improved hardware design which allows +to remove this limitation and have multiple IOMMU groups per a VFIO container. 2) The hardware supports so called DMA windows - the PCI address range within which DMA transfer is allowed, any attempt to access address space @@ -427,6 +429,29 @@
Re: [PATCH kernel v11 05/34] powerpc/iommu: Always release iommu_table in iommu_free_table()
On Fri, May 29, 2015 at 06:44:29PM +1000, Alexey Kardashevskiy wrote: At the moment iommu_free_table() only releases memory if the table was initialized for the platform code use, i.e. it had it_map initialized (which purpose is to track DMA memory space use). With dynamic DMA windows, we will need to be able to release iommu_table even if it was used for VFIO in which case it_map is NULL so does the patch. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com Thanks, Gavin --- Changes: v11: * fixed parameter checks --- arch/powerpc/kernel/iommu.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 3d47eb3..73eb39a 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -713,9 +713,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) unsigned long bitmap_sz; unsigned int order; - if (!tbl || !tbl-it_map) { - printk(KERN_ERR %s: expected TCE map for %s\n, __func__, - node_name); + if (!tbl) + return; + + if (!tbl-it_map) { + kfree(tbl); return; } -- 2.4.0.rc3.8.gfb3e7d5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 4/7] PCI: Include linux/pci.h, not asm/pci.h
We already include asm/pci.h from linux/pci.h, so just include linux/pci.h directly. Signed-off-by: Bjorn Helgaas bhelg...@google.com CC: linuxppc-dev@lists.ozlabs.org CC: linux-s...@vger.kernel.org --- arch/powerpc/platforms/52xx/mpc52xx_pci.c |2 +- arch/s390/kernel/suspend.c|2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c index e2d401a..6eb3b2a 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -12,7 +12,7 @@ #undef DEBUG -#include asm/pci.h +#include linux/pci.h #include asm/mpc52xx.h #include asm/delay.h #include asm/machdep.h diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index d3236c9..39e2f41 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -9,10 +9,10 @@ #include linux/pfn.h #include linux/suspend.h #include linux/mm.h +#include linux/pci.h #include asm/ctl_reg.h #include asm/ipl.h #include asm/cio.h -#include asm/pci.h #include asm/sections.h #include entry.h ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v6] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform
On Fri, 2015-06-05 at 08:01 +1000, Michael Ellerman wrote: On Thu, 2015-04-06 at 12:03:17 UTC, Vipin K Parashar wrote: This patch adds support for FSP (Flexible Service Processor) EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for the PowerNV platform. EPOW events are generated by FSP due to various critical system conditions that require system shutdown. A few examples of these conditions are high ambient temperature or system running on UPS power with low UPS battery. DPO event is generated in response to admin initiated system shutdown request. Upon receipt of EPOW and DPO events the host kernel invokes orderly_poweroff() for performing graceful system shutdown. Reviewed-by: Joel Stanley j...@jms.id.au Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com Reviewed-by: Michael Ellerman m...@ellerman.id.au Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com Hi Vipin, And on my Tuleta I get: opal-power: OPAL EPOW, DPO support detected. opal-power: Existing EPOW event detected. reboot: Failed to start orderly shutdown: forcing the issue [ cut here ] WARNING: at kernel/workqueue.c:818 Modules linked in: CPU: 3 PID: 26 Comm: migration/3 Not tainted 4.1.0-rc3-13669-g704921b #80 task: c00ff1745440 ti: c00ff17ac000 task.ti: c00ff17ac000 NIP: c00ba48c LR: c00ba430 CTR: c00dbac0 REGS: c00ff17af7d0 TRAP: 0700 Not tainted (4.1.0-rc3-13669-g704921b) MSR: 900100029033 SF,HV,EE,ME,IR,DR,RI,LE CR: 2044 XER: CFAR: c00ba44c SOFTE: 0 GPR00: c00cec7c c00ff17afa50 c0d7ce20 c00ff160fcc0 GPR04: c0e10468 GPR08: c0d2ce20 c00ff90dd400 0001 c00ff901dc98 GPR12: 4082 c1dc0d80 c00c0b18 c00ff817a540 GPR16: GPR20: 000ff838 GPR24: c0c9dc00 c0daa8e0 c00ff901dc00 GPR28: 0004 c00ff174ae04 c00ff901dc00 NIP [c00ba48c] wq_worker_waking_up+0x7c/0xa0 LR [c00ba430] wq_worker_waking_up+0x20/0xa0 Call Trace: [c00ff17afa50] [c00ff1749bc0] 0xc00ff1749bc0 (unreliable) [c00ff17afa80] [c00cec7c] ttwu_do_activate.constprop.76+0x6c/0xa0 [c00ff17afab0] [c00d2958] try_to_wake_up+0x208/0x4a0 [c00ff17afb30] [c00eab94] __wake_up_common+0x84/0xf0 [c00ff17afb90] [c00eb744] complete+0x54/0x90 [c00ff17afbd0] [c014bac4] cpu_stop_signal_done+0x54/0x70 [c00ff17afbf0] [c014c324] cpu_stopper_thread+0xd4/0x1f0 [c00ff17afd20] [c00c5b20] smpboot_thread_fn+0x280/0x290 [c00ff17afd80] [c00c0c18] kthread+0x108/0x130 [c00ff17afe30] [c000956c] ret_from_kernel_thread+0x5c/0x70 Instruction dump: 7d00512d 40c2fff4 38210030 e8010010 ebe1fff8 7c0803a6 4e800020 6000 6042 3d02fffb 8948db7e 694a0001 0b0a 2faa 41feffbc 3941 ---[ end trace de25982dcf3cffd9 ]--- reboot: Power down cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: of: clean-up unnecessary libfdt include paths
On Thu, Jun 4, 2015 at 5:20 AM, Michael Ellerman m...@ellerman.id.au wrote: On Wed, 2015-03-06 at 05:10:25 UTC, Rob Herring wrote: With the latest dtc import include fixups, it is no longer necessary to add explicit include paths to use libfdt. Remove these across the kernel. What are the latest dtc import include fixups ? Changing the scripts/dtc/libfdt/libfdt.h includes from to . The import script does this now and the recent import in my for-next tree has this. I'll clarify this in the commit message. diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c1ebbda..c16e836 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,7 +2,6 @@ # Makefile for the linux kernel. # -CFLAGS_prom.o= -I$(src)/../../../scripts/dtc/libfdt CFLAGS_ptrace.o += -DUTS_MACHINE='$(UTS_MACHINE)' subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror Acked-by: Michael Ellerman m...@ellerman.id.au Thanks. Rob ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [3/3,v3] powerpc/powernv: Add opal-prd channel
Hi Michael, Sorry, I put this in but then hit the build break, I was going to fix it up but would rather you did and tested it, so we may as well do another review :) whee! @@ -0,0 +1,58 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * (C) Copyright IBM 2015 Usual syntax is: Copyright IBM Corporation 2015 OK, fixed. + * + * Author: Vaidyanathan Srinivasan svaidy at linux.vnet.ibm.com + * Author: Jeremy Kerr j...@ozlabs.org I'd rather you dropped these, they'll just bit rot, but if you insist I don't care that much. Yep, I'd rather remove them too. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. As pointed out by Daniel, we should probably be using the version 2 only language on new files. Fixed. +vma-vm_page_prot = phys_mem_access_prot(file, vma-vm_pgoff, + size, vma-vm_page_prot) +| _PAGE_SPECIAL; This doesn't build with CONFIG_STRICT_MM_TYPECHECKS=y: arch/powerpc/platforms/powernv/opal-prd.c:131:5: error: invalid operands to binary | (have ‘pgprot_t’ and ‘int’) | _PAGE_SPECIAL; OK, new patch coming with the proper pgprot macros. +switch(cmd) { ^ space please Fixed. +pr_devel(ioctl SCOM_READ: chip %llx addr %016llx +data %016llx rc %lld\n, Don't split the string please. OK, but this makes our lines 80 chars. Assuming that'll be okay. +struct file_operations opal_prd_fops = { This can be static const I think. Indeed it can! Updated. +static struct miscdevice opal_prd_dev = { +.minor = MISC_DYNAMIC_MINOR, +.name = opal-prd, +.fops = opal_prd_fops, White space is messed up here, should be leading tabs. [tabs-spaces-both.png] Thanks for the review, new patch coming soon. Cheers, Jeremy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/3 v4] powerpc/powernv: Add opal-prd channel
This change adds a char device to access the PRD (processor runtime diagnostics) channel to OPAL firmware. Includes contributions from Vaidyanathan Srinivasan, Neelesh Gupta Vishal Kulkarni. Signed-off-by: Neelesh Gupta neele...@linux.vnet.ibm.com Signed-off-by: Jeremy Kerr j...@ozlabs.org Acked-by: Stewart Smith stew...@linux.vnet.ibm.com --- v4: - Address reviews from mpe: - GPLv2+ - GPLv2, fix copyrights, remove authors - fix pgprot manipulations - formatting space fixes - constify opal_prd_fops v3: - Add versioning description and reserved fields in opal_prd_info for future expansion - Fix node leak in opal_prd_range_is_valid - Explain open() probe() semantics - Fix miscdev_register error path --- arch/powerpc/include/asm/opal-api.h| 21 arch/powerpc/include/asm/opal.h|1 arch/powerpc/include/uapi/asm/opal-prd.h | 58 ++ arch/powerpc/platforms/powernv/Kconfig |7 arch/powerpc/platforms/powernv/Makefile|1 arch/powerpc/platforms/powernv/opal-prd.c | 445 + arch/powerpc/platforms/powernv/opal-wrappers.S |1 arch/powerpc/platforms/powernv/opal.c |4 8 files changed, 536 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0321a90..2407f12 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -153,7 +153,8 @@ #define OPAL_FLASH_READ110 #define OPAL_FLASH_WRITE 111 #define OPAL_FLASH_ERASE 112 -#define OPAL_LAST 112 +#define OPAL_PRD_MSG 113 +#define OPAL_LAST 113 /* Device tree flags */ @@ -352,6 +353,7 @@ enum opal_msg_type { OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */ OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, + OPAL_MSG_PRD, OPAL_MSG_TYPE_MAX, }; @@ -674,6 +676,23 @@ typedef struct oppanel_line { __be64 line_len; } oppanel_line_t; +enum opal_prd_msg_type { + OPAL_PRD_MSG_TYPE_INIT = 0, /* HBRT -- OPAL */ + OPAL_PRD_MSG_TYPE_FINI, /* HBRT/kernel -- OPAL */ + OPAL_PRD_MSG_TYPE_ATTN, /* HBRT -- OPAL */ + OPAL_PRD_MSG_TYPE_ATTN_ACK, /* HBRT -- OPAL */ + OPAL_PRD_MSG_TYPE_OCC_ERROR,/* HBRT -- OPAL */ + OPAL_PRD_MSG_TYPE_OCC_RESET,/* HBRT -- OPAL */ +}; + +struct opal_prd_msg_header { + uint8_t type; + uint8_t pad[1]; + __be16 size; +}; + +struct opal_prd_msg; + /* * SG entries * diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 042af1a..93704af 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -193,6 +193,7 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t *msg_len); int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, struct opal_i2c_request *oreq); +int64_t opal_prd_msg(struct opal_prd_msg *msg); int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf, uint64_t size, uint64_t token); diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h new file mode 100644 index 000..319ff4a --- /dev/null +++ b/arch/powerpc/include/uapi/asm/opal-prd.h @@ -0,0 +1,58 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * (C) Copyright IBM 2015 + * + * Author: Vaidyanathan Srinivasan svaidy at linux.vnet.ibm.com + * Author: Jeremy Kerr j...@ozlabs.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_ +#define _UAPI_ASM_POWERPC_OPAL_PRD_H_ + +#include linux/types.h + +/** + * The version of the kernel interface of the PRD system. This describes the + * interface available for the /dev/opal-prd device. The actual PRD message + * layout and content is private to the firmware -- userspace interface, so + * is not covered by this versioning. + * + * Future interface versions are backwards-compatible; if a later kernel + * version is encountered, functionality provided in earlier versions + * will work. + */ +#define OPAL_PRD_KERNEL_VERSION1 + +#define OPAL_PRD_GET_INFO _IOR('o', 0x01, struct opal_prd_info) +#define OPAL_PRD_SCOM_READ
Re: [PATCH] cpufreq: qoriq: optimize the CPU frequency switching time
On 04-06-15, 14:25, yuantian.t...@freescale.com wrote: From: Tang Yuantian yuantian.t...@freescale.com Each time the CPU switches its frequency, the clock nodes in DTS are walked through to find proper clock source. This is very time-consuming, for example, it is up to 500+ us on T4240. Besides, switching time varies from clock to clock. To optimize this, each input clock of CPU is buffered, so that it can be picked up instantly when needed. Since for each CPU each input clock is stored in a pointer which takes 4 or 8 bytes memory and normally there are several input clocks per CPU, that will not take much memory as well. Not sure how it got included in this form in the first place. :) Signed-off-by: Tang Yuantian yuantian.t...@freescale.com --- drivers/cpufreq/qoriq-cpufreq.c | 32 +--- 1 file changed, 21 insertions(+), 11 deletions(-) Acked-by: Viresh Kumar viresh.ku...@linaro.org -- viresh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 02/42] powerpc/powernv: Enable M64 on P7IOC
The patch enables M64 window on P7IOC, which has been enabled on PHB3. Different from PHB3 where 16 M64 BARs are supported and each of them can be owned by one particular PE# exclusively or divided evenly to 256 segments, each P7IOC PHB has 16 M64 BARs and each of them are divided into 8 segments. So each P7IOC PHB can support 128 M64 segments only. Also, P7IOC has M64DT, which helps mapping one particular M64 segment# to arbitrary PE#. PHB3 doesn't have M64DT, indicating that one M64 segment can only be pinned to the fixed PE#. In order to have similar logic to support M64 for PHB3 and P7IOC, we just provide 128 M64 (16 BARs) segments and fixed mapping between PE# and M64 segment# on P7IOC. In turn, we just need different phb-init_m64() hooks for P7IOC and PHB3 to support M64. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Pinned OPAL API return value type to int64_t * Don't initialize M64 callbacks for unknown PHB type * Fixed comments as suggested by aik * Fixed coding style complained by checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 110 ++ 1 file changed, 98 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 573b07a..245ef81 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -174,6 +174,69 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) clear_bit(pe, phb-ioda.pe_alloc); } +static int pnv_ioda1_init_m64(struct pnv_phb *phb) +{ + struct resource *r; + int seg; + + /* There are as many M64 segments as the maximum number +* of PEs, which is 128. +*/ + for (seg = 0; seg phb-ioda.total_pe; seg += 8) { + unsigned long base; + int64_t rc; + + base = phb-ioda.m64_base + seg * phb-ioda.m64_segsize; + rc = opal_pci_set_phb_mem_window(phb-opal_id, +OPAL_M64_WINDOW_TYPE, +seg / 8, +base, +0, /* unused */ +8 * phb-ioda.m64_segsize); + if (rc != OPAL_SUCCESS) { + pr_warn( Error %lld setting M64 PHB#%d-BAR#%d\n, + rc, phb-hose-global_number, seg / 8); + goto fail; + } + + rc = opal_pci_phb_mmio_enable(phb-opal_id, + OPAL_M64_WINDOW_TYPE, + seg / 8, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + pr_warn( Error %lld enabling M64 PHB#%d-BAR#%d\n, + rc, phb-hose-global_number, seg / 8); + goto fail; + } + } + + /* Strip off the segment used by the reserved PE, which +* is expected to be 0 or last supported PE#. The PHB's +* first memory window traces the 32-bits MMIO range +* while the second one traces the 64-bits prefetchable +* MMIO range that the PHB supports. +*/ + r = phb-hose-mem_resources[1]; + if (phb-ioda.reserved_pe == 0) + r-start += phb-ioda.m64_segsize; + else if (phb-ioda.reserved_pe == (phb-ioda.total_pe - 1)) + r-end -= phb-ioda.m64_segsize; + else + pr_warn( Cannot strip M64 segment for reserved PE#%d\n, + phb-ioda.reserved_pe); + + return 0; + +fail: + for ( ; seg = 0; seg -= 8) + opal_pci_phb_mmio_enable(phb-opal_id, +OPAL_M64_WINDOW_TYPE, +seg / 8, +OPAL_DISABLE_M64); + + return -EIO; +} + /* The default M64 BAR is shared by all PEs */ static int pnv_ioda2_init_m64(struct pnv_phb *phb) { @@ -231,7 +294,7 @@ fail: return -EIO; } -static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) +static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb) { resource_size_t sgsz = phb-ioda.m64_segsize; struct pci_dev *pdev; @@ -257,8 +320,8 @@ static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) } } -static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, -struct pci_bus *bus, int all) +static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb, + struct pci_bus *bus, int all) { resource_size_t segsz = phb-ioda.m64_segsize; struct pci_dev *pdev; @@ -355,6 +418,26 @@ done: pe-master = master_pe; list_add_tail(pe-list, master_pe-slaves);
[PATCH v5 26/42] powerpc/powernv: Use PCI slot reset infrastructure
The skiboot firmware might provide the capability of resetting PCI slot by property ibm,reset-by-firmware on the PCI slot associated device node. The patch checks on the property and route the reset to firmware if the property exists. Otherwise, we fail back to the old path as before. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 09/21] --- arch/powerpc/platforms/powernv/eeh-powernv.c | 44 +++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4fd8f15..4feb533 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -841,7 +841,7 @@ out: return 0; } -static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) +static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) { struct pci_dn *pdn = pci_get_pdn_by_devfn(dev-bus, dev-devfn); struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -892,6 +892,48 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) return 0; } +static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + struct device_node *dn = pdev ? pci_device_to_OF_node(pdev) : NULL; + uint64_t id = (0x1ul 60); + uint8_t scope; + int64_t rc; + + /* +* If the firmware can't handle it, we will issue hot reset +* on the secondary bus despite the requested reset type. +*/ + if (!dn || !of_get_property(dn, ibm,reset-by-firmware, NULL)) + return __pnv_eeh_bridge_reset(pdev, option); + + /* The firmware can handle the request */ + switch (option) { + case EEH_RESET_HOT: + scope = OPAL_RESET_PCI_HOT; + break; + case EEH_RESET_FUNDAMENTAL: + scope = OPAL_RESET_PCI_FUNDAMENTAL; + break; + case EEH_RESET_DEACTIVATE: + return 0; + default: + dev_warn(pdev-dev, %s: Unsupported reset %d\n, +__func__, option); + return -EINVAL; + } + + hose = pci_bus_to_host(pdev-bus); + phb = hose-private_data; + id |= (pdev-bus-number 24) | (pdev-devfn 16) | phb-opal_id; + rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET); + if (rc 0) + rc = pnv_eeh_poll(id); + + return (rc == OPAL_SUCCESS) ? 0 : -EIO; +} + static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, u16 mask, bool af_flr_rst) { -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 39/42] drivers/of: Unflatten nodes equal or deeper than specified level
unflatten_dt_node() is called recursively to unflatten FDT nodes with the assumption that FDT blob has only one root node, which isn't true when the FDT blob represents device sub-tree. The patch improves the function to supporting device sub-tree that have multiple root nodes: * Rename original unflatten_dt_node() to __unflatten_dt_node(). * Wrapper unflatten_dt_node() calls __unflatten_dt_node() with adjusted current node depth to 1 to avoid underflow. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 19/21] * Fixed line over 80 characters from checkpatch.pl --- drivers/of/fdt.c | 56 ++-- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index cde35c5d01..b87c157 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -28,6 +28,8 @@ #include asm/setup.h /* for COMMAND_LINE_SIZE */ #include asm/page.h +static int cur_node_depth; + /* * of_fdt_limit_memory - limit the number of regions in the /memory node * @limit: maximum entries @@ -161,27 +163,26 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size, } /** - * unflatten_dt_node - Alloc and populate a device_node from the flat tree + * __unflatten_dt_node - Alloc and populate a device_node from the flat tree * @blob: The parent device tree blob * @mem: Memory chunk to use for allocating device nodes and properties * @p: pointer to node in flat tree * @dad: Parent struct device_node * @fpsize: Size of the node path up at the current depth. */ -static void * unflatten_dt_node(void *blob, - void *mem, - int *poffset, - struct device_node *dad, - struct device_node **nodepp, - unsigned long fpsize, - bool dryrun) +static void *__unflatten_dt_node(void *blob, +void *mem, +int *poffset, +struct device_node *dad, +struct device_node **nodepp, +unsigned long fpsize, +bool dryrun) { const __be32 *p; struct device_node *np; struct property *pp, **prev_pp = NULL; const char *pathp; unsigned int l, allocl; - static int depth = 0; int old_depth; int offset; int has_name = 0; @@ -334,13 +335,19 @@ static void * unflatten_dt_node(void *blob, np-type = NULL; } - old_depth = depth; - *poffset = fdt_next_node(blob, *poffset, depth); - if (depth 0) - depth = 0; - while (*poffset 0 depth old_depth) - mem = unflatten_dt_node(blob, mem, poffset, np, NULL, - fpsize, dryrun); + old_depth = cur_node_depth; + *poffset = fdt_next_node(blob, *poffset, cur_node_depth); + while (*poffset 0) { + if (cur_node_depth old_depth) + break; + + if (cur_node_depth == old_depth) + mem = __unflatten_dt_node(blob, mem, poffset, + dad, NULL, fpsize, dryrun); + else if (cur_node_depth old_depth) + mem = __unflatten_dt_node(blob, mem, poffset, + np, NULL, fpsize, dryrun); + } if (*poffset 0 *poffset != -FDT_ERR_NOTFOUND) pr_err(unflatten: error %d processing FDT\n, *poffset); @@ -366,6 +373,18 @@ static void * unflatten_dt_node(void *blob, return mem; } +static void *unflatten_dt_node(void *blob, + void *mem, + int *poffset, + struct device_node *dad, + struct device_node **nodepp, + bool dryrun) +{ + cur_node_depth = 1; + return __unflatten_dt_node(blob, mem, poffset, + dad, nodepp, 0, dryrun); +} + /** * __unflatten_device_tree - create tree of device_nodes from flat blob * @@ -405,7 +424,8 @@ static void __unflatten_device_tree(void *blob, /* First pass, scan for size */ start = 0; - size = (unsigned long)unflatten_dt_node(blob, NULL, start, NULL, NULL, 0, true); + size = (unsigned long)unflatten_dt_node(blob, NULL, start, + NULL, NULL, true); size = ALIGN(size, 4); pr_debug( size is %lx, allocating...\n, size); @@ -420,7 +440,7 @@ static void __unflatten_device_tree(void *blob, /* Second pass, do actual unflattening */ start = 0; - unflatten_dt_node(blob, mem, start,
[PATCH v5 09/42] powerpc/powernv: pnv_ioda_setup_dma() configure one PE only
The original implementation of pnv_ioda_setup_dma() iterates the list of PEs and configures the DMA32 space for them one by one. The function was designed to be called during PHB fixup time. When configuring PE's DMA32 space in pcibios_setup_bridge(), in order to support PCI hotplug, we have to have the function PE oriented. The patch introduces one more argument struct pnv_ioda_pe *pe to pnv_ioda_setup_dma(). The caller, pnv_pci_ioda_setup_DMA(), gets PE from the list and passes to it. The patch shouldn't cause logic changes. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 06/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 60 ++- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4af3d06..63fad4d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2558,12 +2558,14 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pnv_ioda_setup_bus_dma(pe, pe-pbus); } -static void pnv_ioda_setup_dma(struct pnv_phb *phb) +static void pnv_ioda_setup_dma(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_controller *hose = phb-hose; - struct pnv_ioda_pe *pe; unsigned int dma_weight; + if (!pe-dma32_weight) + return; + /* Calculate the PHB's DMA weight */ dma_weight = pnv_ioda_phb_dma_weight(phb); pr_info(PCI%04x has %ld DMA32 segments, total weight %d\n, @@ -2571,38 +2573,28 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) pnv_pci_ioda_setup_opal_tce_kill(phb); - /* Walk our PE list and configure their DMA segments, hand them -* out one base segment plus any residual segments based on -* weight + /* +* For IODA2 compliant PHB3, we needn't care about the weight. +* The all available 32-bits DMA space will be assigned to +* the specific PE. */ - list_for_each_entry(pe, phb-ioda.pe_dma_list, dma_link) { - if (!pe-dma32_weight) - continue; + if (phb-type == PNV_PHB_IODA1) { + unsigned int segs, base = 0; - /* -* For IODA2 compliant PHB3, we needn't care about the weight. -* The all available 32-bits DMA space will be assigned to -* the specific PE. -*/ - if (phb-type == PNV_PHB_IODA1) { - unsigned int segs, base = 0; - - if (pe-dma32_weight - dma_weight / phb-ioda.dma32_segcount) - segs = 1; - else - segs = (pe-dma32_weight * - phb-ioda.dma32_segcount) / dma_weight; - - pe_info(pe, DMA weight %d, assigned %d DMA32 segments\n, - pe-dma32_weight, segs); - pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); + if (pe-dma32_weight + dma_weight / phb-ioda.dma32_segcount) + segs = 1; + else + segs = (pe-dma32_weight * + phb-ioda.dma32_segcount) / dma_weight; - base += segs; - } else { - pe_info(pe, Assign DMA32 space\n); - pnv_pci_ioda2_setup_dma_pe(phb, pe); - } + pe_info(pe, DMA weight %d, assigned %d DMA32 segments\n, + pe-dma32_weight, segs); + pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); + base += segs; + } else { + pe_info(pe, Assign DMA32 space\n); + pnv_pci_ioda2_setup_dma_pe(phb, pe); } } @@ -3073,12 +3065,14 @@ static void pnv_pci_ioda_setup_DMA(void) { struct pci_controller *hose, *tmp; struct pnv_phb *phb; + struct pnv_ioda_pe *pe; list_for_each_entry_safe(hose, tmp, hose_list, list_node) { - pnv_ioda_setup_dma(hose-private_data); + phb = hose-private_data; + list_for_each_entry(pe, phb-ioda.pe_dma_list, dma_link) + pnv_ioda_setup_dma(phb, pe); /* Mark the PHB initialization done */ - phb = hose-private_data; phb-initialized = 1; } } -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 24/42] powerpc/powernv: Release PEs dynamically
The patch adds refcount to PE, which counts number of PCI devices included in the PE. When last device leaves from the PE, the PE together with its consumed resources (IO, DMA, PELTM/PELTV) are released, in order to support PCI hotplug. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 07/21] --- arch/powerpc/include/asm/pci-bridge.h | 1 + arch/powerpc/kernel/pci-hotplug.c | 5 + arch/powerpc/platforms/powernv/pci-ioda.c | 181 +- arch/powerpc/platforms/powernv/pci.h | 2 + 4 files changed, 183 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 1f39ca7..9a83cdb 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -26,6 +26,7 @@ struct pci_controller_ops { /* Called when pci_enable_device() is called. Returns true to * allow assignment/enabling of the device. */ bool(*enable_device_hook)(struct pci_dev *); + void(*release_device)(struct pci_dev *); /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *, unsigned long); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 98f84ed..21973e7 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -29,6 +29,11 @@ */ void pcibios_release_device(struct pci_dev *dev) { + struct pci_controller *hose = pci_bus_to_host(dev-bus); + + if (hose-controller_ops.release_device) + hose-controller_ops.release_device(dev); + eeh_remove_device(dev); } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2e31472..17ba55c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -132,6 +132,50 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static void pnv_pci_ioda_release_pe_dma(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe-phb; + struct iommu_table *tbl; + int seg; + int64_t rc; + + /* No DMA32 segments allocated */ + if (pe-dma32_seg 0 || + pe-dma32_segcount = 0) + return; + + /* Unlink IOMMU table from group */ + tbl = pe-table_group.tables[0]; + pnv_pci_unlink_table_and_group(tbl, pe-table_group); + if (pe-table_group.group) { + iommu_group_put(pe-table_group.group); + BUG_ON(pe-table_group.group); + } + + /* Release IOMMU table */ + free_pages(tbl-it_base, + get_order(TCE32_TABLE_SIZE * pe-dma32_segcount)); + iommu_free_table(tbl, + of_node_full_name(pci_bus_to_OF_node(pe-pbus))); + + /* Disable TVE */ + for (seg = pe-dma32_seg; +seg pe-dma32_seg + pe-dma32_segcount; +seg++) { + rc = opal_pci_map_pe_dma_window(phb-opal_id, pe-pe_number, + seg, 0, 0ul, 0ul, 0ul); + if (rc) + pe_warn(pe, Error %ld unmapping DMA32 seg#%d\n, + rc, seg); + } + + /* Free the DMA32 segments */ + bitmap_clear(phb-ioda.dma32_segmap, + pe-dma32_seg, pe-dma32_segcount); + pe-dma32_seg = -1; + pe-dma32_segcount = 0; +} + static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ @@ -203,6 +247,10 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) struct device_node *dn; int64_t rc; + if (pe-dma32_seg 0 || + pe-dma32_segcount = 0) + return; + tbl = pe-table_group.tables[0]; rc = pnv_pci_ioda2_unset_window(pe-table_group, 0); if (rc) @@ -227,6 +275,61 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) pnv_pci_ioda2_table_free_pages(tbl); iommu_free_table(tbl, of_node_full_name(dn)); + pe-dma32_seg = -1; + pe-dma32_segcount = 0; +} + +static void pnv_ioda_release_pe_dma(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe-phb; + + if (phb-type == PNV_PHB_IODA1) + pnv_pci_ioda_release_pe_dma(pe); + else if (phb-type == PNV_PHB_IODA2) + pnv_pci_ioda2_release_pe_dma(pe); +} + +static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe-phb; + unsigned long *segmap = NULL; + unsigned long *pe_segmap = NULL; + uint16_t win; + int segno; + + for (win = OPAL_M32_WINDOW_TYPE; win = OPAL_IO_WINDOW_TYPE; win++) { + switch (win) { + case OPAL_IO_WINDOW_TYPE: +
[PATCH v5 36/42] powerpc/pci: Export traverse_pci_device_nodes()
The patch exports following functions, which are derived from their original implementation, so that the PCI hotplug logic can reuse the functions to add or remove pci_dn for all device nodes under specified PCI slot. traverse_pci_device_nodes() traverse_pci_devices() add_pci_device_node_info() update_dn_pci_info() remove_pci_device_node_info() newly added The patch also releases eeh_dev when its corresponding pci_dn is released, indicating they have same life cycle. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 17/21] * Fixed assignment in if condition from checkpatch.pl --- arch/powerpc/include/asm/pci-bridge.h | 4 +- arch/powerpc/include/asm/ppc-pci.h | 7 ++-- arch/powerpc/kernel/pci_dn.c | 71 -- arch/powerpc/platforms/pseries/msi.c | 4 +- arch/powerpc/platforms/pseries/setup.c | 2 +- 5 files changed, 70 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 9a83cdb..d0b4b1a 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -222,7 +222,9 @@ extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus, extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev); extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev); extern void remove_dev_pci_data(struct pci_dev *pdev); -extern void *update_dn_pci_info(struct device_node *dn, void *data); +extern void *add_pci_device_node_info(struct device_node *dn, + struct pci_controller *phb); +extern void remove_pci_device_node_info(struct device_node *dn); static inline int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 7388316..a5b0ea0 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -33,9 +33,10 @@ extern struct pci_dev *isa_bridge_pcidev;/* may be NULL if no ISA bus */ struct device_node; struct pci_dn; -typedef void *(*traverse_func)(struct device_node *me, void *data); -void *traverse_pci_devices(struct device_node *start, traverse_func pre, - void *data); +void *traverse_pci_device_nodes(struct device_node *start, + void *(*fn)(struct device_node *, + struct pci_controller *), + void *data); void *traverse_pci_dn(struct pci_dn *root, void *(*fn)(struct pci_dn *, void *), void *data); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d4330d2..f821e96 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -276,13 +276,17 @@ void remove_dev_pci_data(struct pci_dev *pdev) #endif /* CONFIG_PCI_IOV */ } -/* - * Traverse_func that inits the PCI fields of the device node. - * NOTE: this *must* be done before read/write config to the device. +/** + * add_pci_device_node_info - Add pci_dn for PCI device node + * @dn: PCI device node + * @phb: PHB + * + * Add pci_dn for the indicated PCI device node. The newly created + * pci_dn will be put into the child list of the parent device node. */ -void *update_dn_pci_info(struct device_node *dn, void *data) +void *add_pci_device_node_info(struct device_node *dn, + struct pci_controller *phb) { - struct pci_controller *phb = data; const __be32 *type = of_get_property(dn, ibm,pci-config-space-type, NULL); const __be32 *regs; struct device_node *parent; @@ -339,8 +343,48 @@ void *update_dn_pci_info(struct device_node *dn, void *data) return NULL; } +EXPORT_SYMBOL(add_pci_device_node_info); -/* +/** + * remove_pci_device_node_info - Remove pci_dn from PCI device node + * @dn: PCI device node + * + * Remove pci_dn from PCI device node. The pci_dn is also removed + * from the child list of the parent pci_dn. + */ +void remove_pci_device_node_info(struct device_node *np) +{ + struct pci_dn *pdn = np ? PCI_DN(np) : NULL; +#ifdef CONFIG_EEH + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); +#endif + + if (!pdn) + return; + +#ifdef CONFIG_EEH + if (edev) { + pdn-edev = NULL; + kfree(edev); + } +#endif + + BUG_ON(!list_empty(pdn-child_list)); + list_del(pdn-list); + if (pdn-parent) + of_node_put(pdn-parent-node); + + np-data = NULL; + kfree(pdn); +} +EXPORT_SYMBOL(remove_pci_device_node_info); + +/** + * traverse_pci_device_nodes - Traverse children of indicated device node + * @start: indicated device node + * @pre: callback + * @data: additional parameter to the callback + * * Traverse a device tree stopping each PCI device in the tree. *
[PATCH v5 13/42] powerpc/pci: Override pcibios_setup_bridge()
The patch overrides pcibios_setup_bridge(), called to update PCI bridge windows at completion of PCI resource assignment, to assign PE and setup various (resource) mapping in next patch. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from [PATCH v5 v4 06/21] --- arch/powerpc/include/asm/pci-bridge.h | 1 + arch/powerpc/kernel/pci-common.c | 8 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 1252cd5..1f39ca7 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -29,6 +29,7 @@ struct pci_controller_ops { /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *, unsigned long); + void(*setup_bridge)(struct pci_bus *, unsigned long); void(*reset_secondary_bus)(struct pci_dev *); }; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0d05406..0358f24 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -122,6 +122,14 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + if (hose-controller_ops.setup_bridge) + hose-controller_ops.setup_bridge(bus, type); +} + void pcibios_reset_secondary_bus(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev-bus); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 17/42] powerpc/powernv: PE oriented during configuration
Except pnv_ioda_configure_pe(), all PE configuration related functions are already PE oriented. The patch changes the return value from PE number to PE instance for its callee for the purpose. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 07/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 44 --- arch/powerpc/platforms/powernv/pci.h | 3 ++- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fd2f898..6187f84 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -132,25 +132,26 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } -static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) +static struct pnv_ioda_pe *pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { if (!(pe_no = 0 pe_no phb-ioda.total_pe)) { pr_warn(%s: Invalid PE %d on PHB#%x\n, __func__, pe_no, phb-hose-global_number); - return; + return NULL; } if (test_and_set_bit(pe_no, phb-ioda.pe_alloc)) { pr_warn(%s: PE %d was assigned on PHB#%x\n, __func__, pe_no, phb-hose-global_number); - return; + return NULL; } phb-ioda.pe_array[pe_no].phb = phb; phb-ioda.pe_array[pe_no].pe_number = pe_no; + return phb-ioda.pe_array[pe_no]; } -static int pnv_ioda_alloc_pe(struct pnv_phb *phb) +static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe_no; unsigned long limit = phb-ioda.total_pe - 1; @@ -163,12 +164,12 @@ static int pnv_ioda_alloc_pe(struct pnv_phb *phb) break; if (--limit = phb-ioda.total_pe) - return IODA_INVALID_PE; + return NULL; } while (1); phb-ioda.pe_array[pe_no].phb = phb; phb-ioda.pe_array[pe_no].pe_number = pe_no; - return pe_no; + return phb-ioda.pe_array[pe_no]; } static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) @@ -389,8 +390,8 @@ static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb, } } -static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb, - struct pci_bus *bus, int all) +static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pnv_phb *phb, + struct pci_bus *bus, int all) { resource_size_t segsz = phb-ioda.m64_segsize; struct pci_dev *pdev; @@ -401,13 +402,13 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb, int i; if (!pnv_ioda_need_m64_pe(phb, bus)) - return IODA_INVALID_PE; + return NULL; /* Allocate bitmap */ size = _ALIGN_UP(phb-ioda.total_pe / 8, sizeof(unsigned long)); pe_bitmap = kzalloc(size, GFP_KERNEL); if (!pe_bitmap) - return IODA_INVALID_PE; + return NULL; /* The bridge's M64 window might be extended to PHB's M64 * window by intention to support PCI hotplug. So we have @@ -444,7 +445,7 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb, /* No M64 window found ? */ if (bitmap_empty(pe_bitmap, phb-ioda.total_pe)) { kfree(pe_bitmap); - return IODA_INVALID_PE; + return NULL; } /* Figure out the master PE and put all slave PEs @@ -495,7 +496,7 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb, } kfree(pe_bitmap); - return master_pe-pe_number; + return master_pe; } static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) @@ -1224,7 +1225,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose-private_data; - struct pnv_ioda_pe *pe; + struct pnv_ioda_pe *pe = NULL; int pe_num = IODA_INVALID_PE; /* For partial hotplug case, the PE instance hasn't been destroyed @@ -1240,24 +1241,25 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) } /* PE number for root bus should have been reserved */ - if (pci_is_root_bus(bus)) - pe_num = phb-ioda.root_pe; + if (pci_is_root_bus(bus) + phb-ioda.root_pe != IODA_INVALID_PE) + pe = phb-ioda.pe_array[phb-ioda.root_pe]; /* Check if PE is determined by M64 */ - if (pe_num == IODA_INVALID_PE phb-pick_m64_pe) - pe_num = phb-pick_m64_pe(phb, bus, all); + if (!pe phb-pick_m64_pe) + pe = phb-pick_m64_pe(phb, bus, all); /* The PE
[PATCH v5 19/42] powerpc/powernv: Remove DMA32 list of PEs
PEs were put into the list, maintained by PHB, according its DMA32 weight. After that, the list was iterated to initialize PE's DMA capability. For now, the PE is created and its DMA capability is initialized right way. So we don't need the list and the patch removes that. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Newly introduced --- arch/powerpc/platforms/powernv/pci-ioda.c | 18 -- arch/powerpc/platforms/powernv/pci.h | 6 -- 2 files changed, 24 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f0b54ab..0447534 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -992,20 +992,6 @@ out: return 0; } -static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) -{ - struct pnv_ioda_pe *lpe; - - list_for_each_entry(lpe, phb-ioda.pe_dma_list, dma_link) { - if (lpe-dma32_weight pe-dma32_weight) { - list_add_tail(pe-dma_link, lpe-dma_link); - return; - } - } - list_add_tail(pe-dma_link, phb-ioda.pe_dma_list); -} - static unsigned int pnv_ioda_dev_dma_weight(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev-bus); @@ -1296,9 +1282,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) /* Put PE to the list */ list_add_tail(pe-list, phb-ioda.pe_list); - /* Link the PE */ - pnv_ioda_link_pe_by_weight(phb, pe); - return pe; } @@ -3421,7 +3404,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb-ioda.root_pe = IODA_INVALID_PE; } - INIT_LIST_HEAD(phb-ioda.pe_dma_list); INIT_LIST_HEAD(phb-ioda.pe_list); mutex_init(phb-ioda.pe_list_mutex); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 64c7f03..bf63481 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -79,7 +79,6 @@ struct pnv_ioda_pe { struct list_headslaves; /* Link in list of PE#s */ - struct list_headdma_link; struct list_headlist; }; @@ -186,11 +185,6 @@ struct pnv_phb { /* Number of 32-bit DMA segments */ unsigned long dma32_segcount; - /* Sorted list of used PE's, sorted at -* boot for resource allocation purposes -*/ - struct list_headpe_dma_list; - /* TCE cache invalidate registers (physical and * remapped) */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V7 06/10] powerpc/eeh: Create PE for VFs
On Thu, Jun 04, 2015 at 01:46:15PM +0800, Wei Yang wrote: On Wed, Jun 03, 2015 at 10:46:38AM -0500, Bjorn Helgaas wrote: On Wed, Jun 03, 2015 at 03:10:23PM +1000, Gavin Shan wrote: On Wed, Jun 03, 2015 at 11:31:42AM +0800, Wei Yang wrote: On Mon, Jun 01, 2015 at 06:46:45PM -0500, Bjorn Helgaas wrote: On Tue, May 19, 2015 at 06:50:08PM +0800, Wei Yang wrote: Current EEH recovery code works with the assumption: the PE has primary bus. Unfortunately, that's not true to VF PEs, which generally contains one or multiple VFs (for VF group case). The patch creates PEs for VFs at PCI final fixup time. Those PEs for VFs are indentified with newly introduced flag EEH_PE_VF so that we handle them differently during EEH recovery. [gwshan: changelog and code refactoring] Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com Acked-by: Gavin Shan gws...@linux.vnet.ibm.com --- arch/powerpc/include/asm/eeh.h |1 + arch/powerpc/kernel/eeh_pe.c | 10 -- arch/powerpc/platforms/powernv/eeh-powernv.c | 17 + 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 1b3614d..c1fde48 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -70,6 +70,7 @@ struct pci_dn; #define EEH_PE_PHB (1 1)/* PHB PE*/ #define EEH_PE_DEVICE (1 2)/* Device PE */ #define EEH_PE_BUS (1 3)/* Bus PE*/ +#define EEH_PE_VF (1 4)/* VF PE */ #define EEH_PE_ISOLATED(1 0)/* Isolated PE */ #define EEH_PE_RECOVERING (1 1)/* Recovering PE */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 35f0b62..260a701 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) * EEH device already having associated PE, but * the direct parent EEH device doesn't have yet. */ - pdn = pdn ? pdn-parent : NULL; + if (edev-physfn) + pdn = pci_get_pdn(edev-physfn); + else + pdn = pdn ? pdn-parent : NULL; while (pdn) { /* We're poking out of PCI territory */ parent = pdn_to_eeh_dev(pdn); @@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) } /* Create a new EEH PE */ - pe = eeh_pe_alloc(edev-phb, EEH_PE_DEVICE); + if (edev-physfn) + pe = eeh_pe_alloc(edev-phb, EEH_PE_VF); + else + pe = eeh_pe_alloc(edev-phb, EEH_PE_DEVICE); if (!pe) { pr_err(%s: out of memory!\n, __func__); return -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ce738ab..c505036 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1520,6 +1520,23 @@ static struct eeh_ops pnv_eeh_ops = { .restore_config = pnv_eeh_restore_config }; +static void pnv_eeh_vf_final_fixup(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev-is_virtfn) + return; + + /* +* The following operations will fail if VF's sysfs files +* aren't created or its resources aren't finalized. +*/ I don't understand this comment. The following operations seems to refer to eeh_add_device_early() and eeh_add_device_late(), and VF's sysfs files being created seems to refer to eeh_sysfs_add_device(). So the comment suggests that eeh_add_device_early() and eeh_add_device_late() will fail because they're called before eeh_sysfs_add_device(). So I think you must be talking about some other following operations, not eeh_add_device_early() and eeh_add_device_late(). Sorry for this confusion. The comment here wants to say the eeh_sysfs_add_device() will fail if the VF's sysfs is not created well. Or it will fail if the VF's resources are not set properly, since we would cache the VF's BAR in eeh_add_device_late(). Gavin, If my understanding is not correct please let me know. It's correct. The following operations refers to eeh_add_device_late() and eeh_sysfs_add_device(). The former one requires the resources for one particular PCI device (VF here) are finalized (assigned). eeh_sysfs_add_device() will fail if the sysfs entry for the PCI device isn't populated yet. eeh_add_device_late() contains several things that read config space: eeh_save_bars() caches the entire config header, and eeh_addr_cache_insert_dev() looks at the device resources (which are determined by BARs in config
[PATCH v5 01/42] PCI: Add pcibios_setup_bridge()
Currently, PowerPC PowerNV platform utilizes ppc_md.pcibios_fixup(), which is called for once after PCI probing and resource assignment are completed, to allocate platform required resources for PCI devices: PE#, IO and MMIO mapping, DMA address translation (TCE) table etc. Obviously, it's not hotplug friendly. The patch adds weak function pcibios_setup_bridge(), which is called by pci_setup_bridge(). PowerPC PowerNV platform will reuse the function to assign above platform required resources to newly added PCI devices, in order to support PCI hotplug in subsequent patches. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Corrected subject as Bjorn suggested * pci_setup_bridge() calls pcibios_setup_bridge() and __pci_setup_bridge() --- drivers/pci/setup-bus.c | 5 + include/linux/pci.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4fd0cac..623dee3 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -693,11 +693,16 @@ static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type) pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus-bridge_ctl); } +void __weak pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ +} + void pci_setup_bridge(struct pci_bus *bus) { unsigned long type = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + pcibios_setup_bridge(bus, type); __pci_setup_bridge(bus, type); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 94bacfa..5aacd0a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -811,6 +811,7 @@ void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); void pci_stop_root_bus(struct pci_bus *bus); void pci_remove_root_bus(struct pci_bus *bus); void pci_setup_cardbus(struct pci_bus *bus); +void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type); void pci_sort_breadthfirst(void); #define dev_is_pci(d) ((d)-bus == pci_bus_type) #define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)-is_physfn : false)) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 23/42] powerpc/powernv: Cleanup on pnv_pci_ioda2_release_dma_pe()
The patch applies cleanup on pnv_pci_ioda2_release_dma_pe(): * Rename it to pnv_pci_ioda2_release_pe_dma() to match the function names used to release resources for one PE in the subsequent patches. * Remove the parameter of PCI device, which is used to figure out device node. VFs don't have associated device nodes in SRIOV case. For other cases, the device node can be figured out from the PCI bus or device the PE was allocated for. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Newly introduced --- arch/powerpc/platforms/powernv/pci-ioda.c | 26 +++--- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3d5aec8d..2e31472 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -197,11 +197,11 @@ static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) pe-tce_bypass_enabled = enable; } -static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, -struct pnv_ioda_pe *pe) +static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) { - struct iommu_table*tbl; - int64_t rc; + struct iommu_table *tbl; + struct device_node *dn; + int64_t rc; tbl = pe-table_group.tables[0]; rc = pnv_pci_ioda2_unset_window(pe-table_group, 0); @@ -213,8 +213,20 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, iommu_group_put(pe-table_group.group); BUG_ON(pe-table_group.group); } + + if (pe-flags (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) + dn = pci_bus_to_OF_node(pe-pbus); + else if (pe-flags PNV_IODA_PE_DEV) + dn = pci_device_to_OF_node(pe-pdev); +#ifdef CONFIG_PCI_IOV + else if (pe-flags PNV_IODA_PE_VF) + dn = pci_device_to_OF_node(pe-parent_dev); +#endif + else + dn = NULL; + pnv_pci_ioda2_table_free_pages(tbl); - iommu_free_table(tbl, of_node_full_name(dev-dev.of_node)); + iommu_free_table(tbl, of_node_full_name(dn)); } static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, @@ -1495,14 +1507,14 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) if ((pe-flags PNV_IODA_PE_MASTER) (pe-flags PNV_IODA_PE_VF)) { list_for_each_entry_safe(s, sn, pe-slaves, list) { - pnv_pci_ioda2_release_dma_pe(pdev, s); + pnv_pci_ioda2_release_pe_dma(s); list_del(s-list); pnv_ioda_deconfigure_pe(phb, s); pnv_ioda_free_pe(phb, s-pe_number); } } - pnv_pci_ioda2_release_dma_pe(pdev, pe); + pnv_pci_ioda2_release_pe_dma(pe); /* Remove from list */ mutex_lock(phb-ioda.pe_list_mutex); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 03/42] powerpc/powernv: M64 support improvement
We're having the hardware (on PHB3) or software enforced (on P7IOC) limitation: M64 segment#x can only be assigned to PE#x. IO and M32 segment can be mapped to arbitrary PE# via IODT and M32DT. It means the PE number should be x if M64 segment#x has been assigned to the PE. Also, each PE owns one M64 segment at most. Currently, we are reserving PE# according to root port's M64 window. It won't be reliable once we extend M64 windows of root port, or the upstream port of the PCIE switch behind root port to PHB's M64 window, in order to support PCI hotplug in future. The patch reserves PE# for M64 segments according to the M64 resources of the PCI devices (not bridges) contained in the PE. Besides, it's always worthy to trace the M64 segments consumed by the PE, which can be released at PCI unplugging time. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Made the changelog more descriptive on the fixed M64 seg# mapping * Dropped unnecessary and corrected comments pointed by aik * Replace pe_bitsmap with pe_bitmap * Fixed coding style complained by checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 189 ++ arch/powerpc/platforms/powernv/pci.h | 10 +- 2 files changed, 121 insertions(+), 78 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 245ef81..71afb38 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -294,28 +294,78 @@ fail: return -EIO; } -static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb) +/* We extend the M64 window of root port, or the upstream bridge port + * of the PCIE switch behind root port. So we shouldn't reserve PEs + * for M64 resources because there are no (normal) PCI devices consuming + * M64 resources on the PCI buses leading from root port, or the upstream + * bridge port. The function returns true if the indicated PCI bus needs + * reserved PEs because of M64 resources in advance. Otherwise, the + * function returns false. + */ +static bool pnv_ioda_need_m64_pe(struct pnv_phb *phb, +struct pci_bus *bus) { - resource_size_t sgsz = phb-ioda.m64_segsize; + if (!bus || pci_is_root_bus(bus)) + return false; + + /* Bus leading from root port. We need check what types of PCI +* devices on the bus. If it's connecting PCI bridge, we don't +* need reserve M64 PEs for it. Otherwise, we still need to do +* that. +*/ + if (pci_is_root_bus(bus-self-bus)) { + struct pci_dev *pdev; + + list_for_each_entry(pdev, bus-devices, bus_list) { + if (pdev-hdr_type == PCI_HEADER_TYPE_NORMAL) + return true; + } + + return false; + } + + /* Bus leading from the upstream bridge port on top level */ + if (pci_is_root_bus(bus-self-bus-self-bus)) + return false; + + return true; +} + +static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb, + struct pci_bus *bus) +{ + resource_size_t segsz = phb-ioda.m64_segsize; struct pci_dev *pdev; struct resource *r; - int base, step, i; + unsigned long pe_no, limit; + int i; - /* -* Root bus always has full M64 range and root port has -* M64 range used in reality. So we're checking root port -* instead of root bus. + if (!pnv_ioda_need_m64_pe(phb, bus)) + return; + + /* The bridge's M64 window might have been extended to the +* PHB's M64 window in order to support PCI hotplug. So the +* bridge's M64 window isn't reliable to be used for picking +* PE# for its leading PCI bus. We have to check the M64 +* resources consumed by the PCI devices, which seat on the +* PCI bus. */ - list_for_each_entry(pdev, phb-hose-bus-devices, bus_list) { - for (i = 0; i PCI_BRIDGE_RESOURCE_NUM; i++) { - r = pdev-resource[PCI_BRIDGE_RESOURCES + i]; - if (!r-parent || - !pnv_pci_is_mem_pref_64(r-flags)) + list_for_each_entry(pdev, bus-devices, bus_list) { + for (i = 0; i PCI_NUM_RESOURCES; i++) { +#ifdef CONFIG_PCI_IOV + if (i = PCI_IOV_RESOURCES i = PCI_IOV_RESOURCE_END) + continue; +#endif + r = pdev-resource[i]; + if (!r-flags || r-start = r-end || + !r-parent || !pnv_pci_is_mem_pref_64(r-flags)) continue; - base = (r-start - phb-ioda.m64_base) / sgsz; - for (step = 0; step resource_size(r) / sgsz; step++) -
[PATCH v5 04/42] powerpc/powernv: Trace consumed IO and M32 segments by PE
The patch introduces two bitmaps to trace the IO and M32 segments consumed by one particular PE, which can be released once the PE is destroyed during PCI unplugging time. Also, we're using fixed quantity of bits to trace the used IO and M32 segments by PEs in one particular PHB. Besides, @pe_array is put to the location adjacent to @pe_alloc on account of their close relation. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 04/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 17 + arch/powerpc/platforms/powernv/pci.h | 11 ++- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 71afb38..53d0efd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2992,7 +2992,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, while (index phb-ioda.total_pe region.start = region.end) { - phb-ioda.io_segmap[index] = pe-pe_number; + set_bit(index, phb-ioda.io_segmap); + set_bit(index, pe-io_segmap); rc = opal_pci_map_pe_mmio_window(phb-opal_id, pe-pe_number, OPAL_IO_WINDOW_TYPE, 0, index); if (rc != OPAL_SUCCESS) { @@ -3017,7 +3018,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, while (index phb-ioda.total_pe region.start = region.end) { - phb-ioda.m32_segmap[index] = pe-pe_number; + set_bit(index, phb-ioda.m32_segmap); + set_bit(index, pe-m32_segmap); rc = opal_pci_map_pe_mmio_window(phb-opal_id, pe-pe_number, OPAL_M32_WINDOW_TYPE, 0, index); if (rc != OPAL_SUCCESS) { @@ -3196,7 +3198,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, { struct pci_controller *hose; struct pnv_phb *phb; - unsigned long size, m32map_off, pemap_off, iomap_off = 0; + unsigned long size, pemap_off; const __be64 *prop64; const __be32 *prop32; int len; @@ -3281,19 +3283,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Allocate aux data arrays. We don't have IO ports on PHB3 */ size = _ALIGN_UP(phb-ioda.total_pe / 8, sizeof(unsigned long)); - m32map_off = size; - size += phb-ioda.total_pe * sizeof(phb-ioda.m32_segmap[0]); - if (phb-type == PNV_PHB_IODA1) { - iomap_off = size; - size += phb-ioda.total_pe * sizeof(phb-ioda.io_segmap[0]); - } pemap_off = size; size += phb-ioda.total_pe * sizeof(struct pnv_ioda_pe); aux = memblock_virt_alloc(size, 0); phb-ioda.pe_alloc = aux; - phb-ioda.m32_segmap = aux + m32map_off; - if (phb-type == PNV_PHB_IODA1) - phb-ioda.io_segmap = aux + iomap_off; phb-ioda.pe_array = aux + pemap_off; set_bit(phb-ioda.reserved_pe, phb-ioda.pe_alloc); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 54657f4..0a8cecb 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -54,6 +54,8 @@ struct pnv_ioda_pe { * by slave PEs will be contributed to the master PE. One * PE can own multiple IO and M32 segments. */ + unsigned long io_segmap[8]; + unsigned long m32_segmap[8]; unsigned long m64_segmap[8]; /* Weight assigned to the PE for the sake of DMA resource @@ -154,16 +156,15 @@ struct pnv_phb { unsigned intio_segsize; unsigned intio_pci_base; - /* PE allocation bitmap */ + /* PE allocation */ unsigned long *pe_alloc; - /* PE allocation mutex */ + struct pnv_ioda_pe *pe_array; struct mutexpe_alloc_mutex; /* M32 IO segment maps */ + unsigned long io_segmap[8]; + unsigned long m32_segmap[8]; unsigned long m64_segmap[8]; - unsigned int*m32_segmap; - unsigned int*io_segmap; - struct pnv_ioda_pe *pe_array; /* IRQ chip */ int irq_chip_init; --
[PATCH v5 18/42] powerpc/powernv: Helper function pnv_ioda_init_pe()
The patch introduces helper function pnv_ioda_init_pe(), which initialize PE instance after reserving or allocating PE#, to simplify the code. The patch doesn't introduce behavioural changes. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 07/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6187f84..f0b54ab 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -132,6 +132,17 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *pe = phb-ioda.pe_array[pe_no]; + + pe-phb = phb; + pe-pe_number = pe_no; + INIT_LIST_HEAD(pe-list); + + return pe; +} + static struct pnv_ioda_pe *pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { if (!(pe_no = 0 pe_no phb-ioda.total_pe)) { @@ -146,9 +157,7 @@ static struct pnv_ioda_pe *pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) return NULL; } - phb-ioda.pe_array[pe_no].phb = phb; - phb-ioda.pe_array[pe_no].pe_number = pe_no; - return phb-ioda.pe_array[pe_no]; + return pnv_ioda_init_pe(phb, pe_no); } static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) @@ -167,9 +176,7 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) return NULL; } while (1); - phb-ioda.pe_array[pe_no].phb = phb; - phb-ioda.pe_array[pe_no].pe_number = pe_no; - return phb-ioda.pe_array[pe_no]; + return pnv_ioda_init_pe(phb, pe_no); } static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 21/42] powerpc/powernv: Drop pnv_ioda_setup_dev_PE()
Nobody is using the this function. The patch drops it. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com Reviewed-by: Alexey Kardashevskiy a...@ozlabs.ru --- v5: * Derived from PATCH[v4 08/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 71 --- 1 file changed, 71 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e9165fa..8a79403 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -,77 +,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) } #endif /* CONFIG_PCI_IOV */ -#if 0 -static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev-bus); - struct pnv_phb *phb = hose-private_data; - struct pci_dn *pdn = pci_get_pdn(dev); - struct pnv_ioda_pe *pe; - int pe_num; - - if (!pdn) { - pr_err(%s: Device tree node not associated properly\n, - pci_name(dev)); - return NULL; - } - if (pdn-pe_number != IODA_INVALID_PE) - return NULL; - - /* PE#0 has been pre-set */ - if (dev-bus-number == 0) - pe_num = 0; - else - pe_num = pnv_ioda_alloc_pe(phb); - if (pe_num == IODA_INVALID_PE) { - pr_warning(%s: Not enough PE# available, disabling device\n, - pci_name(dev)); - return NULL; - } - - /* NOTE: We get only one ref to the pci_dev for the pdn, not for the -* pointer in the PE data structure, both should be destroyed at the -* same time. However, this needs to be looked at more closely again -* once we actually start removing things (Hotplug, SR-IOV, ...) -* -* At some point we want to remove the PDN completely anyways -*/ - pe = phb-ioda.pe_array[pe_num]; - pci_dev_get(dev); - pdn-pcidev = dev; - pdn-pe_number = pe_num; - pe-pdev = dev; - pe-pbus = NULL; - pe-tce32_seg = -1; - pe-mve_number = -1; - pe-rid = dev-bus-number 8 | pdn-devfn; - - pe_info(pe, Associated device to PE\n); - - if (pnv_ioda_configure_pe(phb, pe)) { - /* XXX What do we do here ? */ - if (pe_num) - pnv_ioda_free_pe(phb, pe_num); - pdn-pe_number = IODA_INVALID_PE; - pe-pdev = NULL; - pci_dev_put(dev); - return NULL; - } - - /* Assign a DMA weight to the device */ - pe-dma_weight = pnv_ioda_dma_weight(dev); - if (pe-dma_weight != 0) { - phb-ioda.dma_weight += pe-dma_weight; - phb-ioda.dma_pe_count++; - } - - /* Link the PE */ - pnv_ioda_link_pe_by_weight(phb, pe); - - return pe; -} -#endif /* Useful for SRIOV case */ - static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) { struct pci_dev *dev; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 41/42] drivers/of: Return allocated memory chunk from of_fdt_unflatten_tree()
The patch changes of_fdt_unflatten_tree() so that it returns the allocated memory chunk for unflattened device-tree, which can be released once it's obsoleted. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Newly introduced --- drivers/of/fdt.c | 21 +++-- include/linux/of_fdt.h | 6 +++--- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index b6a6c59..a954279 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -405,10 +405,10 @@ static void *unflatten_dt_node(void *blob, * @dt_alloc: An allocator that provides a virtual address to memory * for the resulting tree */ -static void __unflatten_device_tree(void *blob, - struct device_node *dad, - struct device_node **mynodes, - void * (*dt_alloc)(u64 size, u64 align)) +static void *__unflatten_device_tree(void *blob, +struct device_node *dad, +struct device_node **mynodes, +void * (*dt_alloc)(u64 size, u64 align)) { unsigned long size; int start; @@ -418,7 +418,7 @@ static void __unflatten_device_tree(void *blob, if (!blob) { pr_debug(No device tree pointer\n); - return; + return NULL; } pr_debug(Unflattening device tree:\n); @@ -428,7 +428,7 @@ static void __unflatten_device_tree(void *blob, if (fdt_check_header(blob)) { pr_err(Invalid device tree blob header\n); - return; + return NULL; } /* First pass, scan for size */ @@ -455,6 +455,7 @@ static void __unflatten_device_tree(void *blob, be32_to_cpup(mem + size)); pr_debug( - unflatten_device_tree()\n); + return mem; } static void *kernel_tree_alloc(u64 size, u64 align) @@ -470,11 +471,11 @@ static void *kernel_tree_alloc(u64 size, u64 align) * pointers of the nodes so the normal device-tree walking functions * can be used. */ -void of_fdt_unflatten_tree(unsigned long *blob, - struct device_node *dad, - struct device_node **mynodes) +void *of_fdt_unflatten_tree(unsigned long *blob, + struct device_node *dad, + struct device_node **mynodes) { - __unflatten_device_tree(blob, dad, mynodes, kernel_tree_alloc); + return __unflatten_device_tree(blob, dad, mynodes, kernel_tree_alloc); } EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 8882640..8a38c6a 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -37,9 +37,9 @@ extern bool of_fdt_is_big_endian(const void *blob, unsigned long node); extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); -extern void of_fdt_unflatten_tree(unsigned long *blob, - struct device_node *dad, - struct device_node **mynodes); +extern void *of_fdt_unflatten_tree(unsigned long *blob, + struct device_node *dad, + struct device_node **mynodes); /* TBD: Temporary export of fdt globals - remove when code fully merged */ extern int __initdata dt_root_addr_cells; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 12/42] powerpc/pci: Cleanup on pci_controller_ops
Each PHB maintains one instance of struct pci_controller_ops, which includes various callbacks called by PCI subsystem. In the definition of this struct, some callbacks have explicit names for its arguments, but the left don't have. The patch removes all explicit names of the arguments to the callbacks in struct pci_controller_ops to keep the code look consistent. Cc: Daniel Axtens d...@axtens.net Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Newly introduced --- arch/powerpc/include/asm/pci-bridge.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 744884b..1252cd5 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -18,8 +18,8 @@ struct device_node; * PCI controller operations */ struct pci_controller_ops { - void(*dma_dev_setup)(struct pci_dev *dev); - void(*dma_bus_setup)(struct pci_bus *bus); + void(*dma_dev_setup)(struct pci_dev *); + void(*dma_bus_setup)(struct pci_bus *); int (*probe_mode)(struct pci_bus *); @@ -28,8 +28,8 @@ struct pci_controller_ops { bool(*enable_device_hook)(struct pci_dev *); /* Called during PCI resource reassignment */ - resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type); - void(*reset_secondary_bus)(struct pci_dev *dev); + resource_size_t (*window_alignment)(struct pci_bus *, unsigned long); + void(*reset_secondary_bus)(struct pci_dev *); }; /* -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 34/42] powerpc/pci: Delay creating pci_dn
The pci_dn instances are allocated from memblock or bootmem when creating PCI controller (hoses) in setup_arch(). The PCI hotplug, which will be supported by proceeding patches, will release PCI device nodes and their corresponding pci_dn on unplugging event. The pci_dn instance memory chunks alloed from memblock or bootmem are hard to reused after being released. The patch delay creating pci_dn so that they can be allocated from slab. In turn, the memory chunks for them can be reused after being released without problem. The creation of eeh_dev instances, which depends on pci_dn, is delayed a bit as well. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 15/21] * Dropped unrelated changes moving pci_dev_pdn_setup() around --- arch/powerpc/include/asm/ppc-pci.h | 1 - arch/powerpc/kernel/eeh_dev.c | 2 +- arch/powerpc/kernel/pci_dn.c | 8 +-- arch/powerpc/platforms/maple/pci.c | 35 ++ arch/powerpc/platforms/pasemi/pci.c| 3 --- arch/powerpc/platforms/powermac/pci.c | 39 +- arch/powerpc/platforms/powernv/pci.c | 3 --- arch/powerpc/platforms/pseries/setup.c | 1 - 8 files changed, 52 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 4122a86..7388316 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -40,7 +40,6 @@ void *traverse_pci_dn(struct pci_dn *root, void *(*fn)(struct pci_dn *, void *), void *data); -extern void pci_devs_phb_init(void); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); /* From rtas_pci.h */ diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index aabba94..f33ce5b 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -110,4 +110,4 @@ static int __init eeh_dev_phb_init(void) return 0; } -core_initcall(eeh_dev_phb_init); +core_initcall_sync(eeh_dev_phb_init); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0469247..35554c2 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -288,7 +288,7 @@ void *update_dn_pci_info(struct device_node *dn, void *data) struct device_node *parent; struct pci_dn *pdn; - pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; dn-data = pdn; @@ -462,15 +462,19 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) * pci device found underneath. This routine runs once, * early in the boot sequence. */ -void __init pci_devs_phb_init(void) +static int __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ list_for_each_entry_safe(phb, tmp, hose_list, list_node) pci_devs_phb_init_dynamic(phb); + + return 0; } +core_initcall(pci_devs_phb_init); + static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a923230..04a69a8 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -568,6 +568,26 @@ void maple_pci_irq_fixup(struct pci_dev *dev) DBG( - maple_pci_irq_fixup\n); } +static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_controller *hose = pci_bus_to_host(bridge-bus); + struct device_node *np, *child; + + if (hose != u3_agp) + return 0; + + /* Fixup the PCI-OF mapping for U3 AGP due to bus renumbering. We +* assume there is no P2P bridge on the AGP bus, which should be a +* safe assumptions hopefully. +*/ + np = hose-dn; + PCI_DN(np)-busno = 0xf0; + for_each_child_of_node(np, child) + PCI_DN(child)-busno = 0xf0; + + return 0; +} + void __init maple_pci_init(void) { struct device_node *np, *root; @@ -605,20 +625,7 @@ void __init maple_pci_init(void) if (ht maple_add_bridge(ht) != 0) of_node_put(ht); - /* Setup the linkage between OF nodes and PHBs */ - pci_devs_phb_init(); - - /* Fixup the PCI-OF mapping for U3 AGP due to bus renumbering. We -* assume there is no P2P bridge on the AGP bus, which should be a -* safe assumptions hopefully. -*/ - if (u3_agp) { - struct device_node *np = u3_agp-dn; - PCI_DN(np)-busno = 0xf0; - for (np = np-child; np; np = np-sibling) - PCI_DN(np)-busno = 0xf0; - } - + ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare; /* Tell pci.c to
[PATCH v5 33/42] powerpc/powernv: Functions to get/reset PCI slot status
The patch exports 4 functions, which base on corresponding OPAL APIs to get or set PCI slot status. Those functions are going to be used by PCI hotplug module in subsequent patches: pnv_pci_get_overlay_dt() opal_get_overlay_dt() pnv_pci_get_presence_status() opal_pci_get_presence_status() pnv_pci_get_power_status() opal_pci_get_power_status() pnv_pci_set_power_status() opal_pci_set_power_status() Besides, the patch also exports pnv_pci_hotplug_notifier_{register, unregister}() to allow registration and unregistration of PCI hotplug notifier, which will be used to receive PCI hotplug message from skiboot firmware. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 14/21] * No polling required for pnv_pci_get_presence_status() * Separate functions for registration and unregistration of PCI hotplug notifier * int64_t for value returned from OPAL API --- arch/powerpc/include/asm/opal-api.h| 8 +++- arch/powerpc/include/asm/opal.h| 4 ++ arch/powerpc/include/asm/pnv-pci.h | 7 +++ arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++ arch/powerpc/platforms/powernv/pci.c | 66 ++ 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0321a90..c534dd8 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -153,7 +153,11 @@ #define OPAL_FLASH_READ110 #define OPAL_FLASH_WRITE 111 #define OPAL_FLASH_ERASE 112 -#define OPAL_LAST 112 +#define OPAL_GET_OVERLAY_DT116 +#define OPAL_PCI_GET_PRESENCE_STATUS 117 +#define OPAL_PCI_GET_POWER_STATUS 118 +#define OPAL_PCI_SET_POWER_STATUS 119 +#define OPAL_LAST 119 /* Device tree flags */ @@ -352,6 +356,8 @@ enum opal_msg_type { OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */ OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, + OPAL_MSG_PRD, + OPAL_MSG_PCI_HOTPLUG, OPAL_MSG_TYPE_MAX, }; diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6d467df..2d1c825 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -200,6 +200,10 @@ int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf, uint64_t size, uint64_t token); int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size, uint64_t token); +int64_t opal_get_overlay_dt(uint64_t *counter, void *buf, uint64_t len); +int64_t opal_pci_get_presence_status(uint64_t id, uint8_t *status); +int64_t opal_pci_get_power_status(uint64_t id, uint8_t *status); +int64_t opal_pci_set_power_status(uint64_t id, uint8_t status); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index f9b4982..9f63375 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -13,6 +13,13 @@ #include linux/pci.h #include misc/cxl.h +extern int pnv_pci_get_overlay_dt(uint64_t *counter, void *buf, uint64_t len); +extern int pnv_pci_get_presence_status(uint64_t id, uint8_t *status); +extern int pnv_pci_get_power_status(uint64_t id, uint8_t *status); +extern int pnv_pci_set_power_status(uint64_t id, uint8_t status); +extern int pnv_pci_hotplug_notifier_register(struct notifier_block *nb); +extern int pnv_pci_hotplug_notifier_unregister(struct notifier_block *nb); + int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index a7ade94..1d87c30 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -295,3 +295,7 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write,OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase,OPAL_FLASH_ERASE); +OPAL_CALL(opal_get_overlay_dt, OPAL_GET_OVERLAY_DT); +OPAL_CALL(opal_pci_get_presence_status, OPAL_PCI_GET_PRESENCE_STATUS); +OPAL_CALL(opal_pci_get_power_status, OPAL_PCI_GET_POWER_STATUS); +OPAL_CALL(opal_pci_set_power_status, OPAL_PCI_SET_POWER_STATUS); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bf5df04..c332ea7 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++
[PATCH v5 38/42] powerpc/powernv: Select OF_OVERLAY
The device tree nodes will be changed dynamically on PCI hotplug events on PowerNV platform with the help of overlay mechanism. The patch enables CONFIG_OF_OVERLAY on PowerNV platform to support that. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 20/21] * Enables OF_OVERLAY instead of OF_DYNAMIC --- arch/powerpc/platforms/powernv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 4b044d8..97d481b 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -18,4 +18,5 @@ config PPC_POWERNV select CPU_FREQ_GOV_ONDEMAND select CPU_FREQ_GOV_CONSERVATIVE select PPC_DOORBELL + select OF_OVERLAY default y -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 06/42] powerpc/powernv: Improve IO and M32 mapping
The PHB's IO or M32 window is divided evenly to segments, each of them can be mapped to arbitrary PE# by IODT or M32DT. Current code figures out the consumed IO and M32 segments by one particular PE from the windows of the PE's upstream bridge. It won't be reliable once we extend M64 windows of root port, or the upstream port of the PCIE switch behind root port to PHB's IO or M32 window, in order to support PCI hotplug in future. The patch improves the above situation by calculating PE's consumed IO or M32 segments from its contained devices, no PCI bridge windows involved if the PE doesn't contain all the subordinate PCI buses. Otherwise, the PCI bridge windows still contribute to PE's consumed IO or M32 segments. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 136 ++ 1 file changed, 80 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3bb4ce8..46a5e10 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2959,76 +2959,100 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) } #endif /* CONFIG_PCI_IOV */ -/* - * This function is supposed to be called on basis of PE from top - * to bottom style. So the the I/O or MMIO segment assigned to - * parent PE could be overrided by its child PEs if necessary. - */ -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, - struct pnv_ioda_pe *pe) +static int pnv_ioda_map_pe_one_res(struct pci_controller *hose, + struct pnv_ioda_pe *pe, + struct resource *res) { struct pnv_phb *phb = hose-private_data; struct pci_bus_region region; - struct resource *res; - int i, index; + int index; unsigned int segsize; unsigned long *segmap, *pe_segmap; uint16_t win; int64_t rc; - /* -* NOTE: We only care PCI bus based PE for now. For PCI -* device based PE, for example SRIOV sensitive VF should -* be figured out later. -*/ - BUG_ON(!(pe-flags (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); + /* Check if we need map the resource */ + if (!res-parent || + !res-flags || + res-start res-end || + pnv_pci_is_mem_pref_64(res-flags)) + return 0; - pci_bus_for_each_resource(pe-pbus, res, i) { - if (!res || !res-flags || - res-start res-end) - continue; + if (res-flags IORESOURCE_IO) { + region.start = res-start - phb-ioda.io_pci_base; + region.end = res-end - phb-ioda.io_pci_base; + segsize = phb-ioda.io_segsize; + segmap = phb-ioda.io_segmap; + pe_segmap= pe-io_segmap; + win = OPAL_IO_WINDOW_TYPE; + } else if ((res-flags IORESOURCE_MEM) + !pnv_pci_is_mem_pref_64(res-flags)) { + region.start = res-start - + hose-mem_offset[0] - + phb-ioda.m32_pci_base; + region.end = res-end - + hose-mem_offset[0] - + phb-ioda.m32_pci_base; + segsize = phb-ioda.m32_segsize; + segmap = phb-ioda.m32_segmap; + pe_segmap= pe-m32_segmap; + win = OPAL_M32_WINDOW_TYPE; + } else { + return 0; + } - if (res-flags IORESOURCE_IO) { - region.start = res-start - phb-ioda.io_pci_base; - region.end = res-end - phb-ioda.io_pci_base; - segsize = phb-ioda.io_segsize; - segmap = phb-ioda.io_segmap; - pe_segmap= pe-io_segmap; - win = OPAL_IO_WINDOW_TYPE; - } else if ((res-flags IORESOURCE_MEM) - !pnv_pci_is_mem_pref_64(res-flags)) { - region.start = res-start - - hose-mem_offset[0] - - phb-ioda.m32_pci_base; - region.end = res-end - - hose-mem_offset[0] - - phb-ioda.m32_pci_base; - segsize = phb-ioda.m32_segsize; - segmap = phb-ioda.m32_segmap; - pe_segmap= pe-m32_segmap; - win = OPAL_M32_WINDOW_TYPE; - } else { - continue; + index = region.start / phb-ioda.io_segsize; + while (index phb-ioda.total_pe +
[PATCH v5 31/42] powerpc/pci: Move pcibios_find_pci_bus() around
The patch moves pcibios_find_pci_bus() to PPC kerenl directory so that it can be reused by hotplug code for pSeries and PowerNV platform at the same time. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com Acked-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- v5: * Derived from PATCH[v4 12/21] --- arch/powerpc/kernel/pci-hotplug.c | 36 ++ arch/powerpc/platforms/pseries/pci_dlpar.c | 32 -- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index ca392fc..1482bc1 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -21,6 +21,42 @@ #include asm/firmware.h #include asm/eeh.h +static struct pci_bus *find_pci_bus(struct pci_bus *bus, + struct device_node *dn) +{ + struct pci_bus *tmp, *child = NULL; + struct device_node *busdn; + + busdn = pci_bus_to_OF_node(bus); + if (busdn == dn) + return bus; + + list_for_each_entry(tmp, bus-children, node) { + child = find_pci_bus(tmp, dn); + if (child) + break; + } + + return child; +} + +/** + * pcibios_find_pci_bus - find PCI bus according to the given device node + * @dn: Device node + * + * Find the corresponding PCI bus according to the given device node. + */ +struct pci_bus *pcibios_find_pci_bus(struct device_node *dn) +{ + struct pci_dn *pdn = PCI_DN(dn); + + if (!pdn || !pdn-phb || !pdn-phb-bus) + return NULL; + + return find_pci_bus(pdn-phb-bus, dn); +} +EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); + /** * pcibios_release_device - release PCI device * @dev: PCI device diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 5d4a3df..906dbaa 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -34,38 +34,6 @@ #include pseries.h -static struct pci_bus * -find_bus_among_children(struct pci_bus *bus, -struct device_node *dn) -{ - struct pci_bus *child = NULL; - struct pci_bus *tmp; - struct device_node *busdn; - - busdn = pci_bus_to_OF_node(bus); - if (busdn == dn) - return bus; - - list_for_each_entry(tmp, bus-children, node) { - child = find_bus_among_children(tmp, dn); - if (child) - break; - }; - return child; -} - -struct pci_bus * -pcibios_find_pci_bus(struct device_node *dn) -{ - struct pci_dn *pdn = dn-data; - - if (!pdn || !pdn-phb || !pdn-phb-bus) - return NULL; - - return find_bus_among_children(pdn-phb-bus, dn); -} -EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); - struct pci_controller *init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/
On Thu, 2015-06-04 at 00:56 -0500, Zhao Qiang-B45475 wrote: On Thu, 2015-05-28 at 1:37AM +0800, Wood Scott wrote: -Original Message- From: Wood Scott-B07421 Sent: Thursday, May 28, 2015 1:37 AM To: Zhao Qiang-B45475 Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Xie Xiaobo- R63061 Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/ On Wed, 2015-05-27 at 17:12 +0800, Zhao Qiang wrote: qe need to use the rheap, so move it to public directory. You've been previously asked to use lib/genalloc.c rather than introduce duplicate functionality into /lib. NACK. Can't use lib/genalloc.c instead of rheap.c. Qe need to alloc muram of qe, not DIMM. lib/genalloc.h is not for allocating main memory. It is for allocating special regions. It is serving the same purpose as rheap. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 32/42] powerpc/powernv: Introduce pnv_pci_poll()
The patch obsoletes pnv_eeh_poll() with pnv_pci_poll(): * The return value from last OPAL API is passed to the pnv_pci_poll() and handled there. * More information (e.g. PCI slot power status) is retrieved if the last argument is valid. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 13/21] --- arch/powerpc/platforms/powernv/eeh-powernv.c | 46 ++-- arch/powerpc/platforms/powernv/pci.c | 21 + arch/powerpc/platforms/powernv/pci.h | 1 + 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4eb53ed..7ee328b 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -743,28 +743,11 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) return ret; } -static s64 pnv_eeh_poll(uint64_t id) -{ - s64 rc = OPAL_HARDWARE; - - while (1) { - rc = opal_pci_poll(id, NULL); - if (rc = 0) - break; - - if (system_state SYSTEM_RUNNING) - udelay(1000 * rc); - else - msleep(rc); - } - - return rc; -} - int pnv_eeh_phb_reset(struct pci_controller *hose, int option) { struct pnv_phb *phb = hose-private_data; s64 rc = OPAL_HARDWARE; + int ret; pr_debug(%s: Reset PHB#%x, option=%d\n, __func__, hose-global_number, option); @@ -779,8 +762,6 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int option) rc = opal_pci_reset(phb-opal_id, OPAL_RESET_PHB_COMPLETE, OPAL_DEASSERT_RESET); - if (rc 0) - goto out; /* * Poll state of the PHB until the request is done @@ -788,24 +769,22 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int option) * reset followed by hot reset on root bus. So we also * need the PCI bus settlement delay. */ - rc = pnv_eeh_poll(phb-opal_id); - if (option == EEH_RESET_DEACTIVATE) { + ret = pnv_pci_poll(phb-opal_id, rc, NULL); + if (option == EEH_RESET_DEACTIVATE !ret) { if (system_state SYSTEM_RUNNING) udelay(1000 * EEH_PE_RST_SETTLE_TIME); else msleep(EEH_PE_RST_SETTLE_TIME); } -out: - if (rc != OPAL_SUCCESS) - return -EIO; - return 0; + return ret; } static int pnv_eeh_root_reset(struct pci_controller *hose, int option) { struct pnv_phb *phb = hose-private_data; s64 rc = OPAL_HARDWARE; + int ret; pr_debug(%s: Reset PHB#%x, option=%d\n, __func__, hose-global_number, option); @@ -827,18 +806,13 @@ static int pnv_eeh_root_reset(struct pci_controller *hose, int option) rc = opal_pci_reset(phb-opal_id, OPAL_RESET_PCI_HOT, OPAL_DEASSERT_RESET); - if (rc 0) - goto out; /* Poll state of the PHB until the request is done */ - rc = pnv_eeh_poll(phb-opal_id); - if (option == EEH_RESET_DEACTIVATE) + ret = pnv_pci_poll(phb-opal_id, rc, NULL); + if (option == EEH_RESET_DEACTIVATE !ret) msleep(EEH_PE_RST_SETTLE_TIME); -out: - if (rc != OPAL_SUCCESS) - return -EIO; - return 0; + return ret; } static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) @@ -928,10 +902,8 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) phb = hose-private_data; id |= (pdev-bus-number 24) | (pdev-devfn 16) | phb-opal_id; rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET); - if (rc 0) - rc = pnv_eeh_poll(id); - return (rc == OPAL_SUCCESS) ? 0 : -EIO; + return pnv_pci_poll(id, rc, NULL); } static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 678eb24..bf5df04 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -44,6 +44,27 @@ #define cfg_dbg(fmt...)do { } while(0) //#define cfg_dbg(fmt...) printk(fmt) +int pnv_pci_poll(uint64_t id, int64_t rval, uint8_t *pval) +{ + while (rval 0) { + if (system_state SYSTEM_RUNNING) + udelay(1000 * rval); + else + msleep(rval); + + rval = opal_pci_poll(id, pval); + } + + /* +* The caller expects to retrieve additional information +* if the last argument is valid. +*/ + if (rval ==
[PATCH v5 14/42] powerpc/powernv: Allocate PE# in deasending order
The available PE#, represented by a bitmap in the PHB, is allocated in asending order. It conflicts with the fact that M64 segments are assigned in same order. In order to avoid the conflict, the patch allocates PE# in deasending order. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from [PATCH v5 v4 06/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 21 + 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d8b0ef5..0d6539a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -152,18 +152,23 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { - unsigned long pe; + unsigned long pe_no; + unsigned long limit = phb-ioda.total_pe - 1; do { - pe = find_next_zero_bit(phb-ioda.pe_alloc, - phb-ioda.total_pe, 0); - if (pe = phb-ioda.total_pe) + pe_no = find_next_zero_bit(phb-ioda.pe_alloc, + phb-ioda.total_pe, limit); + if (pe_no phb-ioda.total_pe + !test_and_set_bit(pe_no, phb-ioda.pe_alloc)) + break; + + if (--limit = phb-ioda.total_pe) return IODA_INVALID_PE; - } while(test_and_set_bit(pe, phb-ioda.pe_alloc)); + } while (1); - phb-ioda.pe_array[pe].phb = phb; - phb-ioda.pe_array[pe].pe_number = pe; - return pe; + phb-ioda.pe_array[pe_no].phb = phb; + phb-ioda.pe_array[pe_no].pe_number = pe_no; + return pe_no; } static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 35/42] powerpc/pci: Create eeh_dev while creating pci_dn
The eeh_dev is always created based on pci_dn, but with initcall core_initcall_sync(). The patch creates eeh_dev when pci_dn is created, indicating they have same life cycle. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 16/21] --- arch/powerpc/include/asm/eeh.h | 6 -- arch/powerpc/kernel/eeh_dev.c | 18 -- arch/powerpc/kernel/pci_dn.c | 12 arch/powerpc/platforms/pseries/setup.c | 6 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ea1f13c4..c0236a6 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -272,7 +272,8 @@ void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); -void *eeh_dev_init(struct pci_dn *pdn, void *data); +struct eeh_dev *eeh_dev_init(struct pci_dn *pdn, +struct pci_controller *phb); void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); @@ -325,7 +326,8 @@ static inline int eeh_init(void) return 0; } -static inline void *eeh_dev_init(struct pci_dn *pdn, void *data) +static inline struct eeh_dev *eeh_dev_init(struct pci_dn *pdn, + struct pci_controller *phb) { return NULL; } diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index f33ce5b..7486932 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -44,14 +44,14 @@ /** * eeh_dev_init - Create EEH device according to OF node * @pdn: PCI device node - * @data: PHB + * @phb: PCI controller * * It will create EEH device according to the given OF node. The function * might be called by PCI emunation, DR, PHB hotplug. */ -void *eeh_dev_init(struct pci_dn *pdn, void *data) +struct eeh_dev *eeh_dev_init(struct pci_dn *pdn, +struct pci_controller *phb) { - struct pci_controller *phb = data; struct eeh_dev *edev; /* Allocate EEH device */ @@ -68,7 +68,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) edev-phb = phb; INIT_LIST_HEAD(edev-list); - return NULL; + return edev; } /** @@ -80,16 +80,8 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) */ void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { - struct pci_dn *root = phb-pci_data; - /* EEH PE for PHB */ eeh_phb_pe_create(phb); - - /* EEH device for PHB */ - eeh_dev_init(root, phb); - - /* EEH devices for children OF nodes */ - traverse_pci_dn(root, eeh_dev_init, phb); } /** @@ -105,8 +97,6 @@ static int __init eeh_dev_phb_init(void) list_for_each_entry_safe(phb, tmp, hose_list, list_node) eeh_dev_phb_init_dynamic(phb); - pr_info(EEH: devices created\n); - return 0; } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 35554c2..d4330d2 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -287,6 +287,9 @@ void *update_dn_pci_info(struct device_node *dn, void *data) const __be32 *regs; struct device_node *parent; struct pci_dn *pdn; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) @@ -317,6 +320,15 @@ void *update_dn_pci_info(struct device_node *dn, void *data) /* Extended config space */ pdn-pci_ext_config_space = (type of_read_number(type, 1) == 1); + /* Initialize EEH device */ +#ifdef CONFIG_EEH + edev = eeh_dev_init(pdn, phb); + if (!edev) { + kfree(pdn); + return NULL; + } +#endif + /* Attach to parent node */ INIT_LIST_HEAD(pdn-child_list); INIT_LIST_HEAD(pdn-list); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 5f80758..92974aa 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -261,12 +261,8 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act switch (action) { case OF_RECONFIG_ATTACH_NODE: pci = np-parent-data; - if (pci) { + if (pci) update_dn_pci_info(np, pci-phb); - - /* Create EEH device for the OF node */ - eeh_dev_init(PCI_DN(np), pci-phb); - } break; default: err = NOTIFY_DONE; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 30/42] powerpc/pci: Don't scan empty slot
In hotplug case, function pcibios_add_pci_devices() is called to rescan the specified PCI bus, which might not have any child devices. Access to the PCI bus's child device node will cause kernel crash without exception. The patch adds condition of skipping scanning PCI bus without child devices, in order to avoid kernel crash. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 11/21] --- arch/powerpc/kernel/pci-hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 21973e7..ca392fc 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -92,7 +92,8 @@ void pcibios_add_pci_devices(struct pci_bus * bus) if (mode == PCI_PROBE_DEVTREE) { /* use ofdt-based probe */ of_rescan_bus(dn, bus); - } else if (mode == PCI_PROBE_NORMAL) { + } else if (mode == PCI_PROBE_NORMAL + dn-child PCI_DN(dn-child)) { /* * Use legacy probe. In the partial hotplug case, we * probably have grandchildren devices unplugged. So -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 25/42] powerpc/powernv: Supports slot ID
PowerNV platform is running on top of skiboot firmware, which has changes supporting PCI slots. PCI slots are identified by PHB's OPAL ID (PHB slot) or combo of that and PCI slot ID. The patch changes argument names of opal_pci_reset() and opal_pci_poll() to reflect the firmware's change. pnv_eeh_phb_poll() is also renamed to pnv_eeh_poll() to reflect the firmware's change. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 09/21] --- arch/powerpc/include/asm/opal.h | 4 ++-- arch/powerpc/platforms/powernv/eeh-powernv.c | 8 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 042af1a..6d467df 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -129,7 +129,7 @@ int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number, uint16_t dma_window_number, uint64_t pci_start_addr, uint64_t pci_mem_size); -int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); +int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state); int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, uint64_t diag_buffer_len); @@ -145,7 +145,7 @@ int64_t opal_get_epow_status(__be64 *status); int64_t opal_set_system_attention_led(uint8_t led_action); int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); -int64_t opal_pci_poll(uint64_t phb_id); +int64_t opal_pci_poll(uint64_t id, uint8_t *val); int64_t opal_return_cpu(void); int64_t opal_check_token(uint64_t token); int64_t opal_reinit_cpus(uint64_t flags); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index dfdb31f..4fd8f15 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -743,12 +743,12 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) return ret; } -static s64 pnv_eeh_phb_poll(struct pnv_phb *phb) +static s64 pnv_eeh_poll(uint64_t id) { s64 rc = OPAL_HARDWARE; while (1) { - rc = opal_pci_poll(phb-opal_id); + rc = opal_pci_poll(id, NULL); if (rc = 0) break; @@ -788,7 +788,7 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int option) * reset followed by hot reset on root bus. So we also * need the PCI bus settlement delay. */ - rc = pnv_eeh_phb_poll(phb); + rc = pnv_eeh_poll(phb-opal_id); if (option == EEH_RESET_DEACTIVATE) { if (system_state SYSTEM_RUNNING) udelay(1000 * EEH_PE_RST_SETTLE_TIME); @@ -831,7 +831,7 @@ static int pnv_eeh_root_reset(struct pci_controller *hose, int option) goto out; /* Poll state of the PHB until the request is done */ - rc = pnv_eeh_phb_poll(phb); + rc = pnv_eeh_poll(phb-opal_id); if (option == EEH_RESET_DEACTIVATE) msleep(EEH_PE_RST_SETTLE_TIME); out: -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 20/42] powerpc/powernv: Rename pnv_ioda_get_pe() to pnv_ioda_dev_to_pe()
pnv_ioda_get_pe() indicates it's increasing refcount to the given PE instance from the name. However, it gets the instance of the PE, which contains the indicated PCI device. The patch renames it to pnv_ioda_dev_to_pe() to reflect its purpose. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 07/21] * Fixed do not use assignment in if condition from checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0447534..e9165fa 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -702,7 +702,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) * but in the meantime, we need to protect them to avoid warnings */ #ifdef CONFIG_PCI_MSI -static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) +static struct pnv_ioda_pe *pnv_ioda_dev_to_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev-bus); struct pnv_phb *phb = hose-private_data; @@ -2671,7 +2671,7 @@ int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) struct pnv_ioda_pe *pe; int rc; - pe = pnv_ioda_get_pe(dev); + pe = pnv_ioda_dev_to_pe(dev); if (!pe) return -ENODEV; @@ -2787,7 +2787,8 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, struct pnv_ioda_pe *pe; int rc; - if (!(pe = pnv_ioda_get_pe(dev))) + pe = pnv_ioda_dev_to_pe(dev); + if (!pe) return -ENODEV; /* Assign XIVE to PE */ @@ -2809,7 +2810,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) { - struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); + struct pnv_ioda_pe *pe = pnv_ioda_dev_to_pe(dev); unsigned int xive_num = hwirq - phb-msi_base; __be32 data; int rc; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 05/42] powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()
The original implementation of pnv_ioda_setup_pe_seg() configures IO and M32 segments by separate logics, which can be merged by by caching @seg_bitmap, @seg_size, @win in advance. The patch shouldn't cause any behavioural changes. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 04/21] * Fixed coding style complained by checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 67 +++ 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 53d0efd..3bb4ce8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2971,7 +2971,10 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, struct pci_bus_region region; struct resource *res; int i, index; - int rc; + unsigned int segsize; + unsigned long *segmap, *pe_segmap; + uint16_t win; + int64_t rc; /* * NOTE: We only care PCI bus based PE for now. For PCI @@ -2988,50 +2991,44 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, if (res-flags IORESOURCE_IO) { region.start = res-start - phb-ioda.io_pci_base; region.end = res-end - phb-ioda.io_pci_base; - index = region.start / phb-ioda.io_segsize; - - while (index phb-ioda.total_pe - region.start = region.end) { - set_bit(index, phb-ioda.io_segmap); - set_bit(index, pe-io_segmap); - rc = opal_pci_map_pe_mmio_window(phb-opal_id, - pe-pe_number, OPAL_IO_WINDOW_TYPE, 0, index); - if (rc != OPAL_SUCCESS) { - pr_err(%s: OPAL error %d when mapping IO - segment #%d to PE#%d\n, - __func__, rc, index, pe-pe_number); - break; - } - - region.start += phb-ioda.io_segsize; - index++; - } + segsize = phb-ioda.io_segsize; + segmap = phb-ioda.io_segmap; + pe_segmap= pe-io_segmap; + win = OPAL_IO_WINDOW_TYPE; } else if ((res-flags IORESOURCE_MEM) - !pnv_pci_is_mem_pref_64(res-flags)) { + !pnv_pci_is_mem_pref_64(res-flags)) { region.start = res-start - hose-mem_offset[0] - phb-ioda.m32_pci_base; region.end = res-end - hose-mem_offset[0] - phb-ioda.m32_pci_base; - index = region.start / phb-ioda.m32_segsize; + segsize = phb-ioda.m32_segsize; + segmap = phb-ioda.m32_segmap; + pe_segmap= pe-m32_segmap; + win = OPAL_M32_WINDOW_TYPE; + } else { + continue; + } - while (index phb-ioda.total_pe - region.start = region.end) { - set_bit(index, phb-ioda.m32_segmap); - set_bit(index, pe-m32_segmap); - rc = opal_pci_map_pe_mmio_window(phb-opal_id, - pe-pe_number, OPAL_M32_WINDOW_TYPE, 0, index); - if (rc != OPAL_SUCCESS) { - pr_err(%s: OPAL error %d when mapping M32 - segment#%d to PE#%d, - __func__, rc, index, pe-pe_number); - break; - } + index = region.start / phb-ioda.io_segsize; + while (index phb-ioda.total_pe + region.start = region.end) { + set_bit(index, segmap); + set_bit(index, pe_segmap); - region.start += phb-ioda.m32_segsize; - index++; + rc = opal_pci_map_pe_mmio_window(phb-opal_id, + pe-pe_number, win, 0, index); + if (rc != OPAL_SUCCESS) { + pr_warn(%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n, +
[PATCH v5 08/42] powerpc/powernv: DMA32 cleanup
The patch cleans up DMA32 in pci-ioda.c. It shouldn't introduce behavioural changes: * Rename various fields in struct pnv_phb and struct pnv_ioda_pe as 32-bits DMA should be related to DMA, not TCE, and move them around to reflect their relationship and their relative importance. * Removed struct pnv_ioda_pe::tce32_segcount. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 5/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 48 +++ arch/powerpc/platforms/powernv/pci.h | 13 +++-- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d9ff739..4af3d06 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -971,7 +971,7 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, struct pnv_ioda_pe *lpe; list_for_each_entry(lpe, phb-ioda.pe_dma_list, dma_link) { - if (lpe-dma_weight pe-dma_weight) { + if (lpe-dma32_weight pe-dma32_weight) { list_add_tail(pe-dma_link, lpe-dma_link); return; } @@ -996,14 +996,14 @@ static unsigned int pnv_ioda_dev_dma_weight(struct pci_dev *dev) if (dev-class == PCI_CLASS_SERIAL_USB_UHCI || dev-class == PCI_CLASS_SERIAL_USB_OHCI || dev-class == PCI_CLASS_SERIAL_USB_EHCI) - return 3 * phb-ioda.tce32_count; + return 3 * phb-ioda.dma32_segcount; /* Increase the weight of RAID (includes Obsidian) */ if ((dev-class 8) == PCI_CLASS_STORAGE_RAID) - return 15 * phb-ioda.tce32_count; + return 15 * phb-ioda.dma32_segcount; /* Default */ - return 10 * phb-ioda.tce32_count; + return 10 * phb-ioda.dma32_segcount; } static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data) @@ -1182,7 +1182,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) continue; } pdn-pe_number = pe-pe_number; - pe-dma_weight += pnv_ioda_dev_dma_weight(dev); + pe-dma32_weight += pnv_ioda_dev_dma_weight(dev); if ((pe-flags PNV_IODA_PE_BUS_ALL) dev-subordinate) pnv_ioda_setup_same_PE(dev-subordinate, pe); } @@ -1219,10 +1219,10 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) pe-flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe-pbus = bus; pe-pdev = NULL; - pe-tce32_seg = -1; + pe-dma32_seg = -1; pe-mve_number = -1; pe-rid = bus-busn_res.start 8; - pe-dma_weight = 0; + pe-dma32_weight = 0; if (all) pe_info(pe, Secondary bus %d..%d associated with PE#%d\n, @@ -1585,7 +1585,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe-flags = PNV_IODA_PE_VF; pe-pbus = NULL; pe-parent_dev = pdev; - pe-tce32_seg = -1; + pe-dma32_seg = -1; pe-mve_number = -1; pe-rid = (pci_iov_virtfn_bus(pdev, vf_index) 8) | pci_iov_virtfn_devfn(pdev, vf_index); @@ -2061,7 +2061,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, /* XXX FIXME: Allocate multi-level tables on PHB3 */ /* We shouldn't already have a 32-bit DMA associated */ - if (WARN_ON(pe-tce32_seg = 0)) + if (WARN_ON(pe-dma32_seg = 0)) return; tbl = pnv_pci_table_alloc(phb-hose-node); @@ -2070,7 +2070,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, pnv_pci_link_table_and_group(phb-hose-node, 0, tbl, pe-table_group); /* Grab a 32-bit TCE table */ - pe-tce32_seg = base; + pe-dma32_seg = base; pe_info(pe, Setting up 32-bit TCE table at %08x..%08x\n, (base 28), ((base + segs) 28) - 1); @@ -2131,8 +2131,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ - if (pe-tce32_seg = 0) - pe-tce32_seg = -1; + if (pe-dma32_seg = 0) + pe-dma32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); if (tbl) { @@ -2520,7 +2520,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ - if (WARN_ON(pe-tce32_seg = 0)) + if (WARN_ON(pe-dma32_seg = 0)) return; /* TVE #1 is selected by PCI address bit 59 */ @@ -2530,7 +2530,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
[PATCH v5 37/42] powerpc/pci: Update bridge windows on PCI plugging
During the PCI plugging event, the PCI devices are rescanned and their IO and MMIO resources are reassigned. However, the PowerNV platform will assign PE# based on that, which depends on updating to window of bridge of the PE's primary bus. The patch updates the windows of bridge of PE's primary bus if we have valid bridge. Otherwise, we assume it's root bus or SRIOV virtual bus and PE won't be assigned during PCI plugging time. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 18/21] --- arch/powerpc/kernel/pci-common.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0358f24..811eb4d 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1471,8 +1471,12 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); - if (!pci_has_flag(PCI_PROBE_ONLY)) - pci_assign_unassigned_bus_resources(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (bus-self) + pci_assign_unassigned_bridge_resources(bus-self); + else + pci_assign_unassigned_bus_resources(bus); + } /* Fixup EEH */ eeh_add_device_tree_late(bus); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 1/9]powerpc/powernv: Data structure and macros definition
On Wednesday 03 June 2015 04:41 AM, Daniel Axtens wrote: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ I'm not certain about this, but I _think_ this is supposed to be version 2 only: http://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/tree/COPYING#n9 I referred other files for license information from the same folder. So I could wait for the maintainers comment on this. +/* + * Device tree parser code detect nest pmu support + * and create new nest pmus. This structure will + * hold the pmu functions and attrs for each nest pmu and + * will be referenced at the time of pmu registering. + */ The first sentence of that comment is unclear: I think your trying to say Device tree parser code detects nest pmu support and registers new nest pmus? Also s/registering/registration/. Yes. I will rewrite the comment. +struct nest_pmu { + struct pmu pmu; + const struct attribute_group *attr_groups[4]; +}; + Regards, Daniel Axtens Thanks for the review Maddy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 00/42] PowerPC/PowerNV: PCI Slot Management
The series of patches intend to support PCI slot for PowerPC PowerNV platform, which is running on top of skiboot firmware. The patchset requires corresponding changes from skiboot firmware, which is sent to skib...@lists.ozlabs.org for review. The PCI slots are exposed by skiboot with device node properties, and kernel utilizes those properties to populated PCI slots accordingly. The original PCI infrastructure on PowerNV platform can't support hotplug because the PE is assigned during PHB fixup time, which is called for once during system boot time. For this, the PCI infrastructure on PowerNV platform has been reworked for a lot. After that, the PE and its corresponding resources (IODT, M32DT, M64 segments, DMA32 and bypass window) are assigned upon updating PCI bridge's resources, which might decide PE# assigned to the PE (e.g. M64 resources, on P8 strictly speaking). Each PE will maintain a reference count, which is (number of child PCI devices + 1). That indicates when last child PCI device leaves the PE, the PE and its included resources will be relased and put back into free pool again. With this design, the PE will be released when EEH PE is released. PATCH[1 - 24] are related to this part. From skiboot perspective, PCI slot is providing (hot/fundamental/complete) resets to EEH. The kernel gets to know if skiboot supports various reset on one particular PCI slot through device-tree node. If it does, EEH will utilize the functionality provided by skiboot. Besides, the device-tree nodes have to change in order to support PCI hotplug. For example, when one PCI adapter inserted to one slot, its device-tree node should be added to the system dynamically. Conversely, the device-tree node should be removed from the system when the PCI adapter is going to be offline. Since pci_dn and eeh_dev have same life cyle as PCI device nodes, they should be added/removed accordingly during PCI hotplug. PATCH[25 - 38] are doing the related work. The OF driver is changed to support unflattening FDT blob for sub-stree, which is covered by PATCH[39 - 41]. The last patch is the standalone PCI hotplug driver for PowerNV platform. When removing PCI adapter from one PCI slot, which is invoked by command in userland, the skiboot will power off the slot to save power and remove all device-tree nodes for all PCI devices behind the slot. Conversely, the Power to the slot is turned on, the PCI devices behind the slot is rescanned, and the device-tree nodes for those newly detected PCI devices will be built in skiboot. For both of cases, one message will be sent to kernel by skiboot so that the kernel can adjust the device-tree accordingly. At the same time, the kernel also have to deallocate or allocate PE# and its related resources (PE# and so on) for the removed/added PCI devices. Changelog = v5: * Rebased to 4.1.rc6 and some unmerged patches as below: Alexey's DDW patchset (v11); Gavin's EEH error injection support (in mpe's next branch); Richard's EEH cleanup patches (in mpe's next branch); Richard's EEH support for VF (v7); Gavin's misc EEH fixes for 4.2; * The revision bases on skiboot corresponding patches (v7): https://patchwork.ozlabs.org/patch/480437/ * Utilize OF overlay to update device-tree with help of newly introduced OPAL API opal_get_overlay_dt(). * Split patches for easy review according to aik's comments. * Fix coding style from checkpatchc.pl as pointed by aik. * Code cleanup and misc fixup according to aik's input. v4: * Rebased to 4.1.RC1 * Added API to unflatten FDT blob to device node sub-tree, which is attached the indicated parent device node. The original mechanism based on formatted string stream has been dropped. * The PATCH[v3 09/21] (powerpc/eeh: Delay probing EEH device during hotplug) was picked up sent to linux-ppc@ separately for review as Richard's VF EEH Support depends on that. v3: * Rebased to 4.1.RC0 * PowerNV PCI infrasturcture is total refactored in order to support PCI hotplug. The PowerNV hotplug driver is also reworked a lot because of the changes in skiboot in order to support PCI hotplug. Gavin Shan (42): PCI: Add pcibios_setup_bridge() powerpc/powernv: Enable M64 on P7IOC powerpc/powernv: M64 support improvement powerpc/powernv: Trace consumed IO and M32 segments by PE powerpc/powernv: Simplify pnv_ioda_setup_pe_seg() powerpc/powernv: Improve IO and M32 mapping powerpc/powernv: Calculate PHB's DMA weight dynamically powerpc/powernv: DMA32 cleanup powerpc/powernv: pnv_ioda_setup_dma() configure one PE only powerpc/powernv: Trace DMA32 segments consumed by PE powerpc/powernv: Increase PE# capacity powerpc/pci: Cleanup on pci_controller_ops powerpc/pci: Override pcibios_setup_bridge() powerpc/powernv: Allocate PE# in deasending order powerpc/powernv: Reserve PE# for root bus powerpc/powernv: Create PEs dynamically powerpc/powernv: PE
[PATCH v5 15/42] powerpc/powernv: Reserve PE# for root bus
pcibios_setup_bridge(), called to update PCI bridge windows, will allocate PE for PCI buses. The function isn't called for root bus that doesn't have upstream bridge. The patch reserves PE# for root bus in advance so that we can setup it in next patch. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from [PATCH v5 v4 06/21] * Replace strip of with strip off in comments --- arch/powerpc/platforms/powernv/pci-ioda.c | 31 ++- arch/powerpc/platforms/powernv/pci.h | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0d6539a..2eb8baa 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -230,6 +230,13 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) pr_warn( Cannot strip M64 segment for reserved PE#%d\n, phb-ioda.reserved_pe); + /* Strip off the segment used by PE for PCI root bus, +* which is last supported PE#, or one next to the +* reserved PE# +*/ + if (phb-ioda.root_pe != IODA_INVALID_PE) + r-end -= phb-ioda.m64_segsize; + return 0; fail: @@ -287,6 +294,13 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) pr_warn( Cannot strip M64 segment for reserved PE#%d\n, phb-ioda.reserved_pe); + /* Strip off the segment used by PE for PCI root bus, +* which is last supported PE#, or one next to the +* reserved PE# +*/ + if (phb-ioda.root_pe != IODA_INVALID_PE) + r-end -= phb-ioda.m64_segsize; + return 0; fail: @@ -3331,7 +3345,22 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, aux = memblock_virt_alloc(size, 0); phb-ioda.pe_alloc = aux; phb-ioda.pe_array = aux + pemap_off; - set_bit(phb-ioda.reserved_pe, phb-ioda.pe_alloc); + + /* Choose number of PE for root bus, which shouldn't consume +* any M64 resource. So we avoid picking low-end PE#, which +* is usually binding with 64-bits prefetchable memory resources +* closely. +*/ + pnv_ioda_reserve_pe(phb, phb-ioda.reserved_pe); + if (phb-ioda.reserved_pe == 0) { + phb-ioda.root_pe = phb-ioda.total_pe - 1; + pnv_ioda_reserve_pe(phb, phb-ioda.root_pe); + } else if (phb-ioda.reserved_pe == (phb-ioda.total_pe - 1)) { + phb-ioda.root_pe = phb-ioda.reserved_pe - 1; + pnv_ioda_reserve_pe(phb, phb-ioda.root_pe); + } else { + phb-ioda.root_pe = IODA_INVALID_PE; + } INIT_LIST_HEAD(phb-ioda.pe_dma_list); INIT_LIST_HEAD(phb-ioda.pe_list); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 590f778..e372b9f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -133,6 +133,7 @@ struct pnv_phb { struct { /* Global bridge info */ unsigned inttotal_pe; + unsigned introot_pe; unsigned intreserved_pe; /* 32-bit MMIO window */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 42/42] pci/hotplug: PowerPC PowerNV PCI hotplug driver
The patch intends to add standalone driver to support PCI hotplug for PowerPC PowerNV platform, which runs on top of skiboot firmware. The firmware identified hotpluggable slots and marked their device tree node with proper ibm,slot-pluggable and ibm,reset-by-firmware. The driver simply scans device-tree to create/register PCI hotplug slot accordingly. If the skiboot firmware doesn't support slot status retrieval, the PCI slot device node shouldn't have property ibm,reset-by-firmware. In that case, none of valid PCI slots will be detected from device tree. The skiboot firmware doesn't export the capability to access attention LEDs yet and it's something for TBD. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Use OF OVERLAY to update the device-tree * Removed unnecessary header files * More meaningful return value from powernv_php_register_one() * Use pnv_pci_hotplug_notifier_{register, unregister}() * Decimal values for slot's states * Removed struct powernv_php_slot::release() * Merged two bool arguments to one for powernv_php_slot_enable() * Rename release_device_nodes_info() to remove_device_nodes_info() * Don't check on !len in slot_power_on_handler() * Handle return value in get_adapter_status() as suggested by aik * Drop invalid attention status in set_attention_status() * Renaming functions * Fixed coding style and added entry in MAINTAINERS reported by checkpatch.pl --- MAINTAINERS| 6 + drivers/pci/hotplug/Kconfig| 12 + drivers/pci/hotplug/Makefile | 4 + drivers/pci/hotplug/powernv_php.c | 140 +++ drivers/pci/hotplug/powernv_php.h | 90 drivers/pci/hotplug/powernv_php_slot.c | 732 + 6 files changed, 984 insertions(+) create mode 100644 drivers/pci/hotplug/powernv_php.c create mode 100644 drivers/pci/hotplug/powernv_php.h create mode 100644 drivers/pci/hotplug/powernv_php_slot.c diff --git a/MAINTAINERS b/MAINTAINERS index e308718..f5e1dce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7481,6 +7481,12 @@ L: linux-...@vger.kernel.org S: Supported F: Documentation/PCI/pci-error-recovery.txt +PCI HOTPLUG DRIVER FOR POWERNV PLATFORM +M: Gavin Shan gws...@linux.vnet.ibm.com +L: linux-...@vger.kernel.org +S: Supported +F: drivers/pci/hotplug/powernv_php* + PCI SUBSYSTEM M: Bjorn Helgaas bhelg...@google.com L: linux-...@vger.kernel.org diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index df8caec..ef55dae 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC When in doubt, say N. +config HOTPLUG_PCI_POWERNV + tristate PowerPC PowerNV PCI Hotplug driver + depends on PPC_POWERNV EEH + help + Say Y here if you run PowerPC PowerNV platform that supports + PCI Hotplug + + To compile this driver as a module, choose M here: the + module will be called powernv-php. + + When in doubt, say N. + config HOTPLUG_PCI_RPA tristate RPA PCI Hotplug driver depends on PPC_PSERIES EEH diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 4a9aa08..a69665e 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += powernv-php.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)+= rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o @@ -50,6 +51,9 @@ ibmphp-objs := ibmphp_core.o \ acpiphp-objs := acpiphp_core.o \ acpiphp_glue.o +powernv-php-objs := powernv_php.o \ + powernv_php_slot.o + rpaphp-objs:= rpaphp_core.o \ rpaphp_pci.o\ rpaphp_slot.o diff --git a/drivers/pci/hotplug/powernv_php.c b/drivers/pci/hotplug/powernv_php.c new file mode 100644 index 000..4cbff7a --- /dev/null +++ b/drivers/pci/hotplug/powernv_php.c @@ -0,0 +1,140 @@ +/* + * PCI Hotplug Driver for PowerPC PowerNV platform. + * + * Copyright Gavin Shan, IBM Corporation 2015. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include linux/module.h + +#include asm/opal.h +#include asm/pnv-pci.h + +#include powernv_php.h + +#define DRIVER_VERSION 0.1 +#define DRIVER_AUTHOR Gavin
[PATCH v5 22/42] powerpc/powernv: Move functions around
The patch moves functions related to releasing PE around so that we don't need extra declaration for them in subsequent patches. It doesn't introduce any behavioural changes. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 07/21] * Fixed coding style complained by checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 735 +++--- 1 file changed, 369 insertions(+), 366 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8a79403..3d5aec8d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -132,6 +132,285 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) +{ + /* 01xb - invalidate TCEs that match the specified PE# */ + unsigned long val = (0x4ull 60) | (pe-pe_number 0xFF); + struct pnv_phb *phb = pe-phb; + + if (!phb-ioda.tce_inval_reg) + return; + + mb(); /* Ensure above stores are visible */ + __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg); +} + +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe-phb; + long ret; + + pe_info(pe, Removing DMA window #%d\n, num); + + ret = opal_pci_map_pe_dma_window(phb-opal_id, pe-pe_number, + (pe-pe_number 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, Unmapping failed, ret = %ld\n, ret); + else + pnv_pci_ioda2_tce_invalidate_entire(pe); + + pnv_pci_unlink_table_and_group(table_group-tables[num], table_group); + + return ret; +} + +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) +{ + uint16_t window_id = (pe-pe_number 1) + 1; + int64_t rc; + + pe_info(pe, %sabling 64-bit DMA bypass\n, enable ? En : Dis); + if (enable) { + phys_addr_t top = memblock_end_of_DRAM(); + + top = roundup_pow_of_two(top); + rc = opal_pci_map_pe_dma_window_real(pe-phb-opal_id, +pe-pe_number, +window_id, +pe-tce_bypass_base, +top); + } else { + rc = opal_pci_map_pe_dma_window_real(pe-phb-opal_id, +pe-pe_number, +window_id, +pe-tce_bypass_base, +0); + } + if (rc) + pe_err(pe, OPAL error %lld configuring bypass window\n, rc); + else + pe-tce_bypass_enabled = enable; +} + +static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, +struct pnv_ioda_pe *pe) +{ + struct iommu_table*tbl; + int64_t rc; + + tbl = pe-table_group.tables[0]; + rc = pnv_pci_ioda2_unset_window(pe-table_group, 0); + if (rc) + pe_warn(pe, OPAL error %ld release DMA window\n, rc); + + pnv_pci_ioda2_set_bypass(pe, false); + if (pe-table_group.group) { + iommu_group_put(pe-table_group.group); + BUG_ON(pe-table_group.group); + } + pnv_pci_ioda2_table_free_pages(tbl); + iommu_free_table(tbl, of_node_full_name(dev-dev.of_node)); +} + +static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *parent, + struct pnv_ioda_pe *child, + bool is_add) +{ + const char *desc = is_add ? adding : removing; + uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : + OPAL_REMOVE_PE_FROM_DOMAIN; + struct pnv_ioda_pe *slave; + long rc; + + /* Parent PE affects child PE */ + rc = opal_pci_set_peltv(phb-opal_id, parent-pe_number, + child-pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(child, OPAL error %ld %s to parent PELTV\n, + rc, desc); + return -ENXIO; + } + + if (!(child-flags PNV_IODA_PE_MASTER)) + return 0; + + /* Compound case: parent PE affects slave PEs */ + list_for_each_entry(slave, child-slaves, list) { +
[PATCH v5 10/42] powerpc/powernv: Trace DMA32 segments consumed by PE
On P7IOC, the whole DMA32 space is divided evenly to 256MB segments. Each PE can consume one or multiple DMA32 segments. Current code doesn't trace the available DMA32 segments and those consumed by one particular PE. It's conflicting with PCI hotplug. The patch introduces one bitmap to PHB to trace the available DMA32 segments for allocation, more fields to struct pnv_ioda_pe to trace the consumed DMA32 segments by the PE, which is going to be released when the PE is destroyed at PCI unplugging time. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 07/21] * Added space before open parenthesis reported by checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 24 +++- arch/powerpc/platforms/powernv/pci.h | 4 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 63fad4d..2087c5c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2071,6 +2071,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, /* Grab a 32-bit TCE table */ pe-dma32_seg = base; + pe-dma32_segcount = segs; pe_info(pe, Setting up 32-bit TCE table at %08x..%08x\n, (base 28), ((base + segs) 28) - 1); @@ -2131,8 +2132,10 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ - if (pe-dma32_seg = 0) + if (pe-dma32_seg = 0) { + bitmap_clear(phb-ioda.dma32_segmap, base, segs); pe-dma32_seg = -1; + } if (tce_mem) __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); if (tbl) { @@ -2531,6 +2534,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* The PE will reserve all possible 32-bits space */ pe-dma32_seg = 0; + pe-dma32_segcount = 1; pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n, phb-ioda.m32_pci_base); @@ -2588,6 +2592,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb, struct pnv_ioda_pe *pe) segs = (pe-dma32_weight * phb-ioda.dma32_segcount) / dma_weight; + /* Allocate DMA32 segments as required. We might not have +* enough available resource. However, we expect at least +* one segment is allocated. +*/ + do { + base = bitmap_find_next_zero_area( + phb-ioda.dma32_segmap, + phb-ioda.dma32_segcount, + 0, segs, 0); + if (base phb-ioda.dma32_segcount) { + bitmap_set(phb-ioda.dma32_segmap, base, segs); + break; + } + } while (--segs); + + if (!segs) + return; + pe_info(pe, DMA weight %d, assigned %d DMA32 segments\n, pe-dma32_weight, segs); pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 5ea33ca..94ef1df 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -61,6 +61,7 @@ struct pnv_ioda_pe { /* Base iommu table, ie, 4K TCEs, 32-bit DMA */ struct iommu_table_group table_group; int dma32_seg; + int dma32_segcount; unsigned intdma32_weight; /* 64-bit TCE bypass region */ @@ -161,6 +162,9 @@ struct pnv_phb { unsigned long m32_segmap[8]; unsigned long m64_segmap[8]; + /* DMA32 segment maps */ + unsigned long dma32_segmap[8]; + /* IRQ chip */ int irq_chip_init; struct irq_chip irq_chip; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 11/42] powerpc/powernv: Increase PE# capacity
Each PHB maintains an array helping to translate RID (Request ID) to PE# with the assumption that PE# takes 8 bits, indicating that we can't have more than 256 PEs. However, pci_dn-pe_number already had 4-bytes for the PE#. The patch extends the PE# capacity so that each of them will be 4-bytes long. Then we can use IODA_INVALID_PE to check one entry in phb-pe_rmap[] is valid or not. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from [PATCH v5 v4 06/21] --- arch/powerpc/platforms/powernv/pci-ioda.c | 5 - arch/powerpc/platforms/powernv/pci.h | 5 ++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2087c5c..d8b0ef5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -840,7 +840,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) /* Clear the reverse map */ for (rid = pe-rid; rid rid_end; rid++) - phb-ioda.pe_rmap[rid] = 0; + phb-ioda.pe_rmap[rid] = IODA_INVALID_PE; /* Release from all parents PELT-V */ while (parent) { @@ -3303,6 +3303,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (prop32) phb-ioda.reserved_pe = be32_to_cpup(prop32); + /* Invalidate RID to PE# mapping */ + memset(phb-ioda.pe_rmap, 0xff, sizeof(phb-ioda.pe_rmap)); + /* Parse 64-bit MMIO range */ pnv_ioda_parse_m64_window(phb); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 94ef1df..590f778 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -175,11 +175,10 @@ struct pnv_phb { struct list_headpe_list; struct mutexpe_list_mutex; - /* Reverse map of PEs, will have to extend if -* we are to support more than 256 PEs, indexed + /* Reverse map of PEs, indexed by * bus { bus, devfn } */ - unsigned char pe_rmap[0x1]; + int pe_rmap[0x1]; /* Number of 32-bit DMA segments */ unsigned long dma32_segcount; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 29/42] powerpc/powernv: Issue fundamental reset in pnv_pci_reset_secondary_bus()
There might have PCI devices, under the specified PCI bus, asking for fundamental reset. The patch iterates all PCI devices under the specified PCI bus and issue fundamental reset to the PCI bus if any PCI device is asking for that. Otherwise, hot reset is issued to the PCI bus. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from PATCH[v4 10/21] --- arch/powerpc/platforms/powernv/eeh-powernv.c | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 18167c5..4eb53ed 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1055,9 +1055,32 @@ static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) return 0; } +static int pnv_pci_dev_reset_type(struct pci_dev *pdev, void *data) +{ + int *freset = data; + + /* +* Stop the iteration immediately if there is any +* one PCI device requesting fundamental reset +*/ + *freset |= pdev-needs_freset; + return *freset; +} + void pnv_pci_reset_secondary_bus(struct pci_dev *dev) { - pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); + int option = EEH_RESET_HOT; + + if (dev-subordinate) { + int freset = 0; + + pci_walk_bus(dev-subordinate, +pnv_pci_dev_reset_type, +freset); + option = freset ? EEH_RESET_FUNDAMENTAL : EEH_RESET_HOT; + } + + pnv_eeh_bridge_reset(dev, option); pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); } -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 16/42] powerpc/powernv: Create PEs dynamically
Currently, the PEs and their associated resources are assigned in ppc_md.pcibios_fixup() except those consumed by SRIOV VFs. The function is called for once after PCI probing and resources assignment are finished. Obviously, it's not hotplug friendly. The patch creates PEs dynamically by ppc_md.pcibios_setup_bridge(), which is called on the event during system bootup and PCI hotplug: updating PCI bridge's windows after resource assignment/reassignment are finished. For partial hotplug case, where not all PCI devices belonging to the PE are unplugged and plugged again, we just need unbinding/binding the affected PCI devices with the corresponding PE without creating new one. Besides, it might require addtional resources (e.g. M32) to the windows of the PCI bridge when unplugging current adapter, and insert a different adapter if there is one PCI slot, which is assumed behind root port, or the downstream bridge of the PCIE switch behind root port. The parent bridge of the newly plugged adapter would reject the request to add more resources, leading to hotplug failure. For the issue, the patch extends the windows of root port, or the upstream port of the PCIe switch behind root port to PHB's windows when ppc_md.pcibios_setup_bridge() is called. There is no upstream bridge for root bus, so we have to fix it up before any PE is created because the root bus PE is the ancestor to anyone else. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Derived from [PATCH v5 v4 06/21] * Correct accommodate reported by checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 203 +++--- arch/powerpc/platforms/powernv/pci.h | 1 + 2 files changed, 128 insertions(+), 76 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2eb8baa..fd2f898 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1200,6 +1200,13 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) pci_name(dev)); continue; } + + /* The PCI device might have been associated with the PE +* in case of partial hotplug. +*/ + if (pdn-pe_number != IODA_INVALID_PE) + continue; + pdn-pe_number = pe-pe_number; pe-dma32_weight += pnv_ioda_dev_dma_weight(dev); if ((pe-flags PNV_IODA_PE_BUS_ALL) dev-subordinate) @@ -1213,15 +1220,31 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) * subordinate PCI devices and buses. The second type of PE is normally * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. */ -static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) +static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose-private_data; struct pnv_ioda_pe *pe; int pe_num = IODA_INVALID_PE; + /* For partial hotplug case, the PE instance hasn't been destroyed +* yet. We shouldn't allocated a new one and assign resources to +* it. The existing PE instance should be reused, but we should +* associate the devices to the PE. +*/ + pe_num = phb-ioda.pe_rmap[bus-number 8]; + if (pe_num != IODA_INVALID_PE) { + pe = phb-ioda.pe_array[pe_num]; + pnv_ioda_setup_same_PE(bus, pe); + return NULL; + } + + /* PE number for root bus should have been reserved */ + if (pci_is_root_bus(bus)) + pe_num = phb-ioda.root_pe; + /* Check if PE is determined by M64 */ - if (phb-pick_m64_pe) + if (pe_num == IODA_INVALID_PE phb-pick_m64_pe) pe_num = phb-pick_m64_pe(phb, bus, all); /* The PE number isn't pinned by M64 */ @@ -1231,7 +1254,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) if (pe_num == IODA_INVALID_PE) { pr_warning(%s: Not enough PE# available for PCI bus %04x:%02x\n, __func__, pci_domain_nr(bus), bus-number); - return; + return NULL; } pe = phb-ioda.pe_array[pe_num]; @@ -1255,7 +1278,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) if (pe_num) pnv_ioda_free_pe(phb, pe_num); pe-pbus = NULL; - return; + return NULL; } /* Associate it with all child devices */ @@ -1266,46 +1289,8 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) /* Link the PE */ pnv_ioda_link_pe_by_weight(phb, pe); -} - -static void pnv_ioda_setup_PEs(struct pci_bus *bus) -{ - struct
[PATCH v5 28/42] powerpc/powernv: Don't cover root bus in pnv_pci_reset_secondary_bus()
There should have an upstream bridge for the PCI bus for which pnv_pci_reset_secondary_bus() is called. It's impossible to call the function for root buses. So we needn't do reset for root buses in pnv_pci_reset_secondary_bus() and simply drop the logic. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 10/21] --- arch/powerpc/platforms/powernv/eeh-powernv.c | 12 ++-- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4669122..18167c5 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1057,16 +1057,8 @@ static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) void pnv_pci_reset_secondary_bus(struct pci_dev *dev) { - struct pci_controller *hose; - - if (pci_is_root_bus(dev-bus)) { - hose = pci_bus_to_host(dev-bus); - pnv_eeh_root_reset(hose, EEH_RESET_HOT); - pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); - } else { - pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); - pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); - } + pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); + pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); } /** -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 40/42] drivers/of: Allow to specify root node in of_fdt_unflatten_tree()
The patch introduces one more argument to of_fdt_unflatten_tree() to specify the root node for the FDT blob, which is going to be unflattened. In the result, the function can be used to unflatten FDT blob, which represents device sub-tree in subsequent patches. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Newly introduced --- drivers/of/fdt.c | 26 ++ drivers/of/unittest.c | 2 +- include/linux/of_fdt.h | 3 ++- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index b87c157..b6a6c59 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -380,9 +380,16 @@ static void *unflatten_dt_node(void *blob, struct device_node **nodepp, bool dryrun) { + unsigned long fpsize = 0; + + if (dad) + fpsize = strlen(of_node_full_name(dad)); + else + fpsize = 0; + cur_node_depth = 1; return __unflatten_dt_node(blob, mem, poffset, - dad, nodepp, 0, dryrun); + dad, nodepp, fpsize, dryrun); } /** @@ -393,13 +400,15 @@ static void *unflatten_dt_node(void *blob, * pointers of the nodes so the normal device-tree walking functions * can be used. * @blob: The blob to expand + * @dad: The root node of the created device_node tree * @mynodes: The device_node tree created by the call * @dt_alloc: An allocator that provides a virtual address to memory * for the resulting tree */ static void __unflatten_device_tree(void *blob, -struct device_node **mynodes, -void * (*dt_alloc)(u64 size, u64 align)) + struct device_node *dad, + struct device_node **mynodes, + void * (*dt_alloc)(u64 size, u64 align)) { unsigned long size; int start; @@ -425,7 +434,7 @@ static void __unflatten_device_tree(void *blob, /* First pass, scan for size */ start = 0; size = (unsigned long)unflatten_dt_node(blob, NULL, start, - NULL, NULL, true); + dad, NULL, true); size = ALIGN(size, 4); pr_debug( size is %lx, allocating...\n, size); @@ -440,7 +449,7 @@ static void __unflatten_device_tree(void *blob, /* Second pass, do actual unflattening */ start = 0; - unflatten_dt_node(blob, mem, start, NULL, mynodes, false); + unflatten_dt_node(blob, mem, start, dad, mynodes, false); if (be32_to_cpup(mem + size) != 0xdeadbeef) pr_warning(End of tree marker overwritten: %08x\n, be32_to_cpup(mem + size)); @@ -462,9 +471,10 @@ static void *kernel_tree_alloc(u64 size, u64 align) * can be used. */ void of_fdt_unflatten_tree(unsigned long *blob, - struct device_node **mynodes) + struct device_node *dad, + struct device_node **mynodes) { - __unflatten_device_tree(blob, mynodes, kernel_tree_alloc); + __unflatten_device_tree(blob, dad, mynodes, kernel_tree_alloc); } EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree); @@ -1095,7 +1105,7 @@ bool __init early_init_dt_scan(void *params) */ void __init unflatten_device_tree(void) { - __unflatten_device_tree(initial_boot_params, of_root, + __unflatten_device_tree(initial_boot_params, NULL, of_root, early_init_dt_alloc_memory_arch); /* Get pointer to /chosen and /aliases nodes for use everywhere */ diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 1801634..2270830 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -907,7 +907,7 @@ static int __init unittest_data_add(void) not running tests\n, __func__); return -ENOMEM; } - of_fdt_unflatten_tree(unittest_data, unittest_data_node); + of_fdt_unflatten_tree(unittest_data, NULL, unittest_data_node); if (!unittest_data_node) { pr_warn(%s: No tree to attach; not running tests\n, __func__); return -ENODATA; diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 587ee50..8882640 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -38,7 +38,8 @@ extern bool of_fdt_is_big_endian(const void *blob, extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); extern void of_fdt_unflatten_tree(unsigned long *blob, - struct device_node **mynodes); + struct device_node *dad, + struct device_node **mynodes); /* TBD: Temporary export of fdt globals - remove when code fully
[PATCH v5 27/42] powerpc/powernv: Simplify pnv_eeh_reset()
The patch simplifies pnv_eeh_reset() by dropping unnecessary nested if statement. No logic changed by the patch. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 09/21] * Fixed quoted string split across lines from checkpatch.pl --- arch/powerpc/platforms/powernv/eeh-powernv.c | 65 +--- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4feb533..4669122 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1084,7 +1084,9 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) static int pnv_eeh_reset(struct eeh_pe *pe, int option) { struct pci_controller *hose = pe-phb; + struct pnv_phb *phb = hose-private_data; struct pci_bus *bus; + int64_t rc; int ret; /* @@ -1101,44 +1103,39 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) * reset. The side effect is that EEH core has to clear the frozen * state explicitly after BAR restore. */ - if (pe-type EEH_PE_PHB) { - ret = pnv_eeh_phb_reset(hose, option); - } else { - struct pnv_phb *phb; - s64 rc; + if (pe-type EEH_PE_PHB) + return pnv_eeh_phb_reset(hose, option); - /* -* The frozen PE might be caused by PAPR error injection -* registers, which are expected to be cleared after hitting -* frozen PE as stated in the hardware spec. Unfortunately, -* that's not true on P7IOC. So we have to clear it manually -* to avoid recursive EEH errors during recovery. -*/ - phb = hose-private_data; - if (phb-model == PNV_PHB_MODEL_P7IOC - (option == EEH_RESET_HOT || - option == EEH_RESET_FUNDAMENTAL)) { - rc = opal_pci_reset(phb-opal_id, - OPAL_RESET_PHB_ERROR, - OPAL_ASSERT_RESET); - if (rc != OPAL_SUCCESS) { - pr_warn(%s: Failure %lld clearing - error injection registers\n, - __func__, rc); - return -EIO; - } + /* +* The frozen PE might be caused by PAPR error injection +* registers, which are expected to be cleared after hitting +* frozen PE as stated in the hardware spec. Unfortunately, +* that's not true on P7IOC. So we have to clear it manually +* to avoid recursive EEH errors during recovery. +*/ + phb = hose-private_data; + if (phb-model == PNV_PHB_MODEL_P7IOC + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb-opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn(%s: Error %lld clearing errinjct registers\n, + __func__, rc); + return -EIO; } - - bus = eeh_pe_bus_get(pe); - if (pe-type EEH_PE_VF) - ret = pnv_eeh_vf_pe_reset(pe, option); - else if (pci_is_root_bus(bus) || - pci_is_root_bus(bus-parent)) - ret = pnv_eeh_root_reset(hose, option); - else - ret = pnv_eeh_bridge_reset(bus-self, option); } + bus = eeh_pe_bus_get(pe); + if (pe-type EEH_PE_VF) + ret = pnv_eeh_vf_pe_reset(pe, option); + else if (pci_is_root_bus(bus) || + pci_is_root_bus(bus-parent)) + ret = pnv_eeh_root_reset(hose, option); + else + ret = pnv_eeh_bridge_reset(bus-self, option); + return ret; } -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v5 07/42] powerpc/powernv: Calculate PHB's DMA weight dynamically
For P7IOC, the whole available DMA32 space, which is below the MEM32 space, is divided evenly into 256MB segments. How many continuous segments assigned to one particular PE depends on the PE's DMA weight that is figured out from the type of each PCI devices contained in the PE, and PHB's DMA weight which is accumulative DMA weight of PEs contained in the PHB. It means that the PHB's DMA weight calculation depends on existing PEs, which works perfectly now, but not hotplug friendly. As the whole available DMA32 space can be assigned to one PE on PHB3, so we don't have the issue on PHB3. The patch calculates PHB's DMA weight based on the PCI devices contained in the PHB dynamically so that it's hotplug friendly. At the meanwhile, the patch removes the code handling DMA weight for PHB3 in pnv_ioda_setup_dma(). Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com --- v5: * Split from PATCH[v4 5/21] * Fixed line over 80 characters reported from checkpatch.pl --- arch/powerpc/platforms/powernv/pci-ioda.c | 90 +++ arch/powerpc/platforms/powernv/pci.h | 6 --- 2 files changed, 44 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 46a5e10..d9ff739 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -979,8 +979,11 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, list_add_tail(pe-dma_link, phb-ioda.pe_dma_list); } -static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) +static unsigned int pnv_ioda_dev_dma_weight(struct pci_dev *dev) { + struct pci_controller *hose = pci_bus_to_host(dev-bus); + struct pnv_phb *phb = hose-private_data; + /* This is quite simplistic. The base weight of a device * is 10. 0 means no DMA is to be accounted for it. */ @@ -993,14 +996,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) if (dev-class == PCI_CLASS_SERIAL_USB_UHCI || dev-class == PCI_CLASS_SERIAL_USB_OHCI || dev-class == PCI_CLASS_SERIAL_USB_EHCI) - return 3; + return 3 * phb-ioda.tce32_count; /* Increase the weight of RAID (includes Obsidian) */ if ((dev-class 8) == PCI_CLASS_STORAGE_RAID) - return 15; + return 15 * phb-ioda.tce32_count; /* Default */ - return 10; + return 10 * phb-ioda.tce32_count; +} + +static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data) +{ + unsigned int *dma_weight = data; + + *dma_weight += pnv_ioda_dev_dma_weight(pdev); + return 0; +} + +static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb) +{ + unsigned int dma_weight = 0; + + if (!phb-hose-bus) + return dma_weight; + + pci_walk_bus(phb-hose-bus, +__pnv_ioda_phb_dma_weight, dma_weight); + return dma_weight; } #ifdef CONFIG_PCI_IOV @@ -1159,7 +1182,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) continue; } pdn-pe_number = pe-pe_number; - pe-dma_weight += pnv_ioda_dma_weight(dev); + pe-dma_weight += pnv_ioda_dev_dma_weight(dev); if ((pe-flags PNV_IODA_PE_BUS_ALL) dev-subordinate) pnv_ioda_setup_same_PE(dev-subordinate, pe); } @@ -1222,14 +1245,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) /* Put PE to the list */ list_add_tail(pe-list, phb-ioda.pe_list); - /* Account for one DMA PE if at least one DMA capable device exist -* below the bridge -*/ - if (pe-dma_weight != 0) { - phb-ioda.dma_weight += pe-dma_weight; - phb-ioda.dma_pe_count++; - } - /* Link the PE */ pnv_ioda_link_pe_by_weight(phb, pe); } @@ -2546,24 +2561,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, static void pnv_ioda_setup_dma(struct pnv_phb *phb) { struct pci_controller *hose = phb-hose; - unsigned int residual, remaining, segs, tw, base; struct pnv_ioda_pe *pe; + unsigned int dma_weight; - /* If we have more PE# than segments available, hand out one -* per PE until we run out and let the rest fail. If not, -* then we assign at least one segment per PE, plus more based -* on the amount of devices under that PE -*/ - if (phb-ioda.dma_pe_count phb-ioda.tce32_count) - residual = 0; - else - residual = phb-ioda.tce32_count - - phb-ioda.dma_pe_count; - - pr_info(PCI: Domain %04x has %ld available 32-bit DMA segments\n, - hose-global_number, phb-ioda.tce32_count); - pr_info(PCI: %d PE# for a total weight of %d\n, -
Re: [PATCH] powerpc: Make doorbell check preemption safe
On Wednesday 20 May 2015 06:30 AM, Michael Neuling wrote: On Wed, 2015-05-20 at 00:30 +0530, Shreyas B. Prabhu wrote: Doorbell can be used to cause ipi on cpus which are sibling threads on the same core. So icp_native_cause_ipi checks if the destination cpu is a sibling thread of the current cpu and uses doorbell in such cases. But while running with CONFIG_PREEMPT=y, since this section is preemtible, we can run into issues if after we check if the destination cpu is a sibling cpu, the task gets migrated from a sibling cpu to a cpu on another core. Fix this by using get_cpu()/ put_cpu() Thanks. Looks good and it's boots for me. Signed-off-by: Michael Neuling mi...@neuling.org mikey, Thanks! mpe, if this looks ok, can you please pick it up? ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/
On Thu, 2015-06-04 at 2:14PM, Wood Scott wrote: -Original Message- From: Wood Scott-B07421 Sent: Thursday, June 04, 2015 2:14 PM To: Zhao Qiang-B45475 Cc: linuxppc-dev@lists.ozlabs.org; Xie Xiaobo-R63061 Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/ On Thu, 2015-06-04 at 00:56 -0500, Zhao Qiang-B45475 wrote: On Thu, 2015-05-28 at 1:37AM +0800, Wood Scott wrote: -Original Message- From: Wood Scott-B07421 Sent: Thursday, May 28, 2015 1:37 AM To: Zhao Qiang-B45475 Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Xie Xiaobo- R63061 Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/ On Wed, 2015-05-27 at 17:12 +0800, Zhao Qiang wrote: qe need to use the rheap, so move it to public directory. You've been previously asked to use lib/genalloc.c rather than introduce duplicate functionality into /lib. NACK. Can't use lib/genalloc.c instead of rheap.c. Qe need to alloc muram of qe, not DIMM. lib/genalloc.h is not for allocating main memory. It is for allocating special regions. It is serving the same purpose as rheap. I need to use the func rh_alloc_align, what it the similar func? I just find a func gen_pool_first_fit_order_align. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] of/dynamic: Fix test for PPC_PSERIES
IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is supposed to be used with the full Kconfig symbol name, including the CONFIG_ prefix. Add the missing CONFIG_ prefix to fix this. Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and into common code) Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be --- Did this bug cause any breakage? If yes, the fix should go to stable (for v3.17 and later). --- drivers/of/dynamic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index dee658de72b3b221..1901f8870591fe30 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -226,7 +226,7 @@ void __of_attach_node(struct device_node *np) phandle = __of_get_property(np, phandle, sz); if (!phandle) phandle = __of_get_property(np, linux,phandle, sz); - if (IS_ENABLED(PPC_PSERIES) !phandle) + if (IS_ENABLED(CONFIG_PPC_PSERIES) !phandle) phandle = __of_get_property(np, ibm,phandle, sz); np-phandle = (phandle (sz = 4)) ? be32_to_cpup(phandle) : 0; -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 5/9]powerpc/powernv: nest pmu feature detection support
On Wednesday 03 June 2015 05:51 AM, Daniel Axtens wrote: On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote: Patch adds a device tree function to detect the nest pmu support. Function will look for specific dt property ibm,ima-chip as a detection mechanism for the nest pmu. For Nest pmu, device tree will have two set of information. 1) Per-chip Homer address region for nest pmu counter collection area. 2) Supported Nest PMUs and events What's HOMER? Nest PMUs are configured via PORE engine interface and PORE Engine collections the Nest counter value and updates in the main memory which is reserved for this use. +static int nest_ima_detect_parse(void) +{ + const __be32 *gcid; + const __be64 *chip_ima_reg; + const __be64 *chip_ima_size; + struct device_node *dev; + int rc = -EINVAL, idx; + + for_each_node_with_property(dev, ibm,ima-chip) { + gcid = of_get_property(dev, ibm,chip-id, NULL); + chip_ima_reg = of_get_property(dev, reg, NULL); + chip_ima_size = of_get_property(dev, size, NULL); + if ((!gcid) || (!chip_ima_reg) || (!chip_ima_size)) { + pr_err(%s: device %s missing property \n, + __func__, dev-full_name); This is not a particularly informative error message. It'd be good if it mentioned that it was for PMU. Sure will changes. + return rc + } + + idx = (uint32_t)be32_to_cpup(gcid); + p8_perchip_nest_info[idx].pbase = be64_to_cpup(chip_ima_reg); + p8_perchip_nest_info[idx].size = be64_to_cpup(chip_ima_size); + p8_perchip_nest_info[idx].vbase = (uint64_t) + phys_to_virt(p8_perchip_nest_info[idx].pbase); + + rc = 0; + } + + return rc; I'm not sure your rc handling is correct. As I understand it: - Start with rc = -EINVAL. - If your first node is missing a property, return -EINVAL. - Once your first node succeeds, set rc = 0 - If any subsequent node is missing a property, return 0. - Return 0 if any node is successfully processed, otherwise return -EINVAL. Main loop is only for nodes with property ibm,ima-chip. Not all the nodes will have this property. If that's what you intended (especially with regards to returning 0 when a subsequent node is missing a property), a comment explaining it would be great. Yes. I will add comment explaining it. But i did add this in the commit message. Also, why bail out if a property is missing on any node? Why not try all of them and see if any succeed? Only the Nest Unit nodes in the device tree will have this property. Commit has the device tree hierarchy for the Nest instrumentation. So if we dont find this property then Nest instrumentation is not supported, hence bail out. +} + static int __init nest_pmu_init(void) { int ret = 0; @@ -256,6 +287,12 @@ static int __init nest_pmu_init(void) cpumask_chip(); + /* +* Detect the Nest PMU feature +*/ + if (nest_ima_detect_parse()) + return 0; + return 0; } Zero is returned regardless of the output of nest_ima_detect_parse. Is that intentional? If so, do you need the 'if'? No it should return ret which should be initialized to error value. WIll fix it device_initcall(nest_pmu_init); Regards, Daniel Axtens Thanks for the review MAddy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 8/9] powerpc/powernv: Add OPAL support for Nest PMU
On Wednesday 03 June 2015 06:24 AM, Daniel Axtens wrote: +int64_t opal_nest_ima_control(uint32_t value); If I'm understanding things correctly, you call this function in patch 3. Quoting from that patch: +static void nest_init(void *dummy) +{ + opal_nest_ima_control(P8_NEST_ENGINE_START); +} Does this patch need to be moved earlier in the series? I applied all the patches together and tested it since the Makefile inclusion is the final patch in the series. I guess it is better rearrange the series. Have you tested that the series compiles at every point? (I've found that this can be done quite easily with git rebase --interactive using x to run the compile) Nice. will try this out. Thanks for the review Maddy + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index a7ade94..ce36a68 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -295,3 +295,4 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); OPAL_CALL(opal_flash_read,OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); +OPAL_CALL(opal_nest_ima_control, OPAL_NEST_IMA_CONTROL); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: of/dynamic: Fix test for PPC_PSERIES
On Thu, 2015-04-06 at 09:34:41 UTC, Geert Uytterhoeven wrote: IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is supposed to be used with the full Kconfig symbol name, including the CONFIG_ prefix. Add the missing CONFIG_ prefix to fix this. Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and into common code) Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be --- Did this bug cause any breakage? If yes, the fix should go to stable (for v3.17 and later). Yikes. Not that I've heard of. But it's reasonably new so possibly it's not hit distros that folks tend to run on those machines. I'm also not clear how it would break, it could be subtle and we've not noticed. Nathan might have more of an idea (on CC). On my machine here everything that has an ibm,phandle also has a linux,phandle, so we wouldn't hit that code path. But I'm not sure how representative that box is. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] cpufreq: qoriq: optimize the CPU frequency switching time
From: Tang Yuantian yuantian.t...@freescale.com Each time the CPU switches its frequency, the clock nodes in DTS are walked through to find proper clock source. This is very time-consuming, for example, it is up to 500+ us on T4240. Besides, switching time varies from clock to clock. To optimize this, each input clock of CPU is buffered, so that it can be picked up instantly when needed. Since for each CPU each input clock is stored in a pointer which takes 4 or 8 bytes memory and normally there are several input clocks per CPU, that will not take much memory as well. Signed-off-by: Tang Yuantian yuantian.t...@freescale.com --- drivers/cpufreq/qoriq-cpufreq.c | 32 +--- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 88b21ae..358f075 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -27,11 +27,11 @@ /** * struct cpu_data - * @parent: the parent node of cpu clock + * @pclk: the parent clock of cpu * @table: frequency table */ struct cpu_data { - struct device_node *parent; + struct clk **pclk; struct cpufreq_frequency_table *table; }; @@ -196,7 +196,7 @@ static void freq_table_sort(struct cpufreq_frequency_table *freq_table, static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) { - struct device_node *np; + struct device_node *np, *pnode; int i, count, ret; u32 freq, mask; struct clk *clk; @@ -219,17 +219,23 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_nomem2; } - data-parent = of_parse_phandle(np, clocks, 0); - if (!data-parent) { + pnode = of_parse_phandle(np, clocks, 0); + if (!pnode) { pr_err(%s: could not get clock information\n, __func__); goto err_nomem2; } - count = of_property_count_strings(data-parent, clock-names); + count = of_property_count_strings(pnode, clock-names); + data-pclk = kcalloc(count, sizeof(struct clk *), GFP_KERNEL); + if (!data-pclk) { + pr_err(%s: no memory\n, __func__); + goto err_node; + } + table = kcalloc(count + 1, sizeof(*table), GFP_KERNEL); if (!table) { pr_err(%s: no memory\n, __func__); - goto err_node; + goto err_pclk; } if (fmask) @@ -238,7 +244,8 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) mask = 0x0; for (i = 0; i count; i++) { - clk = of_clk_get(data-parent, i); + clk = of_clk_get(pnode, i); + data-pclk[i] = clk; freq = clk_get_rate(clk); /* * the clock is valid if its frequency is not masked @@ -273,13 +280,16 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy) policy-cpuinfo.transition_latency = u64temp + 1; of_node_put(np); + of_node_put(pnode); return 0; err_nomem1: kfree(table); +err_pclk: + kfree(data-pclk); err_node: - of_node_put(data-parent); + of_node_put(pnode); err_nomem2: policy-driver_data = NULL; kfree(data); @@ -293,7 +303,7 @@ static int __exit qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy) { struct cpu_data *data = policy-driver_data; - of_node_put(data-parent); + kfree(data-pclk); kfree(data-table); kfree(data); policy-driver_data = NULL; @@ -307,7 +317,7 @@ static int qoriq_cpufreq_target(struct cpufreq_policy *policy, struct clk *parent; struct cpu_data *data = policy-driver_data; - parent = of_clk_get(data-parent, data-table[index].driver_data); + parent = data-pclk[data-table[index].driver_data]; return clk_set_parent(policy-clk, parent); } -- 2.1.0.27.g96db324 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] of: return NUMA_NO_NODE from fallback of_node_to_nid()
On Mon, 13 Apr 2015 11:49:31 -0500 , Rob Herring robherri...@gmail.com wrote: On Mon, Apr 13, 2015 at 8:38 AM, Konstantin Khlebnikov khlebni...@yandex-team.ru wrote: On 13.04.2015 16:22, Rob Herring wrote: On Wed, Apr 8, 2015 at 11:59 AM, Konstantin Khlebnikov khlebni...@yandex-team.ru wrote: Node 0 might be offline as well as any other numa node, in this case kernel cannot handle memory allocation and crashes. Signed-off-by: Konstantin Khlebnikov khlebni...@yandex-team.ru Fixes: 0c3f061c195c (of: implement of_node_to_nid as a weak function) --- drivers/of/base.c |2 +- include/linux/of.h |5 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 8f165b112e03..51f4bd16e613 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL(of_n_size_cells); #ifdef CONFIG_NUMA int __weak of_node_to_nid(struct device_node *np) { - return numa_node_id(); + return NUMA_NO_NODE; This is going to break any NUMA machine that enables OF and expects the weak function to work. Why? NUMA_NO_NODE == -1 -- this's standard no-affinity signal. As I see powerpc/sparc versions of of_node_to_nid returns -1 if they cannot find out which node should be used. Ah, I was thinking those platforms were relying on the default implementation. I guess any real NUMA support is going to need to override this function. The arm64 patch series does that as well. We need to be sure this change is correct for metag which appears to be the only other OF enabled platform with NUMA support. In that case, then there is little reason to keep the inline and we can just always enable the weak function (with your change). It is slightly less optimal, but the few callers hardly appear to be hot paths. Sounds like you're in agreement with this patch then? Shall I apply it? g. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 2/9]powerpc/powernv: nest pmu init function with cpumask attr
On Wednesday 03 June 2015 04:44 AM, Daniel Axtens wrote: On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote: Patch creates a file nest-pmu-c to contain nest pmu related functions. nest-pmu.c Patch adds nest pmu init function and cpumask function since Nest pmu units are per-chip. First online cpu for a given node is picked as designated thread to read the counter data. Subsequent patch adds the hotplug support. Cc: Michael Ellerman m...@ellerman.id.au Cc: Benjamin Herrenschmidt b...@kernel.crashing.org Cc: Paul Mackerras pau...@samba.org Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Cc: Anshuman Khandual khand...@linux.vnet.ibm.com Cc: Stephane Eranian eran...@google.com Cc: Preeti U Murthy pre...@linux.vnet.ibm.com Cc: Ingo Molnar mi...@kernel.org Cc: Peter Zijlstra pet...@infradead.org Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com --- arch/powerpc/perf/nest-pmu.c | 70 1 file changed, 70 insertions(+) create mode 100644 arch/powerpc/perf/nest-pmu.c diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c new file mode 100644 index 000..d4413bb --- /dev/null +++ b/arch/powerpc/perf/nest-pmu.c @@ -0,0 +1,70 @@ +/* + * Nest Performance Monitor counter support for POWER8 processors. + * + * Copyright 2015 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + Again, I think this is supposed to be v2 only. +#include nest-pmu.h + +static cpumask_t cpu_mask_nest_pmu; + +static ssize_t cpumask_nest_pmu_get_attr(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, cpu_mask_nest_pmu); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, cpumask_nest_pmu_get_attr, NULL); + +static struct attribute *cpumask_nest_pmu_attrs[] = { + dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_nest_pmu_attr_group = { + .attrs = cpumask_nest_pmu_attrs, +}; + +void cpumask_chip(void) +{ + const struct cpumask *l_cpumask; + int cpu, nid; + + if (!cpumask_empty(cpu_mask_nest_pmu)) { + printk(KERN_INFO cpumask not empty\n); + return; + } + + cpu_notifier_register_begin(); + for_each_online_node(nid) { + l_cpumask = cpumask_of_node(nid); + cpu = cpumask_first(l_cpumask); + cpumask_set_cpu(cpu, cpu_mask_nest_pmu); + } + + cpu_notifier_register_done(); +} It's not clear from the name of this function what it does. I don't think I actually understand what it does: it appears to register a notifier on the first cpu of each node; maybe that should be reflected in the name. My bad. Hotplug notification registration happens in the next patch. could merge both as single patch. +static int __init nest_pmu_init(void) +{ + int ret = 0; + + /* +* Lets do this only if we are hypervisor +*/ + if (!cur_cpu_spec-oprofile_cpu_type || + strcmp(cur_cpu_spec-oprofile_cpu_type, ppc64/power8) || + !cpu_has_feature(CPU_FTR_HVMODE)) + return ret; + + cpumask_chip(); + + return 0; +} - Where is ret set? I can only see it set when it's defined: the if statment doesn't change the value of ret as far as I can see... Yes. It should have set to error value. Will fix it. - Would it be clearer if you said !(strcmp(cur_cpu_spec-oprofile_cpu_type, ppc64/power8) == 0) That would make it clearer that you're trying to get a list of possible failure conditions. Yes. Sure will change it. - Is there really no better way to check if a CPU is a power 8 than an string comparison? One other way I can think of is using PVR (Processor Version Register), but then will end up having multiple checks for Power8 itself, so this is lot simpler. +device_initcall(nest_pmu_init); Regards, Daniel Axtens Thanks for the review Maddy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] of: clean-up unnecessary libfdt include paths
On Wed, 3 Jun 2015 10:26:38 +0200 , Ralf Baechle r...@linux-mips.org wrote: On Wed, Jun 03, 2015 at 12:10:25AM -0500, Rob Herring wrote: Date: Wed, 3 Jun 2015 00:10:25 -0500 From: Rob Herring r...@kernel.org To: devicet...@vger.kernel.org, linux-ker...@vger.kernel.org Cc: Grant Likely grant.lik...@linaro.org, Rob Herring r...@kernel.org, Ralf Baechle r...@linux-mips.org, Benjamin Herrenschmidt b...@kernel.crashing.org, Paul Mackerras pau...@samba.org, Michael Ellerman m...@ellerman.id.au, linux-m...@linux-mips.org, linuxppc-dev@lists.ozlabs.org Subject: [PATCH] of: clean-up unnecessary libfdt include paths With the latest dtc import include fixups, it is no longer necessary to add explicit include paths to use libfdt. Remove these across the kernel. Signed-off-by: Rob Herring r...@kernel.org Cc: Ralf Baechle r...@linux-mips.org Cc: Benjamin Herrenschmidt b...@kernel.crashing.org Cc: Paul Mackerras pau...@samba.org Cc: Michael Ellerman m...@ellerman.id.au Cc: Grant Likely grant.lik...@linaro.org Cc: linux-m...@linux-mips.org Cc: linuxppc-dev@lists.ozlabs.org For the MIPS bits; Acked-by: Ralf Baechle r...@linux-mips.org Ralf Acked-by: Grant Likely grant.lik...@lianro.org ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH kernel v11 17/34] powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group
On 06/01/2015 04:24 PM, David Gibson wrote: On Fri, May 29, 2015 at 06:44:41PM +1000, Alexey Kardashevskiy wrote: Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically now by a pnv_pci_table_alloc() helper as PCI hotplug code (for EEH recovery) and SRIOV are supported there. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_group_alloc() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_group_free() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda2_setup_dma_pe. This replaces a single pointer to iommu_group with a list of iommu_table_group structs. For now it is just a single iommu_table_group in this list but later with TCE table sharing enabled, the list will keep all the IOMMU groups which use the particular table. The list uses iommu_table_group_link structs rather than iommu_table_group::next as a VFIO container may have 2 IOMMU tables, each will have its own list head pointer as it is mainly for TCE invalidation code which should walk through all attached groups and invalidate TCE cache so the table has to keep the list head pointer. The other option would be storing list head in a VFIO container but it would not work as the platform code (which does TCE table update and invalidation) has no idea about VFIO. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru [aw: for the vfio related changes] Acked-by: Alex Williamson alex.william...@redhat.com Reviewed-by: David Gibson da...@gibson.dropbear.id.au Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com It looks like this commit message doesn't match the code - it seems like an older or newer version of the message from the previous patch. This patch seems instead to be about changing the table_group - table relationship from 1:1 to many:many. I'll put this: === So far one TCE table could only be used by one IOMMU group. However IODA2 hardware allows programming the same TCE table address to multiple PE allowing sharing tables. This replaces a single pointer to a group in a iommu_table struct with a linked list of groups which provides the way of invalidating TCE cache for every PE when an actual TCE table is updated. This adds pnv_pci_link_table_and_group() and pnv_pci_unlink_table_and_group() helpers to manage the list. However without VFIO, it is still going to be a single IOMMU group per iommu_table. This changes iommu_add_device() to add a device to a first group from the group list of a table as it is only called from the platform init code or PCI bus notifier and at these moments there is only one group per table. This does not change TCE invalidation code to loop through all attached groups in order to simplify this patch and because it is not really needed in most cases. IODA2 is fixed in a later patch. === --- Changes: v10: * iommu_table is not embedded into iommu_table_group but allocated dynamically * iommu_table allocation is moved to a single place for IODA2's pnv_pci_ioda_setup_dma_pe where it belongs to * added list of groups into iommu_table; most of the code just looks at the first item to keep the patch simpler v9: * s/it_group/it_table_group/ * added and used iommu_table_group_free(), from now iommu_free_table() is only used for VIO * added iommu_pseries_group_alloc() * squashed powerpc/iommu: Introduce iommu_table_alloc() helper into this --- arch/powerpc/include/asm/iommu.h| 8 +- arch/powerpc/kernel/iommu.c | 9 +- arch/powerpc/platforms/powernv/pci-ioda.c | 45 ++ arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 + arch/powerpc/platforms/powernv/pci.c| 76 + arch/powerpc/platforms/powernv/pci.h| 7 ++ arch/powerpc/platforms/pseries/iommu.c | 33 +++- drivers/vfio/vfio_iommu_spapr_tce.c | 122 8 files changed, 242 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 5a7267f..44a20cc 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -91,7 +91,7 @@ struct iommu_table { struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu
Re: [PATCH v1 3/9]powerpc/powernv: Add cpu hotplug support
On Wednesday 03 June 2015 05:08 AM, Daniel Axtens wrote: On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote: Patch adds cpu hotplug support. First online cpu in a node is picked as designated thread to read the Nest pmu counter data, and at the time of hotplug, next online cpu from the same node is picked up. I'm not sure I understand this commit message. I think I understand the first half - I think you're trying to say: At boot, the first online I will rephrase it. CPU in a node is picked as the designated thread to read the Nest PMU counter data. I'm not sure I understand the second half: picked up how and for what? When the designated thread is hotplugged, next online cpu in the same node is picked up as the designated thread to read the PMU counter data. (I did eventually figure it out by reading the patch, but it'd be really nice to have it spelled out nicely in the commit message.) Sure. Will fix the commit message. +static void nest_exit_cpu(int cpu) +{ + int i, nid, target = -1; + const struct cpumask *l_cpumask; + int src_chipid; + + if (!cpumask_test_and_clear_cpu(cpu, cpu_mask_nest_pmu)) + return; + + nid = cpu_to_node(cpu); + src_chipid = topology_physical_package_id(cpu); + l_cpumask = cpumask_of_node(nid); + for_each_cpu(i, l_cpumask) { + if (i == cpu) + continue; + if (src_chipid == topology_physical_package_id(i)) { + target = i; + break; + } + } Some comments here would really help. I think you're looking for the first CPU that's (a) not the cpu you're removing and (b) on the same physical package, so sharing the same nest, but it took me a lot of staring at the code to figure it out. My bad. I will comment it. + + cpumask_set_cpu(target, cpu_mask_nest_pmu); + nest_change_cpu_context (cpu, target); + return; Return is redundant here and in several other functions in this patch. Ok. +} + +static void nest_init_cpu(int cpu) +{ + int i, src_chipid; + + src_chipid = topology_physical_package_id(cpu); + for_each_cpu(i, cpu_mask_nest_pmu) + if (src_chipid == topology_physical_package_id(i)) + return; + + cpumask_set_cpu(cpu, cpu_mask_nest_pmu); + nest_change_cpu_context ( -1, cpu); Weird extra spaces here. Yes. Nice catch. Will fix it. + return; +} This function could also do with a comment: AFAICT, you've structured the function so that it only calls nest_change_cpu_context if you've picked up a cpu on a physical package that previously didn't have a nest pmu thread on it. + +static int nest_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; What's with this cast? You cast it to a long and then assign it to an unsigned int? Facepalm. My bad, will fix it. + + switch (action ~CPU_TASKS_FROZEN) { + case CPU_DOWN_FAILED: Is it necessary to move the thread back if the CPU fails to go down? No. not need. You've moved it to another online CPU already; what's the benefit of paying the time-penalty to move it back? Why should go through that. Because, there is no restriction saying only the first cpu has to read it, why should we complicate it further instead of moving to another cpu in the same node. + case CPU_STARTING: + nest_init_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nest_exit_cpu(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} Now, I don't know the details of CPU hotplug _at all_, so this may be stupid, but what happens if you hotplug a lot of CPUs all at once? Is everything properly serialised or is this going to race and end up with either multiple cpus trying to do PMU or no cpus? I did test the code with hotplug test. If all the cpus in the node is offlined, then we will have no cpus designated for that node. Thanks for review Maddy Regards, Daniel Axtens ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 4/9]powerpc/powernv: Add generic nest pmu ops
On Wednesday 03 June 2015 05:33 AM, Daniel Axtens wrote: On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote: Patch adds generic nest pmu functions and format attribute. I'm not sure this commit message accurately reflects the content of the patch. At any rate, please could you: - say what the patch adds the functions and attributes to. - phrase your message as Add generic ... not Patch adds generic ...: see https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n155 Sure. Will rephrase it. +PMU_FORMAT_ATTR(event, config:0-20); +struct attribute *p8_nest_format_attrs[] = { + format_attr_event.attr, + NULL, +}; + +struct attribute_group p8_nest_format_group = { + .name = format, + .attrs = p8_nest_format_attrs, +}; Can these structs be constified? I guess it can. Will check it out. + +int p8_nest_event_init(struct perf_event *event) +{ + int chip_id; + + if (event-attr.type != event-pmu-type) + return -ENOENT; + + /* Sampling not supported yet */ + if (event-hw.sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event-attr.exclude_user || + event-attr.exclude_kernel || + event-attr.exclude_hv || + event-attr.exclude_idle || + event-attr.exclude_host || + event-attr.exclude_guest || + event-attr.sample_period) /* no sampling */ + return -EINVAL; You test for sample period twice here. Yes right. I will remove it. + + if (event-cpu 0) + return -EINVAL; + + chip_id = topology_physical_package_id(event-cpu); + event-hw.event_base = event-attr.config + + p8_perchip_nest_info[chip_id].vbase; + + return 0; +} + +void p8_nest_read_counter(struct perf_event *event) +{ + u64 *addr; + u64 data = 0; + + addr = (u64 *)event-hw.event_base; + data = __be64_to_cpu((uint64_t)*addr); + local64_set(event-hw.prev_count, data); +} + +void p8_nest_perf_event_update(struct perf_event *event) +{ + u64 counter_prev, counter_new, final_count; + uint64_t *addr; + + addr = (u64 *)event-hw.event_base; + counter_prev = local64_read(event-hw.prev_count); + counter_new = __be64_to_cpu((uint64_t)*addr); + final_count = counter_new - counter_prev; + + local64_set(event-hw.prev_count, counter_new); + local64_add(final_count, event-count); +} + +void p8_nest_event_start(struct perf_event *event, int flags) +{ + event-hw.state = 0; + p8_nest_read_counter(event); +} + +void p8_nest_event_stop(struct perf_event *event, int flags) +{ + p8_nest_perf_event_update(event); +} + +int p8_nest_event_add(struct perf_event *event, int flags) +{ + p8_nest_event_start(event, flags); + return 0; +} + +void p8_nest_event_del(struct perf_event *event, int flags) +{ + p8_nest_event_stop(event, flags); Is this necessary? Stop calls update, which I guess makes sense as it finalises the value. But if the event is being deleted anyway, why not just do nothing here? Since these Nest PMUs does not support sampling. IIUC, perf record interface uses the event start/stop ops. Incase of perf stat interface event add/del interface are used to enable and disable the counters. Now, when we disable or delete, we update the event counter with the delta value. +} + Regards, Daniel Axtens Thanks for the review Maddy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 6/9]powerpc/powernv: dt parser function for nest pmu and its events
On Wednesday 03 June 2015 06:16 AM, Daniel Axtens wrote: +static int nest_pmu_create(struct device_node *dev, int pmu_index) +{ + struct ppc64_nest_ima_events **p8_events_arr; + struct ppc64_nest_ima_events *p8_events; + struct property *pp; + char *buf; + const __be32 *lval; + u32 val; + int len, idx = 0; + struct nest_pmu *pmu_ptr; + const char *start, *end; + + if (!dev) + return -EINVAL; + + pmu_ptr = kzalloc(sizeof(struct nest_pmu), GFP_KERNEL); + if (!pmu_ptr) + return -ENOMEM; + + /* Needed for hotplug/migration */ + per_nestpmu_arr[pmu_index] = pmu_ptr; + + p8_events_arr = kzalloc((sizeof(struct ppc64_nest_ima_events) * 64), + GFP_KERNEL); + if (!p8_events_arr) + return -ENOMEM; + p8_events = (struct ppc64_nest_ima_events *)p8_events_arr; I think you're trying to get the first element of the array here: why not just `p8_events = p8_events_arr[0];`? Yes. Will change it. + + /* +* Loop through each property +*/ + for_each_property_of_node(dev, pp) { + start = pp-name; + end = start + strlen(start); + len = strlen(start); + + if (!strcmp(pp-name, name)) { + if (!pp-value || + (strnlen(pp-value, pp-length) = pp-length)) + return -EINVAL; + + buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + sprintf(buf, Nest_%s, (char *)pp-value); + pmu_ptr-pmu.name = (char *)buf; + pmu_ptr-attr_groups[1] = p8_nest_format_group; + pmu_ptr-attr_groups[2] = cpumask_nest_pmu_attr_group; + } + + /* Skip these, we dont need it */ + if (!strcmp(pp-name, name) || + !strcmp(pp-name, phandle) || + !strcmp(pp-name, device_type) || + !strcmp(pp-name, linux,phandle)) + continue; + + buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (strncmp(pp-name, unit., 5) == 0) { + start += 5; + len = strlen(start); + strncpy(buf, start, strlen(start)); You've just saved strlen(start), you could just use len. This also applies in the next case below. Yes. That is true. + p8_events-ev_name = buf; + + if (!pp-value || +(strnlen(pp-value, pp-length) = pp-length)) + return -EINVAL; The strnlen will never be greater than pp-length, so the only case this will hit is if strnlen(pp-value, pp-length) == pp-length. This also applies again below. True will change it. + + buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + strncpy(buf, (const char *)pp-value, pp-length); + p8_events-ev_value = buf; + idx++; + p8_events++; + + } else if (strncmp(pp-name, scale., 6) == 0) { + start += 6; + len = strlen(start); + strncpy(buf, start, strlen(start)); + p8_events-ev_name = buf; + + if (!pp-value || + (strnlen(pp-value, pp-length) = pp-length)) + return -EINVAL; + + buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + strncpy(buf, (const char *)pp-value, pp-length); + p8_events-ev_value = buf; + idx++; + p8_events++; + + } else { + strncpy(buf, start, len); This is the only case where you actually use the orignal version of len. This makes me think you could drop the variable entirely and just use strlen(start) in all cases. I also don't see where `end` is used anywhere in this function: could that be dropped? Correct. I guess we can drop both len and end. I used end for my prints during debug. + p8_events-ev_name = buf; + lval = of_get_property(dev, pp-name, NULL); + val = (uint32_t)be32_to_cpup(lval); + + /* + * Use DT property value as the event + */ I'm not sure if this is my mailer, but it looks like
Re: of: clean-up unnecessary libfdt include paths
On Wed, 2015-03-06 at 05:10:25 UTC, Rob Herring wrote: With the latest dtc import include fixups, it is no longer necessary to add explicit include paths to use libfdt. Remove these across the kernel. What are the latest dtc import include fixups ? diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c1ebbda..c16e836 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,7 +2,6 @@ # Makefile for the linux kernel. # -CFLAGS_prom.o= -I$(src)/../../../scripts/dtc/libfdt CFLAGS_ptrace.o += -DUTS_MACHINE='$(UTS_MACHINE)' subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror Acked-by: Michael Ellerman m...@ellerman.id.au cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v1 4/9]powerpc/powernv: Add generic nest pmu ops
On Wednesday 03 June 2015 05:33 AM, Daniel Axtens wrote: On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote: Patch adds generic nest pmu functions and format attribute. I'm not sure this commit message accurately reflects the content of the patch. At any rate, please could you: - say what the patch adds the functions and attributes to. - phrase your message as Add generic ... not Patch adds generic ...: see https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n155 I will rephrase the commit message. +PMU_FORMAT_ATTR(event, config:0-20); +struct attribute *p8_nest_format_attrs[] = { + format_attr_event.attr, + NULL, +}; + +struct attribute_group p8_nest_format_group = { + .name = format, + .attrs = p8_nest_format_attrs, +}; Can these structs be constified? I guess so. Will try it out. + +int p8_nest_event_init(struct perf_event *event) +{ + int chip_id; + + if (event-attr.type != event-pmu-type) + return -ENOENT; + + /* Sampling not supported yet */ + if (event-hw.sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event-attr.exclude_user || + event-attr.exclude_kernel || + event-attr.exclude_hv || + event-attr.exclude_idle || + event-attr.exclude_host || + event-attr.exclude_guest || + event-attr.sample_period) /* no sampling */ + return -EINVAL; You test for sample period twice here. My bad. Will remove it. + + if (event-cpu 0) + return -EINVAL; + + chip_id = topology_physical_package_id(event-cpu); + event-hw.event_base = event-attr.config + + p8_perchip_nest_info[chip_id].vbase; + + return 0; +} + +void p8_nest_read_counter(struct perf_event *event) +{ + u64 *addr; + u64 data = 0; + + addr = (u64 *)event-hw.event_base; + data = __be64_to_cpu((uint64_t)*addr); + local64_set(event-hw.prev_count, data); +} + +void p8_nest_perf_event_update(struct perf_event *event) +{ + u64 counter_prev, counter_new, final_count; + uint64_t *addr; + + addr = (u64 *)event-hw.event_base; + counter_prev = local64_read(event-hw.prev_count); + counter_new = __be64_to_cpu((uint64_t)*addr); + final_count = counter_new - counter_prev; + + local64_set(event-hw.prev_count, counter_new); + local64_add(final_count, event-count); +} + +void p8_nest_event_start(struct perf_event *event, int flags) +{ + event-hw.state = 0; + p8_nest_read_counter(event); +} + +void p8_nest_event_stop(struct perf_event *event, int flags) +{ + p8_nest_perf_event_update(event); +} + +int p8_nest_event_add(struct perf_event *event, int flags) +{ + p8_nest_event_start(event, flags); + return 0; +} + +void p8_nest_event_del(struct perf_event *event, int flags) +{ + p8_nest_event_stop(event, flags); Is this necessary? Stop calls update, which I guess makes sense as it finalises the value. But if the event is being deleted anyway, why not just do nothing here? IIUC, perf record will use the event start/stop interface. Incase of perf stat (for PMUs which does not support sampling), event add/del interface is used. Now when event is disable or deleted, event count should get updated with the delta value. +} + Regards, Daniel Axtens Thanks for the review Maddy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [3/3,v3] powerpc/powernv: Add opal-prd channel
On Fri, 2015-29-05 at 03:55:59 UTC, Jeremy Kerr wrote: This change adds a char device to access the PRD (processor runtime diagnostics) channel to OPAL firmware. Includes contributions from Vaidyanathan Srinivasan, Neelesh Gupta Vishal Kulkarni. Signed-off-by: Neelesh Gupta neele...@linux.vnet.ibm.com Signed-off-by: Jeremy Kerr j...@ozlabs.org Acked-by: Stewart Smith stew...@linux.vnet.ibm.com Sorry, I put this in but then hit the build break, I was going to fix it up but would rather you did and tested it, so we may as well do another review :) diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h new file mode 100644 index 000..319ff4a --- /dev/null +++ b/arch/powerpc/include/uapi/asm/opal-prd.h @@ -0,0 +1,58 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * (C) Copyright IBM 2015 Usual syntax is: Copyright IBM Corporation 2015 + * + * Author: Vaidyanathan Srinivasan svaidy at linux.vnet.ibm.com + * Author: Jeremy Kerr j...@ozlabs.org I'd rather you dropped these, they'll just bit rot, but if you insist I don't care that much. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. As pointed out by Daniel, we should probably be using the version 2 only language on new files. diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c new file mode 100644 index 000..3004f4a --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -0,0 +1,451 @@ ... +/* + * opal_prd_mmap - maps firmware-provided ranges into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + */ + +static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t addr, size; + int rc; + + pr_devel(opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n, + vma-vm_start, vma-vm_end, vma-vm_pgoff, + vma-vm_flags); + + addr = vma-vm_pgoff PAGE_SHIFT; + size = vma-vm_end - vma-vm_start; + + /* ensure we're mapping within one of the allowable ranges */ + if (!opal_prd_range_is_valid(addr, size)) + return -EINVAL; + + vma-vm_page_prot = phys_mem_access_prot(file, vma-vm_pgoff, + size, vma-vm_page_prot) + | _PAGE_SPECIAL; This doesn't build with CONFIG_STRICT_MM_TYPECHECKS=y: arch/powerpc/platforms/powernv/opal-prd.c:131:5: error: invalid operands to binary | (have âpgprot_tâ and âintâ) | _PAGE_SPECIAL; +static long opal_prd_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct opal_prd_info info; + struct opal_prd_scom scom; + int rc = 0; + + switch(cmd) { ^ space please + case OPAL_PRD_GET_INFO: + memset(info, 0, sizeof(info)); + info.version = OPAL_PRD_KERNEL_VERSION; + rc = copy_to_user((void __user *)param, info, sizeof(info)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_READ: + rc = copy_from_user(scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_read(scom.chip, scom.addr, + (__be64 *)scom.data); + scom.data = be64_to_cpu(scom.data); + pr_devel(ioctl SCOM_READ: chip %llx addr %016llx + data %016llx rc %lld\n, Don't split the string please. + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_WRITE: + rc = copy_from_user(scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data); + pr_devel(ioctl SCOM_WRITE: chip %llx addr %016llx + data %016llx rc %lld\n, Don't split the string please. + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + default: + rc = -EINVAL; + } + + return rc; +} + +struct file_operations opal_prd_fops = { This can be static const I think. + .open = opal_prd_open, + .mmap
Re: [3/5] mm/hugetlb: remove arch_prepare/release_hugepage from arch headers
On Thu, 2015-28-05 at 11:52:35 UTC, Dominik Dingel wrote: Nobody used these hooks so they were removed from common code, and can now be removed from the architectures. Acked-by: Martin Schwidefsky schwidef...@de.ibm.com Signed-off-by: Dominik Dingel din...@linux.vnet.ibm.com Acked-by: Ralf Baechle r...@linux-mips.org diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 4bbd3c8..7eac89b 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -168,15 +168,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } Acked-by: Michael Ellerman m...@ellerman.id.au cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [1/1] powerpc/perf/hv-24x7: Check support before registering
On Wed, 2015-20-05 at 23:34:16 UTC, Sukadev Bhattiprolu wrote: From 955102eacf035b19080dc659a15d9b8fbd8fae7f Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Date: Tue, 28 Apr 2015 18:47:58 -0400 Subject: [PATCH 1/1] powerpc/perf/hv-24x7: Check support before registering PMU We currently try to register the 24x7 PMU unconditionally. Not all Power systems support 24x7 counters (eg: Power7). On these systems we get a backtrace during boot when trying to register the 24x7 PMU. Check if the hypervisor supports 24x7 counters before attempting to register the 24x7 PMU. Reported-by: Gustavo Luiz Duarte gu...@br.ibm.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- Changelog[v2] - [Michael Ellerman] Simplify check with bogus parameters. --- arch/powerpc/perf/hv-24x7.c | 21 + 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20..c04a332 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1268,12 +1268,33 @@ static struct pmu h_24x7_pmu = { .read= h_24x7_event_read, }; +/* + * Return 1 if we can access the 24x7 counter catalog from the hypervisor. + * Return 0 otherwise. Comment is wrong. + */ +static bool hv_has_24x7(void) +{ + unsigned long hret; ret would be fine. + + hret = h_get_24x7_catalog_page(0, 0, 0); + + if (hret != H_FUNCTION) + pr_err(Error %ld reading catalog, disabling 24x7 PMU\n, hret); + + return hret == 0; I don't get what you're doing here. You check for something other than H_FUNCTION, and print, and then you just compare against 0. But I wouldn't ever expect that to return 0, because you passed it bogus args. The logic should be: static bool is_24x7_supported(void) { if (h_get_24x7_catalog_page(0, 0, 0) == H_FUNCTION) return false; return true; } static int hv_24x7_init(void) { int r; unsigned long hret; struct hv_perf_caps caps; + if (!hv_has_24x7()) + return -ENODEV; + This is no good. You're doing the check, which involves a hcall, before you even check if you're running with a hypervisor (below). if (!firmware_has_feature(FW_FEATURE_LPAR)) { pr_debug(not a virtualized system, not enabling\n); return -ENODEV; cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v6] powerpc/powernv: Poweroff (EPOW, DPO) events support for PowerNV platform
This patch adds support for FSP (Flexible Service Processor) EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for the PowerNV platform. EPOW events are generated by FSP due to various critical system conditions that require system shutdown. A few examples of these conditions are high ambient temperature or system running on UPS power with low UPS battery. DPO event is generated in response to admin initiated system shutdown request. Upon receipt of EPOW and DPO events the host kernel invokes orderly_poweroff() for performing graceful system shutdown. Reviewed-by: Joel Stanley j...@jms.id.au Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com Reviewed-by: Michael Ellerman m...@ellerman.id.au Changes in v6: - Made below changes as suggested by Michael Ellerman on previous patch. - Changed EPOW, DPO notifier blocks to use opal_power_control_event() and enhanced opal_power_control_event() to handle EPOW and DPO events. - Reorganized code and added/changed few variable, function names removing older ones. - Minor cleanup like removing unused headers, blank lines etc. Changes in v5: - Made changes to address review comments on previous patch. Changes in v4: - Made changes to address review comments on previous patch. Changes in v3: - Made changes to immediately call orderly_poweroff upon receipt of OPAL EPOW, DPO notifications. - Made code changes to address review comments on previous patch. - Made code changes to use existing OPAL EPOW API. - Removed patch to extract EPOW event timeout from OPAL device-tree. Changes in v2: - Made code changes to improve code as per previous review comments. - Added patch to obtain EPOW event timeout values from OPAL device-tree. Vipin K Parashar (1): powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform arch/powerpc/include/asm/opal-api.h| 40 arch/powerpc/include/asm/opal.h| 3 +- arch/powerpc/platforms/powernv/opal-power.c| 125 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 152 insertions(+), 17 deletions(-) -- 1.9.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v6] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform
This patch adds support for FSP (Flexible Service Processor) EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for the PowerNV platform. EPOW events are generated by FSP due to various critical system conditions that require system shutdown. A few examples of these conditions are high ambient temperature or system running on UPS power with low UPS battery. DPO event is generated in response to admin initiated system shutdown request. Upon receipt of EPOW and DPO events the host kernel invokes orderly_poweroff() for performing graceful system shutdown. Reviewed-by: Joel Stanley j...@jms.id.au Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com Reviewed-by: Michael Ellerman m...@ellerman.id.au Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com --- arch/powerpc/include/asm/opal-api.h| 40 arch/powerpc/include/asm/opal.h| 3 +- arch/powerpc/platforms/powernv/opal-power.c| 125 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 152 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0321a90..f460435 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -730,6 +730,46 @@ struct opal_i2c_request { __be64 buffer_ra; /* Buffer real address */ }; +/* + * EPOW status sharing (OPAL and the host) + * + * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX + * with individual elements being 16 bits wide to fetch the system + * wide EPOW status. Each element in the buffer will contain the + * EPOW status in it's bit representation for a particular EPOW sub + * class as defiend here. So multiple detailed EPOW status bits + * specific for any sub class can be represented in a single buffer + * element as it's bit representation. + */ + +/* System EPOW type */ +enum OpalSysEpow { + OPAL_SYSEPOW_POWER = 0,/* Power EPOW */ + OPAL_SYSEPOW_TEMP = 1,/* Temperature EPOW */ + OPAL_SYSEPOW_COOLING= 2,/* Cooling EPOW */ + OPAL_SYSEPOW_MAX= 3,/* Max EPOW categories */ +}; + +/* Power EPOW */ +enum OpalSysPower { + OPAL_SYSPOWER_UPS = 0x0001, /* System on UPS power */ + OPAL_SYSPOWER_CHNG = 0x0002, /* System power config change */ + OPAL_SYSPOWER_FAIL = 0x0004, /* System impending power failure */ + OPAL_SYSPOWER_INCL = 0x0008, /* System incomplete power */ +}; + +/* Temperature EPOW */ +enum OpalSysTemp { + OPAL_SYSTEMP_AMB= 0x0001, /* System over ambient temperature */ + OPAL_SYSTEMP_INT= 0x0002, /* System over internal temperature */ + OPAL_SYSTEMP_HMD= 0x0004, /* System over ambient humidity */ +}; + +/* Cooling EPOW */ +enum OpalSysCooling { + OPAL_SYSCOOL_INSF = 0x0001, /* System insufficient cooling */ +}; + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 042af1a..8b174f3 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -141,7 +141,8 @@ int64_t opal_pci_fence_phb(uint64_t phb_id); int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data); int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action); int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); -int64_t opal_get_epow_status(__be64 *status); +int64_t opal_get_epow_status(__be16 *epow_status, __be16 *num_epow_classes); +int64_t opal_get_dpo_status(__be64 *dpo_timeout); int64_t opal_set_system_attention_led(uint8_t led_action); int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c index ac46c2c..b9f6620 100644 --- a/arch/powerpc/platforms/powernv/opal-power.c +++ b/arch/powerpc/platforms/powernv/opal-power.c @@ -9,9 +9,12 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt)opal-power: fmt + #include linux/kernel.h #include linux/reboot.h #include linux/notifier.h +#include linux/of.h #include asm/opal.h #include asm/machdep.h @@ -19,30 +22,95 @@ #define SOFT_OFF 0x00 #define SOFT_REBOOT 0x01 +/* Detect existing EPOW, DPO events */ +static bool poweroff_pending(void) +{ + int i, rc; + __be16 epow_classes; + __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0}; + __be64 opal_dpo_timeout; + + /* Check for DPO event */ + rc = opal_get_dpo_status(opal_dpo_timeout); + if (rc != OPAL_WRONG_STATE) { + pr_info(Existing DPO event detected.\n); + return true; + } + + /* + * Check for