[PATCH 5/7] PCI: Remove unnecessary #includes of asm/pci.h

2015-06-04 Thread Bjorn Helgaas
In include/linux/pci.h, we already #include asm/pci.h, so we don't need
to include asm/pci.h directly.

Remove the unnecessary includes.  All the files here already include
linux/pci.h.

Signed-off-by: Bjorn Helgaas bhelg...@google.com
CC: linux-al...@vger.kernel.org
CC: linux-m...@linux-mips.org
CC: linuxppc-dev@lists.ozlabs.org
CC: linux...@vger.kernel.org
CC: x...@kernel.org
---
 arch/alpha/kernel/core_irongate.c |1 -
 arch/alpha/kernel/sys_eiger.c |1 -
 arch/alpha/kernel/sys_nautilus.c  |1 -
 arch/mips/pci/fixup-cobalt.c  |1 -
 arch/mips/pci/ops-mace.c  |1 -
 arch/mips/pci/pci-lantiq.c|1 -
 arch/powerpc/kernel/prom.c|1 -
 arch/powerpc/kernel/prom_init.c   |1 -
 arch/sh/drivers/pci/ops-sh5.c |1 -
 arch/sh/drivers/pci/pci-sh5.c |1 -
 arch/x86/kernel/x86_init.c|1 -
 11 files changed, 11 deletions(-)

diff --git a/arch/alpha/kernel/core_irongate.c 
b/arch/alpha/kernel/core_irongate.c
index 00096df..83d0a35 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -22,7 +22,6 @@
 #include linux/bootmem.h
 
 #include asm/ptrace.h
-#include asm/pci.h
 #include asm/cacheflush.h
 #include asm/tlbflush.h
 
diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c
index 79d69d7..15f4208 100644
--- a/arch/alpha/kernel/sys_eiger.c
+++ b/arch/alpha/kernel/sys_eiger.c
@@ -22,7 +22,6 @@
 #include asm/irq.h
 #include asm/mmu_context.h
 #include asm/io.h
-#include asm/pci.h
 #include asm/pgtable.h
 #include asm/core_tsunami.h
 #include asm/hwrpb.h
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 700686d..2cfaa0e 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -39,7 +39,6 @@
 #include asm/irq.h
 #include asm/mmu_context.h
 #include asm/io.h
-#include asm/pci.h
 #include asm/pgtable.h
 #include asm/core_irongate.h
 #include asm/hwrpb.h
diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c
index a138e8e..b3ab593 100644
--- a/arch/mips/pci/fixup-cobalt.c
+++ b/arch/mips/pci/fixup-cobalt.c
@@ -13,7 +13,6 @@
 #include linux/kernel.h
 #include linux/init.h
 
-#include asm/pci.h
 #include asm/io.h
 #include asm/gt64120.h
 
diff --git a/arch/mips/pci/ops-mace.c b/arch/mips/pci/ops-mace.c
index 6b5821f..951d807 100644
--- a/arch/mips/pci/ops-mace.c
+++ b/arch/mips/pci/ops-mace.c
@@ -8,7 +8,6 @@
 #include linux/kernel.h
 #include linux/pci.h
 #include linux/types.h
-#include asm/pci.h
 #include asm/ip32/mace.h
 
 #if 0
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 8b117e6..c5347d9 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -20,7 +20,6 @@
 #include linux/of_irq.h
 #include linux/of_pci.h
 
-#include asm/pci.h
 #include asm/gpio.h
 #include asm/addrspace.h
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 308c5e1..00fdea2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -46,7 +46,6 @@
 #include asm/mmu.h
 #include asm/paca.h
 #include asm/pgtable.h
-#include asm/pci.h
 #include asm/iommu.h
 #include asm/btext.h
 #include asm/sections.h
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index fd1fe4c..fcca807 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -37,7 +37,6 @@
 #include asm/smp.h
 #include asm/mmu.h
 #include asm/pgtable.h
-#include asm/pci.h
 #include asm/iommu.h
 #include asm/btext.h
 #include asm/sections.h
diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c
index 4ce95a0..4536194 100644
--- a/arch/sh/drivers/pci/ops-sh5.c
+++ b/arch/sh/drivers/pci/ops-sh5.c
@@ -18,7 +18,6 @@
 #include linux/delay.h
 #include linux/types.h
 #include linux/irq.h
-#include asm/pci.h
 #include asm/io.h
 #include pci-sh5.h
 
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index 16c1e72..8229114 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -20,7 +20,6 @@
 #include linux/types.h
 #include linux/irq.h
 #include cpu/irq.h
-#include asm/pci.h
 #include asm/io.h
 #include pci-sh5.h
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 234b072..eed5625 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -11,7 +11,6 @@
 #include asm/bios_ebda.h
 #include asm/paravirt.h
 #include asm/pci_x86.h
-#include asm/pci.h
 #include asm/mpspec.h
 #include asm/setup.h
 #include asm/apic.h

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH kernel v11 21/34] powerpc/powernv/ioda2: Add TCE invalidation for all attached groups

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:45PM +1000, Alexey Kardashevskiy wrote:
The iommu_table struct keeps a list of IOMMU groups it is used for.
At the moment there is just a single group attached but further
patches will add TCE table sharing. When sharing is enabled, TCE cache
in each PE needs to be invalidated so does the patch.

This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan
to enable TCE table sharing on PHBs older than IODA2.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

---
Changes:
v10:
* new to the series
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 35 ---
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3fd8b18..94fccc8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -24,6 +24,7 @@
 #include linux/msi.h
 #include linux/memblock.h
 #include linux/iommu.h
+#include linux/rculist.h

 #include asm/sections.h
 #include asm/io.h
@@ -1764,23 +1765,15 @@ static inline void 
pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
   __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg);
 }

-static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
-  unsigned long index, unsigned long npages, bool rm)
+static void pnv_pci_ioda2_tce_do_invalidate(unsigned pe_number, bool rm,
+  __be64 __iomem *invalidate, unsigned shift,
+  unsigned long index, unsigned long npages)

The better function name would be: pnv_pci_ioda2_do_tce_invalidate(), and
it seems we needn't bool rm any more since invalidate has been assigned
with virtual/real address by caller.

Thanks,
Gavin

 {
-  struct iommu_table_group_link *tgl = list_first_entry_or_null(
-  tbl-it_group_list, struct iommu_table_group_link,
-  next);
-  struct pnv_ioda_pe *pe = container_of(tgl-table_group,
-  struct pnv_ioda_pe, table_group);
   unsigned long start, end, inc;
-  __be64 __iomem *invalidate = rm ?
-  (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys :
-  pe-phb-ioda.tce_inval_reg;
-  const unsigned shift = tbl-it_page_shift;

   /* We'll invalidate DMA address in PE scope */
   start = 0x2ull  60;
-  start |= (pe-pe_number  0xFF);
+  start |= (pe_number  0xFF);
   end = start;

   /* Figure out the start, end and step */
@@ -1798,6 +1791,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct 
iommu_table *tbl,
   }
 }

+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+  unsigned long index, unsigned long npages, bool rm)
+{
+  struct iommu_table_group_link *tgl;
+
+  list_for_each_entry_rcu(tgl, tbl-it_group_list, next) {
+  struct pnv_ioda_pe *pe = container_of(tgl-table_group,
+  struct pnv_ioda_pe, table_group);
+  __be64 __iomem *invalidate = rm ?
+  (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys :
+  pe-phb-ioda.tce_inval_reg;
+
+  pnv_pci_ioda2_tce_do_invalidate(pe-pe_number, rm,
+  invalidate, tbl-it_page_shift,
+  index, npages);
+  }
+}
+
 static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
   long npages, unsigned long uaddr,
   enum dma_data_direction direction,
-- 
2.4.0.rc3.8.gfb3e7d5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 5/7] PCI: Remove unnecessary #includes of asm/pci.h

2015-06-04 Thread Simon Horman
On Thu, Jun 04, 2015 at 04:49:57PM -0500, Bjorn Helgaas wrote:
 In include/linux/pci.h, we already #include asm/pci.h, so we don't need
 to include asm/pci.h directly.
 
 Remove the unnecessary includes.  All the files here already include
 linux/pci.h.
 
 Signed-off-by: Bjorn Helgaas bhelg...@google.com
 CC: linux-al...@vger.kernel.org
 CC: linux-m...@linux-mips.org
 CC: linuxppc-dev@lists.ozlabs.org
 CC: linux...@vger.kernel.org
 CC: x...@kernel.org
 ---
  arch/alpha/kernel/core_irongate.c |1 -
  arch/alpha/kernel/sys_eiger.c |1 -
  arch/alpha/kernel/sys_nautilus.c  |1 -
  arch/mips/pci/fixup-cobalt.c  |1 -
  arch/mips/pci/ops-mace.c  |1 -
  arch/mips/pci/pci-lantiq.c|1 -
  arch/powerpc/kernel/prom.c|1 -
  arch/powerpc/kernel/prom_init.c   |1 -
  arch/sh/drivers/pci/ops-sh5.c |1 -
  arch/sh/drivers/pci/pci-sh5.c |1 -
  arch/x86/kernel/x86_init.c|1 -
  11 files changed, 11 deletions(-)

arch/sh/drivers portion:

Acked-by: Simon Horman horms+rene...@verge.net.au

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH kernel v11 21/34] powerpc/powernv/ioda2: Add TCE invalidation for all attached groups

2015-06-04 Thread Alexey Kardashevskiy

On 06/05/2015 10:27 AM, Gavin Shan wrote:

On Fri, May 29, 2015 at 06:44:45PM +1000, Alexey Kardashevskiy wrote:

The iommu_table struct keeps a list of IOMMU groups it is used for.
At the moment there is just a single group attached but further
patches will add TCE table sharing. When sharing is enabled, TCE cache
in each PE needs to be invalidated so does the patch.

This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan
to enable TCE table sharing on PHBs older than IODA2.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru


Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com


---
Changes:
v10:
* new to the series
---
arch/powerpc/platforms/powernv/pci-ioda.c | 35 ---
1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3fd8b18..94fccc8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -24,6 +24,7 @@
#include linux/msi.h
#include linux/memblock.h
#include linux/iommu.h
+#include linux/rculist.h

#include asm/sections.h
#include asm/io.h
@@ -1764,23 +1765,15 @@ static inline void 
pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
__raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg);
}

-static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
-   unsigned long index, unsigned long npages, bool rm)
+static void pnv_pci_ioda2_tce_do_invalidate(unsigned pe_number, bool rm,
+   __be64 __iomem *invalidate, unsigned shift,
+   unsigned long index, unsigned long npages)


The better function name would be: pnv_pci_ioda2_do_tce_invalidate(), and


Ok.



it seems we needn't bool rm any more since invalidate has been assigned
with virtual/real address by caller.



We still need @rm here as different helpers are used for real and virt 
modes - __raw_rm_writeq and __raw_writeq.






Thanks,
Gavin


{
-   struct iommu_table_group_link *tgl = list_first_entry_or_null(
-   tbl-it_group_list, struct iommu_table_group_link,
-   next);
-   struct pnv_ioda_pe *pe = container_of(tgl-table_group,
-   struct pnv_ioda_pe, table_group);
unsigned long start, end, inc;
-   __be64 __iomem *invalidate = rm ?
-   (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys :
-   pe-phb-ioda.tce_inval_reg;
-   const unsigned shift = tbl-it_page_shift;

/* We'll invalidate DMA address in PE scope */
start = 0x2ull  60;
-   start |= (pe-pe_number  0xFF);
+   start |= (pe_number  0xFF);
end = start;

/* Figure out the start, end and step */
@@ -1798,6 +1791,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct 
iommu_table *tbl,
}
}

+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+   unsigned long index, unsigned long npages, bool rm)
+{
+   struct iommu_table_group_link *tgl;
+
+   list_for_each_entry_rcu(tgl, tbl-it_group_list, next) {
+   struct pnv_ioda_pe *pe = container_of(tgl-table_group,
+   struct pnv_ioda_pe, table_group);
+   __be64 __iomem *invalidate = rm ?
+   (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys :
+   pe-phb-ioda.tce_inval_reg;
+
+   pnv_pci_ioda2_tce_do_invalidate(pe-pe_number, rm,
+   invalidate, tbl-it_page_shift,
+   index, npages);
+   }
+}
+
static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
--
2.4.0.rc3.8.gfb3e7d5






--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: of/dynamic: Fix test for PPC_PSERIES

2015-06-04 Thread Grant Likely
On Thu,  4 Jun 2015 20:57:32 +1000 (AEST)
, Michael Ellerman m...@ellerman.id.au
 wrote:
 On Thu, 2015-04-06 at 09:34:41 UTC, Geert Uytterhoeven wrote:
  IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is
  supposed to be used with the full Kconfig symbol name, including the
  CONFIG_ prefix.
  
  Add the missing CONFIG_ prefix to fix this.
  
  Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and 
  into common code)
  Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be
  ---
 
  Did this bug cause any breakage?
  If yes, the fix should go to stable (for v3.17 and later).
 
 Yikes. Not that I've heard of. But it's reasonably new so possibly it's not 
 hit
 distros that folks tend to run on those machines.
 
 I'm also not clear how it would break, it could be subtle and we've not 
 noticed.
 
 Nathan might have more of an idea (on CC).
 
 On my machine here everything that has an ibm,phandle also has a 
 linux,phandle,
 so we wouldn't hit that code path. But I'm not sure how representative that 
 box
 is.
 
 cheers

Still, an obvious bug. I've picked it up and marked for stable.

g.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver

2015-06-04 Thread Cyril Bur
On Tue, 2015-06-02 at 14:26 +1000, Cyril Bur wrote:
 Powerpc powernv platforms allow access to certain system flash devices
 through a firmwarwe interface. This change adds an mtd driver for these
 flash devices.
 
 Minor updates from Jeremy Kerr and Joel Stanley.
 
 Signed-off-by: Cyril Bur cyril...@gmail.com
 Signed-off-by: Joel Stanley j...@jms.id.au
 Signed-off-by: Jeremy Kerr j...@ozlabs.org
 ---
 Hello Brian and MTD folk,
 Could I please get an ACK for Michael to take this through the powerpc
 tree.
 Thanks.
 

Hello Brian,

As we have some deadlines approaching, I am getting pressure to ensure
this gets merged upstream as quickly as possible, please let me know if
there is anything more which can be done.

Thanks very much,

Cyril
 
 V2: Address Brian Norris' review
 Fix typos
 Change from NAND flash type to NOR flash type
 Correctness tweaks
 V3: Address Neelesh Gupta's review
 Minor corrections
 Release the opal token on error
 Unregister mtd device on module remove
 ---
  drivers/mtd/devices/Kconfig |   8 +
  drivers/mtd/devices/Makefile|   1 +
  drivers/mtd/devices/powernv_flash.c | 286 
 
  3 files changed, 295 insertions(+)
  create mode 100644 drivers/mtd/devices/powernv_flash.c
 
 diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
 index c49d0b1..f73c416 100644
 --- a/drivers/mtd/devices/Kconfig
 +++ b/drivers/mtd/devices/Kconfig
 @@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
 Testing MTD users (eg JFFS2) on large media and media that might
 be removed during a write (using the floppy drive).
  
 +config MTD_POWERNV_FLASH
 + tristate powernv flash MTD driver
 + depends on PPC_POWERNV
 + help
 +   This provides an MTD device to access flash on powernv OPAL
 +   platforms from Linux. This device abstracts away the
 +   firmware interface for flash access.
 +
  comment Disk-On-Chip Device Drivers
  
  config MTD_DOCG3
 diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
 index f0b0e61..7912d3a 100644
 --- a/drivers/mtd/devices/Makefile
 +++ b/drivers/mtd/devices/Makefile
 @@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o
  obj-$(CONFIG_MTD_SST25L) += sst25l.o
  obj-$(CONFIG_MTD_BCM47XXSFLASH)  += bcm47xxsflash.o
  obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
 +obj-$(CONFIG_MTD_POWERNV_FLASH)  += powernv_flash.o
  
 
  CFLAGS_docg3.o   += -I$(src)
 diff --git a/drivers/mtd/devices/powernv_flash.c 
 b/drivers/mtd/devices/powernv_flash.c
 new file mode 100644
 index 000..777e09f
 --- /dev/null
 +++ b/drivers/mtd/devices/powernv_flash.c
 @@ -0,0 +1,286 @@
 +/*
 + * OPAL PNOR flash MTD abstraction
 + *
 + * IBM 2015
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + */
 +
 +#include linux/kernel.h
 +#include linux/module.h
 +#include linux/errno.h
 +#include linux/of.h
 +#include linux/of_address.h
 +#include linux/platform_device.h
 +#include linux/string.h
 +#include linux/slab.h
 +#include linux/mtd/mtd.h
 +#include linux/mtd/partitions.h
 +
 +#include linux/debugfs.h
 +#include linux/seq_file.h
 +
 +#include asm/opal.h
 +
 +
 +/*
 + * This driver creates the a Linux MTD abstraction for platform PNOR flash
 + * backed by OPAL calls
 + */
 +
 +struct powernv_flash {
 + struct mtd_info mtd;
 + u32 id;
 +};
 +
 +enum flash_op {
 + FLASH_OP_READ,
 + FLASH_OP_WRITE,
 + FLASH_OP_ERASE,
 +};
 +
 +static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
 + loff_t offset, size_t len, size_t *retlen, u_char *buf)
 +{
 + struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
 + struct device *dev = mtd-dev;
 + int token;
 + struct opal_msg msg;
 + int rc;
 +
 + dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
 + __func__, op, offset, len);
 +
 + token = opal_async_get_token_interruptible();
 + if (token  0) {
 + if (token != -ERESTARTSYS)
 + dev_err(dev, Failed to get an async token\n);
 +
 + return token;
 + }
 +
 + switch (op) {
 + case FLASH_OP_READ:
 + rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
 + break;
 + case FLASH_OP_WRITE:
 + rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
 + break;
 + case FLASH_OP_ERASE:
 + rc = opal_flash_erase(info-id, offset, len, token);
 +  

[PATCH] cxl: Reset default context for vPHB on release

2015-06-04 Thread Michael Neuling
When we release the device, we should also invalidate the default context.
With this cxl_get_context() will return null after removal.

Signed-off-by: Michael Neuling mi...@neuling.org
---
 drivers/misc/cxl/vphb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index f733309..b1d1983a 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -72,6 +72,7 @@ static void cxl_pci_disable_device(struct pci_dev *dev)
dev_err(dev-dev, Default context started\n);
return;
}
+   dev-dev.archdata.cxl_ctx = NULL;
cxl_release_context(ctx);
}
 }
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: of/dynamic: Fix test for PPC_PSERIES

2015-06-04 Thread Nathan Fontenot
On 06/04/2015 05:57 AM, Michael Ellerman wrote:
 On Thu, 2015-04-06 at 09:34:41 UTC, Geert Uytterhoeven wrote:
 IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is
 supposed to be used with the full Kconfig symbol name, including the
 CONFIG_ prefix.

 Add the missing CONFIG_ prefix to fix this.

 Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and 
 into common code)
 Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be
 ---
 
 Did this bug cause any breakage?
 If yes, the fix should go to stable (for v3.17 and later).
 
 Yikes. Not that I've heard of. But it's reasonably new so possibly it's not 
 hit
 distros that folks tend to run on those machines.

I think we do have some distros that have picked this up.

 
 I'm also not clear how it would break, it could be subtle and we've not 
 noticed.
 

The only place I can find that this might cause an issue is during device tree
updating that pseries does after a live migration or suspend/resume. When
removing or updating a device tree node we look up the node by ibm,phandle and
without this patch we wouldn't find these nodes.

I have not seen any issues because of this but I think pushing this to stable
would be good.

-Nathan

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH kernel v11 16/34] powerpc/spapr: vfio: Replace iommu_table with iommu_table_group

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:40PM +1000, Alexey Kardashevskiy wrote:
Modern IBM POWERPC systems support multiple (currently two) TCE tables
per IOMMU group (a.k.a. PE). This adds a iommu_table_group container
for TCE tables. Right now just one table is supported.

This defines iommu_table_group struct which stores pointers to
iommu_group and iommu_table(s). This replaces iommu_table with
iommu_table_group where iommu_table was used to identify a group:
- iommu_register_group();
- iommudata of generic iommu_group;

This removes @data from iommu_table as it_table_group provides
same access to pnv_ioda_pe.

For IODA, instead of embedding iommu_table, the new iommu_table_group
keeps pointers to those. The iommu_table structs are allocated
dynamically.

For P5IOC2, both iommu_table_group and iommu_table are embedded into
PE struct. As there is no EEH and SRIOV support for P5IOC2,
iommu_free_table() should not be called on iommu_table struct pointers
so we can keep it embedded in pnv_phb::p5ioc2.

For pSeries, this replaces multiple calls of kzalloc_node() with a new
iommu_pseries_alloc_group() helper and stores the table group struct
pointer into the pci_dn struct. For release, a iommu_table_free_group()
helper is added.

This moves iommu_table struct allocation from SR-IOV code to
the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and
pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized.
This change is here because those lines had to be changed anyway.

This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
[aw: for the vfio related changes]
Acked-by: Alex Williamson alex.william...@redhat.com

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

---
Changes:
v11:
* iommu_table_group moved outside #ifdef CONFIG_IOMMU_API as iommu_table
is dynamically allocated and it needs a pointer to PE and
iommu_table_group is this pointer

v10:
* new to the series, separated from
powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group
* iommu_table is not embedded into iommu_table_group but allocated
dynamically in most cases
* iommu_table allocation is moved to a single place for IODA2's
pnv_pci_ioda_setup_dma_pe where it belongs to
* added list of groups into iommu_table; most of the code just looks at
the first item to keep the patch simpler
---
 arch/powerpc/include/asm/iommu.h|  19 ++---
 arch/powerpc/include/asm/pci-bridge.h   |   2 +-
 arch/powerpc/kernel/iommu.c |  17 ++---
 arch/powerpc/platforms/powernv/pci-ioda.c   |  55 +++---
 arch/powerpc/platforms/powernv/pci-p5ioc2.c |  18 +++--
 arch/powerpc/platforms/powernv/pci.h|   3 +-
 arch/powerpc/platforms/pseries/iommu.c  | 107 +++-
 drivers/vfio/vfio_iommu_spapr_tce.c |  23 +++---
 8 files changed, 152 insertions(+), 92 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h 
b/arch/powerpc/include/asm/iommu.h
index e2a45c3..5a7267f 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -91,14 +91,9 @@ struct iommu_table {
   struct iommu_pool pools[IOMMU_NR_POOLS];
   unsigned long *it_map;   /* A simple allocation bitmap for now */
   unsigned long  it_page_shift;/* table iommu page size */
-#ifdef CONFIG_IOMMU_API
-  struct iommu_group *it_group;
-#endif
+  struct iommu_table_group *it_table_group;
   struct iommu_table_ops *it_ops;
   void (*set_bypass)(struct iommu_table *tbl, bool enable);
-#ifdef CONFIG_PPC_POWERNV
-  void   *data;
-#endif
 };

 /* Pure 2^n version of get_order */
@@ -129,14 +124,22 @@ extern void iommu_free_table(struct iommu_table *tbl, 
const char *node_name);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
   int nid);
+#define IOMMU_TABLE_GROUP_MAX_TABLES  1
+
+struct iommu_table_group {
+  struct iommu_group *group;
+  struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+};
+

Number of TCE tables supported in group would be worthy to be
changed dynamically in long run, but not for now. P7IOC has one
table per group while PHB3 has two tables per group.

Thanks,
Gavin

 #ifdef CONFIG_IOMMU_API
-extern void iommu_register_group(struct iommu_table *tbl,
+
+extern void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num);
 extern int iommu_add_device(struct device *dev);
 extern void iommu_del_device(struct device *dev);
 extern int __init tce_iommu_bus_notifier_init(void);
 #else
-static inline void iommu_register_group(struct iommu_table *tbl,
+static inline void iommu_register_group(struct iommu_table_group *table_group,
   int pci_domain_number,
   unsigned long pe_num)
 {
diff --git a/arch/powerpc/include/asm/pci-bridge.h 

Re: [PATCH kernel v11 20/34] powerpc/powernv/ioda2: Move TCE kill register address to PE

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:44PM +1000, Alexey Kardashevskiy wrote:
At the moment the DMA setup code looks for the ibm,opal-tce-kill
property which contains the TCE kill register address. Writing to
this register invalidates TCE cache on IODA/IODA2 hub.

This moves the register address from iommu_table to pnv_pnb as this
register belongs to PHB and invalidates TCE cache for all tables of
all attached PEs.

This moves the property reading/remapping code to a helper which is
called when DMA is being configured for PE and which does DMA setup
for both IODA1 and IODA2.

This adds a new pnv_pci_ioda2_tce_invalidate_entire() helper which
invalidates cache for the entire table. It should be called after
every call to opal_pci_map_pe_dma_window(). It was not required before
because there was just a single TCE table and 64bit DMA was handled via
bypass window (which has no table so no cache was used) but this is going
to change with Dynamic DMA windows (DDW).

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

Thanks,
Gavin

---
Changes:
v11:
* s/pnv_pci_ioda2_tvt_invalidate/pnv_pci_ioda2_tce_invalidate_entire/g
(cannot think of better-and-shorter name)
* moved tce_inval_reg_phys/tce_inval_reg to pnv_phb

v10:
* fixed error from checkpatch.pl
* removed comment at ibm,opal-tce-kill parsing as irrelevant
* s/addr/val/ in pnv_pci_ioda2_tvt_invalidate() as it was not a kernel address

v9:
* new in the series
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 66 ++-
 arch/powerpc/platforms/powernv/pci.h  |  7 +++-
 2 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1d0bb5b..3fd8b18 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1679,8 +1679,8 @@ static void pnv_pci_ioda1_tce_invalidate(struct 
iommu_table *tbl,
   struct pnv_ioda_pe *pe = container_of(tgl-table_group,
   struct pnv_ioda_pe, table_group);
   __be64 __iomem *invalidate = rm ?
-  (__be64 __iomem *)pe-tce_inval_reg_phys :
-  (__be64 __iomem *)tbl-it_index;
+  (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys :
+  pe-phb-ioda.tce_inval_reg;
   unsigned long start, end, inc;
   const unsigned shift = tbl-it_page_shift;

@@ -1751,6 +1751,19 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
   .get = pnv_tce_get,
 };

+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+  /* 01xb - invalidate TCEs that match the specified PE# */
+  unsigned long val = (0x4ull  60) | (pe-pe_number  0xFF);
+  struct pnv_phb *phb = pe-phb;
+
+  if (!phb-ioda.tce_inval_reg)
+  return;
+
+  mb(); /* Ensure above stores are visible */
+  __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg);
+}
+
 static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
   unsigned long index, unsigned long npages, bool rm)
 {
@@ -1761,8 +1774,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct 
iommu_table *tbl,
   struct pnv_ioda_pe, table_group);
   unsigned long start, end, inc;
   __be64 __iomem *invalidate = rm ?
-  (__be64 __iomem *)pe-tce_inval_reg_phys :
-  (__be64 __iomem *)tbl-it_index;
+  (__be64 __iomem *)pe-phb-ioda.tce_inval_reg_phys :
+  pe-phb-ioda.tce_inval_reg;
   const unsigned shift = tbl-it_page_shift;

   /* We'll invalidate DMA address in PE scope */
@@ -1820,7 +1833,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
 {

   struct page *tce_mem = NULL;
-  const __be64 *swinvp;
   struct iommu_table *tbl;
   unsigned int i;
   int64_t rc;
@@ -1877,20 +1889,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
 base  28, IOMMU_PAGE_SHIFT_4K);

   /* OPAL variant of P7IOC SW invalidated TCEs */
-  swinvp = of_get_property(phb-hose-dn, ibm,opal-tce-kill, NULL);
-  if (swinvp) {
-  /* We need a couple more fields -- an address and a data
-   * to or.  Since the bus is only printed out on table free
-   * errors, and on the first pass the data will be a relative
-   * bus number, print that out instead.
-   */
-  pe-tce_inval_reg_phys = be64_to_cpup(swinvp);
-  tbl-it_index = (unsigned long)ioremap(pe-tce_inval_reg_phys,
-  8);
+  if (phb-ioda.tce_inval_reg)
   tbl-it_type |= (TCE_PCI_SWINV_CREATE |
TCE_PCI_SWINV_FREE   |
TCE_PCI_SWINV_PAIR);
-  }
+
   tbl-it_ops = pnv_ioda1_iommu_ops;
   iommu_init_table(tbl, phb-hose-node);

@@ -1971,12 +1974,24 @@ static struct 

Re: [v6] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform

2015-06-04 Thread Michael Ellerman
On Thu, 2015-04-06 at 12:03:17 UTC, Vipin K Parashar wrote:
 This patch adds support for FSP (Flexible Service Processor)
 EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for
 the PowerNV platform. EPOW events are generated by FSP due to various
 critical system conditions that require system shutdown. A few examples
 of these conditions are high ambient temperature or system running on
 UPS power with low UPS battery. DPO event is generated in response to
 admin initiated system shutdown request. Upon receipt of EPOW and DPO
 events the host kernel invokes orderly_poweroff() for performing
 graceful system shutdown.
 
 Reviewed-by: Joel Stanley j...@jms.id.au
 Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com
 Reviewed-by: Michael Ellerman m...@ellerman.id.au
 Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com

Hi Vipin,

One issue, on mambo I'm seeing:

  [666973573,3] OPAL: Called with bad token 105 !
  opal-power: Existing DPO event detected.
  reboot: Failed to start orderly shutdown: forcing the issue
  reboot: Power down
  [684431322,5] OPAL: Shutdown request type 0x0...


ie. at boot it shuts down immediately.

The problem is in here I think:

 + /* Check for DPO event */
 + rc = opal_get_dpo_status(opal_dpo_timeout);
 + if (rc != OPAL_WRONG_STATE) {
 + pr_info(Existing DPO event detected.\n);
 + return true;
 + }


This also makes me think you probably haven't tested this on a BMC machine?

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/

2015-06-04 Thread Scott Wood
On Thu, 2015-06-04 at 04:27 -0500, Zhao Qiang-B45475 wrote:
 On Thu, 2015-06-04 at 2:14PM, Wood Scott wrote:
 
 
  -Original Message-
  From: Wood Scott-B07421
  Sent: Thursday, June 04, 2015 2:14 PM
  To: Zhao Qiang-B45475
  Cc: linuxppc-dev@lists.ozlabs.org; Xie Xiaobo-R63061
  Subject: Re: [PATCH 2/2] rheap: move rheap.c from 
  arch/powerpc/lib/ to
  lib/
  
  On Thu, 2015-06-04 at 00:56 -0500, Zhao Qiang-B45475 wrote:
   On Thu, 2015-05-28 at 1:37AM +0800, Wood Scott wrote:
   
   
-Original Message-
From: Wood Scott-B07421
Sent: Thursday, May 28, 2015 1:37 AM
To: Zhao Qiang-B45475
Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Xie 
Xiaobo-
R63061
Subject: Re: [PATCH 2/2] rheap: move rheap.c from 
arch/powerpc/lib/
to lib/

On Wed, 2015-05-27 at 17:12 +0800, Zhao Qiang wrote:
 qe need to use the rheap, so move it to public directory.

You've been previously asked to use lib/genalloc.c rather than
introduce duplicate functionality into /lib.  NACK.
   
   Can't use lib/genalloc.c instead of rheap.c.
   Qe need to alloc muram of qe, not DIMM.
  
  lib/genalloc.h is not for allocating main memory.  It is for 
  allocating
  special regions.  It is serving the same purpose as rheap.
 
 I need to use the func rh_alloc_align, what it the similar func?
 I just find a func gen_pool_first_fit_order_align.

I don't see anywhere the QE code currently calls rh_alloc_align() -- 
and cpm_muram_init() calls rh_init() with an alignment of 1.

If you do need some functionality that genalloc doesn't offer, add the 
functionality to genalloc.

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH kernel v11 33/34] vfio: powerpc/spapr: Register memory and define IOMMU v2

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:57PM +1000, Alexey Kardashevskiy wrote:
The existing implementation accounts the whole DMA window in
the locked_vm counter. This is going to be worse with multiple
containers and huge DMA windows. Also, real-time accounting would requite
additional tracking of accounted pages due to the page size difference -
IOMMU uses 4K pages and system uses 4K or 64K pages.

Another issue is that actual pages pinning/unpinning happens on every
DMA map/unmap request. This does not affect the performance much now as
we spend way too much time now on switching context between
guest/userspace/host but this will start to matter when we add in-kernel
DMA map/unmap acceleration.

This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU.
New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces
2 new ioctls to register/unregister DMA memory -
VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY -
which receive user space address and size of a memory region which
needs to be pinned/unpinned and counted in locked_vm.
New IOMMU splits physical pages pinning and TCE table update
into 2 different operations. It requires:
1) guest pages to be registered first
2) consequent map/unmap requests to work only with pre-registered memory.
For the default single window case this means that the entire guest
(instead of 2GB) needs to be pinned before using VFIO.
When a huge DMA window is added, no additional pinning will be
required, otherwise it would be guest RAM + 2GB.

The new memory registration ioctls are not supported by
VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration
will require memory to be preregistered in order to work.

The accounting is done per the user process.

This advertises v2 SPAPR TCE IOMMU and restricts what the userspace
can do with v1 or v2 IOMMUs.

In order to support memory pre-registration, we need a way to track
the use of every registered memory region and only allow unregistration
if a region is not in use anymore. So we need a way to tell from what
region the just cleared TCE was from.

This adds a userspace view of the TCE table into iommu_table struct.
It contains userspace address, one per TCE entry. The table is only
allocated when the ownership over an IOMMU group is taken which means
it is only used from outside of the powernv code (such as VFIO).

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
[aw: for the vfio related changes]
Acked-by: Alex Williamson alex.william...@redhat.com
---
Changes:
v11:
* mm_iommu_put() does not return a code so this does not check it
* moved v2 in tce_container to pack the struct

v10:
* moved it_userspace allocation to vfio_iommu_spapr_tce as it VFIO
specific thing
* squashed powerpc/iommu: Add userspace view of TCE table into this as
it is
a part of IOMMU v2
* s/tce_iommu_use_page_v2/tce_iommu_prereg_ua_to_hpa/
* fixed some function names to have tce_iommu_ in the beginning rather
just tce_
* as mm_iommu_mapped_inc() can now fail, check for the return code

v9:
* s/tce_get_hva_cached/tce_iommu_use_page_v2/

v7:
* now memory is registered per mm (i.e. process)
* moved memory registration code to powerpc/mmu
* merged vfio: powerpc/spapr: Define v2 IOMMU into this
* limited new ioctls to v2 IOMMU
* updated doc
* unsupported ioclts return -ENOTTY instead of -EPERM

v6:
* tce_get_hva_cached() returns hva via a pointer

v4:
* updated docs
* s/kzmalloc/vzalloc/
* in tce_pin_pages()/tce_unpin_pages() removed @vaddr, @size and
replaced offset with index
* renamed vfio_iommu_type_register_memory to vfio_iommu_spapr_register_memory
and removed duplicating vfio_iommu_spapr_register_memory
---
 Documentation/vfio.txt  |  31 ++-
 arch/powerpc/include/asm/iommu.h|   6 +
 drivers/vfio/vfio_iommu_spapr_tce.c | 512 ++--
 include/uapi/linux/vfio.h   |  27 ++
 4 files changed, 487 insertions(+), 89 deletions(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..7dcf2b5 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -289,10 +289,12 @@ PPC64 sPAPR implementation note

 This implementation has some specifics:

-1) Only one IOMMU group per container is supported as an IOMMU group
-represents the minimal entity which isolation can be guaranteed for and
-groups are allocated statically, one per a Partitionable Endpoint (PE)
+1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
+container is supported as an IOMMU table is allocated at the boot time,
+one table per a IOMMU group which is a Partitionable Endpoint (PE)
 (PE is often a PCI domain but not always).
+Newer systems (POWER8 with IODA2) have improved hardware design which allows
+to remove this limitation and have multiple IOMMU groups per a VFIO container.

 2) The hardware supports so called DMA windows - the PCI address range
 within which DMA transfer is allowed, any attempt to access address space
@@ -427,6 +429,29 @@ 

Re: [PATCH kernel v11 05/34] powerpc/iommu: Always release iommu_table in iommu_free_table()

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:29PM +1000, Alexey Kardashevskiy wrote:
At the moment iommu_free_table() only releases memory if
the table was initialized for the platform code use, i.e. it had
it_map initialized (which purpose is to track DMA memory space use).

With dynamic DMA windows, we will need to be able to release
iommu_table even if it was used for VFIO in which case it_map is NULL
so does the patch.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

Thanks,
Gavin

---
Changes:
v11:
* fixed parameter checks
---
 arch/powerpc/kernel/iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 3d47eb3..73eb39a 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -713,9 +713,11 @@ void iommu_free_table(struct iommu_table *tbl, const char 
*node_name)
   unsigned long bitmap_sz;
   unsigned int order;

-  if (!tbl || !tbl-it_map) {
-  printk(KERN_ERR %s: expected TCE map for %s\n, __func__,
-  node_name);
+  if (!tbl)
+  return;
+
+  if (!tbl-it_map) {
+  kfree(tbl);
   return;
   }

-- 
2.4.0.rc3.8.gfb3e7d5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/7] PCI: Include linux/pci.h, not asm/pci.h

2015-06-04 Thread Bjorn Helgaas
We already include asm/pci.h from linux/pci.h, so just include
linux/pci.h directly.

Signed-off-by: Bjorn Helgaas bhelg...@google.com
CC: linuxppc-dev@lists.ozlabs.org
CC: linux-s...@vger.kernel.org
---
 arch/powerpc/platforms/52xx/mpc52xx_pci.c |2 +-
 arch/s390/kernel/suspend.c|2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c 
b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index e2d401a..6eb3b2a 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -12,7 +12,7 @@
 
 #undef DEBUG
 
-#include asm/pci.h
+#include linux/pci.h
 #include asm/mpc52xx.h
 #include asm/delay.h
 #include asm/machdep.h
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index d3236c9..39e2f41 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -9,10 +9,10 @@
 #include linux/pfn.h
 #include linux/suspend.h
 #include linux/mm.h
+#include linux/pci.h
 #include asm/ctl_reg.h
 #include asm/ipl.h
 #include asm/cio.h
-#include asm/pci.h
 #include asm/sections.h
 #include entry.h
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v6] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform

2015-06-04 Thread Michael Ellerman
On Fri, 2015-06-05 at 08:01 +1000, Michael Ellerman wrote:
 On Thu, 2015-04-06 at 12:03:17 UTC, Vipin K Parashar wrote:
  This patch adds support for FSP (Flexible Service Processor)
  EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for
  the PowerNV platform. EPOW events are generated by FSP due to various
  critical system conditions that require system shutdown. A few examples
  of these conditions are high ambient temperature or system running on
  UPS power with low UPS battery. DPO event is generated in response to
  admin initiated system shutdown request. Upon receipt of EPOW and DPO
  events the host kernel invokes orderly_poweroff() for performing
  graceful system shutdown.
  
  Reviewed-by: Joel Stanley j...@jms.id.au
  Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com
  Reviewed-by: Michael Ellerman m...@ellerman.id.au
  Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com
 
 Hi Vipin,
 

And on my Tuleta I get:

opal-power: OPAL EPOW, DPO support detected.
opal-power: Existing EPOW event detected.
reboot: Failed to start orderly shutdown: forcing the issue
[ cut here ]
WARNING: at kernel/workqueue.c:818
Modules linked in:
CPU: 3 PID: 26 Comm: migration/3 Not tainted 4.1.0-rc3-13669-g704921b #80
task: c00ff1745440 ti: c00ff17ac000 task.ti: c00ff17ac000
NIP: c00ba48c LR: c00ba430 CTR: c00dbac0
REGS: c00ff17af7d0 TRAP: 0700   Not tainted  (4.1.0-rc3-13669-g704921b)
MSR: 900100029033 SF,HV,EE,ME,IR,DR,RI,LE  CR: 2044  XER: 
CFAR: c00ba44c SOFTE: 0 
GPR00: c00cec7c c00ff17afa50 c0d7ce20 c00ff160fcc0 
GPR04:   c0e10468  
GPR08: c0d2ce20 c00ff90dd400 0001 c00ff901dc98 
GPR12: 4082 c1dc0d80 c00c0b18 c00ff817a540 
GPR16:     
GPR20:    000ff838 
GPR24: c0c9dc00 c0daa8e0 c00ff901dc00  
GPR28: 0004 c00ff174ae04 c00ff901dc00  
NIP [c00ba48c] wq_worker_waking_up+0x7c/0xa0
LR [c00ba430] wq_worker_waking_up+0x20/0xa0
Call Trace:
[c00ff17afa50] [c00ff1749bc0] 0xc00ff1749bc0 (unreliable)
[c00ff17afa80] [c00cec7c] ttwu_do_activate.constprop.76+0x6c/0xa0
[c00ff17afab0] [c00d2958] try_to_wake_up+0x208/0x4a0
[c00ff17afb30] [c00eab94] __wake_up_common+0x84/0xf0
[c00ff17afb90] [c00eb744] complete+0x54/0x90
[c00ff17afbd0] [c014bac4] cpu_stop_signal_done+0x54/0x70
[c00ff17afbf0] [c014c324] cpu_stopper_thread+0xd4/0x1f0
[c00ff17afd20] [c00c5b20] smpboot_thread_fn+0x280/0x290
[c00ff17afd80] [c00c0c18] kthread+0x108/0x130
[c00ff17afe30] [c000956c] ret_from_kernel_thread+0x5c/0x70
Instruction dump:
7d00512d 40c2fff4 38210030 e8010010 ebe1fff8 7c0803a6 4e800020 6000 
6042 3d02fffb 8948db7e 694a0001 0b0a 2faa 41feffbc 3941 
---[ end trace de25982dcf3cffd9 ]---
reboot: Power down


cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: of: clean-up unnecessary libfdt include paths

2015-06-04 Thread Rob Herring
On Thu, Jun 4, 2015 at 5:20 AM, Michael Ellerman m...@ellerman.id.au wrote:
 On Wed, 2015-03-06 at 05:10:25 UTC, Rob Herring wrote:
 With the latest dtc import include fixups, it is no longer necessary to
 add explicit include paths to use libfdt. Remove these across the
 kernel.

 What are the latest dtc import include fixups ?

Changing the scripts/dtc/libfdt/libfdt.h includes from  to . The
import script does this now and the recent import in my for-next tree
has this. I'll clarify this in the commit message.


 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
 index c1ebbda..c16e836 100644
 --- a/arch/powerpc/kernel/Makefile
 +++ b/arch/powerpc/kernel/Makefile
 @@ -2,7 +2,6 @@
  # Makefile for the linux kernel.
  #

 -CFLAGS_prom.o= -I$(src)/../../../scripts/dtc/libfdt
  CFLAGS_ptrace.o  += -DUTS_MACHINE='$(UTS_MACHINE)'

  subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror

 Acked-by: Michael Ellerman m...@ellerman.id.au

Thanks.

Rob
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [3/3,v3] powerpc/powernv: Add opal-prd channel

2015-06-04 Thread Jeremy Kerr
Hi Michael,

 Sorry, I put this in but then hit the build break, I was going to fix it up 
 but
 would rather you did and tested it, so we may as well do another review :)

whee!

 @@ -0,0 +1,58 @@
 +/*
 + * OPAL Runtime Diagnostics interface driver
 + * Supported on POWERNV platform
 + *
 + * (C) Copyright IBM 2015
 
 Usual syntax is: Copyright IBM Corporation 2015

OK, fixed.

 + *
 + * Author: Vaidyanathan Srinivasan svaidy at linux.vnet.ibm.com
 + * Author: Jeremy Kerr j...@ozlabs.org
 
 I'd rather you dropped these, they'll just bit rot, but if you insist I don't
 care that much.

Yep, I'd rather remove them too.

 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2, or (at your option)
 + * any later version.
 
 As pointed out by Daniel, we should probably be using the version 2 only
 language on new files.

Fixed.

 +vma-vm_page_prot = phys_mem_access_prot(file, vma-vm_pgoff,
 + size, vma-vm_page_prot)
 +| _PAGE_SPECIAL;
 
 This doesn't build with CONFIG_STRICT_MM_TYPECHECKS=y:
 
   arch/powerpc/platforms/powernv/opal-prd.c:131:5: error: invalid operands to 
 binary | (have ‘pgprot_t’ and ‘int’)
   | _PAGE_SPECIAL;

OK, new patch coming with the proper pgprot macros.

 +switch(cmd) {
   ^
 space please

Fixed.


 +pr_devel(ioctl SCOM_READ: chip %llx addr %016llx 
 +data %016llx rc %lld\n,
 
 Don't split the string please.

OK, but this makes our lines 80 chars. Assuming that'll be okay.

 +struct file_operations opal_prd_fops = {
 
 This can be static const I think.

Indeed it can! Updated.

 +static struct miscdevice opal_prd_dev = {
 +.minor  = MISC_DYNAMIC_MINOR,
 +.name   = opal-prd,
 +.fops   = opal_prd_fops,
 
 White space is messed up here, should be leading tabs.

[tabs-spaces-both.png]

Thanks for the review, new patch coming soon.

Cheers,


Jeremy


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3 v4] powerpc/powernv: Add opal-prd channel

2015-06-04 Thread Jeremy Kerr
This change adds a char device to access the PRD (processor runtime
diagnostics) channel to OPAL firmware.

Includes contributions from Vaidyanathan Srinivasan, Neelesh Gupta 
Vishal Kulkarni.

Signed-off-by: Neelesh Gupta neele...@linux.vnet.ibm.com
Signed-off-by: Jeremy Kerr j...@ozlabs.org
Acked-by: Stewart Smith stew...@linux.vnet.ibm.com

---

v4:
 - Address reviews from mpe:

 - GPLv2+ - GPLv2, fix copyrights, remove authors

 - fix pgprot manipulations

 - formatting  space fixes

 - constify opal_prd_fops

v3:
 - Add versioning description and reserved fields in opal_prd_info
   for future expansion

 - Fix node leak in opal_prd_range_is_valid

 - Explain open()  probe() semantics

 - Fix miscdev_register error path


---
 arch/powerpc/include/asm/opal-api.h|   21 
 arch/powerpc/include/asm/opal.h|1 
 arch/powerpc/include/uapi/asm/opal-prd.h   |   58 ++
 arch/powerpc/platforms/powernv/Kconfig |7 
 arch/powerpc/platforms/powernv/Makefile|1 
 arch/powerpc/platforms/powernv/opal-prd.c  |  445 +
 arch/powerpc/platforms/powernv/opal-wrappers.S |1 
 arch/powerpc/platforms/powernv/opal.c  |4 
 8 files changed, 536 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 0321a90..2407f12 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -153,7 +153,8 @@
 #define OPAL_FLASH_READ110
 #define OPAL_FLASH_WRITE   111
 #define OPAL_FLASH_ERASE   112
-#define OPAL_LAST  112
+#define OPAL_PRD_MSG   113
+#define OPAL_LAST  113
 
 /* Device tree flags */
 
@@ -352,6 +353,7 @@ enum opal_msg_type {
OPAL_MSG_SHUTDOWN,  /* params[0] = 1 reboot, 0 shutdown */
OPAL_MSG_HMI_EVT,
OPAL_MSG_DPO,
+   OPAL_MSG_PRD,
OPAL_MSG_TYPE_MAX,
 };
 
@@ -674,6 +676,23 @@ typedef struct oppanel_line {
__be64 line_len;
 } oppanel_line_t;
 
+enum opal_prd_msg_type {
+   OPAL_PRD_MSG_TYPE_INIT = 0, /* HBRT -- OPAL */
+   OPAL_PRD_MSG_TYPE_FINI, /* HBRT/kernel -- OPAL */
+   OPAL_PRD_MSG_TYPE_ATTN, /* HBRT -- OPAL */
+   OPAL_PRD_MSG_TYPE_ATTN_ACK, /* HBRT -- OPAL */
+   OPAL_PRD_MSG_TYPE_OCC_ERROR,/* HBRT -- OPAL */
+   OPAL_PRD_MSG_TYPE_OCC_RESET,/* HBRT -- OPAL */
+};
+
+struct opal_prd_msg_header {
+   uint8_t type;
+   uint8_t pad[1];
+   __be16  size;
+};
+
+struct opal_prd_msg;
+
 /*
  * SG entries
  *
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 042af1a..93704af 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -193,6 +193,7 @@ int64_t opal_ipmi_recv(uint64_t interface, struct 
opal_ipmi_msg *msg,
uint64_t *msg_len);
 int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
 struct opal_i2c_request *oreq);
+int64_t opal_prd_msg(struct opal_prd_msg *msg);
 
 int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
uint64_t size, uint64_t token);
diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h 
b/arch/powerpc/include/uapi/asm/opal-prd.h
new file mode 100644
index 000..319ff4a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/opal-prd.h
@@ -0,0 +1,58 @@
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * (C) Copyright IBM 2015
+ *
+ * Author: Vaidyanathan Srinivasan svaidy at linux.vnet.ibm.com
+ * Author: Jeremy Kerr j...@ozlabs.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_
+#define _UAPI_ASM_POWERPC_OPAL_PRD_H_
+
+#include linux/types.h
+
+/**
+ * The version of the kernel interface of the PRD system. This describes the
+ * interface available for the /dev/opal-prd device. The actual PRD message
+ * layout and content is private to the firmware -- userspace interface, so
+ * is not covered by this versioning.
+ *
+ * Future interface versions are backwards-compatible; if a later kernel
+ * version is encountered, functionality provided in earlier versions
+ * will work.
+ */
+#define OPAL_PRD_KERNEL_VERSION1
+
+#define OPAL_PRD_GET_INFO  _IOR('o', 0x01, struct opal_prd_info)
+#define OPAL_PRD_SCOM_READ 

Re: [PATCH] cpufreq: qoriq: optimize the CPU frequency switching time

2015-06-04 Thread Viresh Kumar
On 04-06-15, 14:25, yuantian.t...@freescale.com wrote:
 From: Tang Yuantian yuantian.t...@freescale.com
 
 Each time the CPU switches its frequency, the clock nodes in
 DTS are walked through to find proper clock source. This is
 very time-consuming, for example, it is up to 500+ us on T4240.
 Besides, switching time varies from clock to clock.
 To optimize this, each input clock of CPU is buffered, so that
 it can be picked up instantly when needed.
 
 Since for each CPU each input clock is stored in a pointer
 which takes 4 or 8 bytes memory and normally there are several
 input clocks per CPU, that will not take much memory as well.

Not sure how it got included in this form in the first place. :)

 Signed-off-by: Tang Yuantian yuantian.t...@freescale.com
 ---
  drivers/cpufreq/qoriq-cpufreq.c | 32 +---
  1 file changed, 21 insertions(+), 11 deletions(-)

Acked-by: Viresh Kumar viresh.ku...@linaro.org

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 02/42] powerpc/powernv: Enable M64 on P7IOC

2015-06-04 Thread Gavin Shan
The patch enables M64 window on P7IOC, which has been enabled on
PHB3. Different from PHB3 where 16 M64 BARs are supported and each
of them can be owned by one particular PE# exclusively or divided
evenly to 256 segments, each P7IOC PHB has 16 M64 BARs and each
of them are divided into 8 segments. So each P7IOC PHB can support
128 M64 segments only. Also, P7IOC has M64DT, which helps mapping
one particular M64 segment# to arbitrary PE#. PHB3 doesn't have
M64DT, indicating that one M64 segment can only be pinned to the
fixed PE#. In order to have similar logic to support M64 for PHB3
and P7IOC, we just provide 128 M64 (16 BARs) segments and fixed
mapping between PE# and M64 segment# on P7IOC. In turn, we just
need different phb-init_m64() hooks for P7IOC and PHB3 to support
M64.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Pinned OPAL API return value type to int64_t
  * Don't initialize M64 callbacks for unknown PHB type
  * Fixed comments as suggested by aik
  * Fixed coding style complained by checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 110 ++
 1 file changed, 98 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 573b07a..245ef81 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -174,6 +174,69 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
clear_bit(pe, phb-ioda.pe_alloc);
 }
 
+static int pnv_ioda1_init_m64(struct pnv_phb *phb)
+{
+   struct resource *r;
+   int seg;
+
+   /* There are as many M64 segments as the maximum number
+* of PEs, which is 128.
+*/
+   for (seg = 0; seg  phb-ioda.total_pe; seg += 8) {
+   unsigned long base;
+   int64_t rc;
+
+   base = phb-ioda.m64_base + seg * phb-ioda.m64_segsize;
+   rc = opal_pci_set_phb_mem_window(phb-opal_id,
+OPAL_M64_WINDOW_TYPE,
+seg / 8,
+base,
+0, /* unused */
+8 * phb-ioda.m64_segsize);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn(  Error %lld setting M64 PHB#%d-BAR#%d\n,
+   rc, phb-hose-global_number, seg / 8);
+   goto fail;
+   }
+
+   rc = opal_pci_phb_mmio_enable(phb-opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ seg / 8,
+ OPAL_ENABLE_M64_SPLIT);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn(  Error %lld enabling M64 PHB#%d-BAR#%d\n,
+   rc, phb-hose-global_number, seg / 8);
+   goto fail;
+   }
+   }
+
+   /* Strip off the segment used by the reserved PE, which
+* is expected to be 0 or last supported PE#. The PHB's
+* first memory window traces the 32-bits MMIO range
+* while the second one traces the 64-bits prefetchable
+* MMIO range that the PHB supports.
+*/
+   r = phb-hose-mem_resources[1];
+   if (phb-ioda.reserved_pe == 0)
+   r-start += phb-ioda.m64_segsize;
+   else if (phb-ioda.reserved_pe == (phb-ioda.total_pe - 1))
+   r-end -= phb-ioda.m64_segsize;
+   else
+   pr_warn(  Cannot strip M64 segment for reserved PE#%d\n,
+   phb-ioda.reserved_pe);
+
+   return 0;
+
+fail:
+   for ( ; seg = 0; seg -= 8)
+   opal_pci_phb_mmio_enable(phb-opal_id,
+OPAL_M64_WINDOW_TYPE,
+seg / 8,
+OPAL_DISABLE_M64);
+
+   return -EIO;
+}
+
 /* The default M64 BAR is shared by all PEs */
 static int pnv_ioda2_init_m64(struct pnv_phb *phb)
 {
@@ -231,7 +294,7 @@ fail:
return -EIO;
 }
 
-static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb)
+static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb)
 {
resource_size_t sgsz = phb-ioda.m64_segsize;
struct pci_dev *pdev;
@@ -257,8 +320,8 @@ static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb)
}
 }
 
-static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
-struct pci_bus *bus, int all)
+static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
+   struct pci_bus *bus, int all)
 {
resource_size_t segsz = phb-ioda.m64_segsize;
struct pci_dev *pdev;
@@ -355,6 +418,26 @@ done:
pe-master = master_pe;
list_add_tail(pe-list, master_pe-slaves);
  

[PATCH v5 26/42] powerpc/powernv: Use PCI slot reset infrastructure

2015-06-04 Thread Gavin Shan
The skiboot firmware might provide the capability of resetting PCI
slot by property ibm,reset-by-firmware on the PCI slot associated
device node. The patch checks on the property and route the reset
to firmware if the property exists. Otherwise, we fail back to the
old path as before.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 09/21]
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 44 +++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4fd8f15..4feb533 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -841,7 +841,7 @@ out:
return 0;
 }
 
-static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
 {
struct pci_dn *pdn = pci_get_pdn_by_devfn(dev-bus, dev-devfn);
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -892,6 +892,48 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int 
option)
return 0;
 }
 
+static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct device_node *dn = pdev ? pci_device_to_OF_node(pdev) : NULL;
+   uint64_t id = (0x1ul  60);
+   uint8_t scope;
+   int64_t rc;
+
+   /*
+* If the firmware can't handle it, we will issue hot reset
+* on the secondary bus despite the requested reset type.
+*/
+   if (!dn || !of_get_property(dn, ibm,reset-by-firmware, NULL))
+   return __pnv_eeh_bridge_reset(pdev, option);
+
+   /* The firmware can handle the request */
+   switch (option) {
+   case EEH_RESET_HOT:
+   scope = OPAL_RESET_PCI_HOT;
+   break;
+   case EEH_RESET_FUNDAMENTAL:
+   scope = OPAL_RESET_PCI_FUNDAMENTAL;
+   break;
+   case EEH_RESET_DEACTIVATE:
+   return 0;
+   default:
+   dev_warn(pdev-dev, %s: Unsupported reset %d\n,
+__func__, option);
+   return -EINVAL;
+   }
+
+   hose = pci_bus_to_host(pdev-bus);
+   phb = hose-private_data;
+   id |= (pdev-bus-number  24) | (pdev-devfn  16) | phb-opal_id;
+   rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
+   if (rc  0)
+   rc = pnv_eeh_poll(id);
+
+   return (rc == OPAL_SUCCESS) ? 0 : -EIO;
+}
+
 static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos,
 u16 mask, bool af_flr_rst)
 {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 39/42] drivers/of: Unflatten nodes equal or deeper than specified level

2015-06-04 Thread Gavin Shan
unflatten_dt_node() is called recursively to unflatten FDT nodes
with the assumption that FDT blob has only one root node, which
isn't true when the FDT blob represents device sub-tree. The
patch improves the function to supporting device sub-tree that
have multiple root nodes:

   * Rename original unflatten_dt_node() to __unflatten_dt_node().
   * Wrapper unflatten_dt_node() calls __unflatten_dt_node() with
 adjusted current node depth to 1 to avoid underflow.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 19/21]
  * Fixed line over 80 characters from checkpatch.pl
---
 drivers/of/fdt.c | 56 ++--
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index cde35c5d01..b87c157 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -28,6 +28,8 @@
 #include asm/setup.h  /* for COMMAND_LINE_SIZE */
 #include asm/page.h
 
+static int cur_node_depth;
+
 /*
  * of_fdt_limit_memory - limit the number of regions in the /memory node
  * @limit: maximum entries
@@ -161,27 +163,26 @@ static void *unflatten_dt_alloc(void **mem, unsigned long 
size,
 }
 
 /**
- * unflatten_dt_node - Alloc and populate a device_node from the flat tree
+ * __unflatten_dt_node - Alloc and populate a device_node from the flat tree
  * @blob: The parent device tree blob
  * @mem: Memory chunk to use for allocating device nodes and properties
  * @p: pointer to node in flat tree
  * @dad: Parent struct device_node
  * @fpsize: Size of the node path up at the current depth.
  */
-static void * unflatten_dt_node(void *blob,
-   void *mem,
-   int *poffset,
-   struct device_node *dad,
-   struct device_node **nodepp,
-   unsigned long fpsize,
-   bool dryrun)
+static void *__unflatten_dt_node(void *blob,
+void *mem,
+int *poffset,
+struct device_node *dad,
+struct device_node **nodepp,
+unsigned long fpsize,
+bool dryrun)
 {
const __be32 *p;
struct device_node *np;
struct property *pp, **prev_pp = NULL;
const char *pathp;
unsigned int l, allocl;
-   static int depth = 0;
int old_depth;
int offset;
int has_name = 0;
@@ -334,13 +335,19 @@ static void * unflatten_dt_node(void *blob,
np-type = NULL;
}
 
-   old_depth = depth;
-   *poffset = fdt_next_node(blob, *poffset, depth);
-   if (depth  0)
-   depth = 0;
-   while (*poffset  0  depth  old_depth)
-   mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
-   fpsize, dryrun);
+   old_depth = cur_node_depth;
+   *poffset = fdt_next_node(blob, *poffset, cur_node_depth);
+   while (*poffset  0) {
+   if (cur_node_depth  old_depth)
+   break;
+
+   if (cur_node_depth == old_depth)
+   mem = __unflatten_dt_node(blob, mem, poffset,
+ dad, NULL, fpsize, dryrun);
+   else if (cur_node_depth  old_depth)
+   mem = __unflatten_dt_node(blob, mem, poffset,
+ np, NULL, fpsize, dryrun);
+   }
 
if (*poffset  0  *poffset != -FDT_ERR_NOTFOUND)
pr_err(unflatten: error %d processing FDT\n, *poffset);
@@ -366,6 +373,18 @@ static void * unflatten_dt_node(void *blob,
return mem;
 }
 
+static void *unflatten_dt_node(void *blob,
+  void *mem,
+  int *poffset,
+  struct device_node *dad,
+  struct device_node **nodepp,
+  bool dryrun)
+{
+   cur_node_depth = 1;
+   return __unflatten_dt_node(blob, mem, poffset,
+  dad, nodepp, 0, dryrun);
+}
+
 /**
  * __unflatten_device_tree - create tree of device_nodes from flat blob
  *
@@ -405,7 +424,8 @@ static void __unflatten_device_tree(void *blob,
 
/* First pass, scan for size */
start = 0;
-   size = (unsigned long)unflatten_dt_node(blob, NULL, start, NULL, NULL, 
0, true);
+   size = (unsigned long)unflatten_dt_node(blob, NULL, start,
+   NULL, NULL, true);
size = ALIGN(size, 4);
 
pr_debug(  size is %lx, allocating...\n, size);
@@ -420,7 +440,7 @@ static void __unflatten_device_tree(void *blob,
 
/* Second pass, do actual unflattening */
start = 0;
-   unflatten_dt_node(blob, mem, start, 

[PATCH v5 09/42] powerpc/powernv: pnv_ioda_setup_dma() configure one PE only

2015-06-04 Thread Gavin Shan
The original implementation of pnv_ioda_setup_dma() iterates the
list of PEs and configures the DMA32 space for them one by one.
The function was designed to be called during PHB fixup time.
When configuring PE's DMA32 space in pcibios_setup_bridge(), in
order to support PCI hotplug, we have to have the function PE
oriented.

The patch introduces one more argument struct pnv_ioda_pe *pe
to pnv_ioda_setup_dma(). The caller, pnv_pci_ioda_setup_DMA(),
gets PE from the list and passes to it. The patch shouldn't
cause logic changes.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 06/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 60 ++-
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 4af3d06..63fad4d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2558,12 +2558,14 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
pnv_ioda_setup_bus_dma(pe, pe-pbus);
 }
 
-static void pnv_ioda_setup_dma(struct pnv_phb *phb)
+static void pnv_ioda_setup_dma(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
struct pci_controller *hose = phb-hose;
-   struct pnv_ioda_pe *pe;
unsigned int dma_weight;
 
+   if (!pe-dma32_weight)
+   return;
+
/* Calculate the PHB's DMA weight */
dma_weight = pnv_ioda_phb_dma_weight(phb);
pr_info(PCI%04x has %ld DMA32 segments, total weight %d\n,
@@ -2571,38 +2573,28 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 
pnv_pci_ioda_setup_opal_tce_kill(phb);
 
-   /* Walk our PE list and configure their DMA segments, hand them
-* out one base segment plus any residual segments based on
-* weight
+   /*
+* For IODA2 compliant PHB3, we needn't care about the weight.
+* The all available 32-bits DMA space will be assigned to
+* the specific PE.
 */
-   list_for_each_entry(pe, phb-ioda.pe_dma_list, dma_link) {
-   if (!pe-dma32_weight)
-   continue;
+   if (phb-type == PNV_PHB_IODA1) {
+   unsigned int segs, base = 0;
 
-   /*
-* For IODA2 compliant PHB3, we needn't care about the weight.
-* The all available 32-bits DMA space will be assigned to
-* the specific PE.
-*/
-   if (phb-type == PNV_PHB_IODA1) {
-   unsigned int segs, base = 0;
-
-   if (pe-dma32_weight 
-   dma_weight / phb-ioda.dma32_segcount)
-   segs = 1;
-   else
-   segs = (pe-dma32_weight *
-   phb-ioda.dma32_segcount) / dma_weight;
-
-   pe_info(pe, DMA weight %d, assigned %d DMA32 
segments\n,
-   pe-dma32_weight, segs);
-   pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+   if (pe-dma32_weight 
+   dma_weight / phb-ioda.dma32_segcount)
+   segs = 1;
+   else
+   segs = (pe-dma32_weight *
+   phb-ioda.dma32_segcount) / dma_weight;
 
-   base += segs;
-   } else {
-   pe_info(pe, Assign DMA32 space\n);
-   pnv_pci_ioda2_setup_dma_pe(phb, pe);
-   }
+   pe_info(pe, DMA weight %d, assigned %d DMA32 segments\n,
+   pe-dma32_weight, segs);
+   pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+   base += segs;
+   } else {
+   pe_info(pe, Assign DMA32 space\n);
+   pnv_pci_ioda2_setup_dma_pe(phb, pe);
}
 }
 
@@ -3073,12 +3065,14 @@ static void pnv_pci_ioda_setup_DMA(void)
 {
struct pci_controller *hose, *tmp;
struct pnv_phb *phb;
+   struct pnv_ioda_pe *pe;
 
list_for_each_entry_safe(hose, tmp, hose_list, list_node) {
-   pnv_ioda_setup_dma(hose-private_data);
+   phb = hose-private_data;
+   list_for_each_entry(pe, phb-ioda.pe_dma_list, dma_link)
+   pnv_ioda_setup_dma(phb, pe);
 
/* Mark the PHB initialization done */
-   phb = hose-private_data;
phb-initialized = 1;
}
 }
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 24/42] powerpc/powernv: Release PEs dynamically

2015-06-04 Thread Gavin Shan
The patch adds refcount to PE, which counts number of PCI devices
included in the PE. When last device leaves from the PE, the PE
together with its consumed resources (IO, DMA, PELTM/PELTV) are
released, in order to support PCI hotplug.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 07/21]
---
 arch/powerpc/include/asm/pci-bridge.h |   1 +
 arch/powerpc/kernel/pci-hotplug.c |   5 +
 arch/powerpc/platforms/powernv/pci-ioda.c | 181 +-
 arch/powerpc/platforms/powernv/pci.h  |   2 +
 4 files changed, 183 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 1f39ca7..9a83cdb 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -26,6 +26,7 @@ struct pci_controller_ops {
/* Called when pci_enable_device() is called. Returns true to
 * allow assignment/enabling of the device. */
bool(*enable_device_hook)(struct pci_dev *);
+   void(*release_device)(struct pci_dev *);
 
/* Called during PCI resource reassignment */
resource_size_t (*window_alignment)(struct pci_bus *, unsigned long);
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 98f84ed..21973e7 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -29,6 +29,11 @@
  */
 void pcibios_release_device(struct pci_dev *dev)
 {
+   struct pci_controller *hose = pci_bus_to_host(dev-bus);
+
+   if (hose-controller_ops.release_device)
+   hose-controller_ops.release_device(dev);
+
eeh_remove_device(dev);
 }
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2e31472..17ba55c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -132,6 +132,50 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
 }
 
+static void pnv_pci_ioda_release_pe_dma(struct pnv_ioda_pe *pe)
+{
+   struct pnv_phb *phb = pe-phb;
+   struct iommu_table *tbl;
+   int seg;
+   int64_t rc;
+
+   /* No DMA32 segments allocated */
+   if (pe-dma32_seg  0 ||
+   pe-dma32_segcount = 0)
+   return;
+
+   /* Unlink IOMMU table from group */
+   tbl = pe-table_group.tables[0];
+   pnv_pci_unlink_table_and_group(tbl, pe-table_group);
+   if (pe-table_group.group) {
+   iommu_group_put(pe-table_group.group);
+   BUG_ON(pe-table_group.group);
+   }
+
+   /* Release IOMMU table */
+   free_pages(tbl-it_base,
+   get_order(TCE32_TABLE_SIZE * pe-dma32_segcount));
+   iommu_free_table(tbl,
+   of_node_full_name(pci_bus_to_OF_node(pe-pbus)));
+
+   /* Disable TVE */
+   for (seg = pe-dma32_seg;
+seg  pe-dma32_seg + pe-dma32_segcount;
+seg++) {
+   rc = opal_pci_map_pe_dma_window(phb-opal_id, pe-pe_number,
+   seg, 0, 0ul, 0ul, 0ul);
+   if (rc)
+   pe_warn(pe, Error %ld unmapping DMA32 seg#%d\n,
+   rc, seg);
+   }
+
+   /* Free the DMA32 segments */
+   bitmap_clear(phb-ioda.dma32_segmap,
+   pe-dma32_seg, pe-dma32_segcount);
+   pe-dma32_seg = -1;
+   pe-dma32_segcount = 0;
+}
+
 static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
 {
/* 01xb - invalidate TCEs that match the specified PE# */
@@ -203,6 +247,10 @@ static void pnv_pci_ioda2_release_pe_dma(struct 
pnv_ioda_pe *pe)
struct device_node *dn;
int64_t rc;
 
+   if (pe-dma32_seg  0 ||
+   pe-dma32_segcount = 0)
+   return;
+
tbl = pe-table_group.tables[0];
rc = pnv_pci_ioda2_unset_window(pe-table_group, 0);
if (rc)
@@ -227,6 +275,61 @@ static void pnv_pci_ioda2_release_pe_dma(struct 
pnv_ioda_pe *pe)
 
pnv_pci_ioda2_table_free_pages(tbl);
iommu_free_table(tbl, of_node_full_name(dn));
+   pe-dma32_seg = -1;
+   pe-dma32_segcount = 0;
+}
+
+static void pnv_ioda_release_pe_dma(struct pnv_ioda_pe *pe)
+{
+   struct pnv_phb *phb = pe-phb;
+
+   if (phb-type == PNV_PHB_IODA1)
+   pnv_pci_ioda_release_pe_dma(pe);
+   else if (phb-type == PNV_PHB_IODA2)
+   pnv_pci_ioda2_release_pe_dma(pe);
+}
+
+static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
+{
+   struct pnv_phb *phb = pe-phb;
+   unsigned long *segmap = NULL;
+   unsigned long *pe_segmap = NULL;
+   uint16_t win;
+   int segno;
+
+   for (win = OPAL_M32_WINDOW_TYPE; win = OPAL_IO_WINDOW_TYPE; win++) {
+   switch (win) {
+   case OPAL_IO_WINDOW_TYPE:
+ 

[PATCH v5 36/42] powerpc/pci: Export traverse_pci_device_nodes()

2015-06-04 Thread Gavin Shan
The patch exports following functions, which are derived from their
original implementation, so that the PCI hotplug logic can reuse
the functions to add or remove pci_dn for all device nodes under
specified PCI slot.

   traverse_pci_device_nodes() traverse_pci_devices()
   add_pci_device_node_info()  update_dn_pci_info()
   remove_pci_device_node_info()   newly added

The patch also releases eeh_dev when its corresponding pci_dn
is released, indicating they have same life cycle.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 17/21]
  * Fixed assignment in if condition from checkpatch.pl
---
 arch/powerpc/include/asm/pci-bridge.h  |  4 +-
 arch/powerpc/include/asm/ppc-pci.h |  7 ++--
 arch/powerpc/kernel/pci_dn.c   | 71 --
 arch/powerpc/platforms/pseries/msi.c   |  4 +-
 arch/powerpc/platforms/pseries/setup.c |  2 +-
 5 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 9a83cdb..d0b4b1a 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -222,7 +222,9 @@ extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus 
*bus,
 extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
 extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
 extern void remove_dev_pci_data(struct pci_dev *pdev);
-extern void *update_dn_pci_info(struct device_node *dn, void *data);
+extern void *add_pci_device_node_info(struct device_node *dn,
+ struct pci_controller *phb);
+extern void remove_pci_device_node_info(struct device_node *dn);
 
 static inline int pci_device_from_OF_node(struct device_node *np,
  u8 *bus, u8 *devfn)
diff --git a/arch/powerpc/include/asm/ppc-pci.h 
b/arch/powerpc/include/asm/ppc-pci.h
index 7388316..a5b0ea0 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -33,9 +33,10 @@ extern struct pci_dev *isa_bridge_pcidev;/* may be NULL 
if no ISA bus */
 struct device_node;
 struct pci_dn;
 
-typedef void *(*traverse_func)(struct device_node *me, void *data);
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
-   void *data);
+void *traverse_pci_device_nodes(struct device_node *start,
+   void *(*fn)(struct device_node *,
+   struct pci_controller *),
+   void *data);
 void *traverse_pci_dn(struct pci_dn *root,
  void *(*fn)(struct pci_dn *, void *),
  void *data);
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index d4330d2..f821e96 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -276,13 +276,17 @@ void remove_dev_pci_data(struct pci_dev *pdev)
 #endif /* CONFIG_PCI_IOV */
 }
 
-/*
- * Traverse_func that inits the PCI fields of the device node.
- * NOTE: this *must* be done before read/write config to the device.
+/**
+ * add_pci_device_node_info - Add pci_dn for PCI device node
+ * @dn: PCI device node
+ * @phb: PHB
+ *
+ * Add pci_dn for the indicated PCI device node. The newly created
+ * pci_dn will be put into the child list of the parent device node.
  */
-void *update_dn_pci_info(struct device_node *dn, void *data)
+void *add_pci_device_node_info(struct device_node *dn,
+  struct pci_controller *phb)
 {
-   struct pci_controller *phb = data;
const __be32 *type = of_get_property(dn, ibm,pci-config-space-type, 
NULL);
const __be32 *regs;
struct device_node *parent;
@@ -339,8 +343,48 @@ void *update_dn_pci_info(struct device_node *dn, void 
*data)
 
return NULL;
 }
+EXPORT_SYMBOL(add_pci_device_node_info);
 
-/*
+/**
+ * remove_pci_device_node_info - Remove pci_dn from PCI device node
+ * @dn: PCI device node
+ *
+ * Remove pci_dn from PCI device node. The pci_dn is also removed
+ * from the child list of the parent pci_dn.
+ */
+void remove_pci_device_node_info(struct device_node *np)
+{
+   struct pci_dn *pdn = np ? PCI_DN(np) : NULL;
+#ifdef CONFIG_EEH
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+#endif
+
+   if (!pdn)
+   return;
+
+#ifdef CONFIG_EEH
+   if (edev) {
+   pdn-edev = NULL;
+   kfree(edev);
+   }
+#endif
+
+   BUG_ON(!list_empty(pdn-child_list));
+   list_del(pdn-list);
+   if (pdn-parent)
+   of_node_put(pdn-parent-node);
+
+   np-data = NULL;
+   kfree(pdn);
+}
+EXPORT_SYMBOL(remove_pci_device_node_info);
+
+/**
+ * traverse_pci_device_nodes - Traverse children of indicated device node
+ * @start: indicated device node
+ * @pre: callback
+ * @data: additional parameter to the callback
+ *
  * Traverse a device tree stopping each PCI device in the tree.
  * 

[PATCH v5 13/42] powerpc/pci: Override pcibios_setup_bridge()

2015-06-04 Thread Gavin Shan
The patch overrides pcibios_setup_bridge(), called to update PCI
bridge windows at completion of PCI resource assignment, to assign
PE and setup various (resource) mapping in next patch.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from [PATCH v5 v4 06/21]
---
 arch/powerpc/include/asm/pci-bridge.h | 1 +
 arch/powerpc/kernel/pci-common.c  | 8 
 2 files changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 1252cd5..1f39ca7 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -29,6 +29,7 @@ struct pci_controller_ops {
 
/* Called during PCI resource reassignment */
resource_size_t (*window_alignment)(struct pci_bus *, unsigned long);
+   void(*setup_bridge)(struct pci_bus *, unsigned long);
void(*reset_secondary_bus)(struct pci_dev *);
 };
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 0d05406..0358f24 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -122,6 +122,14 @@ resource_size_t pcibios_window_alignment(struct pci_bus 
*bus,
return 1;
 }
 
+void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+   struct pci_controller *hose = pci_bus_to_host(bus);
+
+   if (hose-controller_ops.setup_bridge)
+   hose-controller_ops.setup_bridge(bus, type);
+}
+
 void pcibios_reset_secondary_bus(struct pci_dev *dev)
 {
struct pci_controller *phb = pci_bus_to_host(dev-bus);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 17/42] powerpc/powernv: PE oriented during configuration

2015-06-04 Thread Gavin Shan
Except pnv_ioda_configure_pe(), all PE configuration related functions
are already PE oriented. The patch changes the return value from PE
number to PE instance for its callee for the purpose.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 07/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 44 ---
 arch/powerpc/platforms/powernv/pci.h  |  3 ++-
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index fd2f898..6187f84 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -132,25 +132,26 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
 }
 
-static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
+static struct pnv_ioda_pe *pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 {
if (!(pe_no = 0  pe_no  phb-ioda.total_pe)) {
pr_warn(%s: Invalid PE %d on PHB#%x\n,
__func__, pe_no, phb-hose-global_number);
-   return;
+   return NULL;
}
 
if (test_and_set_bit(pe_no, phb-ioda.pe_alloc)) {
pr_warn(%s: PE %d was assigned on PHB#%x\n,
__func__, pe_no, phb-hose-global_number);
-   return;
+   return NULL;
}
 
phb-ioda.pe_array[pe_no].phb = phb;
phb-ioda.pe_array[pe_no].pe_number = pe_no;
+   return phb-ioda.pe_array[pe_no];
 }
 
-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
unsigned long pe_no;
unsigned long limit = phb-ioda.total_pe - 1;
@@ -163,12 +164,12 @@ static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
break;
 
if (--limit = phb-ioda.total_pe)
-   return IODA_INVALID_PE;
+   return NULL;
} while (1);
 
phb-ioda.pe_array[pe_no].phb = phb;
phb-ioda.pe_array[pe_no].pe_number = pe_no;
-   return pe_no;
+   return phb-ioda.pe_array[pe_no];
 }
 
 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
@@ -389,8 +390,8 @@ static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb,
}
 }
 
-static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
-   struct pci_bus *bus, int all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
+   struct pci_bus *bus, int all)
 {
resource_size_t segsz = phb-ioda.m64_segsize;
struct pci_dev *pdev;
@@ -401,13 +402,13 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
int i;
 
if (!pnv_ioda_need_m64_pe(phb, bus))
-   return IODA_INVALID_PE;
+   return NULL;
 
/* Allocate bitmap */
size = _ALIGN_UP(phb-ioda.total_pe / 8, sizeof(unsigned long));
pe_bitmap = kzalloc(size, GFP_KERNEL);
if (!pe_bitmap)
-   return IODA_INVALID_PE;
+   return NULL;
 
/* The bridge's M64 window might be extended to PHB's M64
 * window by intention to support PCI hotplug. So we have
@@ -444,7 +445,7 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
/* No M64 window found ? */
if (bitmap_empty(pe_bitmap, phb-ioda.total_pe)) {
kfree(pe_bitmap);
-   return IODA_INVALID_PE;
+   return NULL;
}
 
/* Figure out the master PE and put all slave PEs
@@ -495,7 +496,7 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
}
 
kfree(pe_bitmap);
-   return master_pe-pe_number;
+   return master_pe;
 }
 
 static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
@@ -1224,7 +1225,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct 
pci_bus *bus, int all)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose-private_data;
-   struct pnv_ioda_pe *pe;
+   struct pnv_ioda_pe *pe = NULL;
int pe_num = IODA_INVALID_PE;
 
/* For partial hotplug case, the PE instance hasn't been destroyed
@@ -1240,24 +1241,25 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct 
pci_bus *bus, int all)
}
 
/* PE number for root bus should have been reserved */
-   if (pci_is_root_bus(bus))
-   pe_num = phb-ioda.root_pe;
+   if (pci_is_root_bus(bus) 
+   phb-ioda.root_pe != IODA_INVALID_PE)
+   pe = phb-ioda.pe_array[phb-ioda.root_pe];
 
/* Check if PE is determined by M64 */
-   if (pe_num == IODA_INVALID_PE  phb-pick_m64_pe)
-   pe_num = phb-pick_m64_pe(phb, bus, all);
+   if (!pe  phb-pick_m64_pe)
+   pe = phb-pick_m64_pe(phb, bus, all);
 
/* The PE 

[PATCH v5 19/42] powerpc/powernv: Remove DMA32 list of PEs

2015-06-04 Thread Gavin Shan
PEs were put into the list, maintained by PHB, according its DMA32
weight. After that, the list was iterated to initialize PE's DMA
capability. For now, the PE is created and its DMA capability is
initialized right way. So we don't need the list and the patch
removes that.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Newly introduced
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 18 --
 arch/powerpc/platforms/powernv/pci.h  |  6 --
 2 files changed, 24 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index f0b54ab..0447534 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -992,20 +992,6 @@ out:
return 0;
 }
 
-static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
-  struct pnv_ioda_pe *pe)
-{
-   struct pnv_ioda_pe *lpe;
-
-   list_for_each_entry(lpe, phb-ioda.pe_dma_list, dma_link) {
-   if (lpe-dma32_weight  pe-dma32_weight) {
-   list_add_tail(pe-dma_link, lpe-dma_link);
-   return;
-   }
-   }
-   list_add_tail(pe-dma_link, phb-ioda.pe_dma_list);
-}
-
 static unsigned int pnv_ioda_dev_dma_weight(struct pci_dev *dev)
 {
struct pci_controller *hose = pci_bus_to_host(dev-bus);
@@ -1296,9 +1282,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct 
pci_bus *bus, int all)
/* Put PE to the list */
list_add_tail(pe-list, phb-ioda.pe_list);
 
-   /* Link the PE */
-   pnv_ioda_link_pe_by_weight(phb, pe);
-
return pe;
 }
 
@@ -3421,7 +3404,6 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
phb-ioda.root_pe = IODA_INVALID_PE;
}
 
-   INIT_LIST_HEAD(phb-ioda.pe_dma_list);
INIT_LIST_HEAD(phb-ioda.pe_list);
mutex_init(phb-ioda.pe_list_mutex);
 
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 64c7f03..bf63481 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -79,7 +79,6 @@ struct pnv_ioda_pe {
struct list_headslaves;
 
/* Link in list of PE#s */
-   struct list_headdma_link;
struct list_headlist;
 };
 
@@ -186,11 +185,6 @@ struct pnv_phb {
/* Number of 32-bit DMA segments */
unsigned long   dma32_segcount;
 
-   /* Sorted list of used PE's, sorted at
-* boot for resource allocation purposes
-*/
-   struct list_headpe_dma_list;
-
/* TCE cache invalidate registers (physical and
 * remapped)
 */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V7 06/10] powerpc/eeh: Create PE for VFs

2015-06-04 Thread Gavin Shan
On Thu, Jun 04, 2015 at 01:46:15PM +0800, Wei Yang wrote:
On Wed, Jun 03, 2015 at 10:46:38AM -0500, Bjorn Helgaas wrote:
On Wed, Jun 03, 2015 at 03:10:23PM +1000, Gavin Shan wrote:
 On Wed, Jun 03, 2015 at 11:31:42AM +0800, Wei Yang wrote:
 On Mon, Jun 01, 2015 at 06:46:45PM -0500, Bjorn Helgaas wrote:
 On Tue, May 19, 2015 at 06:50:08PM +0800, Wei Yang wrote:
  Current EEH recovery code works with the assumption: the PE has primary
  bus. Unfortunately, that's not true to VF PEs, which generally contains
  one or multiple VFs (for VF group case). The patch creates PEs for VFs
  at PCI final fixup time. Those PEs for VFs are indentified with newly
  introduced flag EEH_PE_VF so that we handle them differently during
  EEH recovery.
  
  [gwshan: changelog and code refactoring]
  Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
  Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
  ---
   arch/powerpc/include/asm/eeh.h   |1 +
   arch/powerpc/kernel/eeh_pe.c |   10 --
   arch/powerpc/platforms/powernv/eeh-powernv.c |   17 +
   3 files changed, 26 insertions(+), 2 deletions(-)
  
  diff --git a/arch/powerpc/include/asm/eeh.h 
  b/arch/powerpc/include/asm/eeh.h
  index 1b3614d..c1fde48 100644
  --- a/arch/powerpc/include/asm/eeh.h
  +++ b/arch/powerpc/include/asm/eeh.h
  @@ -70,6 +70,7 @@ struct pci_dn;
   #define EEH_PE_PHB (1  1)/* PHB PE*/
   #define EEH_PE_DEVICE  (1  2)/* Device PE */
   #define EEH_PE_BUS (1  3)/* Bus PE*/
  +#define EEH_PE_VF  (1  4)/* VF PE */
   
   #define EEH_PE_ISOLATED(1  0)/* Isolated PE  
  */
   #define EEH_PE_RECOVERING  (1  1)/* Recovering PE
  */
  diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
  index 35f0b62..260a701 100644
  --- a/arch/powerpc/kernel/eeh_pe.c
  +++ b/arch/powerpc/kernel/eeh_pe.c
  @@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct 
  eeh_dev *edev)
   * EEH device already having associated PE, but
   * the direct parent EEH device doesn't have yet.
   */
  -   pdn = pdn ? pdn-parent : NULL;
  +   if (edev-physfn)
  +   pdn = pci_get_pdn(edev-physfn);
  +   else
  +   pdn = pdn ? pdn-parent : NULL;
  while (pdn) {
  /* We're poking out of PCI territory */
  parent = pdn_to_eeh_dev(pdn);
  @@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
  }
   
  /* Create a new EEH PE */
  -   pe = eeh_pe_alloc(edev-phb, EEH_PE_DEVICE);
  +   if (edev-physfn)
  +   pe = eeh_pe_alloc(edev-phb, EEH_PE_VF);
  +   else
  +   pe = eeh_pe_alloc(edev-phb, EEH_PE_DEVICE);
  if (!pe) {
  pr_err(%s: out of memory!\n, __func__);
  return -ENOMEM;
  diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
  b/arch/powerpc/platforms/powernv/eeh-powernv.c
  index ce738ab..c505036 100644
  --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
  +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
  @@ -1520,6 +1520,23 @@ static struct eeh_ops pnv_eeh_ops = {
  .restore_config = pnv_eeh_restore_config
   };
   
  +static void pnv_eeh_vf_final_fixup(struct pci_dev *pdev)
  +{
  +   struct pci_dn *pdn = pci_get_pdn(pdev);
  +
  +   if (!pdev-is_virtfn)
  +   return;
  +
  +   /*
  +* The following operations will fail if VF's sysfs files
  +* aren't created or its resources aren't finalized.
  +*/
 
 I don't understand this comment.  The following operations seems to 
 refer
 to eeh_add_device_early() and eeh_add_device_late(), and
 VF's sysfs files being created seems to refer to eeh_sysfs_add_device().
 
 So the comment suggests that eeh_add_device_early() and
 eeh_add_device_late() will fail because they're called before
 eeh_sysfs_add_device().  So I think you must be talking about some other
 following operations, not eeh_add_device_early() and
 eeh_add_device_late().
 
 Sorry for this confusion.
 
 The comment here wants to say the eeh_sysfs_add_device() will fail if the 
 VF's
 sysfs is not created well. Or it will fail if the VF's resources are not 
 set
 properly, since we would cache the VF's BAR in eeh_add_device_late().
 
 Gavin,
 
 If my understanding is not correct please let me know.
 
 
 It's correct. The following operations refers to eeh_add_device_late()
 and eeh_sysfs_add_device(). The former one requires the resources for
 one particular PCI device (VF here) are finalized (assigned). 
 eeh_sysfs_add_device()
 will fail if the sysfs entry for the PCI device isn't populated yet.

eeh_add_device_late() contains several things that read config space:
eeh_save_bars() caches the entire config header, and
eeh_addr_cache_insert_dev() looks at the device resources (which are
determined by BARs in config 

[PATCH v5 01/42] PCI: Add pcibios_setup_bridge()

2015-06-04 Thread Gavin Shan
Currently, PowerPC PowerNV platform utilizes ppc_md.pcibios_fixup(),
which is called for once after PCI probing and resource assignment
are completed, to allocate platform required resources for PCI devices:
PE#, IO and MMIO mapping, DMA address translation (TCE) table etc.
Obviously, it's not hotplug friendly.

The patch adds weak function pcibios_setup_bridge(), which is called
by pci_setup_bridge(). PowerPC PowerNV platform will reuse the function
to assign above platform required resources to newly added PCI devices,
in order to support PCI hotplug in subsequent patches.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Corrected subject as Bjorn suggested
  * pci_setup_bridge() calls pcibios_setup_bridge() and __pci_setup_bridge()
---
 drivers/pci/setup-bus.c | 5 +
 include/linux/pci.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 4fd0cac..623dee3 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -693,11 +693,16 @@ static void __pci_setup_bridge(struct pci_bus *bus, 
unsigned long type)
pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus-bridge_ctl);
 }
 
+void __weak pcibios_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+}
+
 void pci_setup_bridge(struct pci_bus *bus)
 {
unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
  IORESOURCE_PREFETCH;
 
+   pcibios_setup_bridge(bus, type);
__pci_setup_bridge(bus, type);
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 94bacfa..5aacd0a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -811,6 +811,7 @@ void pci_stop_and_remove_bus_device_locked(struct pci_dev 
*dev);
 void pci_stop_root_bus(struct pci_bus *bus);
 void pci_remove_root_bus(struct pci_bus *bus);
 void pci_setup_cardbus(struct pci_bus *bus);
+void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type);
 void pci_sort_breadthfirst(void);
 #define dev_is_pci(d) ((d)-bus == pci_bus_type)
 #define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)-is_physfn : false))
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 23/42] powerpc/powernv: Cleanup on pnv_pci_ioda2_release_dma_pe()

2015-06-04 Thread Gavin Shan
The patch applies cleanup on pnv_pci_ioda2_release_dma_pe():

  * Rename it to pnv_pci_ioda2_release_pe_dma() to match the
function names used to release resources for one PE in the
subsequent patches.
  * Remove the parameter of PCI device, which is used to figure
out device node. VFs don't have associated device nodes in
SRIOV case. For other cases, the device node can be figured
out from the PCI bus or device the PE was allocated for.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Newly introduced
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3d5aec8d..2e31472 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -197,11 +197,11 @@ static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe 
*pe, bool enable)
pe-tce_bypass_enabled = enable;
 }
 
-static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev,
-struct pnv_ioda_pe *pe)
+static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 {
-   struct iommu_table*tbl;
-   int64_t   rc;
+   struct iommu_table *tbl;
+   struct device_node *dn;
+   int64_t rc;
 
tbl = pe-table_group.tables[0];
rc = pnv_pci_ioda2_unset_window(pe-table_group, 0);
@@ -213,8 +213,20 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
*dev,
iommu_group_put(pe-table_group.group);
BUG_ON(pe-table_group.group);
}
+
+   if (pe-flags  (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+   dn = pci_bus_to_OF_node(pe-pbus);
+   else if (pe-flags  PNV_IODA_PE_DEV)
+   dn = pci_device_to_OF_node(pe-pdev);
+#ifdef CONFIG_PCI_IOV
+   else if (pe-flags  PNV_IODA_PE_VF)
+   dn = pci_device_to_OF_node(pe-parent_dev);
+#endif
+   else
+   dn = NULL;
+
pnv_pci_ioda2_table_free_pages(tbl);
-   iommu_free_table(tbl, of_node_full_name(dev-dev.of_node));
+   iommu_free_table(tbl, of_node_full_name(dn));
 }
 
 static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
@@ -1495,14 +1507,14 @@ static void pnv_ioda_release_vf_PE(struct pci_dev 
*pdev, u16 num_vfs)
if ((pe-flags  PNV_IODA_PE_MASTER) 
(pe-flags  PNV_IODA_PE_VF)) {
list_for_each_entry_safe(s, sn, pe-slaves, list) {
-   pnv_pci_ioda2_release_dma_pe(pdev, s);
+   pnv_pci_ioda2_release_pe_dma(s);
list_del(s-list);
pnv_ioda_deconfigure_pe(phb, s);
pnv_ioda_free_pe(phb, s-pe_number);
}
}
 
-   pnv_pci_ioda2_release_dma_pe(pdev, pe);
+   pnv_pci_ioda2_release_pe_dma(pe);
 
/* Remove from list */
mutex_lock(phb-ioda.pe_list_mutex);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 03/42] powerpc/powernv: M64 support improvement

2015-06-04 Thread Gavin Shan
We're having the hardware (on PHB3) or software enforced (on P7IOC)
limitation: M64 segment#x can only be assigned to PE#x. IO and M32
segment can be mapped to arbitrary PE# via IODT and M32DT. It means
the PE number should be x if M64 segment#x has been assigned to the
PE. Also, each PE owns one M64 segment at most. Currently, we are
reserving PE# according to root port's M64 window. It won't be reliable
once we extend M64 windows of root port, or the upstream port of the
PCIE switch behind root port to PHB's M64 window, in order to support
PCI hotplug in future.

The patch reserves PE# for M64 segments according to the M64 resources
of the PCI devices (not bridges) contained in the PE. Besides, it's
always worthy to trace the M64 segments consumed by the PE, which can
be released at PCI unplugging time.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Made the changelog more descriptive on the fixed M64 seg# mapping
  * Dropped unnecessary and corrected comments pointed by aik
  * Replace pe_bitsmap with pe_bitmap
  * Fixed coding style complained by checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 189 ++
 arch/powerpc/platforms/powernv/pci.h  |  10 +-
 2 files changed, 121 insertions(+), 78 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 245ef81..71afb38 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -294,28 +294,78 @@ fail:
return -EIO;
 }
 
-static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb)
+/* We extend the M64 window of root port, or the upstream bridge port
+ * of the PCIE switch behind root port. So we shouldn't reserve PEs
+ * for M64 resources because there are no (normal) PCI devices consuming
+ * M64 resources on the PCI buses leading from root port, or the upstream
+ * bridge port. The function returns true if the indicated PCI bus needs
+ * reserved PEs because of M64 resources in advance. Otherwise, the
+ * function returns false.
+ */
+static bool pnv_ioda_need_m64_pe(struct pnv_phb *phb,
+struct pci_bus *bus)
 {
-   resource_size_t sgsz = phb-ioda.m64_segsize;
+   if (!bus || pci_is_root_bus(bus))
+   return false;
+
+   /* Bus leading from root port. We need check what types of PCI
+* devices on the bus. If it's connecting PCI bridge, we don't
+* need reserve M64 PEs for it. Otherwise, we still need to do
+* that.
+*/
+   if (pci_is_root_bus(bus-self-bus)) {
+   struct pci_dev *pdev;
+
+   list_for_each_entry(pdev, bus-devices, bus_list) {
+   if (pdev-hdr_type == PCI_HEADER_TYPE_NORMAL)
+   return true;
+   }
+
+   return false;
+   }
+
+   /* Bus leading from the upstream bridge port on top level */
+   if (pci_is_root_bus(bus-self-bus-self-bus))
+   return false;
+
+   return true;
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb,
+   struct pci_bus *bus)
+{
+   resource_size_t segsz = phb-ioda.m64_segsize;
struct pci_dev *pdev;
struct resource *r;
-   int base, step, i;
+   unsigned long pe_no, limit;
+   int i;
 
-   /*
-* Root bus always has full M64 range and root port has
-* M64 range used in reality. So we're checking root port
-* instead of root bus.
+   if (!pnv_ioda_need_m64_pe(phb, bus))
+   return;
+
+   /* The bridge's M64 window might have been extended to the
+* PHB's M64 window in order to support PCI hotplug. So the
+* bridge's M64 window isn't reliable to be used for picking
+* PE# for its leading PCI bus. We have to check the M64
+* resources consumed by the PCI devices, which seat on the
+* PCI bus.
 */
-   list_for_each_entry(pdev, phb-hose-bus-devices, bus_list) {
-   for (i = 0; i  PCI_BRIDGE_RESOURCE_NUM; i++) {
-   r = pdev-resource[PCI_BRIDGE_RESOURCES + i];
-   if (!r-parent ||
-   !pnv_pci_is_mem_pref_64(r-flags))
+   list_for_each_entry(pdev, bus-devices, bus_list) {
+   for (i = 0; i  PCI_NUM_RESOURCES; i++) {
+#ifdef CONFIG_PCI_IOV
+   if (i = PCI_IOV_RESOURCES  i = PCI_IOV_RESOURCE_END)
+   continue;
+#endif
+   r = pdev-resource[i];
+   if (!r-flags || r-start = r-end ||
+   !r-parent || !pnv_pci_is_mem_pref_64(r-flags))
continue;
 
-   base = (r-start - phb-ioda.m64_base) / sgsz;
-   for (step = 0; step  resource_size(r) / sgsz; step++)
-   

[PATCH v5 04/42] powerpc/powernv: Trace consumed IO and M32 segments by PE

2015-06-04 Thread Gavin Shan
The patch introduces two bitmaps to trace the IO and M32 segments
consumed by one particular PE, which can be released once the PE
is destroyed during PCI unplugging time. Also, we're using fixed
quantity of bits to trace the used IO and M32 segments by PEs in
one particular PHB. Besides, @pe_array is put to the location
adjacent to @pe_alloc on account of their close relation.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 04/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 17 +
 arch/powerpc/platforms/powernv/pci.h  | 11 ++-
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 71afb38..53d0efd 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2992,7 +2992,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller 
*hose,
 
while (index  phb-ioda.total_pe 
   region.start = region.end) {
-   phb-ioda.io_segmap[index] = pe-pe_number;
+   set_bit(index, phb-ioda.io_segmap);
+   set_bit(index, pe-io_segmap);
rc = opal_pci_map_pe_mmio_window(phb-opal_id,
pe-pe_number, OPAL_IO_WINDOW_TYPE, 0, 
index);
if (rc != OPAL_SUCCESS) {
@@ -3017,7 +3018,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller 
*hose,
 
while (index  phb-ioda.total_pe 
   region.start = region.end) {
-   phb-ioda.m32_segmap[index] = pe-pe_number;
+   set_bit(index, phb-ioda.m32_segmap);
+   set_bit(index, pe-m32_segmap);
rc = opal_pci_map_pe_mmio_window(phb-opal_id,
pe-pe_number, OPAL_M32_WINDOW_TYPE, 0, 
index);
if (rc != OPAL_SUCCESS) {
@@ -3196,7 +3198,7 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
 {
struct pci_controller *hose;
struct pnv_phb *phb;
-   unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+   unsigned long size, pemap_off;
const __be64 *prop64;
const __be32 *prop32;
int len;
@@ -3281,19 +3283,10 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
 
/* Allocate aux data  arrays. We don't have IO ports on PHB3 */
size = _ALIGN_UP(phb-ioda.total_pe / 8, sizeof(unsigned long));
-   m32map_off = size;
-   size += phb-ioda.total_pe * sizeof(phb-ioda.m32_segmap[0]);
-   if (phb-type == PNV_PHB_IODA1) {
-   iomap_off = size;
-   size += phb-ioda.total_pe * sizeof(phb-ioda.io_segmap[0]);
-   }
pemap_off = size;
size += phb-ioda.total_pe * sizeof(struct pnv_ioda_pe);
aux = memblock_virt_alloc(size, 0);
phb-ioda.pe_alloc = aux;
-   phb-ioda.m32_segmap = aux + m32map_off;
-   if (phb-type == PNV_PHB_IODA1)
-   phb-ioda.io_segmap = aux + iomap_off;
phb-ioda.pe_array = aux + pemap_off;
set_bit(phb-ioda.reserved_pe, phb-ioda.pe_alloc);
 
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 54657f4..0a8cecb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -54,6 +54,8 @@ struct pnv_ioda_pe {
 * by slave PEs will be contributed to the master PE. One
 * PE can own multiple IO and M32 segments.
 */
+   unsigned long   io_segmap[8];
+   unsigned long   m32_segmap[8];
unsigned long   m64_segmap[8];
 
/* Weight assigned to the PE for the sake of DMA resource
@@ -154,16 +156,15 @@ struct pnv_phb {
unsigned intio_segsize;
unsigned intio_pci_base;
 
-   /* PE allocation bitmap */
+   /* PE allocation */
unsigned long   *pe_alloc;
-   /* PE allocation mutex */
+   struct pnv_ioda_pe  *pe_array;
struct mutexpe_alloc_mutex;
 
/* M32  IO segment maps */
+   unsigned long   io_segmap[8];
+   unsigned long   m32_segmap[8];
unsigned long   m64_segmap[8];
-   unsigned int*m32_segmap;
-   unsigned int*io_segmap;
-   struct pnv_ioda_pe  *pe_array;
 
/* IRQ chip */
int irq_chip_init;
-- 

[PATCH v5 18/42] powerpc/powernv: Helper function pnv_ioda_init_pe()

2015-06-04 Thread Gavin Shan
The patch introduces helper function pnv_ioda_init_pe(), which
initialize PE instance after reserving or allocating PE#, to
simplify the code. The patch doesn't introduce behavioural
changes.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 07/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6187f84..f0b54ab 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -132,6 +132,17 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
 }
 
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+   struct pnv_ioda_pe *pe = phb-ioda.pe_array[pe_no];
+
+   pe-phb = phb;
+   pe-pe_number = pe_no;
+   INIT_LIST_HEAD(pe-list);
+
+   return pe;
+}
+
 static struct pnv_ioda_pe *pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 {
if (!(pe_no = 0  pe_no  phb-ioda.total_pe)) {
@@ -146,9 +157,7 @@ static struct pnv_ioda_pe *pnv_ioda_reserve_pe(struct 
pnv_phb *phb, int pe_no)
return NULL;
}
 
-   phb-ioda.pe_array[pe_no].phb = phb;
-   phb-ioda.pe_array[pe_no].pe_number = pe_no;
-   return phb-ioda.pe_array[pe_no];
+   return pnv_ioda_init_pe(phb, pe_no);
 }
 
 static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
@@ -167,9 +176,7 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb 
*phb)
return NULL;
} while (1);
 
-   phb-ioda.pe_array[pe_no].phb = phb;
-   phb-ioda.pe_array[pe_no].pe_number = pe_no;
-   return phb-ioda.pe_array[pe_no];
+   return pnv_ioda_init_pe(phb, pe_no);
 }
 
 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 21/42] powerpc/powernv: Drop pnv_ioda_setup_dev_PE()

2015-06-04 Thread Gavin Shan
Nobody is using the this function. The patch drops it.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
Reviewed-by: Alexey Kardashevskiy a...@ozlabs.ru
---
v5:
  * Derived from PATCH[v4 08/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 71 ---
 1 file changed, 71 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index e9165fa..8a79403 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -,77 +,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev 
*dev, int offset)
 }
 #endif /* CONFIG_PCI_IOV */
 
-#if 0
-static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
-{
-   struct pci_controller *hose = pci_bus_to_host(dev-bus);
-   struct pnv_phb *phb = hose-private_data;
-   struct pci_dn *pdn = pci_get_pdn(dev);
-   struct pnv_ioda_pe *pe;
-   int pe_num;
-
-   if (!pdn) {
-   pr_err(%s: Device tree node not associated properly\n,
-  pci_name(dev));
-   return NULL;
-   }
-   if (pdn-pe_number != IODA_INVALID_PE)
-   return NULL;
-
-   /* PE#0 has been pre-set */
-   if (dev-bus-number == 0)
-   pe_num = 0;
-   else
-   pe_num = pnv_ioda_alloc_pe(phb);
-   if (pe_num == IODA_INVALID_PE) {
-   pr_warning(%s: Not enough PE# available, disabling device\n,
-  pci_name(dev));
-   return NULL;
-   }
-
-   /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
-* pointer in the PE data structure, both should be destroyed at the
-* same time. However, this needs to be looked at more closely again
-* once we actually start removing things (Hotplug, SR-IOV, ...)
-*
-* At some point we want to remove the PDN completely anyways
-*/
-   pe = phb-ioda.pe_array[pe_num];
-   pci_dev_get(dev);
-   pdn-pcidev = dev;
-   pdn-pe_number = pe_num;
-   pe-pdev = dev;
-   pe-pbus = NULL;
-   pe-tce32_seg = -1;
-   pe-mve_number = -1;
-   pe-rid = dev-bus-number  8 | pdn-devfn;
-
-   pe_info(pe, Associated device to PE\n);
-
-   if (pnv_ioda_configure_pe(phb, pe)) {
-   /* XXX What do we do here ? */
-   if (pe_num)
-   pnv_ioda_free_pe(phb, pe_num);
-   pdn-pe_number = IODA_INVALID_PE;
-   pe-pdev = NULL;
-   pci_dev_put(dev);
-   return NULL;
-   }
-
-   /* Assign a DMA weight to the device */
-   pe-dma_weight = pnv_ioda_dma_weight(dev);
-   if (pe-dma_weight != 0) {
-   phb-ioda.dma_weight += pe-dma_weight;
-   phb-ioda.dma_pe_count++;
-   }
-
-   /* Link the PE */
-   pnv_ioda_link_pe_by_weight(phb, pe);
-
-   return pe;
-}
-#endif /* Useful for SRIOV case */
-
 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 {
struct pci_dev *dev;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 41/42] drivers/of: Return allocated memory chunk from of_fdt_unflatten_tree()

2015-06-04 Thread Gavin Shan
The patch changes of_fdt_unflatten_tree() so that it returns the
allocated memory chunk for unflattened device-tree, which can be
released once it's obsoleted.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Newly introduced
---
 drivers/of/fdt.c   | 21 +++--
 include/linux/of_fdt.h |  6 +++---
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index b6a6c59..a954279 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -405,10 +405,10 @@ static void *unflatten_dt_node(void *blob,
  * @dt_alloc: An allocator that provides a virtual address to memory
  * for the resulting tree
  */
-static void __unflatten_device_tree(void *blob,
-   struct device_node *dad,
-   struct device_node **mynodes,
-   void * (*dt_alloc)(u64 size, u64 align))
+static void *__unflatten_device_tree(void *blob,
+struct device_node *dad,
+struct device_node **mynodes,
+void * (*dt_alloc)(u64 size, u64 align))
 {
unsigned long size;
int start;
@@ -418,7 +418,7 @@ static void __unflatten_device_tree(void *blob,
 
if (!blob) {
pr_debug(No device tree pointer\n);
-   return;
+   return NULL;
}
 
pr_debug(Unflattening device tree:\n);
@@ -428,7 +428,7 @@ static void __unflatten_device_tree(void *blob,
 
if (fdt_check_header(blob)) {
pr_err(Invalid device tree blob header\n);
-   return;
+   return NULL;
}
 
/* First pass, scan for size */
@@ -455,6 +455,7 @@ static void __unflatten_device_tree(void *blob,
   be32_to_cpup(mem + size));
 
pr_debug( - unflatten_device_tree()\n);
+   return mem;
 }
 
 static void *kernel_tree_alloc(u64 size, u64 align)
@@ -470,11 +471,11 @@ static void *kernel_tree_alloc(u64 size, u64 align)
  * pointers of the nodes so the normal device-tree walking functions
  * can be used.
  */
-void of_fdt_unflatten_tree(unsigned long *blob,
-  struct device_node *dad,
-  struct device_node **mynodes)
+void *of_fdt_unflatten_tree(unsigned long *blob,
+   struct device_node *dad,
+   struct device_node **mynodes)
 {
-   __unflatten_device_tree(blob, dad, mynodes, kernel_tree_alloc);
+   return __unflatten_device_tree(blob, dad, mynodes, kernel_tree_alloc);
 }
 EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
 
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 8882640..8a38c6a 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -37,9 +37,9 @@ extern bool of_fdt_is_big_endian(const void *blob,
 unsigned long node);
 extern int of_fdt_match(const void *blob, unsigned long node,
const char *const *compat);
-extern void of_fdt_unflatten_tree(unsigned long *blob,
- struct device_node *dad,
- struct device_node **mynodes);
+extern void *of_fdt_unflatten_tree(unsigned long *blob,
+  struct device_node *dad,
+  struct device_node **mynodes);
 
 /* TBD: Temporary export of fdt globals - remove when code fully merged */
 extern int __initdata dt_root_addr_cells;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 12/42] powerpc/pci: Cleanup on pci_controller_ops

2015-06-04 Thread Gavin Shan
Each PHB maintains one instance of struct pci_controller_ops,
which includes various callbacks called by PCI subsystem. In the
definition of this struct, some callbacks have explicit names for
its arguments, but the left don't have.

The patch removes all explicit names of the arguments to the
callbacks in struct pci_controller_ops to keep the code look
consistent.

Cc: Daniel Axtens d...@axtens.net
Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Newly introduced
---
 arch/powerpc/include/asm/pci-bridge.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 744884b..1252cd5 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -18,8 +18,8 @@ struct device_node;
  * PCI controller operations
  */
 struct pci_controller_ops {
-   void(*dma_dev_setup)(struct pci_dev *dev);
-   void(*dma_bus_setup)(struct pci_bus *bus);
+   void(*dma_dev_setup)(struct pci_dev *);
+   void(*dma_bus_setup)(struct pci_bus *);
 
int (*probe_mode)(struct pci_bus *);
 
@@ -28,8 +28,8 @@ struct pci_controller_ops {
bool(*enable_device_hook)(struct pci_dev *);
 
/* Called during PCI resource reassignment */
-   resource_size_t (*window_alignment)(struct pci_bus *, unsigned long 
type);
-   void(*reset_secondary_bus)(struct pci_dev *dev);
+   resource_size_t (*window_alignment)(struct pci_bus *, unsigned long);
+   void(*reset_secondary_bus)(struct pci_dev *);
 };
 
 /*
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 34/42] powerpc/pci: Delay creating pci_dn

2015-06-04 Thread Gavin Shan
The pci_dn instances are allocated from memblock or bootmem when
creating PCI controller (hoses) in setup_arch(). The PCI hotplug,
which will be supported by proceeding patches, will release PCI
device nodes and their corresponding pci_dn on unplugging event.
The pci_dn instance memory chunks alloed from memblock or bootmem
are hard to reused after being released.

The patch delay creating pci_dn so that they can be allocated from
slab. In turn, the memory chunks for them can be reused after being
released without problem. The creation of eeh_dev instances, which
depends on pci_dn, is delayed a bit as well.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 15/21]
  * Dropped unrelated changes moving pci_dev_pdn_setup() around
---
 arch/powerpc/include/asm/ppc-pci.h |  1 -
 arch/powerpc/kernel/eeh_dev.c  |  2 +-
 arch/powerpc/kernel/pci_dn.c   |  8 +--
 arch/powerpc/platforms/maple/pci.c | 35 ++
 arch/powerpc/platforms/pasemi/pci.c|  3 ---
 arch/powerpc/platforms/powermac/pci.c  | 39 +-
 arch/powerpc/platforms/powernv/pci.c   |  3 ---
 arch/powerpc/platforms/pseries/setup.c |  1 -
 8 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h 
b/arch/powerpc/include/asm/ppc-pci.h
index 4122a86..7388316 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -40,7 +40,6 @@ void *traverse_pci_dn(struct pci_dn *root,
  void *(*fn)(struct pci_dn *, void *),
  void *data);
 
-extern void pci_devs_phb_init(void);
 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
 
 /* From rtas_pci.h */
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index aabba94..f33ce5b 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -110,4 +110,4 @@ static int __init eeh_dev_phb_init(void)
return 0;
 }
 
-core_initcall(eeh_dev_phb_init);
+core_initcall_sync(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 0469247..35554c2 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -288,7 +288,7 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
struct device_node *parent;
struct pci_dn *pdn;
 
-   pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+   pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
if (pdn == NULL)
return NULL;
dn-data = pdn;
@@ -462,15 +462,19 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
  * pci device found underneath.  This routine runs once,
  * early in the boot sequence.
  */
-void __init pci_devs_phb_init(void)
+static int __init pci_devs_phb_init(void)
 {
struct pci_controller *phb, *tmp;
 
/* This must be done first so the device nodes have valid pci info! */
list_for_each_entry_safe(phb, tmp, hose_list, list_node)
pci_devs_phb_init_dynamic(phb);
+
+   return 0;
 }
 
+core_initcall(pci_devs_phb_init);
+
 static void pci_dev_pdn_setup(struct pci_dev *pdev)
 {
struct pci_dn *pdn;
diff --git a/arch/powerpc/platforms/maple/pci.c 
b/arch/powerpc/platforms/maple/pci.c
index a923230..04a69a8 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -568,6 +568,26 @@ void maple_pci_irq_fixup(struct pci_dev *dev)
DBG( - maple_pci_irq_fixup\n);
 }
 
+static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+   struct pci_controller *hose = pci_bus_to_host(bridge-bus);
+   struct device_node *np, *child;
+
+   if (hose != u3_agp)
+   return 0;
+
+   /* Fixup the PCI-OF mapping for U3 AGP due to bus renumbering. We
+* assume there is no P2P bridge on the AGP bus, which should be a
+* safe assumptions hopefully.
+*/
+   np = hose-dn;
+   PCI_DN(np)-busno = 0xf0;
+   for_each_child_of_node(np, child)
+   PCI_DN(child)-busno = 0xf0;
+
+   return 0;
+}
+
 void __init maple_pci_init(void)
 {
struct device_node *np, *root;
@@ -605,20 +625,7 @@ void __init maple_pci_init(void)
if (ht  maple_add_bridge(ht) != 0)
of_node_put(ht);
 
-   /* Setup the linkage between OF nodes and PHBs */ 
-   pci_devs_phb_init();
-
-   /* Fixup the PCI-OF mapping for U3 AGP due to bus renumbering. We
-* assume there is no P2P bridge on the AGP bus, which should be a
-* safe assumptions hopefully.
-*/
-   if (u3_agp) {
-   struct device_node *np = u3_agp-dn;
-   PCI_DN(np)-busno = 0xf0;
-   for (np = np-child; np; np = np-sibling)
-   PCI_DN(np)-busno = 0xf0;
-   }
-
+   ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare;
/* Tell pci.c to 

[PATCH v5 33/42] powerpc/powernv: Functions to get/reset PCI slot status

2015-06-04 Thread Gavin Shan
The patch exports 4 functions, which base on corresponding OPAL
APIs to get or set PCI slot status. Those functions are going to
be used by PCI hotplug module in subsequent patches:

   pnv_pci_get_overlay_dt()   opal_get_overlay_dt()
   pnv_pci_get_presence_status()  opal_pci_get_presence_status()
   pnv_pci_get_power_status() opal_pci_get_power_status()
   pnv_pci_set_power_status() opal_pci_set_power_status()

Besides, the patch also exports pnv_pci_hotplug_notifier_{register,
unregister}() to allow registration and unregistration of PCI hotplug
notifier, which will be used to receive PCI hotplug message from skiboot
firmware.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 14/21]
  * No polling required for pnv_pci_get_presence_status()
  * Separate functions for registration and unregistration of PCI
hotplug notifier
  * int64_t for value returned from OPAL API
---
 arch/powerpc/include/asm/opal-api.h|  8 +++-
 arch/powerpc/include/asm/opal.h|  4 ++
 arch/powerpc/include/asm/pnv-pci.h |  7 +++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  4 ++
 arch/powerpc/platforms/powernv/pci.c   | 66 ++
 5 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 0321a90..c534dd8 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -153,7 +153,11 @@
 #define OPAL_FLASH_READ110
 #define OPAL_FLASH_WRITE   111
 #define OPAL_FLASH_ERASE   112
-#define OPAL_LAST  112
+#define OPAL_GET_OVERLAY_DT116
+#define OPAL_PCI_GET_PRESENCE_STATUS   117
+#define OPAL_PCI_GET_POWER_STATUS  118
+#define OPAL_PCI_SET_POWER_STATUS  119
+#define OPAL_LAST  119
 
 /* Device tree flags */
 
@@ -352,6 +356,8 @@ enum opal_msg_type {
OPAL_MSG_SHUTDOWN,  /* params[0] = 1 reboot, 0 shutdown */
OPAL_MSG_HMI_EVT,
OPAL_MSG_DPO,
+   OPAL_MSG_PRD,
+   OPAL_MSG_PCI_HOTPLUG,
OPAL_MSG_TYPE_MAX,
 };
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 6d467df..2d1c825 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -200,6 +200,10 @@ int64_t opal_flash_write(uint64_t id, uint64_t offset, 
uint64_t buf,
uint64_t size, uint64_t token);
 int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
uint64_t token);
+int64_t opal_get_overlay_dt(uint64_t *counter, void *buf, uint64_t len);
+int64_t opal_pci_get_presence_status(uint64_t id, uint8_t *status);
+int64_t opal_pci_get_power_status(uint64_t id, uint8_t *status);
+int64_t opal_pci_set_power_status(uint64_t id, uint8_t status);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index f9b4982..9f63375 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -13,6 +13,13 @@
 #include linux/pci.h
 #include misc/cxl.h
 
+extern int pnv_pci_get_overlay_dt(uint64_t *counter, void *buf, uint64_t len);
+extern int pnv_pci_get_presence_status(uint64_t id, uint8_t *status);
+extern int pnv_pci_get_power_status(uint64_t id, uint8_t *status);
+extern int pnv_pci_set_power_status(uint64_t id, uint8_t status);
+extern int pnv_pci_hotplug_notifier_register(struct notifier_block *nb);
+extern int pnv_pci_hotplug_notifier_unregister(struct notifier_block *nb);
+
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a7ade94..1d87c30 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -295,3 +295,7 @@ OPAL_CALL(opal_i2c_request, 
OPAL_I2C_REQUEST);
 OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
 OPAL_CALL(opal_flash_write,OPAL_FLASH_WRITE);
 OPAL_CALL(opal_flash_erase,OPAL_FLASH_ERASE);
+OPAL_CALL(opal_get_overlay_dt, OPAL_GET_OVERLAY_DT);
+OPAL_CALL(opal_pci_get_presence_status,
OPAL_PCI_GET_PRESENCE_STATUS);
+OPAL_CALL(opal_pci_get_power_status,   OPAL_PCI_GET_POWER_STATUS);
+OPAL_CALL(opal_pci_set_power_status,   OPAL_PCI_SET_POWER_STATUS);
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index bf5df04..c332ea7 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ 

[PATCH v5 38/42] powerpc/powernv: Select OF_OVERLAY

2015-06-04 Thread Gavin Shan
The device tree nodes will be changed dynamically on PCI hotplug
events on PowerNV platform with the help of overlay mechanism.
The patch enables CONFIG_OF_OVERLAY on PowerNV platform to support
that.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 20/21]
  * Enables OF_OVERLAY instead of OF_DYNAMIC
---
 arch/powerpc/platforms/powernv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 4b044d8..97d481b 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -18,4 +18,5 @@ config PPC_POWERNV
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
+   select OF_OVERLAY
default y
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 06/42] powerpc/powernv: Improve IO and M32 mapping

2015-06-04 Thread Gavin Shan
The PHB's IO or M32 window is divided evenly to segments, each of
them can be mapped to arbitrary PE# by IODT or M32DT. Current code
figures out the consumed IO and M32 segments by one particular PE
from the windows of the PE's upstream bridge. It won't be reliable
once we extend M64 windows of root port, or the upstream port of
the PCIE switch behind root port to PHB's IO or M32 window, in order
to support PCI hotplug in future.

The patch improves the above situation by calculating PE's consumed
IO or M32 segments from its contained devices, no PCI bridge windows
involved if the PE doesn't contain all the subordinate PCI buses.
Otherwise, the PCI bridge windows still contribute to PE's consumed
IO or M32 segments.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 136 ++
 1 file changed, 80 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3bb4ce8..46a5e10 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2959,76 +2959,100 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
pci_dev *pdev)
 }
 #endif /* CONFIG_PCI_IOV */
 
-/*
- * This function is supposed to be called on basis of PE from top
- * to bottom style. So the the I/O or MMIO segment assigned to
- * parent PE could be overrided by its child PEs if necessary.
- */
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
- struct pnv_ioda_pe *pe)
+static int pnv_ioda_map_pe_one_res(struct pci_controller *hose,
+  struct pnv_ioda_pe *pe,
+  struct resource *res)
 {
struct pnv_phb *phb = hose-private_data;
struct pci_bus_region region;
-   struct resource *res;
-   int i, index;
+   int index;
unsigned int segsize;
unsigned long *segmap, *pe_segmap;
uint16_t win;
int64_t rc;
 
-   /*
-* NOTE: We only care PCI bus based PE for now. For PCI
-* device based PE, for example SRIOV sensitive VF should
-* be figured out later.
-*/
-   BUG_ON(!(pe-flags  (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+   /* Check if we need map the resource */
+   if (!res-parent ||
+   !res-flags ||
+   res-start  res-end ||
+   pnv_pci_is_mem_pref_64(res-flags))
+   return 0;
 
-   pci_bus_for_each_resource(pe-pbus, res, i) {
-   if (!res || !res-flags ||
-   res-start  res-end)
-   continue;
+   if (res-flags  IORESOURCE_IO) {
+   region.start = res-start - phb-ioda.io_pci_base;
+   region.end   = res-end - phb-ioda.io_pci_base;
+   segsize  = phb-ioda.io_segsize;
+   segmap   = phb-ioda.io_segmap;
+   pe_segmap= pe-io_segmap;
+   win  = OPAL_IO_WINDOW_TYPE;
+   } else if ((res-flags  IORESOURCE_MEM) 
+  !pnv_pci_is_mem_pref_64(res-flags)) {
+   region.start = res-start -
+  hose-mem_offset[0] -
+  phb-ioda.m32_pci_base;
+   region.end   = res-end -
+  hose-mem_offset[0] -
+  phb-ioda.m32_pci_base;
+   segsize  = phb-ioda.m32_segsize;
+   segmap   = phb-ioda.m32_segmap;
+   pe_segmap= pe-m32_segmap;
+   win  = OPAL_M32_WINDOW_TYPE;
+   } else {
+   return 0;
+   }
 
-   if (res-flags  IORESOURCE_IO) {
-   region.start = res-start - phb-ioda.io_pci_base;
-   region.end   = res-end - phb-ioda.io_pci_base;
-   segsize  = phb-ioda.io_segsize;
-   segmap   = phb-ioda.io_segmap;
-   pe_segmap= pe-io_segmap;
-   win  = OPAL_IO_WINDOW_TYPE;
-   } else if ((res-flags  IORESOURCE_MEM) 
-   !pnv_pci_is_mem_pref_64(res-flags)) {
-   region.start = res-start -
-  hose-mem_offset[0] -
-  phb-ioda.m32_pci_base;
-   region.end   = res-end -
-  hose-mem_offset[0] -
-  phb-ioda.m32_pci_base;
-   segsize  = phb-ioda.m32_segsize;
-   segmap   = phb-ioda.m32_segmap;
-   pe_segmap= pe-m32_segmap;
-   win  = OPAL_M32_WINDOW_TYPE;
-   } else {
-   continue;
+   index = region.start / phb-ioda.io_segsize;
+   while (index  phb-ioda.total_pe 
+

[PATCH v5 31/42] powerpc/pci: Move pcibios_find_pci_bus() around

2015-06-04 Thread Gavin Shan
The patch moves pcibios_find_pci_bus() to PPC kerenl directory so
that it can be reused by hotplug code for pSeries and PowerNV
platform at the same time.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
Acked-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
v5:
  * Derived from PATCH[v4 12/21]
---
 arch/powerpc/kernel/pci-hotplug.c  | 36 ++
 arch/powerpc/platforms/pseries/pci_dlpar.c | 32 --
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index ca392fc..1482bc1 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -21,6 +21,42 @@
 #include asm/firmware.h
 #include asm/eeh.h
 
+static struct pci_bus *find_pci_bus(struct pci_bus *bus,
+   struct device_node *dn)
+{
+   struct pci_bus *tmp, *child = NULL;
+   struct device_node *busdn;
+
+   busdn = pci_bus_to_OF_node(bus);
+   if (busdn == dn)
+   return bus;
+
+   list_for_each_entry(tmp, bus-children, node) {
+   child = find_pci_bus(tmp, dn);
+   if (child)
+   break;
+   }
+
+   return child;
+}
+
+/**
+ * pcibios_find_pci_bus - find PCI bus according to the given device node
+ * @dn: Device node
+ *
+ * Find the corresponding PCI bus according to the given device node.
+ */
+struct pci_bus *pcibios_find_pci_bus(struct device_node *dn)
+{
+   struct pci_dn *pdn = PCI_DN(dn);
+
+   if (!pdn  || !pdn-phb || !pdn-phb-bus)
+   return NULL;
+
+   return find_pci_bus(pdn-phb-bus, dn);
+}
+EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
+
 /**
  * pcibios_release_device - release PCI device
  * @dev: PCI device
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c 
b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 5d4a3df..906dbaa 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -34,38 +34,6 @@
 
 #include pseries.h
 
-static struct pci_bus *
-find_bus_among_children(struct pci_bus *bus,
-struct device_node *dn)
-{
-   struct pci_bus *child = NULL;
-   struct pci_bus *tmp;
-   struct device_node *busdn;
-
-   busdn = pci_bus_to_OF_node(bus);
-   if (busdn == dn)
-   return bus;
-
-   list_for_each_entry(tmp, bus-children, node) {
-   child = find_bus_among_children(tmp, dn);
-   if (child)
-   break;
-   };
-   return child;
-}
-
-struct pci_bus *
-pcibios_find_pci_bus(struct device_node *dn)
-{
-   struct pci_dn *pdn = dn-data;
-
-   if (!pdn  || !pdn-phb || !pdn-phb-bus)
-   return NULL;
-
-   return find_bus_among_children(pdn-phb-bus, dn);
-}
-EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
-
 struct pci_controller *init_phb_dynamic(struct device_node *dn)
 {
struct pci_controller *phb;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/

2015-06-04 Thread Scott Wood
On Thu, 2015-06-04 at 00:56 -0500, Zhao Qiang-B45475 wrote:
 On Thu, 2015-05-28 at 1:37AM +0800, Wood Scott wrote:
 
 
  -Original Message-
  From: Wood Scott-B07421
  Sent: Thursday, May 28, 2015 1:37 AM
  To: Zhao Qiang-B45475
  Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Xie Xiaobo-
  R63061
  Subject: Re: [PATCH 2/2] rheap: move rheap.c from 
  arch/powerpc/lib/ to
  lib/
  
  On Wed, 2015-05-27 at 17:12 +0800, Zhao Qiang wrote:
   qe need to use the rheap, so move it to public directory.
  
  You've been previously asked to use lib/genalloc.c rather than 
  introduce
  duplicate functionality into /lib.  NACK.
 
 Can't use lib/genalloc.c instead of rheap.c.
 Qe need to alloc muram of qe, not DIMM.

lib/genalloc.h is not for allocating main memory.  It is for 
allocating special regions.  It is serving the same purpose as rheap.

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 32/42] powerpc/powernv: Introduce pnv_pci_poll()

2015-06-04 Thread Gavin Shan
The patch obsoletes pnv_eeh_poll() with pnv_pci_poll():

   * The return value from last OPAL API is passed to the
 pnv_pci_poll() and handled there.
   * More information (e.g. PCI slot power status) is retrieved
 if the last argument is valid.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 13/21]
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 46 ++--
 arch/powerpc/platforms/powernv/pci.c | 21 +
 arch/powerpc/platforms/powernv/pci.h |  1 +
 3 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4eb53ed..7ee328b 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -743,28 +743,11 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int 
*delay)
return ret;
 }
 
-static s64 pnv_eeh_poll(uint64_t id)
-{
-   s64 rc = OPAL_HARDWARE;
-
-   while (1) {
-   rc = opal_pci_poll(id, NULL);
-   if (rc = 0)
-   break;
-
-   if (system_state  SYSTEM_RUNNING)
-   udelay(1000 * rc);
-   else
-   msleep(rc);
-   }
-
-   return rc;
-}
-
 int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
 {
struct pnv_phb *phb = hose-private_data;
s64 rc = OPAL_HARDWARE;
+   int ret;
 
pr_debug(%s: Reset PHB#%x, option=%d\n,
 __func__, hose-global_number, option);
@@ -779,8 +762,6 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int 
option)
rc = opal_pci_reset(phb-opal_id,
OPAL_RESET_PHB_COMPLETE,
OPAL_DEASSERT_RESET);
-   if (rc  0)
-   goto out;
 
/*
 * Poll state of the PHB until the request is done
@@ -788,24 +769,22 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int 
option)
 * reset followed by hot reset on root bus. So we also
 * need the PCI bus settlement delay.
 */
-   rc = pnv_eeh_poll(phb-opal_id);
-   if (option == EEH_RESET_DEACTIVATE) {
+   ret = pnv_pci_poll(phb-opal_id, rc, NULL);
+   if (option == EEH_RESET_DEACTIVATE  !ret) {
if (system_state  SYSTEM_RUNNING)
udelay(1000 * EEH_PE_RST_SETTLE_TIME);
else
msleep(EEH_PE_RST_SETTLE_TIME);
}
-out:
-   if (rc != OPAL_SUCCESS)
-   return -EIO;
 
-   return 0;
+   return ret;
 }
 
 static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
 {
struct pnv_phb *phb = hose-private_data;
s64 rc = OPAL_HARDWARE;
+   int ret;
 
pr_debug(%s: Reset PHB#%x, option=%d\n,
 __func__, hose-global_number, option);
@@ -827,18 +806,13 @@ static int pnv_eeh_root_reset(struct pci_controller 
*hose, int option)
rc = opal_pci_reset(phb-opal_id,
OPAL_RESET_PCI_HOT,
OPAL_DEASSERT_RESET);
-   if (rc  0)
-   goto out;
 
/* Poll state of the PHB until the request is done */
-   rc = pnv_eeh_poll(phb-opal_id);
-   if (option == EEH_RESET_DEACTIVATE)
+   ret = pnv_pci_poll(phb-opal_id, rc, NULL);
+   if (option == EEH_RESET_DEACTIVATE  !ret)
msleep(EEH_PE_RST_SETTLE_TIME);
-out:
-   if (rc != OPAL_SUCCESS)
-   return -EIO;
 
-   return 0;
+   return ret;
 }
 
 static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
@@ -928,10 +902,8 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int 
option)
phb = hose-private_data;
id |= (pdev-bus-number  24) | (pdev-devfn  16) | phb-opal_id;
rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
-   if (rc  0)
-   rc = pnv_eeh_poll(id);
 
-   return (rc == OPAL_SUCCESS) ? 0 : -EIO;
+   return pnv_pci_poll(id, rc, NULL);
 }
 
 static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos,
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 678eb24..bf5df04 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -44,6 +44,27 @@
 #define cfg_dbg(fmt...)do { } while(0)
 //#define cfg_dbg(fmt...)  printk(fmt)
 
+int pnv_pci_poll(uint64_t id, int64_t rval, uint8_t *pval)
+{
+   while (rval  0) {
+   if (system_state  SYSTEM_RUNNING)
+   udelay(1000 * rval);
+   else
+   msleep(rval);
+
+   rval = opal_pci_poll(id, pval);
+   }
+
+   /*
+* The caller expects to retrieve additional information
+* if the last argument is valid.
+*/
+   if (rval == 

[PATCH v5 14/42] powerpc/powernv: Allocate PE# in deasending order

2015-06-04 Thread Gavin Shan
The available PE#, represented by a bitmap in the PHB, is allocated
in asending order. It conflicts with the fact that M64 segments are
assigned in same order. In order to avoid the conflict, the patch
allocates PE# in deasending order.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from [PATCH v5 v4 06/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index d8b0ef5..0d6539a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -152,18 +152,23 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int 
pe_no)
 
 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
-   unsigned long pe;
+   unsigned long pe_no;
+   unsigned long limit = phb-ioda.total_pe - 1;
 
do {
-   pe = find_next_zero_bit(phb-ioda.pe_alloc,
-   phb-ioda.total_pe, 0);
-   if (pe = phb-ioda.total_pe)
+   pe_no = find_next_zero_bit(phb-ioda.pe_alloc,
+  phb-ioda.total_pe, limit);
+   if (pe_no  phb-ioda.total_pe 
+   !test_and_set_bit(pe_no, phb-ioda.pe_alloc))
+   break;
+
+   if (--limit = phb-ioda.total_pe)
return IODA_INVALID_PE;
-   } while(test_and_set_bit(pe, phb-ioda.pe_alloc));
+   } while (1);
 
-   phb-ioda.pe_array[pe].phb = phb;
-   phb-ioda.pe_array[pe].pe_number = pe;
-   return pe;
+   phb-ioda.pe_array[pe_no].phb = phb;
+   phb-ioda.pe_array[pe_no].pe_number = pe_no;
+   return pe_no;
 }
 
 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 35/42] powerpc/pci: Create eeh_dev while creating pci_dn

2015-06-04 Thread Gavin Shan
The eeh_dev is always created based on pci_dn, but with initcall
core_initcall_sync(). The patch creates eeh_dev when pci_dn is
created, indicating they have same life cycle.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 16/21]
---
 arch/powerpc/include/asm/eeh.h |  6 --
 arch/powerpc/kernel/eeh_dev.c  | 18 --
 arch/powerpc/kernel/pci_dn.c   | 12 
 arch/powerpc/platforms/pseries/setup.c |  6 +-
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index ea1f13c4..c0236a6 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -272,7 +272,8 @@ void eeh_pe_restore_bars(struct eeh_pe *pe);
 const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
-void *eeh_dev_init(struct pci_dn *pdn, void *data);
+struct eeh_dev *eeh_dev_init(struct pci_dn *pdn,
+struct pci_controller *phb);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int eeh_init(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
@@ -325,7 +326,8 @@ static inline int eeh_init(void)
return 0;
 }
 
-static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
+static inline struct eeh_dev *eeh_dev_init(struct pci_dn *pdn,
+  struct pci_controller *phb)
 {
return NULL;
 }
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index f33ce5b..7486932 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -44,14 +44,14 @@
 /**
  * eeh_dev_init - Create EEH device according to OF node
  * @pdn: PCI device node
- * @data: PHB
+ * @phb: PCI controller
  *
  * It will create EEH device according to the given OF node. The function
  * might be called by PCI emunation, DR, PHB hotplug.
  */
-void *eeh_dev_init(struct pci_dn *pdn, void *data)
+struct eeh_dev *eeh_dev_init(struct pci_dn *pdn,
+struct pci_controller *phb)
 {
-   struct pci_controller *phb = data;
struct eeh_dev *edev;
 
/* Allocate EEH device */
@@ -68,7 +68,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data)
edev-phb = phb;
INIT_LIST_HEAD(edev-list);
 
-   return NULL;
+   return edev;
 }
 
 /**
@@ -80,16 +80,8 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data)
  */
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 {
-   struct pci_dn *root = phb-pci_data;
-
/* EEH PE for PHB */
eeh_phb_pe_create(phb);
-
-   /* EEH device for PHB */
-   eeh_dev_init(root, phb);
-
-   /* EEH devices for children OF nodes */
-   traverse_pci_dn(root, eeh_dev_init, phb);
 }
 
 /**
@@ -105,8 +97,6 @@ static int __init eeh_dev_phb_init(void)
list_for_each_entry_safe(phb, tmp, hose_list, list_node)
eeh_dev_phb_init_dynamic(phb);
 
-   pr_info(EEH: devices created\n);
-
return 0;
 }
 
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 35554c2..d4330d2 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -287,6 +287,9 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
const __be32 *regs;
struct device_node *parent;
struct pci_dn *pdn;
+#ifdef CONFIG_EEH
+   struct eeh_dev *edev;
+#endif
 
pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
if (pdn == NULL)
@@ -317,6 +320,15 @@ void *update_dn_pci_info(struct device_node *dn, void 
*data)
/* Extended config space */
pdn-pci_ext_config_space = (type  of_read_number(type, 1) == 1);
 
+   /* Initialize EEH device */
+#ifdef CONFIG_EEH
+   edev = eeh_dev_init(pdn, phb);
+   if (!edev) {
+   kfree(pdn);
+   return NULL;
+   }
+#endif
+
/* Attach to parent node */
INIT_LIST_HEAD(pdn-child_list);
INIT_LIST_HEAD(pdn-list);
diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index 5f80758..92974aa 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -261,12 +261,8 @@ static int pci_dn_reconfig_notifier(struct notifier_block 
*nb, unsigned long act
switch (action) {
case OF_RECONFIG_ATTACH_NODE:
pci = np-parent-data;
-   if (pci) {
+   if (pci)
update_dn_pci_info(np, pci-phb);
-
-   /* Create EEH device for the OF node */
-   eeh_dev_init(PCI_DN(np), pci-phb);
-   }
break;
default:
err = NOTIFY_DONE;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 30/42] powerpc/pci: Don't scan empty slot

2015-06-04 Thread Gavin Shan
In hotplug case, function pcibios_add_pci_devices() is called to
rescan the specified PCI bus, which might not have any child devices.
Access to the PCI bus's child device node will cause kernel crash
without exception. The patch adds condition of skipping scanning
PCI bus without child devices, in order to avoid kernel crash.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 11/21]
---
 arch/powerpc/kernel/pci-hotplug.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 21973e7..ca392fc 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -92,7 +92,8 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
if (mode == PCI_PROBE_DEVTREE) {
/* use ofdt-based probe */
of_rescan_bus(dn, bus);
-   } else if (mode == PCI_PROBE_NORMAL) {
+   } else if (mode == PCI_PROBE_NORMAL 
+  dn-child  PCI_DN(dn-child)) {
/*
 * Use legacy probe. In the partial hotplug case, we
 * probably have grandchildren devices unplugged. So
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 25/42] powerpc/powernv: Supports slot ID

2015-06-04 Thread Gavin Shan
PowerNV platform is running on top of skiboot firmware, which has
changes supporting PCI slots. PCI slots are identified by PHB's
OPAL ID (PHB slot) or combo of that and PCI slot ID. The patch
changes argument names of opal_pci_reset() and opal_pci_poll()
to reflect the firmware's change. pnv_eeh_phb_poll() is also
renamed to pnv_eeh_poll() to reflect the firmware's change.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 09/21]
---
 arch/powerpc/include/asm/opal.h  | 4 ++--
 arch/powerpc/platforms/powernv/eeh-powernv.c | 8 
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 042af1a..6d467df 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -129,7 +129,7 @@ int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, 
uint16_t pe_number, uint16_t
 int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number,
uint16_t dma_window_number, uint64_t 
pci_start_addr,
uint64_t pci_mem_size);
-int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t 
assert_state);
+int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state);
 
 int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer,
   uint64_t diag_buffer_len);
@@ -145,7 +145,7 @@ int64_t opal_get_epow_status(__be64 *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
 int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
__be16 *pci_error_type, __be16 *severity);
-int64_t opal_pci_poll(uint64_t phb_id);
+int64_t opal_pci_poll(uint64_t id, uint8_t *val);
 int64_t opal_return_cpu(void);
 int64_t opal_check_token(uint64_t token);
 int64_t opal_reinit_cpus(uint64_t flags);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index dfdb31f..4fd8f15 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -743,12 +743,12 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int 
*delay)
return ret;
 }
 
-static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
+static s64 pnv_eeh_poll(uint64_t id)
 {
s64 rc = OPAL_HARDWARE;
 
while (1) {
-   rc = opal_pci_poll(phb-opal_id);
+   rc = opal_pci_poll(id, NULL);
if (rc = 0)
break;
 
@@ -788,7 +788,7 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int 
option)
 * reset followed by hot reset on root bus. So we also
 * need the PCI bus settlement delay.
 */
-   rc = pnv_eeh_phb_poll(phb);
+   rc = pnv_eeh_poll(phb-opal_id);
if (option == EEH_RESET_DEACTIVATE) {
if (system_state  SYSTEM_RUNNING)
udelay(1000 * EEH_PE_RST_SETTLE_TIME);
@@ -831,7 +831,7 @@ static int pnv_eeh_root_reset(struct pci_controller *hose, 
int option)
goto out;
 
/* Poll state of the PHB until the request is done */
-   rc = pnv_eeh_phb_poll(phb);
+   rc = pnv_eeh_poll(phb-opal_id);
if (option == EEH_RESET_DEACTIVATE)
msleep(EEH_PE_RST_SETTLE_TIME);
 out:
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 20/42] powerpc/powernv: Rename pnv_ioda_get_pe() to pnv_ioda_dev_to_pe()

2015-06-04 Thread Gavin Shan
pnv_ioda_get_pe() indicates it's increasing refcount to the given
PE instance from the name. However, it gets the instance of the
PE, which contains the indicated PCI device. The patch renames it
to pnv_ioda_dev_to_pe() to reflect its purpose.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 07/21]
  * Fixed do not use assignment in if condition from checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 0447534..e9165fa 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -702,7 +702,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int 
pe_no)
  * but in the meantime, we need to protect them to avoid warnings
  */
 #ifdef CONFIG_PCI_MSI
-static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
+static struct pnv_ioda_pe *pnv_ioda_dev_to_pe(struct pci_dev *dev)
 {
struct pci_controller *hose = pci_bus_to_host(dev-bus);
struct pnv_phb *phb = hose-private_data;
@@ -2671,7 +2671,7 @@ int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t 
mode)
struct pnv_ioda_pe *pe;
int rc;
 
-   pe = pnv_ioda_get_pe(dev);
+   pe = pnv_ioda_dev_to_pe(dev);
if (!pe)
return -ENODEV;
 
@@ -2787,7 +2787,8 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned 
int hwirq,
struct pnv_ioda_pe *pe;
int rc;
 
-   if (!(pe = pnv_ioda_get_pe(dev)))
+   pe = pnv_ioda_dev_to_pe(dev);
+   if (!pe)
return -ENODEV;
 
/* Assign XIVE to PE */
@@ -2809,7 +2810,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, 
struct pci_dev *dev,
  unsigned int hwirq, unsigned int virq,
  unsigned int is_64, struct msi_msg *msg)
 {
-   struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
+   struct pnv_ioda_pe *pe = pnv_ioda_dev_to_pe(dev);
unsigned int xive_num = hwirq - phb-msi_base;
__be32 data;
int rc;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 05/42] powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()

2015-06-04 Thread Gavin Shan
The original implementation of pnv_ioda_setup_pe_seg() configures
IO and M32 segments by separate logics, which can be merged by
by caching @seg_bitmap, @seg_size, @win in advance. The patch
shouldn't cause any behavioural changes.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 04/21]
  * Fixed coding style complained by checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 67 +++
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 53d0efd..3bb4ce8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2971,7 +2971,10 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller 
*hose,
struct pci_bus_region region;
struct resource *res;
int i, index;
-   int rc;
+   unsigned int segsize;
+   unsigned long *segmap, *pe_segmap;
+   uint16_t win;
+   int64_t rc;
 
/*
 * NOTE: We only care PCI bus based PE for now. For PCI
@@ -2988,50 +2991,44 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller 
*hose,
if (res-flags  IORESOURCE_IO) {
region.start = res-start - phb-ioda.io_pci_base;
region.end   = res-end - phb-ioda.io_pci_base;
-   index = region.start / phb-ioda.io_segsize;
-
-   while (index  phb-ioda.total_pe 
-  region.start = region.end) {
-   set_bit(index, phb-ioda.io_segmap);
-   set_bit(index, pe-io_segmap);
-   rc = opal_pci_map_pe_mmio_window(phb-opal_id,
-   pe-pe_number, OPAL_IO_WINDOW_TYPE, 0, 
index);
-   if (rc != OPAL_SUCCESS) {
-   pr_err(%s: OPAL error %d when mapping 
IO 
-  segment #%d to PE#%d\n,
-  __func__, rc, index, 
pe-pe_number);
-   break;
-   }
-
-   region.start += phb-ioda.io_segsize;
-   index++;
-   }
+   segsize  = phb-ioda.io_segsize;
+   segmap   = phb-ioda.io_segmap;
+   pe_segmap= pe-io_segmap;
+   win  = OPAL_IO_WINDOW_TYPE;
} else if ((res-flags  IORESOURCE_MEM) 
-  !pnv_pci_is_mem_pref_64(res-flags)) {
+   !pnv_pci_is_mem_pref_64(res-flags)) {
region.start = res-start -
   hose-mem_offset[0] -
   phb-ioda.m32_pci_base;
region.end   = res-end -
   hose-mem_offset[0] -
   phb-ioda.m32_pci_base;
-   index = region.start / phb-ioda.m32_segsize;
+   segsize  = phb-ioda.m32_segsize;
+   segmap   = phb-ioda.m32_segmap;
+   pe_segmap= pe-m32_segmap;
+   win  = OPAL_M32_WINDOW_TYPE;
+   } else {
+   continue;
+   }
 
-   while (index  phb-ioda.total_pe 
-  region.start = region.end) {
-   set_bit(index, phb-ioda.m32_segmap);
-   set_bit(index, pe-m32_segmap);
-   rc = opal_pci_map_pe_mmio_window(phb-opal_id,
-   pe-pe_number, OPAL_M32_WINDOW_TYPE, 0, 
index);
-   if (rc != OPAL_SUCCESS) {
-   pr_err(%s: OPAL error %d when mapping 
M32 
-  segment#%d to PE#%d,
-  __func__, rc, index, 
pe-pe_number);
-   break;
-   }
+   index = region.start / phb-ioda.io_segsize;
+   while (index  phb-ioda.total_pe 
+  region.start = region.end) {
+   set_bit(index, segmap);
+   set_bit(index, pe_segmap);
 
-   region.start += phb-ioda.m32_segsize;
-   index++;
+   rc = opal_pci_map_pe_mmio_window(phb-opal_id,
+   pe-pe_number, win, 0, index);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn(%s: Error %lld mapping (%d) seg#%d to 
PHB#%d-PE#%d\n,
+

[PATCH v5 08/42] powerpc/powernv: DMA32 cleanup

2015-06-04 Thread Gavin Shan
The patch cleans up DMA32 in pci-ioda.c. It shouldn't introduce
behavioural changes:

   * Rename various fields in struct pnv_phb and struct pnv_ioda_pe
 as 32-bits DMA should be related to DMA, not TCE, and move
 them around to reflect their relationship and their relative
 importance.
   * Removed struct pnv_ioda_pe::tce32_segcount.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 5/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 48 +++
 arch/powerpc/platforms/powernv/pci.h  | 13 +++--
 2 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index d9ff739..4af3d06 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -971,7 +971,7 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
struct pnv_ioda_pe *lpe;
 
list_for_each_entry(lpe, phb-ioda.pe_dma_list, dma_link) {
-   if (lpe-dma_weight  pe-dma_weight) {
+   if (lpe-dma32_weight  pe-dma32_weight) {
list_add_tail(pe-dma_link, lpe-dma_link);
return;
}
@@ -996,14 +996,14 @@ static unsigned int pnv_ioda_dev_dma_weight(struct 
pci_dev *dev)
if (dev-class == PCI_CLASS_SERIAL_USB_UHCI ||
dev-class == PCI_CLASS_SERIAL_USB_OHCI ||
dev-class == PCI_CLASS_SERIAL_USB_EHCI)
-   return 3 * phb-ioda.tce32_count;
+   return 3 * phb-ioda.dma32_segcount;
 
/* Increase the weight of RAID (includes Obsidian) */
if ((dev-class  8) == PCI_CLASS_STORAGE_RAID)
-   return 15 * phb-ioda.tce32_count;
+   return 15 * phb-ioda.dma32_segcount;
 
/* Default */
-   return 10 * phb-ioda.tce32_count;
+   return 10 * phb-ioda.dma32_segcount;
 }
 
 static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data)
@@ -1182,7 +1182,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, 
struct pnv_ioda_pe *pe)
continue;
}
pdn-pe_number = pe-pe_number;
-   pe-dma_weight += pnv_ioda_dev_dma_weight(dev);
+   pe-dma32_weight += pnv_ioda_dev_dma_weight(dev);
if ((pe-flags  PNV_IODA_PE_BUS_ALL)  dev-subordinate)
pnv_ioda_setup_same_PE(dev-subordinate, pe);
}
@@ -1219,10 +1219,10 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
int all)
pe-flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
pe-pbus = bus;
pe-pdev = NULL;
-   pe-tce32_seg = -1;
+   pe-dma32_seg = -1;
pe-mve_number = -1;
pe-rid = bus-busn_res.start  8;
-   pe-dma_weight = 0;
+   pe-dma32_weight = 0;
 
if (all)
pe_info(pe, Secondary bus %d..%d associated with PE#%d\n,
@@ -1585,7 +1585,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)
pe-flags = PNV_IODA_PE_VF;
pe-pbus = NULL;
pe-parent_dev = pdev;
-   pe-tce32_seg = -1;
+   pe-dma32_seg = -1;
pe-mve_number = -1;
pe-rid = (pci_iov_virtfn_bus(pdev, vf_index)  8) |
   pci_iov_virtfn_devfn(pdev, vf_index);
@@ -2061,7 +2061,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
/* XXX FIXME: Allocate multi-level tables on PHB3 */
 
/* We shouldn't already have a 32-bit DMA associated */
-   if (WARN_ON(pe-tce32_seg = 0))
+   if (WARN_ON(pe-dma32_seg = 0))
return;
 
tbl = pnv_pci_table_alloc(phb-hose-node);
@@ -2070,7 +2070,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
pnv_pci_link_table_and_group(phb-hose-node, 0, tbl, pe-table_group);
 
/* Grab a 32-bit TCE table */
-   pe-tce32_seg = base;
+   pe-dma32_seg = base;
pe_info(pe,  Setting up 32-bit TCE table at %08x..%08x\n,
(base  28), ((base + segs)  28) - 1);
 
@@ -2131,8 +2131,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
return;
  fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
-   if (pe-tce32_seg = 0)
-   pe-tce32_seg = -1;
+   if (pe-dma32_seg = 0)
+   pe-dma32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
if (tbl) {
@@ -2520,7 +2520,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
int64_t rc;
 
/* We shouldn't already have a 32-bit DMA associated */
-   if (WARN_ON(pe-tce32_seg = 0))
+   if (WARN_ON(pe-dma32_seg = 0))
return;
 
/* TVE #1 is selected by PCI address bit 59 */
@@ -2530,7 +2530,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,

[PATCH v5 37/42] powerpc/pci: Update bridge windows on PCI plugging

2015-06-04 Thread Gavin Shan
During the PCI plugging event, the PCI devices are rescanned and
their IO and MMIO resources are reassigned. However, the PowerNV
platform will assign PE# based on that, which depends on updating
to window of bridge of the PE's primary bus.

The patch updates the windows of bridge of PE's primary bus if
we have valid bridge. Otherwise, we assume it's root bus or SRIOV
virtual bus and PE won't be assigned during PCI plugging time.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 18/21]
---
 arch/powerpc/kernel/pci-common.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 0358f24..811eb4d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1471,8 +1471,12 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
/* Allocate bus and devices resources */
pcibios_allocate_bus_resources(bus);
pcibios_claim_one_bus(bus);
-   if (!pci_has_flag(PCI_PROBE_ONLY))
-   pci_assign_unassigned_bus_resources(bus);
+   if (!pci_has_flag(PCI_PROBE_ONLY)) {
+   if (bus-self)
+   pci_assign_unassigned_bridge_resources(bus-self);
+   else
+   pci_assign_unassigned_bus_resources(bus);
+   }
 
/* Fixup EEH */
eeh_add_device_tree_late(bus);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 1/9]powerpc/powernv: Data structure and macros definition

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 04:41 AM, Daniel Axtens wrote:

+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */

I'm not certain about this, but I _think_ this is supposed to be version
2 only:
http://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/tree/COPYING#n9
I referred other files for license information from the same folder. So 
I could wait

for the maintainers comment on this.



+/*
+ * Device tree parser code detect nest pmu support
+ * and create new nest pmus. This structure will
+ * hold the pmu functions and attrs for each nest pmu and
+ * will be referenced at the time of pmu registering.
+ */

The first sentence of that comment is unclear: I think your trying to
say Device tree parser code detects nest pmu support and registers new
nest pmus? Also s/registering/registration/.

Yes. I will rewrite the comment.


+struct nest_pmu {
+   struct pmu pmu;
+   const struct attribute_group *attr_groups[4];
+};
+

Regards,
Daniel Axtens

Thanks for the review
Maddy

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 00/42] PowerPC/PowerNV: PCI Slot Management

2015-06-04 Thread Gavin Shan
The series of patches intend to support PCI slot for PowerPC PowerNV platform,
which is running on top of skiboot firmware. The patchset requires corresponding
changes from skiboot firmware, which is sent to skib...@lists.ozlabs.org
for review. The PCI slots are exposed by skiboot with device node properties,
and kernel utilizes those properties to populated PCI slots accordingly.

The original PCI infrastructure on PowerNV platform can't support hotplug
because the PE is assigned during PHB fixup time, which is called for once
during system boot time. For this, the PCI infrastructure on PowerNV platform
has been reworked for a lot. After that, the PE and its corresponding resources
(IODT, M32DT, M64 segments, DMA32 and bypass window) are assigned upon updating
PCI bridge's resources, which might decide PE# assigned to the PE (e.g. M64
resources, on P8 strictly speaking). Each PE will maintain a reference count,
which is (number of child PCI devices + 1). That indicates when last child PCI
device leaves the PE, the PE and its included resources will be relased and put
back into free pool again. With this design, the PE will be released when EEH PE
is released. PATCH[1 - 24] are related to this part.

From skiboot perspective, PCI slot is providing (hot/fundamental/complete)
resets to EEH. The kernel gets to know if skiboot supports various reset on one
particular PCI slot through device-tree node. If it does, EEH will utilize the
functionality provided by skiboot. Besides, the device-tree nodes have to change
in order to support PCI hotplug. For example, when one PCI adapter inserted to
one slot, its device-tree node should be added to the system dynamically. 
Conversely,
the device-tree node should be removed from the system when the PCI adapter is 
going
to be offline. Since pci_dn and eeh_dev have same life cyle as PCI device nodes,
they should be added/removed accordingly during PCI hotplug. PATCH[25 - 38] are
doing the related work.

The OF driver is changed to support unflattening FDT blob for sub-stree, which
is covered by PATCH[39 - 41].

The last patch is the standalone PCI hotplug driver for PowerNV platform. When
removing PCI adapter from one PCI slot, which is invoked by command in userland,
the skiboot will power off the slot to save power and remove all device-tree
nodes for all PCI devices behind the slot. Conversely, the Power to the slot
is turned on, the PCI devices behind the slot is rescanned, and the device-tree
nodes for those newly detected PCI devices will be built in skiboot. For both
of cases, one message will be sent to kernel by skiboot so that the kernel
can adjust the device-tree accordingly. At the same time, the kernel also have
to deallocate or allocate PE# and its related resources (PE# and so on) for the
removed/added PCI devices.

Changelog
=
v5:
   * Rebased to 4.1.rc6 and some unmerged patches as below:
 Alexey's DDW patchset (v11);
 Gavin's EEH error injection support (in mpe's next branch);
 Richard's EEH cleanup patches (in mpe's next branch);
 Richard's EEH support for VF (v7);
 Gavin's misc EEH fixes for 4.2;
   * The revision bases on skiboot corresponding patches (v7):
 https://patchwork.ozlabs.org/patch/480437/
   * Utilize OF overlay to update device-tree with help of newly introduced
 OPAL API opal_get_overlay_dt().
   * Split patches for easy review according to aik's comments.
   * Fix coding style from checkpatchc.pl as pointed by aik.
   * Code cleanup and misc fixup according to aik's input.
v4:
   * Rebased to 4.1.RC1
   * Added API to unflatten FDT blob to device node sub-tree, which is attached
 the indicated parent device node. The original mechanism based on formatted
 string stream has been dropped.
   * The PATCH[v3 09/21] (powerpc/eeh: Delay probing EEH device during 
hotplug)
 was picked up sent to linux-ppc@ separately for review as Richard's VF EEH
 Support depends on that.
v3:
   * Rebased to 4.1.RC0
   * PowerNV PCI infrasturcture is total refactored in order to support PCI
 hotplug. The PowerNV hotplug driver is also reworked a lot because of
 the changes in skiboot in order to support PCI hotplug.


Gavin Shan (42):
  PCI: Add pcibios_setup_bridge()
  powerpc/powernv: Enable M64 on P7IOC
  powerpc/powernv: M64 support improvement
  powerpc/powernv: Trace consumed IO and M32 segments by PE
  powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()
  powerpc/powernv: Improve IO and M32 mapping
  powerpc/powernv: Calculate PHB's DMA weight dynamically
  powerpc/powernv: DMA32 cleanup
  powerpc/powernv: pnv_ioda_setup_dma() configure one PE only
  powerpc/powernv: Trace DMA32 segments consumed by PE
  powerpc/powernv: Increase PE# capacity
  powerpc/pci: Cleanup on pci_controller_ops
  powerpc/pci: Override pcibios_setup_bridge()
  powerpc/powernv: Allocate PE# in deasending order
  powerpc/powernv: Reserve PE# for root bus
  powerpc/powernv: Create PEs dynamically
  powerpc/powernv: PE 

[PATCH v5 15/42] powerpc/powernv: Reserve PE# for root bus

2015-06-04 Thread Gavin Shan
pcibios_setup_bridge(), called to update PCI bridge windows, will
allocate PE for PCI buses. The function isn't called for root bus
that doesn't have upstream bridge. The patch reserves PE# for root
bus in advance so that we can setup it in next patch.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from [PATCH v5 v4 06/21]
  * Replace strip of with strip off in comments
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 31 ++-
 arch/powerpc/platforms/powernv/pci.h  |  1 +
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 0d6539a..2eb8baa 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -230,6 +230,13 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
pr_warn(  Cannot strip M64 segment for reserved PE#%d\n,
phb-ioda.reserved_pe);
 
+   /* Strip off the segment used by PE for PCI root bus,
+* which is last supported PE#, or one next to the
+* reserved PE#
+*/
+   if (phb-ioda.root_pe != IODA_INVALID_PE)
+   r-end -= phb-ioda.m64_segsize;
+
return 0;
 
 fail:
@@ -287,6 +294,13 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
pr_warn(  Cannot strip M64 segment for reserved PE#%d\n,
phb-ioda.reserved_pe);
 
+   /* Strip off the segment used by PE for PCI root bus,
+* which is last supported PE#, or one next to the
+* reserved PE#
+*/
+   if (phb-ioda.root_pe != IODA_INVALID_PE)
+   r-end -= phb-ioda.m64_segsize;
+
return 0;
 
 fail:
@@ -3331,7 +3345,22 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
aux = memblock_virt_alloc(size, 0);
phb-ioda.pe_alloc = aux;
phb-ioda.pe_array = aux + pemap_off;
-   set_bit(phb-ioda.reserved_pe, phb-ioda.pe_alloc);
+
+   /* Choose number of PE for root bus, which shouldn't consume
+* any M64 resource. So we avoid picking low-end PE#, which
+* is usually binding with 64-bits prefetchable memory resources
+* closely.
+*/
+   pnv_ioda_reserve_pe(phb, phb-ioda.reserved_pe);
+   if (phb-ioda.reserved_pe == 0) {
+   phb-ioda.root_pe = phb-ioda.total_pe - 1;
+   pnv_ioda_reserve_pe(phb, phb-ioda.root_pe);
+   } else if (phb-ioda.reserved_pe == (phb-ioda.total_pe - 1)) {
+   phb-ioda.root_pe = phb-ioda.reserved_pe - 1;
+   pnv_ioda_reserve_pe(phb, phb-ioda.root_pe);
+   } else {
+   phb-ioda.root_pe = IODA_INVALID_PE;
+   }
 
INIT_LIST_HEAD(phb-ioda.pe_dma_list);
INIT_LIST_HEAD(phb-ioda.pe_list);
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 590f778..e372b9f 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -133,6 +133,7 @@ struct pnv_phb {
struct {
/* Global bridge info */
unsigned inttotal_pe;
+   unsigned introot_pe;
unsigned intreserved_pe;
 
/* 32-bit MMIO window */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 42/42] pci/hotplug: PowerPC PowerNV PCI hotplug driver

2015-06-04 Thread Gavin Shan
The patch intends to add standalone driver to support PCI hotplug
for PowerPC PowerNV platform, which runs on top of skiboot firmware.
The firmware identified hotpluggable slots and marked their device
tree node with proper ibm,slot-pluggable and ibm,reset-by-firmware.
The driver simply scans device-tree to create/register PCI hotplug slot
accordingly.

If the skiboot firmware doesn't support slot status retrieval, the PCI
slot device node shouldn't have property ibm,reset-by-firmware. In
that case, none of valid PCI slots will be detected from device tree.
The skiboot firmware doesn't export the capability to access attention
LEDs yet and it's something for TBD.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Use OF OVERLAY to update the device-tree
  * Removed unnecessary header files
  * More meaningful return value from powernv_php_register_one()
  * Use pnv_pci_hotplug_notifier_{register, unregister}()
  * Decimal values for slot's states
  * Removed struct powernv_php_slot::release()
  * Merged two bool arguments to one for powernv_php_slot_enable()
  * Rename release_device_nodes_info() to remove_device_nodes_info()
  * Don't check on !len in slot_power_on_handler()
  * Handle return value in get_adapter_status() as suggested by aik
  * Drop invalid attention status in set_attention_status()
  * Renaming functions
  * Fixed coding style and added entry in MAINTAINERS reported by
checkpatch.pl
---
 MAINTAINERS|   6 +
 drivers/pci/hotplug/Kconfig|  12 +
 drivers/pci/hotplug/Makefile   |   4 +
 drivers/pci/hotplug/powernv_php.c  | 140 +++
 drivers/pci/hotplug/powernv_php.h  |  90 
 drivers/pci/hotplug/powernv_php_slot.c | 732 +
 6 files changed, 984 insertions(+)
 create mode 100644 drivers/pci/hotplug/powernv_php.c
 create mode 100644 drivers/pci/hotplug/powernv_php.h
 create mode 100644 drivers/pci/hotplug/powernv_php_slot.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e308718..f5e1dce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7481,6 +7481,12 @@ L:   linux-...@vger.kernel.org
 S: Supported
 F: Documentation/PCI/pci-error-recovery.txt
 
+PCI HOTPLUG DRIVER FOR POWERNV PLATFORM
+M: Gavin Shan gws...@linux.vnet.ibm.com
+L: linux-...@vger.kernel.org
+S: Supported
+F: drivers/pci/hotplug/powernv_php*
+
 PCI SUBSYSTEM
 M: Bjorn Helgaas bhelg...@google.com
 L: linux-...@vger.kernel.org
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index df8caec..ef55dae 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC
 
  When in doubt, say N.
 
+config HOTPLUG_PCI_POWERNV
+   tristate PowerPC PowerNV PCI Hotplug driver
+   depends on PPC_POWERNV  EEH
+   help
+ Say Y here if you run PowerPC PowerNV platform that supports
+  PCI Hotplug
+
+ To compile this driver as a module, choose M here: the
+ module will be called powernv-php.
+
+ When in doubt, say N.
+
 config HOTPLUG_PCI_RPA
tristate RPA PCI Hotplug driver
depends on PPC_PSERIES  EEH
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 4a9aa08..a69665e 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o
 obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550)  += cpcihp_zt5550.o
 obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o
 obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o
+obj-$(CONFIG_HOTPLUG_PCI_POWERNV)  += powernv-php.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA)  += rpaphp.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)+= rpadlpar_io.o
 obj-$(CONFIG_HOTPLUG_PCI_SGI)  += sgi_hotplug.o
@@ -50,6 +51,9 @@ ibmphp-objs   :=  ibmphp_core.o   \
 acpiphp-objs   :=  acpiphp_core.o  \
acpiphp_glue.o
 
+powernv-php-objs   :=  powernv_php.o   \
+   powernv_php_slot.o
+
 rpaphp-objs:=  rpaphp_core.o   \
rpaphp_pci.o\
rpaphp_slot.o
diff --git a/drivers/pci/hotplug/powernv_php.c 
b/drivers/pci/hotplug/powernv_php.c
new file mode 100644
index 000..4cbff7a
--- /dev/null
+++ b/drivers/pci/hotplug/powernv_php.c
@@ -0,0 +1,140 @@
+/*
+ * PCI Hotplug Driver for PowerPC PowerNV platform.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2015.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include linux/module.h
+
+#include asm/opal.h
+#include asm/pnv-pci.h
+
+#include powernv_php.h
+
+#define DRIVER_VERSION 0.1
+#define DRIVER_AUTHOR  Gavin 

[PATCH v5 22/42] powerpc/powernv: Move functions around

2015-06-04 Thread Gavin Shan
The patch moves functions related to releasing PE around so that
we don't need extra declaration for them in subsequent patches.
It doesn't introduce any behavioural changes.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 07/21]
  * Fixed coding style complained by checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 735 +++---
 1 file changed, 369 insertions(+), 366 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8a79403..3d5aec8d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -132,6 +132,285 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
 }
 
+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+   /* 01xb - invalidate TCEs that match the specified PE# */
+   unsigned long val = (0x4ull  60) | (pe-pe_number  0xFF);
+   struct pnv_phb *phb = pe-phb;
+
+   if (!phb-ioda.tce_inval_reg)
+   return;
+
+   mb(); /* Ensure above stores are visible */
+   __raw_writeq(cpu_to_be64(val), phb-ioda.tce_inval_reg);
+}
+
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+   int num)
+{
+   struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+   table_group);
+   struct pnv_phb *phb = pe-phb;
+   long ret;
+
+   pe_info(pe, Removing DMA window #%d\n, num);
+
+   ret = opal_pci_map_pe_dma_window(phb-opal_id, pe-pe_number,
+   (pe-pe_number  1) + num,
+   0/* levels */, 0/* table address */,
+   0/* table size */, 0/* page size */);
+   if (ret)
+   pe_warn(pe, Unmapping failed, ret = %ld\n, ret);
+   else
+   pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+   pnv_pci_unlink_table_and_group(table_group-tables[num], table_group);
+
+   return ret;
+}
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+{
+   uint16_t window_id = (pe-pe_number  1) + 1;
+   int64_t rc;
+
+   pe_info(pe, %sabling 64-bit DMA bypass\n, enable ? En : Dis);
+   if (enable) {
+   phys_addr_t top = memblock_end_of_DRAM();
+
+   top = roundup_pow_of_two(top);
+   rc = opal_pci_map_pe_dma_window_real(pe-phb-opal_id,
+pe-pe_number,
+window_id,
+pe-tce_bypass_base,
+top);
+   } else {
+   rc = opal_pci_map_pe_dma_window_real(pe-phb-opal_id,
+pe-pe_number,
+window_id,
+pe-tce_bypass_base,
+0);
+   }
+   if (rc)
+   pe_err(pe, OPAL error %lld configuring bypass window\n, rc);
+   else
+   pe-tce_bypass_enabled = enable;
+}
+
+static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev,
+struct pnv_ioda_pe *pe)
+{
+   struct iommu_table*tbl;
+   int64_t   rc;
+
+   tbl = pe-table_group.tables[0];
+   rc = pnv_pci_ioda2_unset_window(pe-table_group, 0);
+   if (rc)
+   pe_warn(pe, OPAL error %ld release DMA window\n, rc);
+
+   pnv_pci_ioda2_set_bypass(pe, false);
+   if (pe-table_group.group) {
+   iommu_group_put(pe-table_group.group);
+   BUG_ON(pe-table_group.group);
+   }
+   pnv_pci_ioda2_table_free_pages(tbl);
+   iommu_free_table(tbl, of_node_full_name(dev-dev.of_node));
+}
+
+static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *parent,
+ struct pnv_ioda_pe *child,
+ bool is_add)
+{
+   const char *desc = is_add ? adding : removing;
+   uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN :
+ OPAL_REMOVE_PE_FROM_DOMAIN;
+   struct pnv_ioda_pe *slave;
+   long rc;
+
+   /* Parent PE affects child PE */
+   rc = opal_pci_set_peltv(phb-opal_id, parent-pe_number,
+   child-pe_number, op);
+   if (rc != OPAL_SUCCESS) {
+   pe_warn(child, OPAL error %ld %s to parent PELTV\n,
+   rc, desc);
+   return -ENXIO;
+   }
+
+   if (!(child-flags  PNV_IODA_PE_MASTER))
+   return 0;
+
+   /* Compound case: parent PE affects slave PEs */
+   list_for_each_entry(slave, child-slaves, list) {
+ 

[PATCH v5 10/42] powerpc/powernv: Trace DMA32 segments consumed by PE

2015-06-04 Thread Gavin Shan
On P7IOC, the whole DMA32 space is divided evenly to 256MB segments.
Each PE can consume one or multiple DMA32 segments. Current code
doesn't trace the available DMA32 segments and those consumed by
one particular PE. It's conflicting with PCI hotplug.

The patch introduces one bitmap to PHB to trace the available
DMA32 segments for allocation, more fields to struct pnv_ioda_pe
to trace the consumed DMA32 segments by the PE, which is going to
be released when the PE is destroyed at PCI unplugging time.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 07/21]
  * Added space before open parenthesis reported by checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 24 +++-
 arch/powerpc/platforms/powernv/pci.h  |  4 
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 63fad4d..2087c5c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2071,6 +2071,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 
/* Grab a 32-bit TCE table */
pe-dma32_seg = base;
+   pe-dma32_segcount = segs;
pe_info(pe,  Setting up 32-bit TCE table at %08x..%08x\n,
(base  28), ((base + segs)  28) - 1);
 
@@ -2131,8 +2132,10 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
return;
  fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
-   if (pe-dma32_seg = 0)
+   if (pe-dma32_seg = 0) {
+   bitmap_clear(phb-ioda.dma32_segmap, base, segs);
pe-dma32_seg = -1;
+   }
if (tce_mem)
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
if (tbl) {
@@ -2531,6 +2534,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
 
/* The PE will reserve all possible 32-bits space */
pe-dma32_seg = 0;
+   pe-dma32_segcount = 1;
pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n,
phb-ioda.m32_pci_base);
 
@@ -2588,6 +2592,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb, 
struct pnv_ioda_pe *pe)
segs = (pe-dma32_weight *
phb-ioda.dma32_segcount) / dma_weight;
 
+   /* Allocate DMA32 segments as required. We might not have
+* enough available resource. However, we expect at least
+* one segment is allocated.
+*/
+   do {
+   base = bitmap_find_next_zero_area(
+   phb-ioda.dma32_segmap,
+   phb-ioda.dma32_segcount,
+   0, segs, 0);
+   if (base  phb-ioda.dma32_segcount) {
+   bitmap_set(phb-ioda.dma32_segmap, base, segs);
+   break;
+   }
+   } while (--segs);
+
+   if (!segs)
+   return;
+
pe_info(pe, DMA weight %d, assigned %d DMA32 segments\n,
pe-dma32_weight, segs);
pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 5ea33ca..94ef1df 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -61,6 +61,7 @@ struct pnv_ioda_pe {
/* Base iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
int dma32_seg;
+   int dma32_segcount;
unsigned intdma32_weight;
 
/* 64-bit TCE bypass region */
@@ -161,6 +162,9 @@ struct pnv_phb {
unsigned long   m32_segmap[8];
unsigned long   m64_segmap[8];
 
+   /* DMA32 segment maps */
+   unsigned long   dma32_segmap[8];
+
/* IRQ chip */
int irq_chip_init;
struct irq_chip irq_chip;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 11/42] powerpc/powernv: Increase PE# capacity

2015-06-04 Thread Gavin Shan
Each PHB maintains an array helping to translate RID (Request
ID) to PE# with the assumption that PE# takes 8 bits, indicating
that we can't have more than 256 PEs. However, pci_dn-pe_number
already had 4-bytes for the PE#.

The patch extends the PE# capacity so that each of them will be
4-bytes long. Then we can use IODA_INVALID_PE to check one entry
in phb-pe_rmap[] is valid or not.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from [PATCH v5 v4 06/21]
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 5 -
 arch/powerpc/platforms/powernv/pci.h  | 5 ++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2087c5c..d8b0ef5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -840,7 +840,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, 
struct pnv_ioda_pe *pe)
 
/* Clear the reverse map */
for (rid = pe-rid; rid  rid_end; rid++)
-   phb-ioda.pe_rmap[rid] = 0;
+   phb-ioda.pe_rmap[rid] = IODA_INVALID_PE;
 
/* Release from all parents PELT-V */
while (parent) {
@@ -3303,6 +3303,9 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
if (prop32)
phb-ioda.reserved_pe = be32_to_cpup(prop32);
 
+   /* Invalidate RID to PE# mapping */
+   memset(phb-ioda.pe_rmap, 0xff, sizeof(phb-ioda.pe_rmap));
+
/* Parse 64-bit MMIO range */
pnv_ioda_parse_m64_window(phb);
 
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 94ef1df..590f778 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -175,11 +175,10 @@ struct pnv_phb {
struct list_headpe_list;
struct mutexpe_list_mutex;
 
-   /* Reverse map of PEs, will have to extend if
-* we are to support more than 256 PEs, indexed
+   /* Reverse map of PEs, indexed by
 * bus { bus, devfn }
 */
-   unsigned char   pe_rmap[0x1];
+   int pe_rmap[0x1];
 
/* Number of 32-bit DMA segments */
unsigned long   dma32_segcount;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 29/42] powerpc/powernv: Issue fundamental reset in pnv_pci_reset_secondary_bus()

2015-06-04 Thread Gavin Shan
There might have PCI devices, under the specified PCI bus, asking
for fundamental reset. The patch iterates all PCI devices under
the specified PCI bus and issue fundamental reset to the PCI bus
if any PCI device is asking for that. Otherwise, hot reset is
issued to the PCI bus.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from PATCH[v4 10/21]
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 18167c5..4eb53ed 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1055,9 +1055,32 @@ static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int 
option)
return 0;
 }
 
+static int pnv_pci_dev_reset_type(struct pci_dev *pdev, void *data)
+{
+   int *freset = data;
+
+   /*
+* Stop the iteration immediately if there is any
+* one PCI device requesting fundamental reset
+*/
+   *freset |= pdev-needs_freset;
+   return *freset;
+}
+
 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
 {
-   pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+   int option = EEH_RESET_HOT;
+
+   if (dev-subordinate) {
+   int freset = 0;
+
+   pci_walk_bus(dev-subordinate,
+pnv_pci_dev_reset_type,
+freset);
+   option = freset ? EEH_RESET_FUNDAMENTAL : EEH_RESET_HOT;
+   }
+
+   pnv_eeh_bridge_reset(dev, option);
pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
 }
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 16/42] powerpc/powernv: Create PEs dynamically

2015-06-04 Thread Gavin Shan
Currently, the PEs and their associated resources are assigned
in ppc_md.pcibios_fixup() except those consumed by SRIOV VFs.
The function is called for once after PCI probing and resources
assignment are finished. Obviously, it's not hotplug friendly.

The patch creates PEs dynamically by ppc_md.pcibios_setup_bridge(),
which is called on the event during system bootup and PCI hotplug:
updating PCI bridge's windows after resource assignment/reassignment
are finished. For partial hotplug case, where not all PCI devices
belonging to the PE are unplugged and plugged again, we just need
unbinding/binding the affected PCI devices with the corresponding
PE without creating new one.

Besides, it might require addtional resources (e.g. M32) to the
windows of the PCI bridge when unplugging current adapter, and
insert a different adapter if there is one PCI slot, which is
assumed behind root port, or the downstream bridge of the PCIE
switch behind root port. The parent bridge of the newly plugged
adapter would reject the request to add more resources, leading
to hotplug failure. For the issue, the patch extends the windows
of root port, or the upstream port of the PCIe switch behind root
port to PHB's windows when ppc_md.pcibios_setup_bridge() is called.

There is no upstream bridge for root bus, so we have to fix it up
before any PE is created because the root bus PE is the ancestor
to anyone else.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Derived from [PATCH v5 v4 06/21]
  * Correct accommodate reported by checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 203 +++---
 arch/powerpc/platforms/powernv/pci.h  |   1 +
 2 files changed, 128 insertions(+), 76 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2eb8baa..fd2f898 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1200,6 +1200,13 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, 
struct pnv_ioda_pe *pe)
pci_name(dev));
continue;
}
+
+   /* The PCI device might have been associated with the PE
+* in case of partial hotplug.
+*/
+   if (pdn-pe_number != IODA_INVALID_PE)
+   continue;
+
pdn-pe_number = pe-pe_number;
pe-dma32_weight += pnv_ioda_dev_dma_weight(dev);
if ((pe-flags  PNV_IODA_PE_BUS_ALL)  dev-subordinate)
@@ -1213,15 +1220,31 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, 
struct pnv_ioda_pe *pe)
  * subordinate PCI devices and buses. The second type of PE is normally
  * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
  */
-static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose-private_data;
struct pnv_ioda_pe *pe;
int pe_num = IODA_INVALID_PE;
 
+   /* For partial hotplug case, the PE instance hasn't been destroyed
+* yet. We shouldn't allocated a new one and assign resources to
+* it. The existing PE instance should be reused, but we should
+* associate the devices to the PE.
+*/
+   pe_num = phb-ioda.pe_rmap[bus-number  8];
+   if (pe_num != IODA_INVALID_PE) {
+   pe = phb-ioda.pe_array[pe_num];
+   pnv_ioda_setup_same_PE(bus, pe);
+   return NULL;
+   }
+
+   /* PE number for root bus should have been reserved */
+   if (pci_is_root_bus(bus))
+   pe_num = phb-ioda.root_pe;
+
/* Check if PE is determined by M64 */
-   if (phb-pick_m64_pe)
+   if (pe_num == IODA_INVALID_PE  phb-pick_m64_pe)
pe_num = phb-pick_m64_pe(phb, bus, all);
 
/* The PE number isn't pinned by M64 */
@@ -1231,7 +1254,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
int all)
if (pe_num == IODA_INVALID_PE) {
pr_warning(%s: Not enough PE# available for PCI bus 
%04x:%02x\n,
__func__, pci_domain_nr(bus), bus-number);
-   return;
+   return NULL;
}
 
pe = phb-ioda.pe_array[pe_num];
@@ -1255,7 +1278,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
int all)
if (pe_num)
pnv_ioda_free_pe(phb, pe_num);
pe-pbus = NULL;
-   return;
+   return NULL;
}
 
/* Associate it with all child devices */
@@ -1266,46 +1289,8 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
int all)
 
/* Link the PE */
pnv_ioda_link_pe_by_weight(phb, pe);
-}
-
-static void pnv_ioda_setup_PEs(struct pci_bus *bus)
-{
-   struct 

[PATCH v5 28/42] powerpc/powernv: Don't cover root bus in pnv_pci_reset_secondary_bus()

2015-06-04 Thread Gavin Shan
There should have an upstream bridge for the PCI bus for which
pnv_pci_reset_secondary_bus() is called. It's impossible to call
the function for root buses. So we needn't do reset for root buses
in pnv_pci_reset_secondary_bus() and simply drop the logic.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 10/21]
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4669122..18167c5 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1057,16 +1057,8 @@ static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int 
option)
 
 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
 {
-   struct pci_controller *hose;
-
-   if (pci_is_root_bus(dev-bus)) {
-   hose = pci_bus_to_host(dev-bus);
-   pnv_eeh_root_reset(hose, EEH_RESET_HOT);
-   pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
-   } else {
-   pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
-   pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
-   }
+   pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+   pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
 }
 
 /**
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 40/42] drivers/of: Allow to specify root node in of_fdt_unflatten_tree()

2015-06-04 Thread Gavin Shan
The patch introduces one more argument to of_fdt_unflatten_tree()
to specify the root node for the FDT blob, which is going to be
unflattened. In the result, the function can be used to unflatten
FDT blob, which represents device sub-tree in subsequent patches.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Newly introduced
---
 drivers/of/fdt.c   | 26 ++
 drivers/of/unittest.c  |  2 +-
 include/linux/of_fdt.h |  3 ++-
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index b87c157..b6a6c59 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -380,9 +380,16 @@ static void *unflatten_dt_node(void *blob,
   struct device_node **nodepp,
   bool dryrun)
 {
+   unsigned long fpsize = 0;
+
+   if (dad)
+   fpsize = strlen(of_node_full_name(dad));
+   else
+   fpsize = 0;
+
cur_node_depth = 1;
return __unflatten_dt_node(blob, mem, poffset,
-  dad, nodepp, 0, dryrun);
+  dad, nodepp, fpsize, dryrun);
 }
 
 /**
@@ -393,13 +400,15 @@ static void *unflatten_dt_node(void *blob,
  * pointers of the nodes so the normal device-tree walking functions
  * can be used.
  * @blob: The blob to expand
+ * @dad: The root node of the created device_node tree
  * @mynodes: The device_node tree created by the call
  * @dt_alloc: An allocator that provides a virtual address to memory
  * for the resulting tree
  */
 static void __unflatten_device_tree(void *blob,
-struct device_node **mynodes,
-void * (*dt_alloc)(u64 size, u64 align))
+   struct device_node *dad,
+   struct device_node **mynodes,
+   void * (*dt_alloc)(u64 size, u64 align))
 {
unsigned long size;
int start;
@@ -425,7 +434,7 @@ static void __unflatten_device_tree(void *blob,
/* First pass, scan for size */
start = 0;
size = (unsigned long)unflatten_dt_node(blob, NULL, start,
-   NULL, NULL, true);
+   dad, NULL, true);
size = ALIGN(size, 4);
 
pr_debug(  size is %lx, allocating...\n, size);
@@ -440,7 +449,7 @@ static void __unflatten_device_tree(void *blob,
 
/* Second pass, do actual unflattening */
start = 0;
-   unflatten_dt_node(blob, mem, start, NULL, mynodes, false);
+   unflatten_dt_node(blob, mem, start, dad, mynodes, false);
if (be32_to_cpup(mem + size) != 0xdeadbeef)
pr_warning(End of tree marker overwritten: %08x\n,
   be32_to_cpup(mem + size));
@@ -462,9 +471,10 @@ static void *kernel_tree_alloc(u64 size, u64 align)
  * can be used.
  */
 void of_fdt_unflatten_tree(unsigned long *blob,
-   struct device_node **mynodes)
+  struct device_node *dad,
+  struct device_node **mynodes)
 {
-   __unflatten_device_tree(blob, mynodes, kernel_tree_alloc);
+   __unflatten_device_tree(blob, dad, mynodes, kernel_tree_alloc);
 }
 EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
 
@@ -1095,7 +1105,7 @@ bool __init early_init_dt_scan(void *params)
  */
 void __init unflatten_device_tree(void)
 {
-   __unflatten_device_tree(initial_boot_params, of_root,
+   __unflatten_device_tree(initial_boot_params, NULL, of_root,
early_init_dt_alloc_memory_arch);
 
/* Get pointer to /chosen and /aliases nodes for use everywhere */
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 1801634..2270830 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -907,7 +907,7 @@ static int __init unittest_data_add(void)
not running tests\n, __func__);
return -ENOMEM;
}
-   of_fdt_unflatten_tree(unittest_data, unittest_data_node);
+   of_fdt_unflatten_tree(unittest_data, NULL, unittest_data_node);
if (!unittest_data_node) {
pr_warn(%s: No tree to attach; not running tests\n, __func__);
return -ENODATA;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 587ee50..8882640 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -38,7 +38,8 @@ extern bool of_fdt_is_big_endian(const void *blob,
 extern int of_fdt_match(const void *blob, unsigned long node,
const char *const *compat);
 extern void of_fdt_unflatten_tree(unsigned long *blob,
-  struct device_node **mynodes);
+ struct device_node *dad,
+ struct device_node **mynodes);
 
 /* TBD: Temporary export of fdt globals - remove when code fully 

[PATCH v5 27/42] powerpc/powernv: Simplify pnv_eeh_reset()

2015-06-04 Thread Gavin Shan
The patch simplifies pnv_eeh_reset() by dropping unnecessary nested
if statement. No logic changed by the patch.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 09/21]
  * Fixed quoted string split across lines from checkpatch.pl
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 65 +---
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4feb533..4669122 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1084,7 +1084,9 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
 static int pnv_eeh_reset(struct eeh_pe *pe, int option)
 {
struct pci_controller *hose = pe-phb;
+   struct pnv_phb *phb = hose-private_data;
struct pci_bus *bus;
+   int64_t rc;
int ret;
 
/*
@@ -1101,44 +1103,39 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
 * reset. The side effect is that EEH core has to clear the frozen
 * state explicitly after BAR restore.
 */
-   if (pe-type  EEH_PE_PHB) {
-   ret = pnv_eeh_phb_reset(hose, option);
-   } else {
-   struct pnv_phb *phb;
-   s64 rc;
+   if (pe-type  EEH_PE_PHB)
+   return pnv_eeh_phb_reset(hose, option);
 
-   /*
-* The frozen PE might be caused by PAPR error injection
-* registers, which are expected to be cleared after hitting
-* frozen PE as stated in the hardware spec. Unfortunately,
-* that's not true on P7IOC. So we have to clear it manually
-* to avoid recursive EEH errors during recovery.
-*/
-   phb = hose-private_data;
-   if (phb-model == PNV_PHB_MODEL_P7IOC 
-   (option == EEH_RESET_HOT ||
-   option == EEH_RESET_FUNDAMENTAL)) {
-   rc = opal_pci_reset(phb-opal_id,
-   OPAL_RESET_PHB_ERROR,
-   OPAL_ASSERT_RESET);
-   if (rc != OPAL_SUCCESS) {
-   pr_warn(%s: Failure %lld clearing 
-   error injection registers\n,
-   __func__, rc);
-   return -EIO;
-   }
+   /*
+* The frozen PE might be caused by PAPR error injection
+* registers, which are expected to be cleared after hitting
+* frozen PE as stated in the hardware spec. Unfortunately,
+* that's not true on P7IOC. So we have to clear it manually
+* to avoid recursive EEH errors during recovery.
+*/
+   phb = hose-private_data;
+   if (phb-model == PNV_PHB_MODEL_P7IOC 
+   (option == EEH_RESET_HOT ||
+   option == EEH_RESET_FUNDAMENTAL)) {
+   rc = opal_pci_reset(phb-opal_id,
+   OPAL_RESET_PHB_ERROR,
+   OPAL_ASSERT_RESET);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn(%s: Error %lld clearing errinjct registers\n,
+   __func__, rc);
+   return -EIO;
}
-
-   bus = eeh_pe_bus_get(pe);
-   if (pe-type  EEH_PE_VF)
-   ret = pnv_eeh_vf_pe_reset(pe, option);
-   else if (pci_is_root_bus(bus) ||
-   pci_is_root_bus(bus-parent))
-   ret = pnv_eeh_root_reset(hose, option);
-   else
-   ret = pnv_eeh_bridge_reset(bus-self, option);
}
 
+   bus = eeh_pe_bus_get(pe);
+   if (pe-type  EEH_PE_VF)
+   ret = pnv_eeh_vf_pe_reset(pe, option);
+   else if (pci_is_root_bus(bus) ||
+   pci_is_root_bus(bus-parent))
+   ret = pnv_eeh_root_reset(hose, option);
+   else
+   ret = pnv_eeh_bridge_reset(bus-self, option);
+
return ret;
 }
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 07/42] powerpc/powernv: Calculate PHB's DMA weight dynamically

2015-06-04 Thread Gavin Shan
For P7IOC, the whole available DMA32 space, which is below the
MEM32 space, is divided evenly into 256MB segments. How many
continuous segments assigned to one particular PE depends on
the PE's DMA weight that is figured out from the type of each
PCI devices contained in the PE, and PHB's DMA weight which is
accumulative DMA weight of PEs contained in the PHB. It means
that the PHB's DMA weight calculation depends on existing PEs,
which works perfectly now, but not hotplug friendly. As the
whole available DMA32 space can be assigned to one PE on PHB3,
so we don't have the issue on PHB3.

The patch calculates PHB's DMA weight based on the PCI devices
contained in the PHB dynamically so that it's hotplug friendly.
At the meanwhile, the patch removes the code handling DMA weight
for PHB3 in pnv_ioda_setup_dma().

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
v5:
  * Split from PATCH[v4 5/21]
  * Fixed line over 80 characters reported from checkpatch.pl
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 90 +++
 arch/powerpc/platforms/powernv/pci.h  |  6 ---
 2 files changed, 44 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 46a5e10..d9ff739 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -979,8 +979,11 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
list_add_tail(pe-dma_link, phb-ioda.pe_dma_list);
 }
 
-static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
+static unsigned int pnv_ioda_dev_dma_weight(struct pci_dev *dev)
 {
+   struct pci_controller *hose = pci_bus_to_host(dev-bus);
+   struct pnv_phb *phb = hose-private_data;
+
/* This is quite simplistic. The base weight of a device
 * is 10. 0 means no DMA is to be accounted for it.
 */
@@ -993,14 +996,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev 
*dev)
if (dev-class == PCI_CLASS_SERIAL_USB_UHCI ||
dev-class == PCI_CLASS_SERIAL_USB_OHCI ||
dev-class == PCI_CLASS_SERIAL_USB_EHCI)
-   return 3;
+   return 3 * phb-ioda.tce32_count;
 
/* Increase the weight of RAID (includes Obsidian) */
if ((dev-class  8) == PCI_CLASS_STORAGE_RAID)
-   return 15;
+   return 15 * phb-ioda.tce32_count;
 
/* Default */
-   return 10;
+   return 10 * phb-ioda.tce32_count;
+}
+
+static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data)
+{
+   unsigned int *dma_weight = data;
+
+   *dma_weight += pnv_ioda_dev_dma_weight(pdev);
+   return 0;
+}
+
+static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb)
+{
+   unsigned int dma_weight = 0;
+
+   if (!phb-hose-bus)
+   return dma_weight;
+
+   pci_walk_bus(phb-hose-bus,
+__pnv_ioda_phb_dma_weight, dma_weight);
+   return dma_weight;
 }
 
 #ifdef CONFIG_PCI_IOV
@@ -1159,7 +1182,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, 
struct pnv_ioda_pe *pe)
continue;
}
pdn-pe_number = pe-pe_number;
-   pe-dma_weight += pnv_ioda_dma_weight(dev);
+   pe-dma_weight += pnv_ioda_dev_dma_weight(dev);
if ((pe-flags  PNV_IODA_PE_BUS_ALL)  dev-subordinate)
pnv_ioda_setup_same_PE(dev-subordinate, pe);
}
@@ -1222,14 +1245,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
int all)
/* Put PE to the list */
list_add_tail(pe-list, phb-ioda.pe_list);
 
-   /* Account for one DMA PE if at least one DMA capable device exist
-* below the bridge
-*/
-   if (pe-dma_weight != 0) {
-   phb-ioda.dma_weight += pe-dma_weight;
-   phb-ioda.dma_pe_count++;
-   }
-
/* Link the PE */
pnv_ioda_link_pe_by_weight(phb, pe);
 }
@@ -2546,24 +2561,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 {
struct pci_controller *hose = phb-hose;
-   unsigned int residual, remaining, segs, tw, base;
struct pnv_ioda_pe *pe;
+   unsigned int dma_weight;
 
-   /* If we have more PE# than segments available, hand out one
-* per PE until we run out and let the rest fail. If not,
-* then we assign at least one segment per PE, plus more based
-* on the amount of devices under that PE
-*/
-   if (phb-ioda.dma_pe_count  phb-ioda.tce32_count)
-   residual = 0;
-   else
-   residual = phb-ioda.tce32_count -
-   phb-ioda.dma_pe_count;
-
-   pr_info(PCI: Domain %04x has %ld available 32-bit DMA segments\n,
-   hose-global_number, phb-ioda.tce32_count);
-   pr_info(PCI: %d PE# for a total weight of %d\n,
-   

Re: [PATCH] powerpc: Make doorbell check preemption safe

2015-06-04 Thread Shreyas B Prabhu


On Wednesday 20 May 2015 06:30 AM, Michael Neuling wrote:
 On Wed, 2015-05-20 at 00:30 +0530, Shreyas B. Prabhu wrote:
 Doorbell can be used to cause ipi on cpus which are sibling threads on
 the same core. So icp_native_cause_ipi checks if the destination cpu
 is a sibling thread of the current cpu and uses doorbell in such cases.

 But while running with CONFIG_PREEMPT=y, since this section is
 preemtible, we can run into issues if after we check if the destination
 cpu is a sibling cpu, the task gets migrated from a sibling cpu to a
 cpu on another core.

 Fix this by using get_cpu()/ put_cpu()
 
 Thanks.  Looks good and it's boots for me.
 
 Signed-off-by: Michael Neuling mi...@neuling.org
 
mikey, Thanks!


mpe, if this looks ok, can you please pick it up?

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to lib/

2015-06-04 Thread Zhao Qiang
On Thu, 2015-06-04 at 2:14PM, Wood Scott wrote:


 -Original Message-
 From: Wood Scott-B07421
 Sent: Thursday, June 04, 2015 2:14 PM
 To: Zhao Qiang-B45475
 Cc: linuxppc-dev@lists.ozlabs.org; Xie Xiaobo-R63061
 Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/ to
 lib/
 
 On Thu, 2015-06-04 at 00:56 -0500, Zhao Qiang-B45475 wrote:
  On Thu, 2015-05-28 at 1:37AM +0800, Wood Scott wrote:
 
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Thursday, May 28, 2015 1:37 AM
   To: Zhao Qiang-B45475
   Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Xie Xiaobo-
   R63061
   Subject: Re: [PATCH 2/2] rheap: move rheap.c from arch/powerpc/lib/
   to lib/
  
   On Wed, 2015-05-27 at 17:12 +0800, Zhao Qiang wrote:
qe need to use the rheap, so move it to public directory.
  
   You've been previously asked to use lib/genalloc.c rather than
   introduce duplicate functionality into /lib.  NACK.
 
  Can't use lib/genalloc.c instead of rheap.c.
  Qe need to alloc muram of qe, not DIMM.
 
 lib/genalloc.h is not for allocating main memory.  It is for allocating
 special regions.  It is serving the same purpose as rheap.

I need to use the func rh_alloc_align, what it the similar func?
I just find a func gen_pool_first_fit_order_align.

 
 -Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] of/dynamic: Fix test for PPC_PSERIES

2015-06-04 Thread Geert Uytterhoeven
IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is
supposed to be used with the full Kconfig symbol name, including the
CONFIG_ prefix.

Add the missing CONFIG_ prefix to fix this.

Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and into 
common code)
Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be
---
Did this bug cause any breakage?
If yes, the fix should go to stable (for v3.17 and later).
---
 drivers/of/dynamic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index dee658de72b3b221..1901f8870591fe30 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -226,7 +226,7 @@ void __of_attach_node(struct device_node *np)
phandle = __of_get_property(np, phandle, sz);
if (!phandle)
phandle = __of_get_property(np, linux,phandle, sz);
-   if (IS_ENABLED(PPC_PSERIES)  !phandle)
+   if (IS_ENABLED(CONFIG_PPC_PSERIES)  !phandle)
phandle = __of_get_property(np, ibm,phandle, sz);
np-phandle = (phandle  (sz = 4)) ? be32_to_cpup(phandle) : 0;
 
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 5/9]powerpc/powernv: nest pmu feature detection support

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 05:51 AM, Daniel Axtens wrote:

On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote:

Patch adds a device tree function to detect the nest pmu
support. Function will look for specific dt property ibm,ima-chip
as a detection mechanism for the nest pmu.

For Nest pmu, device tree will have two set of information.
1) Per-chip Homer address region for nest pmu counter collection area.
2) Supported Nest PMUs and events

What's HOMER?
Nest PMUs are configured via PORE engine interface and PORE Engine 
collections the Nest counter

value and updates in the main memory which is reserved for this use.

  
+static int nest_ima_detect_parse(void)

+{
+   const __be32 *gcid;
+   const __be64 *chip_ima_reg;
+   const __be64 *chip_ima_size;
+   struct device_node *dev;
+   int rc = -EINVAL, idx;
+
+   for_each_node_with_property(dev, ibm,ima-chip) {
+   gcid = of_get_property(dev, ibm,chip-id, NULL);
+   chip_ima_reg = of_get_property(dev, reg, NULL);
+   chip_ima_size = of_get_property(dev, size, NULL);
+   if ((!gcid) || (!chip_ima_reg) || (!chip_ima_size)) {
+   pr_err(%s: device %s missing property \n,
+   __func__, dev-full_name);

This is not a particularly informative error message. It'd be good if it
mentioned that it was for PMU.

Sure will changes.


+   return rc
+   }
+
+   idx = (uint32_t)be32_to_cpup(gcid);
+   p8_perchip_nest_info[idx].pbase = be64_to_cpup(chip_ima_reg);
+   p8_perchip_nest_info[idx].size = be64_to_cpup(chip_ima_size);
+   p8_perchip_nest_info[idx].vbase = (uint64_t)
+   phys_to_virt(p8_perchip_nest_info[idx].pbase);
+
+   rc = 0;
+   }
+
+   return rc;

I'm not sure your rc handling is correct. As I understand it:
  - Start with rc = -EINVAL.
  - If your first node is missing a property, return -EINVAL.
  - Once your first node succeeds, set rc = 0
  - If any subsequent node is missing a property, return 0.
  - Return 0 if any node is successfully processed, otherwise return
-EINVAL.
Main loop is only for nodes with property ibm,ima-chip.  Not all the 
nodes will have this

property.

If that's what you intended (especially with regards to returning 0 when
a subsequent node is missing a property), a comment explaining it would
be great.
Yes. I will add comment explaining it. But i did add this in the commit 
message.



Also, why bail out if a property is missing on any node? Why not try all
of them and see if any succeed?
Only the Nest Unit nodes in the device tree will have this property. 
Commit has the
device tree hierarchy for the Nest instrumentation.  So if we dont find 
this property

then Nest instrumentation is not supported, hence bail out.



+}
+
  static int __init nest_pmu_init(void)
  {
int ret = 0;
@@ -256,6 +287,12 @@ static int __init nest_pmu_init(void)
  
  	cpumask_chip();
  
+	/*

+* Detect the Nest PMU feature
+*/
+   if (nest_ima_detect_parse())
+   return 0;
+
return 0;
  }

Zero is returned regardless of the output of nest_ima_detect_parse. Is
that intentional? If so, do you need the 'if'?
No it should return ret which should be initialized to error value. 
WIll fix it



  device_initcall(nest_pmu_init);

Regards,
Daniel Axtens


Thanks for the review
MAddy

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 8/9] powerpc/powernv: Add OPAL support for Nest PMU

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 06:24 AM, Daniel Axtens wrote:

+int64_t opal_nest_ima_control(uint32_t value);

If I'm understanding things correctly, you call this function in patch
3. Quoting from that patch:

+static void nest_init(void *dummy)
+{
+   opal_nest_ima_control(P8_NEST_ENGINE_START);
+}

Does this patch need to be moved earlier in the series?
I applied all the patches together and tested it since the Makefile 
inclusion is

the final patch in the series. I guess it is better rearrange the series.


Have you tested that the series compiles at every point?
(I've found that this can be done quite easily with
  git rebase --interactive using x to run the compile)

Nice. will try this out.

Thanks for the review
Maddy


+
  /* Internal functions */
  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a7ade94..ce36a68 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -295,3 +295,4 @@ OPAL_CALL(opal_i2c_request, 
OPAL_I2C_REQUEST);
  OPAL_CALL(opal_flash_read,OPAL_FLASH_READ);
  OPAL_CALL(opal_flash_write,   OPAL_FLASH_WRITE);
  OPAL_CALL(opal_flash_erase,   OPAL_FLASH_ERASE);
+OPAL_CALL(opal_nest_ima_control,   OPAL_NEST_IMA_CONTROL);


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: of/dynamic: Fix test for PPC_PSERIES

2015-06-04 Thread Michael Ellerman
On Thu, 2015-04-06 at 09:34:41 UTC, Geert Uytterhoeven wrote:
 IS_ENABLED(PPC_PSERIES) always evaluates to false, as IS_ENABLED() is
 supposed to be used with the full Kconfig symbol name, including the
 CONFIG_ prefix.
 
 Add the missing CONFIG_ prefix to fix this.
 
 Fixes: a25095d451ece23b (of: Move dynamic node fixups out of powerpc and 
 into common code)
 Signed-off-by: Geert Uytterhoeven geert+rene...@glider.be
 ---

 Did this bug cause any breakage?
 If yes, the fix should go to stable (for v3.17 and later).

Yikes. Not that I've heard of. But it's reasonably new so possibly it's not hit
distros that folks tend to run on those machines.

I'm also not clear how it would break, it could be subtle and we've not noticed.

Nathan might have more of an idea (on CC).

On my machine here everything that has an ibm,phandle also has a linux,phandle,
so we wouldn't hit that code path. But I'm not sure how representative that box
is.

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cpufreq: qoriq: optimize the CPU frequency switching time

2015-06-04 Thread Yuantian.Tang
From: Tang Yuantian yuantian.t...@freescale.com

Each time the CPU switches its frequency, the clock nodes in
DTS are walked through to find proper clock source. This is
very time-consuming, for example, it is up to 500+ us on T4240.
Besides, switching time varies from clock to clock.
To optimize this, each input clock of CPU is buffered, so that
it can be picked up instantly when needed.

Since for each CPU each input clock is stored in a pointer
which takes 4 or 8 bytes memory and normally there are several
input clocks per CPU, that will not take much memory as well.

Signed-off-by: Tang Yuantian yuantian.t...@freescale.com
---
 drivers/cpufreq/qoriq-cpufreq.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 88b21ae..358f075 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -27,11 +27,11 @@
 
 /**
  * struct cpu_data
- * @parent: the parent node of cpu clock
+ * @pclk: the parent clock of cpu
  * @table: frequency table
  */
 struct cpu_data {
-   struct device_node *parent;
+   struct clk **pclk;
struct cpufreq_frequency_table *table;
 };
 
@@ -196,7 +196,7 @@ static void freq_table_sort(struct cpufreq_frequency_table 
*freq_table,
 
 static int qoriq_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-   struct device_node *np;
+   struct device_node *np, *pnode;
int i, count, ret;
u32 freq, mask;
struct clk *clk;
@@ -219,17 +219,23 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
goto err_nomem2;
}
 
-   data-parent = of_parse_phandle(np, clocks, 0);
-   if (!data-parent) {
+   pnode = of_parse_phandle(np, clocks, 0);
+   if (!pnode) {
pr_err(%s: could not get clock information\n, __func__);
goto err_nomem2;
}
 
-   count = of_property_count_strings(data-parent, clock-names);
+   count = of_property_count_strings(pnode, clock-names);
+   data-pclk = kcalloc(count, sizeof(struct clk *), GFP_KERNEL);
+   if (!data-pclk) {
+   pr_err(%s: no memory\n, __func__);
+   goto err_node;
+   }
+
table = kcalloc(count + 1, sizeof(*table), GFP_KERNEL);
if (!table) {
pr_err(%s: no memory\n, __func__);
-   goto err_node;
+   goto err_pclk;
}
 
if (fmask)
@@ -238,7 +244,8 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
mask = 0x0;
 
for (i = 0; i  count; i++) {
-   clk = of_clk_get(data-parent, i);
+   clk = of_clk_get(pnode, i);
+   data-pclk[i] = clk;
freq = clk_get_rate(clk);
/*
 * the clock is valid if its frequency is not masked
@@ -273,13 +280,16 @@ static int qoriq_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
policy-cpuinfo.transition_latency = u64temp + 1;
 
of_node_put(np);
+   of_node_put(pnode);
 
return 0;
 
 err_nomem1:
kfree(table);
+err_pclk:
+   kfree(data-pclk);
 err_node:
-   of_node_put(data-parent);
+   of_node_put(pnode);
 err_nomem2:
policy-driver_data = NULL;
kfree(data);
@@ -293,7 +303,7 @@ static int __exit qoriq_cpufreq_cpu_exit(struct 
cpufreq_policy *policy)
 {
struct cpu_data *data = policy-driver_data;
 
-   of_node_put(data-parent);
+   kfree(data-pclk);
kfree(data-table);
kfree(data);
policy-driver_data = NULL;
@@ -307,7 +317,7 @@ static int qoriq_cpufreq_target(struct cpufreq_policy 
*policy,
struct clk *parent;
struct cpu_data *data = policy-driver_data;
 
-   parent = of_clk_get(data-parent, data-table[index].driver_data);
+   parent = data-pclk[data-table[index].driver_data];
return clk_set_parent(policy-clk, parent);
 }
 
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] of: return NUMA_NO_NODE from fallback of_node_to_nid()

2015-06-04 Thread Grant Likely
On Mon, 13 Apr 2015 11:49:31 -0500
, Rob Herring robherri...@gmail.com
 wrote:
 On Mon, Apr 13, 2015 at 8:38 AM, Konstantin Khlebnikov
 khlebni...@yandex-team.ru wrote:
  On 13.04.2015 16:22, Rob Herring wrote:
 
  On Wed, Apr 8, 2015 at 11:59 AM, Konstantin Khlebnikov
  khlebni...@yandex-team.ru wrote:
 
  Node 0 might be offline as well as any other numa node,
  in this case kernel cannot handle memory allocation and crashes.
 
  Signed-off-by: Konstantin Khlebnikov khlebni...@yandex-team.ru
  Fixes: 0c3f061c195c (of: implement of_node_to_nid as a weak function)
  ---
drivers/of/base.c  |2 +-
include/linux/of.h |5 -
2 files changed, 5 insertions(+), 2 deletions(-)
 
  diff --git a/drivers/of/base.c b/drivers/of/base.c
  index 8f165b112e03..51f4bd16e613 100644
  --- a/drivers/of/base.c
  +++ b/drivers/of/base.c
  @@ -89,7 +89,7 @@ EXPORT_SYMBOL(of_n_size_cells);
#ifdef CONFIG_NUMA
int __weak of_node_to_nid(struct device_node *np)
{
  -   return numa_node_id();
  +   return NUMA_NO_NODE;
 
 
  This is going to break any NUMA machine that enables OF and expects
  the weak function to work.
 
 
  Why? NUMA_NO_NODE == -1 -- this's standard no-affinity signal.
  As I see powerpc/sparc versions of of_node_to_nid returns -1 if they
  cannot find out which node should be used.
 
 Ah, I was thinking those platforms were relying on the default
 implementation. I guess any real NUMA support is going to need to
 override this function. The arm64 patch series does that as well. We
 need to be sure this change is correct for metag which appears to be
 the only other OF enabled platform with NUMA support.
 
 In that case, then there is little reason to keep the inline and we
 can just always enable the weak function (with your change). It is
 slightly less optimal, but the few callers hardly appear to be hot
 paths.

Sounds like you're in agreement with this patch then? Shall I apply it?

g.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 2/9]powerpc/powernv: nest pmu init function with cpumask attr

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 04:44 AM, Daniel Axtens wrote:

On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote:

Patch creates a file nest-pmu-c to contain nest pmu related functions.

nest-pmu.c

Patch adds nest pmu init function and cpumask function since Nest pmu units
are per-chip. First online cpu for a given node is picked as
designated thread to read the counter data.

Subsequent patch adds the hotplug support.

Cc: Michael Ellerman m...@ellerman.id.au
Cc: Benjamin Herrenschmidt b...@kernel.crashing.org
Cc: Paul Mackerras pau...@samba.org
Cc: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
Cc: Anshuman Khandual khand...@linux.vnet.ibm.com
Cc: Stephane Eranian eran...@google.com
Cc: Preeti U Murthy pre...@linux.vnet.ibm.com
Cc: Ingo Molnar mi...@kernel.org
Cc: Peter Zijlstra pet...@infradead.org
Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com
---
  arch/powerpc/perf/nest-pmu.c | 70 
  1 file changed, 70 insertions(+)
  create mode 100644 arch/powerpc/perf/nest-pmu.c

diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c
new file mode 100644
index 000..d4413bb
--- /dev/null
+++ b/arch/powerpc/perf/nest-pmu.c
@@ -0,0 +1,70 @@
+/*
+ * Nest Performance Monitor counter support for POWER8 processors.
+ *
+ * Copyright 2015 Madhavan Srinivasan, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+

Again, I think this is supposed to be v2 only.


+#include nest-pmu.h
+
+static cpumask_t cpu_mask_nest_pmu;
+
+static ssize_t cpumask_nest_pmu_get_attr(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   return cpumap_print_to_pagebuf(true, buf, cpu_mask_nest_pmu);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, cpumask_nest_pmu_get_attr, NULL);
+
+static struct attribute *cpumask_nest_pmu_attrs[] = {
+   dev_attr_cpumask.attr,
+   NULL,
+};
+
+static struct attribute_group cpumask_nest_pmu_attr_group = {
+   .attrs = cpumask_nest_pmu_attrs,
+};
+
+void cpumask_chip(void)
+{
+   const struct cpumask *l_cpumask;
+   int cpu, nid;
+
+   if (!cpumask_empty(cpu_mask_nest_pmu)) {
+   printk(KERN_INFO cpumask not empty\n);
+   return;
+   }
+
+   cpu_notifier_register_begin();
+   for_each_online_node(nid) {
+   l_cpumask = cpumask_of_node(nid);
+   cpu = cpumask_first(l_cpumask);
+   cpumask_set_cpu(cpu, cpu_mask_nest_pmu);
+   }
+
+   cpu_notifier_register_done();
+}

It's not clear from the name of this function what it does. I don't
think I actually understand what it does: it appears to register a
notifier on the first cpu of each node; maybe that should be reflected
in the name.


My bad. Hotplug notification registration happens in the next patch.
could merge both as single patch.




+static int __init nest_pmu_init(void)
+{
+   int ret = 0;
+
+   /*
+* Lets do this only if we are hypervisor
+*/
+   if (!cur_cpu_spec-oprofile_cpu_type ||
+  strcmp(cur_cpu_spec-oprofile_cpu_type, ppc64/power8) ||
+  !cpu_has_feature(CPU_FTR_HVMODE))
+   return ret;
+
+   cpumask_chip();
+
+   return 0;
+}

  - Where is ret set? I can only see it set when it's defined: the if
statment doesn't change the value of ret as far as I can see...

Yes. It should have set to error value. Will fix it.


  - Would it be clearer if you said
 !(strcmp(cur_cpu_spec-oprofile_cpu_type, ppc64/power8) == 0)
That would make it clearer that you're trying to get a list of
possible failure conditions.
  

Yes. Sure will change it.


  - Is there really no better way to check if a CPU is a power 8 than an
string comparison?
One other way I can think of is using PVR (Processor Version Register), 
but then will end up having multiple checks for Power8 itself, so this 
is lot simpler.



+device_initcall(nest_pmu_init);

Regards,
Daniel Axtens

Thanks for the review
Maddy

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] of: clean-up unnecessary libfdt include paths

2015-06-04 Thread Grant Likely
On Wed, 3 Jun 2015 10:26:38 +0200
, Ralf Baechle r...@linux-mips.org
 wrote:
 On Wed, Jun 03, 2015 at 12:10:25AM -0500, Rob Herring wrote:
  Date:   Wed,  3 Jun 2015 00:10:25 -0500
  From: Rob Herring r...@kernel.org
  To: devicet...@vger.kernel.org, linux-ker...@vger.kernel.org
  Cc: Grant Likely grant.lik...@linaro.org, Rob Herring r...@kernel.org,
   Ralf Baechle r...@linux-mips.org, Benjamin Herrenschmidt
   b...@kernel.crashing.org, Paul Mackerras pau...@samba.org, Michael
   Ellerman m...@ellerman.id.au, linux-m...@linux-mips.org,
   linuxppc-dev@lists.ozlabs.org
  Subject: [PATCH] of: clean-up unnecessary libfdt include paths
  
  With the latest dtc import include fixups, it is no longer necessary to
  add explicit include paths to use libfdt. Remove these across the
  kernel.
  
  Signed-off-by: Rob Herring r...@kernel.org
  Cc: Ralf Baechle r...@linux-mips.org
  Cc: Benjamin Herrenschmidt b...@kernel.crashing.org
  Cc: Paul Mackerras pau...@samba.org
  Cc: Michael Ellerman m...@ellerman.id.au
  Cc: Grant Likely grant.lik...@linaro.org
  Cc: linux-m...@linux-mips.org
  Cc: linuxppc-dev@lists.ozlabs.org
 
 For the MIPS bits;
 
 Acked-by: Ralf Baechle r...@linux-mips.org
 
   Ralf


Acked-by: Grant Likely grant.lik...@lianro.org
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH kernel v11 17/34] powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group

2015-06-04 Thread Alexey Kardashevskiy

On 06/01/2015 04:24 PM, David Gibson wrote:

On Fri, May 29, 2015 at 06:44:41PM +1000, Alexey Kardashevskiy wrote:

Modern IBM POWERPC systems support multiple (currently two) TCE tables
per IOMMU group (a.k.a. PE). This adds a iommu_table_group container
for TCE tables. Right now just one table is supported.

For IODA, instead of embedding iommu_table, the new iommu_table_group
keeps pointers to those. The iommu_table structs are allocated
dynamically now by a pnv_pci_table_alloc() helper as PCI hotplug
code (for EEH recovery) and SRIOV are supported there.

For P5IOC2, both iommu_table_group and iommu_table are embedded into
PE struct. As there is no EEH and SRIOV support for P5IOC2,
iommu_free_table() should not be called on iommu_table struct pointers
so we can keep it embedded in pnv_phb::p5ioc2.

For pSeries, this replaces multiple calls of kzalloc_node() with a new
iommu_pseries_group_alloc() helper and stores the table group struct
pointer into the pci_dn struct. For release, a iommu_table_group_free()
helper is added.

This moves iommu_table struct allocation from SR-IOV code to
the generic DMA initialization code in pnv_pci_ioda2_setup_dma_pe.

This replaces a single pointer to iommu_group with a list of
iommu_table_group structs. For now it is just a single iommu_table_group
in this list but later with TCE table sharing enabled, the list will
keep all the IOMMU groups which use the particular table. The list
uses iommu_table_group_link structs rather than iommu_table_group::next
as a VFIO container may have 2 IOMMU tables, each will have its own list
head pointer as it is mainly for TCE invalidation code which should
walk through all attached groups and invalidate TCE cache so
the table has to keep the list head pointer. The other option would
be storing list head in a VFIO container but it would not work as
the platform code (which does TCE table update and invalidation) has
no idea about VFIO.

This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
[aw: for the vfio related changes]
Acked-by: Alex Williamson alex.william...@redhat.com
Reviewed-by: David Gibson da...@gibson.dropbear.id.au
Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com


It looks like this commit message doesn't match the code - it seems
like an older or newer version of the message from the previous patch.



This patch seems instead to be about changing the table_group - table
relationship from 1:1 to many:many.



I'll put this:

===
So far one TCE table could only be used by one IOMMU group. However
IODA2 hardware allows programming the same TCE table address to
multiple PE allowing sharing tables.

This replaces a single pointer to a group in a iommu_table struct
with a linked list of groups which provides the way of invalidating
TCE cache for every PE when an actual TCE table is updated. This adds 
pnv_pci_link_table_and_group() and pnv_pci_unlink_table_and_group() helpers 
to manage the list. However without VFIO, it is still going

to be a single IOMMU group per iommu_table.

This changes iommu_add_device() to add a device to a first group
from the group list of a table as it is only called from the platform
init code or PCI bus notifier and at these moments there is only
one group per table.

This does not change TCE invalidation code to loop through all
attached groups in order to simplify this patch and because
it is not really needed in most cases. IODA2 is fixed in a later
patch.

===



---
Changes:
v10:
* iommu_table is not embedded into iommu_table_group but allocated
dynamically
* iommu_table allocation is moved to a single place for IODA2's
pnv_pci_ioda_setup_dma_pe where it belongs to
* added list of groups into iommu_table; most of the code just looks at
the first item to keep the patch simpler

v9:
* s/it_group/it_table_group/
* added and used iommu_table_group_free(), from now iommu_free_table()
is only used for VIO
* added iommu_pseries_group_alloc()
* squashed powerpc/iommu: Introduce iommu_table_alloc() helper into this
---
  arch/powerpc/include/asm/iommu.h|   8 +-
  arch/powerpc/kernel/iommu.c |   9 +-
  arch/powerpc/platforms/powernv/pci-ioda.c   |  45 ++
  arch/powerpc/platforms/powernv/pci-p5ioc2.c |   3 +
  arch/powerpc/platforms/powernv/pci.c|  76 +
  arch/powerpc/platforms/powernv/pci.h|   7 ++
  arch/powerpc/platforms/pseries/iommu.c  |  33 +++-
  drivers/vfio/vfio_iommu_spapr_tce.c | 122 
  8 files changed, 242 insertions(+), 61 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 5a7267f..44a20cc 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -91,7 +91,7 @@ struct iommu_table {
struct iommu_pool pools[IOMMU_NR_POOLS];
unsigned long *it_map;   /* A simple allocation bitmap for now */
unsigned long  it_page_shift;/* table iommu 

Re: [PATCH v1 3/9]powerpc/powernv: Add cpu hotplug support

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 05:08 AM, Daniel Axtens wrote:

On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote:

Patch adds cpu hotplug support. First online cpu in a node is picked as
designated thread to read the Nest pmu counter data, and at the time of
hotplug, next online cpu from the same node is picked up.

I'm not sure I understand this commit message. I think I understand the
first half - I think you're trying to say: At boot, the first online

I will rephrase it.


CPU in a node is picked as the designated thread to read the Nest PMU
counter data. I'm not sure I understand the second half: picked up
how and for what?

When the designated thread is hotplugged, next online cpu in the
same node is picked up as the designated thread to read the PMU counter 
data.



(I did eventually figure it out by reading the patch, but it'd be really
nice to have it spelled out nicely in the commit message.)

Sure. Will fix the commit message.


+static void nest_exit_cpu(int cpu)
+{
+   int i, nid, target = -1;
+   const struct cpumask *l_cpumask;
+   int src_chipid;
+
+   if (!cpumask_test_and_clear_cpu(cpu, cpu_mask_nest_pmu))
+   return;
+
+   nid = cpu_to_node(cpu);
+   src_chipid = topology_physical_package_id(cpu);
+   l_cpumask = cpumask_of_node(nid);
+   for_each_cpu(i, l_cpumask) {
+   if (i == cpu)
+   continue;
+   if (src_chipid == topology_physical_package_id(i)) {
+   target = i;
+   break;
+   }
+   }

Some comments here would really help. I think you're looking for the
first CPU that's (a) not the cpu you're removing and (b) on the same
physical package, so sharing the same nest, but it took me a lot of
staring at the code to figure it out.

My bad. I will comment it.


+
+   cpumask_set_cpu(target, cpu_mask_nest_pmu);
+   nest_change_cpu_context (cpu, target);
+   return;

Return is redundant here and in several other functions in this patch.

Ok.


+}
+
+static void nest_init_cpu(int cpu)
+{
+   int i, src_chipid;
+
+   src_chipid = topology_physical_package_id(cpu);
+   for_each_cpu(i, cpu_mask_nest_pmu)
+   if (src_chipid == topology_physical_package_id(i))
+   return;
+
+   cpumask_set_cpu(cpu, cpu_mask_nest_pmu);
+   nest_change_cpu_context ( -1, cpu);

Weird extra spaces here.

Yes. Nice catch. Will fix it.


+   return;
+}

This function could also do with a comment: AFAICT, you've structured
the function so that it only calls nest_change_cpu_context if you've
picked up a cpu on a physical package that previously didn't have a nest
pmu thread on it.


+
+static int nest_cpu_notifier(struct notifier_block *self,
+   unsigned long action, void *hcpu)
+{
+   unsigned int cpu = (long)hcpu;

What's with this cast? You cast it to a long and then assign it to an
unsigned int?


Facepalm. My bad, will fix it.

+
+   switch (action  ~CPU_TASKS_FROZEN) {
+   case CPU_DOWN_FAILED:

Is it necessary to move the thread back if the CPU fails to go down?

No. not need.

You've moved it to another online CPU already; what's the benefit of
paying the time-penalty to move it back?
Why should go through that. Because, there is no restriction saying only 
the first
cpu has to read it, why should we complicate it further instead of 
moving to another

cpu in the same node.


+   case CPU_STARTING:
+   nest_init_cpu(cpu);
+   break;
+   case CPU_DOWN_PREPARE:
+   nest_exit_cpu(cpu);
+   break;
+   default:
+   break;
+   }
+
+   return NOTIFY_OK;
+}
  

Now, I don't know the details of CPU hotplug _at all_, so this may be
stupid, but what happens if you hotplug a lot of CPUs all at once? Is
everything properly serialised or is this going to race and end up with
either multiple cpus trying to do PMU or no cpus?
I did test the code with hotplug test. If all the cpus in the node is 
offlined,

then we will have no cpus designated for that node.

Thanks for review
Maddy

Regards,
Daniel Axtens




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 4/9]powerpc/powernv: Add generic nest pmu ops

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 05:33 AM, Daniel Axtens wrote:

On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote:

Patch adds generic nest pmu functions and format attribute.


I'm not sure this commit message accurately reflects the content of the
patch. At any rate, please could you:
  - say what the patch adds the functions and attributes to.
  - phrase your message as Add generic ... not Patch adds
generic ...: see
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n155


Sure. Will rephrase it.


  
+PMU_FORMAT_ATTR(event, config:0-20);

+struct attribute *p8_nest_format_attrs[] = {
+   format_attr_event.attr,
+   NULL,
+};
+
+struct attribute_group p8_nest_format_group = {
+   .name = format,
+   .attrs = p8_nest_format_attrs,
+};

Can these structs be constified?

I guess it can. Will check it out.


+
+int p8_nest_event_init(struct perf_event *event)
+{
+   int chip_id;
+
+   if (event-attr.type != event-pmu-type)
+   return -ENOENT;
+
+   /* Sampling not supported yet */
+   if (event-hw.sample_period)
+   return -EINVAL;
+
+   /* unsupported modes and filters */
+   if (event-attr.exclude_user   ||
+   event-attr.exclude_kernel ||
+   event-attr.exclude_hv ||
+   event-attr.exclude_idle   ||
+   event-attr.exclude_host   ||
+   event-attr.exclude_guest  ||
+   event-attr.sample_period) /* no sampling */
+   return -EINVAL;

You test for sample period twice here.

Yes right. I will remove it.


+
+   if (event-cpu  0)
+   return -EINVAL;
+
+   chip_id = topology_physical_package_id(event-cpu);
+   event-hw.event_base = event-attr.config +
+   p8_perchip_nest_info[chip_id].vbase;
+
+   return 0;
+}
+
+void p8_nest_read_counter(struct perf_event *event)
+{
+   u64 *addr;
+   u64 data = 0;
+
+   addr = (u64 *)event-hw.event_base;
+   data = __be64_to_cpu((uint64_t)*addr);
+   local64_set(event-hw.prev_count, data);
+}
+
+void p8_nest_perf_event_update(struct perf_event *event)
+{
+   u64 counter_prev, counter_new, final_count;
+   uint64_t *addr;
+
+   addr = (u64 *)event-hw.event_base;
+   counter_prev = local64_read(event-hw.prev_count);
+   counter_new = __be64_to_cpu((uint64_t)*addr);
+   final_count = counter_new - counter_prev;
+
+   local64_set(event-hw.prev_count, counter_new);
+   local64_add(final_count, event-count);
+}
+
+void p8_nest_event_start(struct perf_event *event, int flags)
+{
+   event-hw.state = 0;
+   p8_nest_read_counter(event);
+}
+
+void p8_nest_event_stop(struct perf_event *event, int flags)
+{
+   p8_nest_perf_event_update(event);
+}
+
+int p8_nest_event_add(struct perf_event *event, int flags)
+{
+   p8_nest_event_start(event, flags);
+   return 0;
+}
+
+void p8_nest_event_del(struct perf_event *event, int flags)
+{
+   p8_nest_event_stop(event, flags);

Is this necessary?

Stop calls update, which I guess makes sense as it finalises the value.
But if the event is being deleted anyway, why not just do nothing here?
Since these Nest PMUs does not support sampling. IIUC, perf record 
interface uses
the event start/stop ops. Incase of perf stat interface event add/del 
interface are used to enable and disable the counters. Now, when we 
disable or delete, we update the event counter with the delta value.



+}
+

Regards,
Daniel Axtens


Thanks for the review
Maddy

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 6/9]powerpc/powernv: dt parser function for nest pmu and its events

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 06:16 AM, Daniel Axtens wrote:

+static int nest_pmu_create(struct device_node *dev, int pmu_index)
+{
+   struct ppc64_nest_ima_events **p8_events_arr;
+   struct ppc64_nest_ima_events *p8_events;
+   struct property *pp;
+   char *buf;
+   const __be32 *lval;
+   u32 val;
+   int len, idx = 0;
+   struct nest_pmu *pmu_ptr;
+   const char *start, *end;
+
+   if (!dev)
+   return -EINVAL;
+
+   pmu_ptr = kzalloc(sizeof(struct nest_pmu), GFP_KERNEL);
+   if (!pmu_ptr)
+   return -ENOMEM;
+
+   /* Needed for hotplug/migration */
+   per_nestpmu_arr[pmu_index] = pmu_ptr;
+
+   p8_events_arr = kzalloc((sizeof(struct ppc64_nest_ima_events) * 64),
+   GFP_KERNEL);
+   if (!p8_events_arr)
+   return -ENOMEM;
+   p8_events = (struct ppc64_nest_ima_events *)p8_events_arr;

I think you're trying to get the first element of the array here: why
not just `p8_events = p8_events_arr[0];`?

Yes. Will change it.

+
+   /*
+* Loop through each property
+*/
+   for_each_property_of_node(dev, pp) {
+   start = pp-name;
+   end = start + strlen(start);
+   len = strlen(start);
+
+   if (!strcmp(pp-name, name)) {
+   if (!pp-value ||
+  (strnlen(pp-value, pp-length) = pp-length))
+   return -EINVAL;
+
+   buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   sprintf(buf, Nest_%s, (char *)pp-value);
+   pmu_ptr-pmu.name = (char *)buf;
+   pmu_ptr-attr_groups[1] = p8_nest_format_group;
+   pmu_ptr-attr_groups[2] = cpumask_nest_pmu_attr_group;
+   }
+
+   /* Skip these, we dont need it */
+   if (!strcmp(pp-name, name) ||
+   !strcmp(pp-name, phandle) ||
+   !strcmp(pp-name, device_type) ||
+   !strcmp(pp-name, linux,phandle))
+   continue;
+
+   buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   if (strncmp(pp-name, unit., 5) == 0) {
+   start += 5;
+   len = strlen(start);
+   strncpy(buf, start, strlen(start));

You've just saved strlen(start), you could just use len. This also
applies in the next case below.

Yes. That is true.


+   p8_events-ev_name = buf;
+
+   if (!pp-value ||
+(strnlen(pp-value, pp-length) = pp-length))
+   return -EINVAL;

The strnlen will never be greater than pp-length, so the only case this
will hit is if strnlen(pp-value, pp-length) == pp-length. This also
applies again below.


True will change it.



+
+   buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   strncpy(buf, (const char *)pp-value, pp-length);
+   p8_events-ev_value = buf;
+   idx++;
+   p8_events++;
+
+   } else if (strncmp(pp-name, scale., 6) == 0) {
+   start += 6;
+   len = strlen(start);
+   strncpy(buf, start, strlen(start));
+   p8_events-ev_name = buf;
+
+   if (!pp-value ||
+  (strnlen(pp-value, pp-length) = pp-length))
+   return -EINVAL;
+
+   buf = kzalloc(MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   strncpy(buf, (const char *)pp-value, pp-length);
+   p8_events-ev_value = buf;
+   idx++;
+   p8_events++;
+
+   } else {
+   strncpy(buf, start, len);

This is the only case where you actually use the orignal version of len.
This makes me think you could drop the variable entirely and just use
strlen(start) in all cases. I also don't see where `end` is used
anywhere in this function: could that be dropped?
Correct. I guess we can drop both len and end. I used end for my 
prints during debug.



+   p8_events-ev_name = buf;
+   lval = of_get_property(dev, pp-name, NULL);
+   val = (uint32_t)be32_to_cpup(lval);
+
+   /*
+   * Use DT property value as the event
+   */

I'm not sure if this is my mailer, but it looks like 

Re: of: clean-up unnecessary libfdt include paths

2015-06-04 Thread Michael Ellerman
On Wed, 2015-03-06 at 05:10:25 UTC, Rob Herring wrote:
 With the latest dtc import include fixups, it is no longer necessary to
 add explicit include paths to use libfdt. Remove these across the
 kernel.

What are the latest dtc import include fixups ?

 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
 index c1ebbda..c16e836 100644
 --- a/arch/powerpc/kernel/Makefile
 +++ b/arch/powerpc/kernel/Makefile
 @@ -2,7 +2,6 @@
  # Makefile for the linux kernel.
  #
  
 -CFLAGS_prom.o= -I$(src)/../../../scripts/dtc/libfdt
  CFLAGS_ptrace.o  += -DUTS_MACHINE='$(UTS_MACHINE)'
  
  subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror

Acked-by: Michael Ellerman m...@ellerman.id.au


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 4/9]powerpc/powernv: Add generic nest pmu ops

2015-06-04 Thread Madhavan Srinivasan



On Wednesday 03 June 2015 05:33 AM, Daniel Axtens wrote:

On Tue, 2015-06-02 at 21:29 +0530, Madhavan Srinivasan wrote:

Patch adds generic nest pmu functions and format attribute.


I'm not sure this commit message accurately reflects the content of the
patch. At any rate, please could you:
  - say what the patch adds the functions and attributes to.
  - phrase your message as Add generic ... not Patch adds
generic ...: see
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n155


I will rephrase the commit message.

  
+PMU_FORMAT_ATTR(event, config:0-20);

+struct attribute *p8_nest_format_attrs[] = {
+   format_attr_event.attr,
+   NULL,
+};
+
+struct attribute_group p8_nest_format_group = {
+   .name = format,
+   .attrs = p8_nest_format_attrs,
+};

Can these structs be constified?

I guess so. Will try it out.


+
+int p8_nest_event_init(struct perf_event *event)
+{
+   int chip_id;
+
+   if (event-attr.type != event-pmu-type)
+   return -ENOENT;
+
+   /* Sampling not supported yet */
+   if (event-hw.sample_period)
+   return -EINVAL;
+
+   /* unsupported modes and filters */
+   if (event-attr.exclude_user   ||
+   event-attr.exclude_kernel ||
+   event-attr.exclude_hv ||
+   event-attr.exclude_idle   ||
+   event-attr.exclude_host   ||
+   event-attr.exclude_guest  ||
+   event-attr.sample_period) /* no sampling */
+   return -EINVAL;

You test for sample period twice here.

My bad. Will remove it.


+
+   if (event-cpu  0)
+   return -EINVAL;
+
+   chip_id = topology_physical_package_id(event-cpu);
+   event-hw.event_base = event-attr.config +
+   p8_perchip_nest_info[chip_id].vbase;
+
+   return 0;
+}
+
+void p8_nest_read_counter(struct perf_event *event)
+{
+   u64 *addr;
+   u64 data = 0;
+
+   addr = (u64 *)event-hw.event_base;
+   data = __be64_to_cpu((uint64_t)*addr);
+   local64_set(event-hw.prev_count, data);
+}
+
+void p8_nest_perf_event_update(struct perf_event *event)
+{
+   u64 counter_prev, counter_new, final_count;
+   uint64_t *addr;
+
+   addr = (u64 *)event-hw.event_base;
+   counter_prev = local64_read(event-hw.prev_count);
+   counter_new = __be64_to_cpu((uint64_t)*addr);
+   final_count = counter_new - counter_prev;
+
+   local64_set(event-hw.prev_count, counter_new);
+   local64_add(final_count, event-count);
+}
+
+void p8_nest_event_start(struct perf_event *event, int flags)
+{
+   event-hw.state = 0;
+   p8_nest_read_counter(event);
+}
+
+void p8_nest_event_stop(struct perf_event *event, int flags)
+{
+   p8_nest_perf_event_update(event);
+}
+
+int p8_nest_event_add(struct perf_event *event, int flags)
+{
+   p8_nest_event_start(event, flags);
+   return 0;
+}
+
+void p8_nest_event_del(struct perf_event *event, int flags)
+{
+   p8_nest_event_stop(event, flags);

Is this necessary?

Stop calls update, which I guess makes sense as it finalises the value.
But if the event is being deleted anyway, why not just do nothing here?
IIUC, perf record  will use the event start/stop interface. Incase of 
perf stat (for PMUs which
does not support sampling), event add/del interface is used. Now when 
event is disable or deleted,

event count should get updated with the delta value.


+}
+

Regards,
Daniel Axtens

Thanks for the review
Maddy

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [3/3,v3] powerpc/powernv: Add opal-prd channel

2015-06-04 Thread Michael Ellerman
On Fri, 2015-29-05 at 03:55:59 UTC, Jeremy Kerr wrote:
 This change adds a char device to access the PRD (processor runtime
 diagnostics) channel to OPAL firmware.
 
 Includes contributions from Vaidyanathan Srinivasan, Neelesh Gupta 
 Vishal Kulkarni.
 
 Signed-off-by: Neelesh Gupta neele...@linux.vnet.ibm.com
 Signed-off-by: Jeremy Kerr j...@ozlabs.org
 Acked-by: Stewart Smith stew...@linux.vnet.ibm.com

Sorry, I put this in but then hit the build break, I was going to fix it up but
would rather you did and tested it, so we may as well do another review :)

 diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h 
 b/arch/powerpc/include/uapi/asm/opal-prd.h
 new file mode 100644
 index 000..319ff4a
 --- /dev/null
 +++ b/arch/powerpc/include/uapi/asm/opal-prd.h
 @@ -0,0 +1,58 @@
 +/*
 + * OPAL Runtime Diagnostics interface driver
 + * Supported on POWERNV platform
 + *
 + * (C) Copyright IBM 2015

Usual syntax is: Copyright IBM Corporation 2015

 + *
 + * Author: Vaidyanathan Srinivasan svaidy at linux.vnet.ibm.com
 + * Author: Jeremy Kerr j...@ozlabs.org

I'd rather you dropped these, they'll just bit rot, but if you insist I don't
care that much.

 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2, or (at your option)
 + * any later version.

As pointed out by Daniel, we should probably be using the version 2 only
language on new files.

 diff --git a/arch/powerpc/platforms/powernv/opal-prd.c 
 b/arch/powerpc/platforms/powernv/opal-prd.c
 new file mode 100644
 index 000..3004f4a
 --- /dev/null
 +++ b/arch/powerpc/platforms/powernv/opal-prd.c
 @@ -0,0 +1,451 @@

...

 +/*
 + * opal_prd_mmap - maps firmware-provided ranges into userspace
 + * @file: file structure for the device
 + * @vma: VMA to map the registers into
 + */
 +
 +static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 +{
 + size_t addr, size;
 + int rc;
 +
 + pr_devel(opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n,
 + vma-vm_start, vma-vm_end, vma-vm_pgoff,
 + vma-vm_flags);
 +
 + addr = vma-vm_pgoff  PAGE_SHIFT;
 + size = vma-vm_end - vma-vm_start;
 +
 + /* ensure we're mapping within one of the allowable ranges */
 + if (!opal_prd_range_is_valid(addr, size))
 + return -EINVAL;
 +
 + vma-vm_page_prot = phys_mem_access_prot(file, vma-vm_pgoff,
 +  size, vma-vm_page_prot)
 + | _PAGE_SPECIAL;

This doesn't build with CONFIG_STRICT_MM_TYPECHECKS=y:

  arch/powerpc/platforms/powernv/opal-prd.c:131:5: error: invalid operands to 
binary | (have ‘pgprot_t’ and ‘int’)
  | _PAGE_SPECIAL;


 +static long opal_prd_ioctl(struct file *file, unsigned int cmd,
 + unsigned long param)
 +{
 + struct opal_prd_info info;
 + struct opal_prd_scom scom;
 + int rc = 0;
 +
 + switch(cmd) {
  ^
  space please

 + case OPAL_PRD_GET_INFO:
 + memset(info, 0, sizeof(info));
 + info.version = OPAL_PRD_KERNEL_VERSION;
 + rc = copy_to_user((void __user *)param, info, sizeof(info));
 + if (rc)
 + return -EFAULT;
 + break;
 +
 + case OPAL_PRD_SCOM_READ:
 + rc = copy_from_user(scom, (void __user *)param, sizeof(scom));
 + if (rc)
 + return -EFAULT;
 +
 + scom.rc = opal_xscom_read(scom.chip, scom.addr,
 + (__be64 *)scom.data);
 + scom.data = be64_to_cpu(scom.data);
 + pr_devel(ioctl SCOM_READ: chip %llx addr %016llx 
 + data %016llx rc %lld\n,

Don't split the string please.

 + scom.chip, scom.addr, scom.data, scom.rc);
 +
 + rc = copy_to_user((void __user *)param, scom, sizeof(scom));
 + if (rc)
 + return -EFAULT;
 + break;
 +
 + case OPAL_PRD_SCOM_WRITE:
 + rc = copy_from_user(scom, (void __user *)param, sizeof(scom));
 + if (rc)
 + return -EFAULT;
 +
 + scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data);
 + pr_devel(ioctl SCOM_WRITE: chip %llx addr %016llx 
 + data %016llx rc %lld\n,

Don't split the string please.

 + scom.chip, scom.addr, scom.data, scom.rc);
 +
 + rc = copy_to_user((void __user *)param, scom, sizeof(scom));
 + if (rc)
 + return -EFAULT;
 + break;
 +
 + default:
 + rc = -EINVAL;
 + }
 +
 + return rc;
 +}
 +
 +struct file_operations opal_prd_fops = {

This can be static const I think.

 + .open   = opal_prd_open,
 + .mmap   

Re: [3/5] mm/hugetlb: remove arch_prepare/release_hugepage from arch headers

2015-06-04 Thread Michael Ellerman
On Thu, 2015-28-05 at 11:52:35 UTC, Dominik Dingel wrote:
 Nobody used these hooks so they were removed from common code,
 and can now be removed from the architectures.
 
 Acked-by: Martin Schwidefsky schwidef...@de.ibm.com
 Signed-off-by: Dominik Dingel din...@linux.vnet.ibm.com
 Acked-by: Ralf Baechle r...@linux-mips.org

 diff --git a/arch/powerpc/include/asm/hugetlb.h 
 b/arch/powerpc/include/asm/hugetlb.h
 index 4bbd3c8..7eac89b 100644
 --- a/arch/powerpc/include/asm/hugetlb.h
 +++ b/arch/powerpc/include/asm/hugetlb.h
 @@ -168,15 +168,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
   return *ptep;
  }
  
 -static inline int arch_prepare_hugepage(struct page *page)
 -{
 - return 0;
 -}
 -
 -static inline void arch_release_hugepage(struct page *page)
 -{
 -}
 -
  static inline void arch_clear_hugepage_flags(struct page *page)
  {
  }

Acked-by: Michael Ellerman m...@ellerman.id.au

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [1/1] powerpc/perf/hv-24x7: Check support before registering

2015-06-04 Thread Michael Ellerman
On Wed, 2015-20-05 at 23:34:16 UTC, Sukadev Bhattiprolu wrote:
 From 955102eacf035b19080dc659a15d9b8fbd8fae7f Mon Sep 17 00:00:00 2001
 From: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
 Date: Tue, 28 Apr 2015 18:47:58 -0400
 Subject: [PATCH 1/1] powerpc/perf/hv-24x7: Check support before registering
  PMU
 
 We currently try to register the 24x7 PMU unconditionally. Not all
 Power systems support 24x7 counters (eg: Power7). On these systems
 we get a backtrace during boot when trying to register the 24x7 PMU.
 
 Check if the hypervisor supports 24x7 counters before attempting to
 register the 24x7 PMU.
 
 Reported-by: Gustavo Luiz Duarte gu...@br.ibm.com
 Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
 ---
 
 Changelog[v2]
   - [Michael Ellerman] Simplify check with bogus parameters.
 ---
  arch/powerpc/perf/hv-24x7.c |   21 +
  1 file changed, 21 insertions(+)
 
 diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
 index ec2eb20..c04a332 100644
 --- a/arch/powerpc/perf/hv-24x7.c
 +++ b/arch/powerpc/perf/hv-24x7.c
 @@ -1268,12 +1268,33 @@ static struct pmu h_24x7_pmu = {
   .read= h_24x7_event_read,
  };
  
 +/*
 + * Return 1 if we can access the 24x7 counter catalog from the hypervisor.
 + * Return 0 otherwise.

Comment is wrong.

 + */
 +static bool hv_has_24x7(void)
 +{
 + unsigned long hret;

ret would be fine.

 +
 + hret = h_get_24x7_catalog_page(0, 0, 0);
 +
 + if (hret != H_FUNCTION)
 + pr_err(Error %ld reading catalog, disabling 24x7 PMU\n, hret);
 +
 + return hret == 0;

I don't get what you're doing here.

You check for something other than H_FUNCTION, and print, and then you just
compare against 0. But I wouldn't ever expect that to return 0, because you
passed it bogus args.

The logic should be:

static bool is_24x7_supported(void)
{
if (h_get_24x7_catalog_page(0, 0, 0) == H_FUNCTION)
return false;

return true;
}

  static int hv_24x7_init(void)
  {
   int r;
   unsigned long hret;
   struct hv_perf_caps caps;
  
 + if (!hv_has_24x7())
 + return -ENODEV;
 +

This is no good. You're doing the check, which involves a hcall, before you
even check if you're running with a hypervisor (below).

   if (!firmware_has_feature(FW_FEATURE_LPAR)) {
   pr_debug(not a virtualized system, not enabling\n);
   return -ENODEV;

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6] powerpc/powernv: Poweroff (EPOW, DPO) events support for PowerNV platform

2015-06-04 Thread Vipin K Parashar
This patch adds support for FSP (Flexible Service Processor)
EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for
the PowerNV platform. EPOW events are generated by FSP due to various
critical system conditions that require system shutdown. A few examples
of these conditions are high ambient temperature or system running on
UPS power with low UPS battery. DPO event is generated in response to
admin initiated system shutdown request. Upon receipt of EPOW and DPO
events the host kernel invokes orderly_poweroff() for performing
graceful system shutdown.

Reviewed-by: Joel Stanley j...@jms.id.au
Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com
Reviewed-by: Michael Ellerman m...@ellerman.id.au

Changes in v6:
 - Made below changes as suggested by Michael Ellerman on previous patch.
 - Changed EPOW, DPO notifier blocks to use opal_power_control_event()
   and enhanced opal_power_control_event() to handle EPOW and DPO events.
 - Reorganized code and added/changed few variable, function names removing
   older ones.
 - Minor cleanup like removing unused headers, blank lines etc.

Changes in v5:
 - Made changes to address review comments on previous patch.

Changes in v4:
 - Made changes to address review comments on previous patch.

Changes in v3:
 - Made changes to immediately call orderly_poweroff upon receipt of
   OPAL EPOW, DPO notifications.
 - Made code changes to address review comments on previous patch.
 - Made code changes to use existing OPAL EPOW API.
 - Removed patch to extract EPOW event timeout from OPAL device-tree.

Changes in v2:
 - Made code changes to improve code as per previous review comments.
 - Added patch to obtain EPOW event timeout values from OPAL device-tree.

Vipin K Parashar (1):
  powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV
platform

 arch/powerpc/include/asm/opal-api.h|  40 
 arch/powerpc/include/asm/opal.h|   3 +-
 arch/powerpc/platforms/powernv/opal-power.c| 125 +
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 152 insertions(+), 17 deletions(-)

-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform

2015-06-04 Thread Vipin K Parashar
This patch adds support for FSP (Flexible Service Processor)
EPOW (Early Power Off Warning) and DPO (Delayed Power Off) events for
the PowerNV platform. EPOW events are generated by FSP due to various
critical system conditions that require system shutdown. A few examples
of these conditions are high ambient temperature or system running on
UPS power with low UPS battery. DPO event is generated in response to
admin initiated system shutdown request. Upon receipt of EPOW and DPO
events the host kernel invokes orderly_poweroff() for performing
graceful system shutdown.

Reviewed-by: Joel Stanley j...@jms.id.au
Reviewed-by: Vaibhav Jain vaib...@linux.vnet.ibm.com
Reviewed-by: Michael Ellerman m...@ellerman.id.au
Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/opal-api.h|  40 
 arch/powerpc/include/asm/opal.h|   3 +-
 arch/powerpc/platforms/powernv/opal-power.c| 125 +
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 152 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 0321a90..f460435 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -730,6 +730,46 @@ struct opal_i2c_request {
__be64 buffer_ra;   /* Buffer real address */
 };
 
+/*
+ * EPOW status sharing (OPAL and the host)
+ *
+ * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX
+ * with individual elements being 16 bits wide to fetch the system
+ * wide EPOW status. Each element in the buffer will contain the
+ * EPOW status in it's bit representation for a particular EPOW sub
+ * class as defiend here. So multiple detailed EPOW status bits
+ * specific for any sub class can be represented in a single buffer
+ * element as it's bit representation.
+ */
+
+/* System EPOW type */
+enum OpalSysEpow {
+   OPAL_SYSEPOW_POWER  = 0,/* Power EPOW */
+   OPAL_SYSEPOW_TEMP   = 1,/* Temperature EPOW */
+   OPAL_SYSEPOW_COOLING= 2,/* Cooling EPOW */
+   OPAL_SYSEPOW_MAX= 3,/* Max EPOW categories */
+};
+
+/* Power EPOW */
+enum OpalSysPower {
+   OPAL_SYSPOWER_UPS   = 0x0001, /* System on UPS power */
+   OPAL_SYSPOWER_CHNG  = 0x0002, /* System power config change */
+   OPAL_SYSPOWER_FAIL  = 0x0004, /* System impending power failure */
+   OPAL_SYSPOWER_INCL  = 0x0008, /* System incomplete power */
+};
+
+/* Temperature EPOW */
+enum OpalSysTemp {
+   OPAL_SYSTEMP_AMB= 0x0001, /* System over ambient temperature */
+   OPAL_SYSTEMP_INT= 0x0002, /* System over internal temperature */
+   OPAL_SYSTEMP_HMD= 0x0004, /* System over ambient humidity */
+};
+
+/* Cooling EPOW */
+enum OpalSysCooling {
+   OPAL_SYSCOOL_INSF   = 0x0001, /* System insufficient cooling */
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 042af1a..8b174f3 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -141,7 +141,8 @@ int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t 
error_type, uint8_t mask_action);
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t 
led_type, uint8_t led_action);
-int64_t opal_get_epow_status(__be64 *status);
+int64_t opal_get_epow_status(__be16 *epow_status, __be16 *num_epow_classes);
+int64_t opal_get_dpo_status(__be64 *dpo_timeout);
 int64_t opal_set_system_attention_led(uint8_t led_action);
 int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
__be16 *pci_error_type, __be16 *severity);
diff --git a/arch/powerpc/platforms/powernv/opal-power.c 
b/arch/powerpc/platforms/powernv/opal-power.c
index ac46c2c..b9f6620 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -9,9 +9,12 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)opal-power:   fmt
+
 #include linux/kernel.h
 #include linux/reboot.h
 #include linux/notifier.h
+#include linux/of.h
 
 #include asm/opal.h
 #include asm/machdep.h
@@ -19,30 +22,95 @@
 #define SOFT_OFF 0x00
 #define SOFT_REBOOT 0x01
 
+/* Detect existing EPOW, DPO events */
+static bool poweroff_pending(void)
+{
+   int i, rc;
+   __be16 epow_classes;
+   __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0};
+   __be64 opal_dpo_timeout;
+
+   /* Check for DPO event */
+   rc = opal_get_dpo_status(opal_dpo_timeout);
+   if (rc != OPAL_WRONG_STATE) {
+   pr_info(Existing DPO event detected.\n);
+   return true;
+   }
+
+   /*
+   * Check for