[PATCH v4 3/3] powerpc/8xx: Implement support of hugepages

2016-12-06 Thread Christophe Leroy
8xx uses a two level page table with two different linux page size
support (4k and 16k). 8xx also support two different hugepage sizes
512k and 8M. In order to support them on linux we define two different
page table layout.

The size of pages is in the PGD entry, using PS field (bits 28-29):
00 : Small pages (4k or 16k)
01 : 512k pages
10 : reserved
11 : 8M pages

For 512K hugepage size a pgd entry have the below format
[0101] . The hugepte table allocated will contain 8
entries pointing to 512K huge pte in 4k pages mode and 64 entries in
16k pages mode.

For 8M in 16k mode, a pgd entry have the below format
[1101] . The hugepte table allocated will contain 8
entries pointing to 8M huge pte.

For 8M in 4k mode, multiple pgd entries point to the same hugepte
address and pgd entry will have the below format
[1101]. The hugepte table allocated will only have one
entry.

For the time being, we do not support CPU15 ERRATA when HUGETLB is
selected

Signed-off-by: Christophe Leroy 
Reviewed-by: Aneesh Kumar K.V  (v3, for the 
generic bits)
---
v2: This v1 was split in two parts. This part focuses on adding the
support on 8xx. It also fixes an error in TLBmiss handlers in the
case of 8M hugepages in 16k pages mode.

v3: No change

v4: No change

 arch/powerpc/include/asm/hugetlb.h   |  19 -
 arch/powerpc/include/asm/mmu-8xx.h   |  35 
 arch/powerpc/include/asm/mmu.h   |  23 +++---
 arch/powerpc/include/asm/nohash/32/pte-8xx.h |   1 +
 arch/powerpc/include/asm/nohash/pgtable.h|   4 +
 arch/powerpc/include/asm/reg_8xx.h   |   2 +-
 arch/powerpc/kernel/head_8xx.S   | 119 +--
 arch/powerpc/mm/hugetlbpage.c|  29 ---
 arch/powerpc/mm/tlb_nohash.c |  21 -
 arch/powerpc/platforms/8xx/Kconfig   |   1 +
 arch/powerpc/platforms/Kconfig.cputype   |   1 +
 11 files changed, 225 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index c03e0a3..ede2151 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -51,12 +51,20 @@ static inline void __local_flush_hugetlb_page(struct 
vm_area_struct *vma,
 static inline pte_t *hugepd_page(hugepd_t hpd)
 {
BUG_ON(!hugepd_ok(hpd));
+#ifdef CONFIG_PPC_8xx
+   return (pte_t *)__va(hpd.pd & ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK));
+#else
return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
+#endif
 }
 
 static inline unsigned int hugepd_shift(hugepd_t hpd)
 {
+#ifdef CONFIG_PPC_8xx
+   return ((hpd.pd & _PMD_PAGE_MASK) >> 1) + 17;
+#else
return hpd.pd & HUGEPD_SHIFT_MASK;
+#endif
 }
 
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -99,7 +107,15 @@ static inline int is_hugepage_only_range(struct mm_struct 
*mm,
 
 void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
+#ifdef CONFIG_PPC_8xx
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+   flush_tlb_page(vma, vmaddr);
+}
+#else
 void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+#endif
 
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor,
@@ -205,7 +221,8 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned 
long addr,
  * are reserved early in the boot process by memblock instead of via
  * the .dts as on IBM platforms.
  */
-#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \
+defined(CONFIG_PPC_8xx))
 extern void __init reserve_hugetlb_gpages(void);
 #else
 static inline void reserve_hugetlb_gpages(void)
diff --git a/arch/powerpc/include/asm/mmu-8xx.h 
b/arch/powerpc/include/asm/mmu-8xx.h
index 3e0e492..798b5bf 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -172,6 +172,41 @@ typedef struct {
 
 #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff8)
 #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
+
+/* Page size definitions, common between 32 and 64-bit
+ *
+ *shift : is the "PAGE_SHIFT" value for that page size
+ *penc  : is the pte encoding mask
+ *
+ */
+struct mmu_psize_def {
+   unsigned intshift;  /* number of bits */
+   unsigned intenc;/* PTE encoding */
+   unsigned intind;/* Corresponding indirect page size shift */
+   unsigned intflags;
+#define MMU_PAGE_SIZE_DIRECT   0x1 /* Supported as a direct size */
+#define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */
+};
+
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+   int psize;
+
+   for (psize 

[PATCH v4 2/3] powerpc: get hugetlbpage handling more generic

2016-12-06 Thread Christophe Leroy
Today there are two implementations of hugetlbpages which are managed
by exclusive #ifdefs:
* FSL_BOOKE: several directory entries points to the same single hugepage
* BOOK3S: one upper level directory entry points to a table of hugepages

In preparation of implementation of hugepage support on the 8xx, we
need a mix of the two above solutions, because the 8xx needs both cases
depending on the size of pages:
* In 4k page size mode, each PGD entry covers a 4M bytes area. It means
that 2 PGD entries will be necessary to cover an 8M hugepage while a
single PGD entry will cover 8x 512k hugepages.
* In 16 page size mode, each PGD entry covers a 64M bytes area. It means
that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
hugepages will be covers by one PGD entry.

This patch:
* removes #ifdefs in favor of if/else based on the range sizes
* merges the two huge_pte_alloc() functions as they are pretty similar
* merges the two hugetlbpage_init() functions as they are pretty similar

Signed-off-by: Christophe Leroy 
Reviewed-by: Aneesh Kumar K.V  (v3)
---
v2: This part is new and results from a split of last patch of v1 serie in
two parts

v3:
- Only allocate hugepte_cache on FSL_BOOKE. Not needed on BOOK3S_64
- Removed the BUG in the unused hugepd_free(), made it
static inline {} instead.

v4:
- Fixing pdshift calculation on FSL_BOOK3E in hugetlbpage_init() by 
   using HUGEPD_PxD_SHIFT instead of PyD_SHIFT.
- Fixing default hugepage size selection on FSL_BOOK3E by spliting decision
   based on #ifdefs in order to keep previous behaviour.

 arch/powerpc/mm/hugetlbpage.c | 195 ++
 1 file changed, 81 insertions(+), 114 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a5d3ecd..53245aa 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
 {
struct kmem_cache *cachep;
pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int i;
-   int num_hugepd = 1 << (pshift - pdshift);
-   cachep = hugepte_cache;
-#else
-   cachep = PGT_CACHE(pdshift - pshift);
-#endif
+   int num_hugepd;
+
+   if (pshift >= pdshift) {
+   cachep = hugepte_cache;
+   num_hugepd = 1 << (pshift - pdshift);
+   } else {
+   cachep = PGT_CACHE(pdshift - pshift);
+   num_hugepd = 1;
+   }
 
new = kmem_cache_zalloc(cachep, GFP_KERNEL);
 
@@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
smp_wmb();
 
spin_lock(>page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+
/*
 * We have multiple higher-level entries that point to the same
 * actual pte location.  Fill in each as we go and backtrack on error.
@@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+#ifdef CONFIG_PPC_BOOK3S_64
+   hpdp->pd = __pa(new) |
+  (shift_to_mmu_psize(pshift) << 2);
+#else
/* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
+#endif
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -109,17 +116,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
-#else
-   if (!hugepd_none(*hpdp))
-   kmem_cache_free(cachep, new);
-   else {
-#ifdef CONFIG_PPC_BOOK3S_64
-   hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
-#else
-   hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
-   }
-#endif
spin_unlock(>page_table_lock);
return 0;
 }
@@ -136,7 +132,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
 #define HUGEPD_PUD_SHIFT PMD_SHIFT
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
 /*
  * At this point we do the placement change only for BOOK3S 64. This would
  * possibly work on other subarchs.
@@ -153,6 +148,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long 
addr, unsigned long sz
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
 
+#ifdef CONFIG_PPC_BOOK3S_64
if (pshift == PGDIR_SHIFT)
/* 16GB huge page */
return (pte_t *) pg;
@@ -178,32 +174,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long 
addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
-   if (!hpdp)
-   return NULL;
-
-   BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
-   if 

[PATCH v4 1/3] powerpc: port 64 bits pgtable_cache to 32 bits

2016-12-06 Thread Christophe Leroy
Today powerpc64 uses a set of pgtable_caches while powerpc32 uses
standard pages when using 4k pages and a single pgtable_cache
if using other size pages.

In preparation of implementing huge pages on the 8xx, this patch
replaces the specific powerpc32 handling by the 64 bits approach.

This is done by:
* moving 64 bits pgtable_cache_add() and pgtable_cache_init()
in a new file called init-common.c
* modifying pgtable_cache_init() to also handle the case
without PMD
* removing the 32 bits version of pgtable_cache_add() and
pgtable_cache_init()
* copying related header contents from 64 bits into both the
book3s/32 and nohash/32 header files

On the 8xx, the following cache sizes will be used:
* 4k pages mode:
- PGT_CACHE(10) for PGD
- PGT_CACHE(3) for 512k hugepage tables
* 16k pages mode:
- PGT_CACHE(6) for PGD
- PGT_CACHE(7) for 512k hugepage tables
- PGT_CACHE(3) for 8M hugepage tables

Signed-off-by: Christophe Leroy 
Reviewed-by: Aneesh Kumar K.V 
---
v2: in v1, hugepte_cache was wrongly replaced by PGT_CACHE(1).
This modification has been removed from v2.

v3:
- Not adding anymore MIN_HUGEPTE_SHIFT to 32 bits headers as
this constant was last used on kernel 2.6.32.
- Fixed PMD_TABLE_SIZE and PUD_TABLE_SIZE
- Removed unneccessary includes from init-common.c

v4: No change

 arch/powerpc/include/asm/book3s/32/pgalloc.h |  44 +--
 arch/powerpc/include/asm/book3s/32/pgtable.h |  40 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h |   3 -
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  44 +--
 arch/powerpc/include/asm/nohash/32/pgtable.h |  42 +--
 arch/powerpc/include/asm/nohash/64/pgtable.h |   2 -
 arch/powerpc/include/asm/pgtable.h   |   2 +
 arch/powerpc/mm/Makefile |   3 +-
 arch/powerpc/mm/init-common.c| 107 +++
 arch/powerpc/mm/init_64.c|  77 ---
 arch/powerpc/mm/pgtable_32.c |  37 -
 11 files changed, 227 insertions(+), 174 deletions(-)
 create mode 100644 arch/powerpc/mm/init-common.c

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 8e21bb4..d310546 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -2,14 +2,42 @@
 #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
 
 #include 
+#include 
 
-/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
-#define MAX_PGTABLE_INDEX_SIZE 0
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation.  For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer.  In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value.  This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
 
 extern void __bad_pte(pmd_t *pmd);
 
-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) ({\
+   BUG_ON(!(shift));   \
+   pgtable_cache[(shift) - 1]; \
+   })
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+   kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
 
 /*
  * We don't have any real pmd's, and this code never triggers because
@@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t 
ptepage)
 
 static inline void pgtable_free(void *table, unsigned index_size)
 {
-   BUG_ON(index_size); /* 32-bit doesn't use this */
-   free_page((unsigned long)table);
+   if (!index_size) {
+   free_page((unsigned long)table);
+   } else {
+   BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+   kmem_cache_free(PGT_CACHE(index_size), table);
+   }
 }
 
 #define check_pgt_cache()  do { } while (0)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index dc58980..0122236 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -8,6 +8,23 @@
 /* And here we include common definitions */
 

[PATCH v4 0/3] powerpc: implementation of huge pages for 8xx

2016-12-06 Thread Christophe Leroy
This is v4 of patch serie is the implementation of support of
hugepages for the 8xx.

v2: the last patch has been split in two parts.
v3: Taking into account comments from aneesh
v4: Fixing pdshift calculation on FSL_BOOK3E in hugetlbpage_init()
Fixing default hugepage size selection on FSL_BOOK3E



Christophe Leroy (3):
  powerpc: port 64 bits pgtable_cache to 32 bits
  powerpc: get hugetlbpage handling more generic
  powerpc/8xx: Implement support of hugepages

 arch/powerpc/include/asm/book3s/32/pgalloc.h |  44 +-
 arch/powerpc/include/asm/book3s/32/pgtable.h |  40 ++---
 arch/powerpc/include/asm/book3s/64/pgtable.h |   3 -
 arch/powerpc/include/asm/hugetlb.h   |  19 ++-
 arch/powerpc/include/asm/mmu-8xx.h   |  35 +
 arch/powerpc/include/asm/mmu.h   |  23 +--
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  44 +-
 arch/powerpc/include/asm/nohash/32/pgtable.h |  42 +++---
 arch/powerpc/include/asm/nohash/32/pte-8xx.h |   1 +
 arch/powerpc/include/asm/nohash/64/pgtable.h |   2 -
 arch/powerpc/include/asm/nohash/pgtable.h|   4 +
 arch/powerpc/include/asm/pgtable.h   |   2 +
 arch/powerpc/include/asm/reg_8xx.h   |   2 +-
 arch/powerpc/kernel/head_8xx.S   | 119 ++-
 arch/powerpc/mm/Makefile |   3 +-
 arch/powerpc/mm/hugetlbpage.c| 216 ---
 arch/powerpc/mm/init-common.c| 107 +
 arch/powerpc/mm/init_64.c|  77 --
 arch/powerpc/mm/pgtable_32.c |  37 -
 arch/powerpc/mm/tlb_nohash.c |  21 ++-
 arch/powerpc/platforms/8xx/Kconfig   |   1 +
 arch/powerpc/platforms/Kconfig.cputype   |   1 +
 22 files changed, 529 insertions(+), 314 deletions(-)
 create mode 100644 arch/powerpc/mm/init-common.c

-- 
2.10.1



[PATCH V9 8/8] powerpc: Enable support for new DRC devtree properties

2016-12-06 Thread Michael Bringmann
prom_init.c: Enable support for new DRC device tree properties
"ibm,drc-info" and "ibm,dynamic-memory-v2" in initial handshake
between the Linux kernel and the front end processor.

Signed-off-by: Michael Bringmann 
---
 arch/powerpc/kernel/prom_init.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 88ac964..6e6454f 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -696,7 +696,7 @@ static void __init early_cmdline_parse(void)
OV4_MIN_ENT_CAP,/* minimum VP entitled capacity */
 
/* option vector 5: PAPR/OF options */
-   VECTOR_LENGTH(21),  /* length */
+   VECTOR_LENGTH(22),  /* length */
0,  /* don't ignore, don't halt */
OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
@@ -732,6 +732,7 @@ static void __init early_cmdline_parse(void)
0,  /* Byte 19 */
0,  /* Byte 20 */
OV5_FEAT(OV5_SUB_PROCESSORS),   /* Byte 21 */
+   OV5_FEAT(OV5_DYN_MEM_V2) | OV5_FEAT(OV5_DRC_INFO),  /* Byte 22 */
 
/* option vector 6: IBM PAPR hints */
VECTOR_LENGTH(3),   /* length */



[PATCH V9 7/8] powerpc: Check arch.vec earlier during boot for memory features

2016-12-06 Thread Michael Bringmann
architecture.vec5 features: The boot-time memory management needs to
know the form of the "ibm,dynamic-memory-v2" property early during
scanning of the flattened device tree.  This patch moves execution of
the function pseries_probe_fw_features() early enough to be before
the scanning of the memory properties in the device tree to allow
recognition of the supported properties.

Signed-off-by: Michael Bringmann 
---
 arch/powerpc/kernel/prom.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 2d49887..bd07157 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -759,6 +759,9 @@ void __init early_init_devtree(void *params)
 */
of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
 
+   /* Now try to figure out if we are running on LPAR and so on */
+   pseries_probe_fw_features();
+
/* Scan memory nodes and rebuild MEMBLOCKs */
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
@@ -826,9 +829,6 @@ void __init early_init_devtree(void *params)
 #endif
epapr_paravirt_early_init();
 
-   /* Now try to figure out if we are running on LPAR and so on */
-   pseries_probe_fw_features();
-
 #ifdef CONFIG_PPC_PS3
/* Identify PS3 firmware */
if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3"))



[PATCH V9 6/8] hotplug/drc-info: Add code to search new devtree properties

2016-12-06 Thread Michael Bringmann
rpadlpar_core.c: Provide parallel routines to search the older device-
tree properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
and "ibm,drc-power-domains"), or the new property "ibm,drc-info".

The interface to examine the DRC information is changed from a "get"
function that returns values for local verification elsewhere, to a
"check" function that validates the 'name' and/or 'type' of a device
node.  This update hides the format of the underlying device-tree
properties, and concentrates the value checks into a single function
without requiring the user to verify whether a search was successful.

Signed-off-by: Michael Bringmann 
---
Changes in V9:
  -- Remove unnecessary code from rpaphp_check_drc_props_v2()
---
 drivers/pci/hotplug/rpadlpar_core.c |   13 ++--
 drivers/pci/hotplug/rpaphp.h|4 +
 drivers/pci/hotplug/rpaphp_core.c   |  108 +++
 3 files changed, 90 insertions(+), 35 deletions(-)

diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
b/drivers/pci/hotplug/rpadlpar_core.c
index dc67f39..bea9723 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "../pci.h"
 #include "rpaphp.h"
@@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char 
*drc_name)
 {
struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
struct device_node *dn = NULL;
-   char *name;
int rc;
 
if (!parent)
return NULL;
 
while ((dn = of_get_next_child(parent, dn))) {
-   rc = rpaphp_get_drc_props(dn, NULL, , NULL, NULL);
-   if ((rc == 0) && (!strcmp(drc_name, name)))
+   rc = rpaphp_check_drc_props(dn, drc_name, NULL);
+   if (rc == 0)
break;
}
 
@@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char 
*drc_name,
  char *drc_type)
 {
struct device_node *np = NULL;
-   char *name;
-   char *type;
int rc;
 
while ((np = of_find_node_by_name(np, "pci"))) {
-   rc = rpaphp_get_drc_props(np, NULL, , , NULL);
+   rc = rpaphp_check_drc_props(np, drc_name, drc_type);
if (rc == 0)
-   if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
-   break;
+   break;
}
 
return np;
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7db024e..8db5f2e 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -91,8 +91,8 @@ struct slot {
 
 /* rpaphp_core.c */
 int rpaphp_add_slot(struct device_node *dn);
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
-   char **drc_name, char **drc_type, int *drc_power_domain);
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+   char *drc_type);
 
 /* rpaphp_slot.c */
 void dealloc_slot_struct(struct slot *slot);
diff --git a/drivers/pci/hotplug/rpaphp_core.c 
b/drivers/pci/hotplug/rpaphp_core.c
index 8d13202..f9c9ce5 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include/* for eeh_add_device() */
 #include   /* rtas_call */
 #include /* for pci_controller */
@@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, 
const int **drc_indexes,
return 0;
 }
 
-/* To get the DRC props describing the current node, first obtain it's
- * my-drc-index property.  Next obtain the DRC list from it's parent.  Use
- * the my-drc-index for correlation, and obtain the requested properties.
+
+/* Verify the existence of 'drc_name' and/or 'drc_type' within the
+ * current node.  First obtain it's my-drc-index property.  Next,
+ * obtain the DRC info from it's parent.  Use the my-drc-index for
+ * correlation, and obtain/validate the requested properties.
  */
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
-   char **drc_name, char **drc_type, int *drc_power_domain)
+
+static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
+   char *drc_type, unsigned int my_index)
 {
+   char *name_tmp, *type_tmp;
const int *indexes, *names;
const int *types, *domains;
-   const unsigned int *my_index;
-   char *name_tmp, *type_tmp;
int i, rc;
 
-   my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
-   if (!my_index) {
-   /* Node isn't DLPAR/hotplug capable */
-   return -EINVAL;
-   }
-
rc = get_children_props(dn->parent, , , , );
if (rc < 0) {
return -EINVAL;
@@ -225,24 +222,85 @@ int rpaphp_get_drc_props(struct device_node 

[PATCH V9 5/8] pseries/drc-info: Search new DRC properties for CPU indexes

2016-12-06 Thread Michael Bringmann
pseries/drc-info: Provide parallel routines to convert between
drc_index and CPU numbers at runtime, using the older device-tree
properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
and "ibm,drc-power-domains"), or the new property "ibm,drc-info".

Signed-off-by: Michael Bringmann 
---
Changes in V9:
  -- Correct cpu-to-thread calculation for drc-info structs in function
 drc_index_to_cpu
  -- Change OR operators in WARN_ON test
---
 arch/powerpc/platforms/pseries/pseries_energy.c |  202 ---
 1 file changed, 176 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c 
b/arch/powerpc/platforms/pseries/pseries_energy.c
index 164a13d..549efdb 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -35,10 +35,73 @@
 
 /* Helper Routines to convert between drc_index to cpu numbers */
 
+void read_one_drc_info(int **info, char **dtype, char **dname,
+   unsigned long int *drc_index_start_p,
+   unsigned long int *num_sequential_elems_p,
+   unsigned long int *sequential_inc_p,
+   unsigned long int *last_drc_index_p)
+{
+   char *drc_type, *drc_name_prefix, *pc;
+   u32 drc_index_start, num_sequential_elems;
+   u32 sequential_inc, last_drc_index;
+
+   drc_index_start = num_sequential_elems = 0;
+   sequential_inc = last_drc_index = 0;
+
+   /* Get drc-type:encode-string */
+   pc = (char *)info;
+   drc_type = pc;
+   pc += (strlen(drc_type) + 1);
+
+   /* Get drc-name-prefix:encode-string */
+   drc_name_prefix = (char *)pc;
+   pc += (strlen(drc_name_prefix) + 1);
+
+   /* Get drc-index-start:encode-int */
+   memcpy(_index_start, pc, 4);
+   drc_index_start = be32_to_cpu(drc_index_start);
+   pc += 4;
+
+   /* Get/skip drc-name-suffix-start:encode-int */
+   pc += 4;
+
+   /* Get number-sequential-elements:encode-int */
+   memcpy(_sequential_elems, pc, 4);
+   num_sequential_elems = be32_to_cpu(num_sequential_elems);
+   pc += 4;
+
+   /* Get sequential-increment:encode-int */
+   memcpy(_inc, pc, 4);
+   sequential_inc = be32_to_cpu(sequential_inc);
+   pc += 4;
+
+   /* Get/skip drc-power-domain:encode-int */
+   pc += 4;
+
+   /* Should now know end of current entry */
+   last_drc_index = drc_index_start +
+   ((num_sequential_elems-1)*sequential_inc);
+
+   (*info) = (int *)pc;
+
+   if (dtype)
+   *dtype = drc_type;
+   if (dname)
+   *dname = drc_name_prefix;
+   if (drc_index_start_p)
+   *drc_index_start_p = drc_index_start;
+   if (num_sequential_elems_p)
+   *num_sequential_elems_p = num_sequential_elems;
+   if (sequential_inc_p)
+   *sequential_inc_p = sequential_inc;
+   if (last_drc_index_p)
+   *last_drc_index_p = last_drc_index;
+}
+EXPORT_SYMBOL(read_one_drc_info);
+
 static u32 cpu_to_drc_index(int cpu)
 {
struct device_node *dn = NULL;
-   const int *indexes;
int i;
int rc = 1;
u32 ret = 0;
@@ -46,18 +109,60 @@ static u32 cpu_to_drc_index(int cpu)
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
-   indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
-   if (indexes == NULL)
-   goto err_of_node_put;
+
/* Convert logical cpu number to core number */
i = cpu_core_index_of_thread(cpu);
-   /*
-* The first element indexes[0] is the number of drc_indexes
-* returned in the list.  Hence i+1 will get the drc_index
-* corresponding to core number i.
-*/
-   WARN_ON(i > indexes[0]);
-   ret = indexes[i + 1];
+
+   if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+   int *info = (int *)4;
+   unsigned long int num_set_entries, j, check_val = i;
+   unsigned long int drc_index_start = 0;
+   unsigned long int last_drc_index = 0;
+   unsigned long int num_sequential_elems = 0;
+   unsigned long int sequential_inc = 0;
+   char *dtype;
+   char *dname;
+
+   info = (int *)of_get_property(dn, "ibm,drc-info", NULL);
+   if (info == NULL)
+   goto err_of_node_put;
+
+   num_set_entries = be32_to_cpu(*info++);
+
+   for (j = 0; j < num_set_entries; j++) {
+
+   read_one_drc_info(, , ,
+   _index_start,
+   _sequential_elems,
+   _inc, _drc_index);
+   if (strcmp(dtype, "CPU"))
+   goto err;
+
+   if (check_val < 

[PATCH V9 4/8] pseries/hotplug init: Convert new DRC memory property for hotplug runtime

2016-12-06 Thread Michael Bringmann
hotplug_init: Simplify the code needed for runtime memory hotplug and
maintenance with a conversion routine that transforms the compressed
property "ibm,dynamic-memory-v2" to the form of "ibm,dynamic-memory"
within the "ibm,dynamic-reconfiguration-memory" property.  Thus only
a single set of routines should be required at runtime to parse, edit,
and manipulate the memory representation in the device tree.  Similarly,
any userspace applications that need this information will only need
to recognize the older format to be able to continue to operate.

Signed-off-by: Michael Bringmann 
---
Changes in V9:
  - Remove unnecessary field initialization in allocated memory block
---
 arch/powerpc/platforms/pseries/Makefile |4 +
 arch/powerpc/platforms/pseries/hotplug-memory.c |   92 +++
 2 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index fedc2ccf0..e74cf6c 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,14 +5,14 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
   of_helpers.o \
   setup.o iommu.o event_sources.o ras.o \
   firmware.o power.o dlpar.o mobility.o rng.o \
-  pci.o pci_dlpar.o eeh_pseries.o msi.o
+  pci.o pci_dlpar.o eeh_pseries.o msi.o \
+  hotplug-memory.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_SCANLOG)  += scanlog.o
 obj-$(CONFIG_KEXEC)+= kexec.o
 obj-$(CONFIG_PSERIES_ENERGY)   += pseries_energy.o
 
 obj-$(CONFIG_HOTPLUG_CPU)  += hotplug-cpu.o
-obj-$(CONFIG_MEMORY_HOTPLUG)   += hotplug-memory.o
 
 obj-$(CONFIG_HVC_CONSOLE)  += hvconsole.o
 obj-$(CONFIG_HVCS) += hvcserver.o
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 76ec104..7b63639 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -24,6 +24,8 @@
 #include 
 #include "pseries.h"
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+
 static bool rtas_hp_event;
 
 unsigned long pseries_memory_block_size(void)
@@ -887,11 +889,101 @@ static int pseries_memory_notifier(struct notifier_block 
*nb,
 static struct notifier_block pseries_mem_nb = {
.notifier_call = pseries_memory_notifier,
 };
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+static int pseries_rewrite_dynamic_memory_v2(void)
+{
+   unsigned long memblock_size;
+   struct device_node *dn;
+   struct property *prop, *prop_v2;
+   __be32 *p;
+   struct of_drconf_cell *lmbs;
+   u32 num_lmb_desc_sets, num_lmbs;
+   int i, j, k;
+
+   dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!dn)
+   return -EINVAL;
+
+   prop_v2 = of_find_property(dn, "ibm,dynamic-memory-v2", NULL);
+   if (!prop_v2)
+   return -EINVAL;
+
+   memblock_size = pseries_memory_block_size();
+   if (!memblock_size)
+   return -EINVAL;
+
+   /* The first int of the property is the number of lmb sets
+* described by the property.
+*/
+   p = (__be32 *)prop_v2->value;
+   num_lmb_desc_sets = be32_to_cpu(*p++);
+
+   /* Count the number of LMBs for generating the alternate format
+*/
+   for (i = 0, num_lmbs = 0; i < num_lmb_desc_sets; i++) {
+   struct of_drconf_cell_v2 drmem;
+
+   read_drconf_cell_v2(, (const __be32 **));
+   num_lmbs += drmem.num_seq_lmbs;
+   }
+
+   /* Create an empty copy of the new 'ibm,dynamic-memory' property
+*/
+   prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+   if (!prop)
+   return -ENOMEM;
+   prop->name = kstrdup("ibm,dynamic-memory", GFP_KERNEL);
+   prop->length = dyn_mem_v2_len(num_lmbs);
+   prop->value = kzalloc(prop->length, GFP_KERNEL);
+
+   /* Copy/expand the ibm,dynamic-memory-v2 format to produce the
+* ibm,dynamic-memory format.
+*/
+   p = (__be32 *)prop->value;
+   *p = cpu_to_be32(num_lmbs);
+   p++;
+   lmbs = (struct of_drconf_cell *)p;
+
+   p = (__be32 *)prop_v2->value;
+   p++;
+
+   for (i = 0, k = 0; i < num_lmb_desc_sets; i++) {
+   struct of_drconf_cell_v2 drmem;
+
+   read_drconf_cell_v2(, (const __be32 **));
+
+   for (j = 0; j < drmem.num_seq_lmbs; j++) {
+   lmbs[k+j].base_addr = be64_to_cpu(drmem.base_addr);
+   lmbs[k+j].drc_index = be32_to_cpu(drmem.drc_index);
+   lmbs[k+j].aa_index  = be32_to_cpu(drmem.aa_index);
+   lmbs[k+i].flags = be32_to_cpu(drmem.flags);
+
+   drmem.base_addr += memblock_size;
+   

[PATCH V9 3/8] powerpc/memory: Parse new memory property to initialize structures.

2016-12-06 Thread Michael Bringmann
powerpc/memory: Add parallel routines to parse the new property
"ibm,dynamic-memory-v2" property when it is present, and then to
finish initialization of the relevant memory structures with the
operating system.  This code is shared between the boot-time
initialization functions and the runtime functions for memory
hotplug, so it needs to be able to handle both formats.

Signed-off-by: Michael Bringmann 
---
Changes in V9:
 - Remove unnecessary multiplier from a property length calculation
---
 arch/powerpc/include/asm/prom.h |   12 +++
 arch/powerpc/mm/numa.c  |  146 +--
 2 files changed, 135 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 43a002b..01842a7 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -110,6 +110,18 @@ struct of_drconf_cell_v2 {
 extern void read_drconf_cell_v2(struct of_drconf_cell_v2 *drmem,
const __be32 **cellp);
 
+extern void read_one_drc_info(int **info, char **drc_type, char **drc_name,
+   unsigned long int *fdi_p, unsigned long int *nsl_p,
+   unsigned long int *si_p, unsigned long int *ldi_p);
+
+static inline int dyn_mem_v2_len(int entries)
+{
+   /* Calculate for counter + number of cells that follow */
+   int drconf_v2_cells = (n_mem_addr_cells + 4);
+   int drconf_v2_cells_len = (drconf_v2_cells * sizeof(unsigned int));
+   return (((entries) * drconf_v2_cells_len) + sizeof(unsigned int));
+}
+
 /*
  * There are two methods for telling firmware what our capabilities are.
  * Newer machines have an "ibm,client-architecture-support" method on the
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 67dc989..4cbcae8 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -427,30 +427,55 @@ void read_drconf_cell_v2(struct of_drconf_cell_v2 *drmem, 
const __be32 **cellp)
 EXPORT_SYMBOL(read_drconf_cell_v2);
 
 /*
- * Retrieve and validate the ibm,dynamic-memory property of the device tree.
+ * Retrieve and validate the ibm,dynamic-memory[-v2] property of the
+ * device tree.
+ *
+ * The layout of the ibm,dynamic-memory property is a number N of memory
+ * block description list entries followed by N memory block description
+ * list entries.  Each memory block description list entry contains
+ * information as laid out in the of_drconf_cell struct above.
  *
- * The layout of the ibm,dynamic-memory property is a number N of memblock
- * list entries followed by N memblock list entries.  Each memblock list entry
- * contains information as laid out in the of_drconf_cell struct above.
+ * The layout of the ibm,dynamic-memory-v2 property is a number N of memory
+ * block set description list entries, followed by N memory block set
+ * description set entries.
  */
 static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
 {
const __be32 *prop;
u32 len, entries;
 
-   prop = of_get_property(memory, "ibm,dynamic-memory", );
-   if (!prop || len < sizeof(unsigned int))
-   return 0;
+   if (firmware_has_feature(FW_FEATURE_DYN_MEM_V2)) {
 
-   entries = of_read_number(prop++, 1);
+   prop = of_get_property(memory, "ibm,dynamic-memory-v2", );
+   if (!prop || len < sizeof(unsigned int))
+   return 0;
 
-   /* Now that we know the number of entries, revalidate the size
-* of the property read in to ensure we have everything
-*/
-   if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
-   return 0;
+   entries = of_read_number(prop++, 1);
+
+   /* Now that we know the number of set entries, revalidate the
+* size of the property read in to ensure we have everything.
+*/
+   if (len < dyn_mem_v2_len(entries))
+   return 0;
+
+   *dm = prop;
+   } else {
+   prop = of_get_property(memory, "ibm,dynamic-memory", );
+   if (!prop || len < sizeof(unsigned int))
+   return 0;
+
+   entries = of_read_number(prop++, 1);
+
+   /* Now that we know the number of entries, revalidate the size
+* of the property read in to ensure we have everything
+*/
+   if (len < (entries * (n_mem_addr_cells + 4) + 1) *
+  sizeof(unsigned int))
+   return 0;
+
+   *dm = prop;
+   }
 
-   *dm = prop;
return entries;
 }
 
@@ -513,7 +538,7 @@ static int of_get_assoc_arrays(struct device_node *memory,
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
  */
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,

[PATCH V9 2/8] powerpc/memory: Parse new memory property to register blocks.

2016-12-06 Thread Michael Bringmann
powerpc/memory: Add parallel routines to parse the new property
"ibm,dynamic-memory-v2" property when it is present, and then to
register the relevant memory blocks with the operating system.
This property format is intended to provide a more compact
representation of memory when communicating with the front end
processor, especially when describing vast amounts of RAM.

Signed-off-by: Michael Bringmann 
---
 arch/powerpc/include/asm/prom.h |   24 --
 arch/powerpc/kernel/prom.c  |   97 ---
 arch/powerpc/mm/numa.c  |   22 -
 3 files changed, 129 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bc7c4b5..43a002b 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -69,6 +69,8 @@ struct boot_param_header {
  * OF address retreival & translation
  */
 
+extern int n_mem_addr_cells;
+
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
@@ -81,8 +83,9 @@ void of_parse_dma_window(struct device_node *dn, const __be32 
*dma_window,
 extern int of_get_ibm_chip_id(struct device_node *np);
 
 /* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
+ * specified in the device tree properties,
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory-v2
  */
 struct of_drconf_cell {
u64 base_addr;
@@ -92,9 +95,20 @@ struct of_drconf_cell {
u32 flags;
 };
 
-#define DRCONF_MEM_ASSIGNED0x0008
-#define DRCONF_MEM_AI_INVALID  0x0040
-#define DRCONF_MEM_RESERVED0x0080
+#define DRCONF_MEM_ASSIGNED0x0008
+#define DRCONF_MEM_AI_INVALID  0x0040
+#define DRCONF_MEM_RESERVED0x0080
+
+struct of_drconf_cell_v2 {
+   u32 num_seq_lmbs;
+   u64 base_addr;
+   u32 drc_index;
+   u32 aa_index;
+   u32 flags;
+} __attribute__((packed));
+
+extern void read_drconf_cell_v2(struct of_drconf_cell_v2 *drmem,
+   const __be32 **cellp);
 
 /*
  * There are two methods for telling firmware what our capabilities are.
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b0245be..2d49887 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -443,23 +443,34 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned 
long node,
 
 #ifdef CONFIG_PPC_PSERIES
 /*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Retrieve and validate the ibm,lmb-size property for drconf memory
+ * from the flattened device tree.
+ */
+static u64 __init get_lmb_size(unsigned long node)
+{
+   const __be32 *ls;
+   int len;
+   ls = of_get_flat_dt_prop(node, "ibm,lmb-size", );
+   if (!ls || len < dt_root_size_cells * sizeof(__be32))
+   return 0;
+   return dt_mem_next_cell(dt_root_size_cells, );
+}
+
+/*
+ * Interpret the ibm,dynamic-memory property/ibm,dynamic-memory-v2
+ * in the /ibm,dynamic-reconfiguration-memory node.
  * This contains a list of memory blocks along with NUMA affinity
  * information.
  */
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static int __init early_init_dt_scan_drconf_memory_v1(unsigned long node)
 {
-   const __be32 *dm, *ls, *usm;
+   const __be32 *dm, *usm;
int l;
unsigned long n, flags;
u64 base, size, memblock_size;
unsigned int is_kexec_kdump = 0, rngs;
 
-   ls = of_get_flat_dt_prop(node, "ibm,lmb-size", );
-   if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
-   return 0;
-   memblock_size = dt_mem_next_cell(dt_root_size_cells, );
+   memblock_size = get_lmb_size(node);
 
dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", );
if (dm == NULL || l < sizeof(__be32))
@@ -518,6 +529,76 @@ static int __init 
early_init_dt_scan_drconf_memory(unsigned long node)
memblock_dump_all();
return 0;
 }
+
+static int __init early_init_dt_scan_drconf_memory_v2(unsigned long node)
+{
+   const __be32 *dm;
+   int l;
+   unsigned long num_sets;
+   u64 size, base, memblock_size;
+
+   memblock_size = get_lmb_size(node);
+
+   dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2", );
+   if (dm == NULL || l < sizeof(__be32))
+   return 0;
+
+   /* Verify expected length of the array of ibm,dynamic-memory-v2
+* structs fits in the actual size of the property data.
+*/
+   num_sets = of_read_number(dm++, 1);
+   if (l < (num_sets * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
+   return 0;
+
+   if (n_mem_addr_cells == 0)
+   

[PATCH V9 1/8] powerpc/firmware: Add definitions for new firmware features.

2016-12-06 Thread Michael Bringmann
Firmware Features: Define new bit flags representing the presence of
new device tree properties "ibm,drc-info", and "ibm,dynamic-memory-v2".
These flags are used to tell the front end processor when the Linux
kernel supports the new properties, and by the front end processor to
tell the Linux kernel that the new properties are present in the devie
tree.

Signed-off-by: Michael Bringmann 
---
 arch/powerpc/include/asm/firmware.h   |5 -
 arch/powerpc/include/asm/prom.h   |2 ++
 arch/powerpc/platforms/pseries/firmware.c |2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/firmware.h 
b/arch/powerpc/include/asm/firmware.h
index 1e0b5a5..6b5cf38 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,6 +51,8 @@
 #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x8000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0001)
 #define FW_FEATURE_PRRNASM_CONST(0x0002)
+#define FW_FEATURE_DYN_MEM_V2  ASM_CONST(0x0004)
+#define FW_FEATURE_DRC_INFOASM_CONST(0x0008)
 
 #ifndef __ASSEMBLY__
 
@@ -66,7 +68,8 @@ enum {
FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
-   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
+   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
+   FW_FEATURE_DYN_MEM_V2 | FW_FEATURE_DRC_INFO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7f436ba..bc7c4b5 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -155,6 +155,8 @@ struct of_drconf_cell {
 #define OV5_PFO_HW_842 0x0E40  /* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR0x0E20  /* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS 0x0F01  /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DYN_MEM_V2 0x1680  /* Redef Prop Structures: dyn-mem-v2 */
+#define OV5_DRC_INFO   0x1640  /* Redef Prop Structures: drc-info   */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX  0x02/* Linux is our OS */
diff --git a/arch/powerpc/platforms/pseries/firmware.c 
b/arch/powerpc/platforms/pseries/firmware.c
index ea7f09b..d2d23f5 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -113,6 +113,8 @@ struct vec5_fw_feature {
 vec5_fw_features_table[] = {
{FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
{FW_FEATURE_PRRN,   OV5_PRRN},
+   {FW_FEATURE_DYN_MEM_V2, OV5_DYN_MEM_V2},
+   {FW_FEATURE_DRC_INFO,   OV5_DRC_INFO},
 };
 
 static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)



[PATCH V9 0/8] powerpc/devtree: Add support for 2 new DRC properties

2016-12-06 Thread Michael Bringmann
Several properties in the DRC device tree format are replaced by
more compact representations to allow, for example, for the encoding
of vast amounts of memory, and or reduced duplication of information
in related data structures.

"ibm,drc-info": This property, when present, replaces the following
four properties: "ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
and "ibm,drc-power-domains".  This property is defined for all
dynamically reconfigurable platform nodes.  The "ibm,drc-info" elements
are intended to provide a more compact representation, and reduce some
search overhead.

"ibm,dynamic-memory-v2": This property replaces the "ibm,dynamic-memory"
node representation within the "ibm,dynamic-reconfiguration-memory"
property provided by the BMC.  This element format is intended to provide
a more compact representation of memory, especially, for systems with
massive amounts of RAM.  To simplify portability, this property is
converted to the "ibm,dynamic-memory" property during system boot.

"ibm,architecture.vec": Bidirectional communication mechanism between
the host system and the front end processor indicating what features
the host system supports and what features the front end processor will
actually provide.  In this case, we are indicating that the host system
can support the new device tree structures "ibm,drc-info" and
"ibm,dynamic-memory-v2".

Signed-off-by: Michael Bringmann 

Michael Bringmann (8):
  powerpc/firmware: Add definitions for new firmware features.
  powerpc/memory: Parse new memory property to register blocks.
  powerpc/memory: Parse new memory property to initialize structures.
  pseries/hotplug init: Convert new DRC memory property for hotplug runtime
  pseries/drc-info: Search new DRC properties for CPU indexes
  hotplug/drc-info: Add code to search new devtree properties
  powerpc: Check arch.vec earlier during boot for memory features
  powerpc: Enable support for new DRC devtree properties
---
Changes in V9:
  -- Various code cleanup measures

 arch/powerpc/include/asm/firmware.h |5 -
 arch/powerpc/include/asm/prom.h |   38 
 arch/powerpc/kernel/prom.c  |  103 ++--
 arch/powerpc/kernel/prom_init.c |3 
 arch/powerpc/mm/numa.c  |  168 ---
 arch/powerpc/platforms/pseries/firmware.c   |2 
 arch/powerpc/platforms/pseries/pseries_energy.c |  202 ---
 drivers/pci/hotplug/rpadlpar_core.c |   13 +
 drivers/pci/hotplug/rpaphp.h|4 
 drivers/pci/hotplug/rpaphp_core.c   |  108 +---
 10 files changed, 543 insertions(+), 103 deletions(-)



RE: [v2, 2/3] powerpc/fsl/dts: add QMan and BMan portal nodes on t1024

2016-12-06 Thread Madalin-Cristian Bucur
> From: Scott Wood [mailto:o...@buserror.net]
> Sent: Wednesday, December 07, 2016 2:59 AM
> 
> On Tue, Dec 06, 2016 at 03:13:38PM +0200, Madalin Bucur wrote:
> > Signed-off-by: Madalin Bucur 
> > ---
> >  arch/powerpc/boot/dts/fsl/t1024qds.dts | 29
> +
> >  arch/powerpc/boot/dts/fsl/t1024rdb.dts | 33
> +
> >  2 files changed, 62 insertions(+)
> 
> So, in patch 1/3 you add qman and bman nodes to t1023si-post.dtsi and
> call it "add QMan and BMan portal nodes on t1023rdb" as if it were
> board-specific (the only board-specific part is the reserved-memory
> nodes, not the portals).
> 
> Then, in this patch you only touch board-specific files, and label it
> "add QMan and BMan portal nodes on t1024"...

That needs fixing...

> > diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> > index 302cdd2..73a6453 100644
> > --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> > +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> > @@ -41,6 +41,31 @@
> > #size-cells = <2>;
> > interrupt-parent = <>;
> >
> > +   aliases {
> > +   sg_2500_aqr105_phy4 = _2500_aqr105_phy4;
> > +   };
> 
> What does this have to do with the qman and bman portal nodes?  Why is
> this alias needed?
> 
> -Scott

It's needed by u-boot, should be a separate patch.
I'll resend.

Madalin


Re: [PATCH v3 2/3] powerpc: get hugetlbpage handling more generic

2016-12-06 Thread Christophe LEROY



Le 07/12/2016 à 02:06, Scott Wood a écrit :

On Tue, 2016-12-06 at 07:34 +0100, Christophe LEROY wrote:


Le 06/12/2016 à 02:18, Scott Wood a écrit :


On Wed, 2016-09-21 at 10:11 +0200, Christophe Leroy wrote:


Today there are two implementations of hugetlbpages which are managed
by exclusive #ifdefs:
* FSL_BOOKE: several directory entries points to the same single
hugepage
* BOOK3S: one upper level directory entry points to a table of hugepages

In preparation of implementation of hugepage support on the 8xx, we
need a mix of the two above solutions, because the 8xx needs both cases
depending on the size of pages:
* In 4k page size mode, each PGD entry covers a 4M bytes area. It means
that 2 PGD entries will be necessary to cover an 8M hugepage while a
single PGD entry will cover 8x 512k hugepages.
* In 16 page size mode, each PGD entry covers a 64M bytes area. It means
that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
hugepages will be covers by one PGD entry.

This patch:
* removes #ifdefs in favor of if/else based on the range sizes
* merges the two huge_pte_alloc() functions as they are pretty similar
* merges the two hugetlbpage_init() functions as they are pretty similar

[snip]


@@ -860,16 +803,34 @@ static int __init hugetlbpage_init(void)
 * if we have pdshift and shift value same, we don't
 * use pgt cache for hugepd.
 */
-   if (pdshift != shift) {
+   if (pdshift > shift) {
pgtable_cache_add(pdshift - shift, NULL);
if (!PGT_CACHE(pdshift - shift))
panic("hugetlbpage_init(): could not
create
"
  "pgtable cache for %d bit
pagesize\n", shift);
}
+#ifdef CONFIG_PPC_FSL_BOOK3E
+   else if (!hugepte_cache) {

This else never triggers on book3e, because the way this function
calculates
pdshift is wrong for book3e (it uses PyD_SHIFT instead of
HUGEPD_PxD_SHIFT).
 We later get OOMs because huge_pte_alloc() calculates pdshift correctly,
tries to use hugepte_cache, and fails.

Ok, I'll check it again, I was expecting it to still work properly on
book3e, because after applying patch 3 it works properly on the 8xx.


On 8xx you probably happen to have a page size that yields "pdshift <= shift"
even with the incorrect pdshift calculation, causing hugepte_cache to be
allocated.  The smallest hugepage size on 8xx is 512k compared to 4M on fsl-
book3e.



Indeed it works because on 8xx, PUD_SHIFT == PMD_SHIFT == PGDIR_SHIFT

Christophe


Re: [PATCH 3/3] powerpc: enable support for GCC plugins

2016-12-06 Thread Andrew Donnellan

On 07/12/16 08:25, Emese Revfy wrote:

What are these missing headers? Because if they aren't necessary then they can
be removed from gcc-common.h. There were missing headers on arm/arm64 and these
archs are supported. I think this version check is unnecessary because
gcc-plugin.sh also checks the missing headers.


rs6000-cpus.def, included via tm.h - see 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66840


I realise gcc-plugin.sh does detect this, but the point of the 
additional version check is to provide somewhat more helpful advice to 
the user.



What is the problem on gcc-4.5/gcc-4.6?


On 4.6.4, c-family/c-common.h:

/scratch/ajd/gcc-test-v2/kernel/scripts/gcc-plugins/gcc-common.h:60:31: 
fatal error: c-family/c-common.h: No such file or directory


ajd@ka1:/scratch/ajd/tmp/cross/gcc-4.6.4-nolibc/powerpc64-linux$ find 
-name c-common.*

./lib/gcc/powerpc64-linux/4.6.4/plugin/include/c-common.h
./lib/gcc/powerpc64-linux/4.6.4/plugin/include/c-family/c-common.def

Are we sure the version check in gcc-common.h:59 is correct, or is this 
just a peculiarity of my particular toolchain?


I need to build another 4.5 toolchain, I'll try to do that this week.

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH 3/3] powerpc: enable support for GCC plugins

2016-12-06 Thread Andrew Donnellan

On 06/12/16 17:28, Andrew Donnellan wrote:

Enable support for GCC plugins on powerpc.

Add an additional version check in gcc-plugins-check to advise users to
upgrade to gcc 5.2+ on powerpc to avoid issues with header files (gcc <=
4.6) or missing copies of rs6000-cpus.def (4.8 to 5.1 on 64-bit targets).

Signed-off-by: Andrew Donnellan 

---

Open to bikeshedding on the gcc version check.

Compile tested with all plugins enabled on gcc 4.6-6.2,
x86->ppc{32,64,64le} and 4.8-6.2 ppc64le->ppc{32,64,64le}. Thanks to
Chris Smart for help with this.

I think it's best to take this through powerpc#next with an ACK from
Kees/Emese?
---
 arch/powerpc/Kconfig | 1 +
 scripts/Makefile.gcc-plugins | 8 


Will respin with an update to Documentation/gcc-plugins.txt as well.

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH v3 2/3] powerpc: get hugetlbpage handling more generic

2016-12-06 Thread Scott Wood
On Tue, 2016-12-06 at 07:34 +0100, Christophe LEROY wrote:
> 
> Le 06/12/2016 à 02:18, Scott Wood a écrit :
> > 
> > On Wed, 2016-09-21 at 10:11 +0200, Christophe Leroy wrote:
> > > 
> > > Today there are two implementations of hugetlbpages which are managed
> > > by exclusive #ifdefs:
> > > * FSL_BOOKE: several directory entries points to the same single
> > > hugepage
> > > * BOOK3S: one upper level directory entry points to a table of hugepages
> > > 
> > > In preparation of implementation of hugepage support on the 8xx, we
> > > need a mix of the two above solutions, because the 8xx needs both cases
> > > depending on the size of pages:
> > > * In 4k page size mode, each PGD entry covers a 4M bytes area. It means
> > > that 2 PGD entries will be necessary to cover an 8M hugepage while a
> > > single PGD entry will cover 8x 512k hugepages.
> > > * In 16 page size mode, each PGD entry covers a 64M bytes area. It means
> > > that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
> > > hugepages will be covers by one PGD entry.
> > > 
> > > This patch:
> > > * removes #ifdefs in favor of if/else based on the range sizes
> > > * merges the two huge_pte_alloc() functions as they are pretty similar
> > > * merges the two hugetlbpage_init() functions as they are pretty similar
> > [snip]
> > > 
> > > @@ -860,16 +803,34 @@ static int __init hugetlbpage_init(void)
> > >    * if we have pdshift and shift value same, we don't
> > >    * use pgt cache for hugepd.
> > >    */
> > > - if (pdshift != shift) {
> > > + if (pdshift > shift) {
> > >   pgtable_cache_add(pdshift - shift, NULL);
> > >   if (!PGT_CACHE(pdshift - shift))
> > >   panic("hugetlbpage_init(): could not
> > > create
> > > "
> > >     "pgtable cache for %d bit
> > > pagesize\n", shift);
> > >   }
> > > +#ifdef CONFIG_PPC_FSL_BOOK3E
> > > + else if (!hugepte_cache) {
> > This else never triggers on book3e, because the way this function
> > calculates
> > pdshift is wrong for book3e (it uses PyD_SHIFT instead of
> > HUGEPD_PxD_SHIFT).
> >  We later get OOMs because huge_pte_alloc() calculates pdshift correctly,
> > tries to use hugepte_cache, and fails.
> Ok, I'll check it again, I was expecting it to still work properly on 
> book3e, because after applying patch 3 it works properly on the 8xx.

On 8xx you probably happen to have a page size that yields "pdshift <= shift"
even with the incorrect pdshift calculation, causing hugepte_cache to be
allocated.  The smallest hugepage size on 8xx is 512k compared to 4M on fsl-
book3e.

-Scott



Re: [kernel-hardening] Re: [PATCH 3/3] powerpc: enable support for GCC plugins

2016-12-06 Thread Andrew Donnellan

On 07/12/16 07:40, Kees Cook wrote:

Compile tested with all plugins enabled on gcc 4.6-6.2,
x86->ppc{32,64,64le} and 4.8-6.2 ppc64le->ppc{32,64,64le}. Thanks to
Chris Smart for help with this.


I assume also tested on 5.2? :)


Tested on the latest subrevision of every release branch up till 6.2, so 
yes :)



I think it's best to take this through powerpc#next with an ACK from
Kees/Emese?


That would be fine by me. Please consider the whole series:

Acked-by: Kees Cook 


Thanks!

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [v2, 2/3] powerpc/fsl/dts: add QMan and BMan portal nodes on t1024

2016-12-06 Thread Scott Wood
On Tue, Dec 06, 2016 at 03:13:38PM +0200, Madalin Bucur wrote:
> Signed-off-by: Madalin Bucur 
> ---
>  arch/powerpc/boot/dts/fsl/t1024qds.dts | 29 +
>  arch/powerpc/boot/dts/fsl/t1024rdb.dts | 33 +
>  2 files changed, 62 insertions(+)

So, in patch 1/3 you add qman and bman nodes to t1023si-post.dtsi and
call it "add QMan and BMan portal nodes on t1023rdb" as if it were
board-specific (the only board-specific part is the reserved-memory
nodes, not the portals).

Then, in this patch you only touch board-specific files, and label it
"add QMan and BMan portal nodes on t1024"...

> diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts 
> b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> index 302cdd2..73a6453 100644
> --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
> @@ -41,6 +41,31 @@
>   #size-cells = <2>;
>   interrupt-parent = <>;
>  
> + aliases {
> + sg_2500_aqr105_phy4 = _2500_aqr105_phy4;
> + };

What does this have to do with the qman and bman portal nodes?  Why is
this alias needed?

-Scott


Re: next-20161206 WARN@ mm/hugetlb.c:2918 during boot

2016-12-06 Thread Dave Hansen
On 12/06/2016 02:37 PM, Balbir Singh wrote:
> I think Michael found this as well, its related to adding our gigantic
> page to the hugetlb list
> and the WARN_ON that Dave Hansen added (commit
> a3a18061c987aa9da4f5d3cbb31a9e71e9d7191d)
> 
> I've not looked at this issue yet, Aneesh is aware of the issue as well

It's harmless, and I believe Andrew pulled it out of -mm.  So, just
ignore it for now.

Thanks for the report, though.


Re: next-20161206 WARN@ mm/hugetlb.c:2918 during boot

2016-12-06 Thread Balbir Singh
On Wed, Dec 7, 2016 at 9:27 AM, Stephen Rothwell <s...@canb.auug.org.au> wrote:
> Hi Sachin,
>
> [Just cc'ing a few more people]
>
> On Tue, 6 Dec 2016 21:11:46 +0530 Sachin Sant <sach...@linux.vnet.ibm.com> 
> wrote:
>>
>> Following warning is displayed during todays next boot
>> on a Power8 LPAR. Kernel was built with pseries_le_defconfig.
>>
>> [0.016702] [ cut here ]
>> [0.016708] WARNING: CPU: 1 PID: 1 at mm/hugetlb.c:2918 
>> hugetlb_add_hstate+0x1e8/0x210
>> [0.016709] Modules linked in:
>> [0.016712] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
>> 4.9.0-rc8-next-20161206 #2
>> [0.016715] task: c003bacc task.stack: c003bad0
>> [0.016717] NIP: c0cd30a0 LR: c0cd303c CTR: 
>> c0000057d910
>> [0.016719] REGS: c003bad03870 TRAP: 0700   Not tainted  
>> (4.9.0-rc8-next-20161206)
>> [0.016721] MSR: 82029033 <SF,VEC,EE,ME,IR,DR,RI,LE>
>> [0.016728]   CR: 88004022  XER: 200e
>> [0.016730] CFAR: c057db9c SOFTE: 1
>> GPR00: c0cd303c c003bad03af0 c0e84000 0014
>> GPR04: c0bb7927 0002 fff4 c0db4000
>> GPR08: c0d14000 0001  0006
>> GPR12: 88004028 cea70400 c000ddd8 
>> GPR16:    
>> GPR20:    
>> GPR24:  c0ca3614 0020 c0ec1228
>> GPR28: 1c78 c0faa9f0 0004 c0fac668
>> [0.016767] NIP [c0cd30a0] hugetlb_add_hstate+0x1e8/0x210
>> [0.016770] LR [c0cd303c] hugetlb_add_hstate+0x184/0x210
>> [0.016771] Call Trace:
>> [0.016773] [c003bad03af0] [c0cd303c] 
>> hugetlb_add_hstate+0x184/0x210 (unreliable)
>> [0.016778] [c003bad03b80] [c0cb26ec] 
>> add_huge_page_size+0xa8/0xc8
>> [0.016782] [c003bad03bb0] [c0cb27d8] 
>> hugetlbpage_init+0x60/0x1cc
>> [0.016785] [c003bad03c40] [c000d4e0] 
>> do_one_initcall+0x60/0x1c0
>> [0.016788] [c003bad03d00] [c0ca4254] 
>> kernel_init_freeable+0x284/0x364
>> [0.016791] [c003bad03dc0] [c000ddf4] kernel_init+0x24/0x150
>> [0.016795] [c003bad03e30] [c000bae0] 
>> ret_from_kernel_thread+0x5c/0x7c
>> [0.016797] Instruction dump:
>> [0.016799] 7d0807b4 394a0010 7d0a5036 3d02ffe9 7fbe5040 fbe83d80 
>> 41fc001c 3ce2fff3
>> [0.016805] 89471fa4 2f8a 40fe000c 99271fa4 <0fe0> 38210090 
>> e8010010 eb61ffd8
>> [0.016811] ---[ end trace 42481f903da18455 ]---
>> [0.016882] pstore: using zlib compression
>>

I think Michael found this as well, its related to adding our gigantic
page to the hugetlb list
and the WARN_ON that Dave Hansen added (commit
a3a18061c987aa9da4f5d3cbb31a9e71e9d7191d)

I've not looked at this issue yet, Aneesh is aware of the issue as well

Balbir Singh.


Re: next-20161206 WARN@ mm/hugetlb.c:2918 during boot

2016-12-06 Thread Stephen Rothwell
Hi Sachin,

[Just cc'ing a few more people]

On Tue, 6 Dec 2016 21:11:46 +0530 Sachin Sant <sach...@linux.vnet.ibm.com> 
wrote:
>
> Following warning is displayed during todays next boot
> on a Power8 LPAR. Kernel was built with pseries_le_defconfig.
> 
> [0.016702] [ cut here ]
> [0.016708] WARNING: CPU: 1 PID: 1 at mm/hugetlb.c:2918 
> hugetlb_add_hstate+0x1e8/0x210
> [0.016709] Modules linked in:
> [0.016712] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
> 4.9.0-rc8-next-20161206 #2
> [0.016715] task: c003bacc task.stack: c003bad0
> [0.016717] NIP: c0cd30a0 LR: c0cd303c CTR: 
> c057d910
> [0.016719] REGS: c003bad03870 TRAP: 0700   Not tainted  
> (4.9.0-rc8-next-20161206)
> [0.016721] MSR: 82029033 <SF,VEC,EE,ME,IR,DR,RI,LE>
> [0.016728]   CR: 88004022  XER: 200e
> [0.016730] CFAR: c057db9c SOFTE: 1 
> GPR00: c0cd303c c003bad03af0 c0e84000 0014 
> GPR04: c0bb7927 0002 fff4 c0db4000 
> GPR08: c0d14000 0001  0006 
> GPR12: 88004028 cea70400 c000ddd8  
> GPR16:     
> GPR20:     
> GPR24:  c0ca3614 0020 c0ec1228 
> GPR28: 1c78 c0faa9f0 0004 c0fac668 
> [0.016767] NIP [c0cd30a0] hugetlb_add_hstate+0x1e8/0x210
> [0.016770] LR [c0cd303c] hugetlb_add_hstate+0x184/0x210
> [0.016771] Call Trace:
> [0.016773] [c003bad03af0] [c0cd303c] 
> hugetlb_add_hstate+0x184/0x210 (unreliable)
> [0.016778] [c003bad03b80] [c0cb26ec] 
> add_huge_page_size+0xa8/0xc8
> [0.016782] [c003bad03bb0] [c0cb27d8] 
> hugetlbpage_init+0x60/0x1cc
> [0.016785] [c003bad03c40] [c000d4e0] 
> do_one_initcall+0x60/0x1c0
> [0.016788] [c003bad03d00] [c0ca4254] 
> kernel_init_freeable+0x284/0x364
> [0.016791] [c003bad03dc0] [c000ddf4] kernel_init+0x24/0x150
> [0.016795] [c003bad03e30] [c000bae0] 
> ret_from_kernel_thread+0x5c/0x7c
> [0.016797] Instruction dump:
> [0.016799] 7d0807b4 394a0010 7d0a5036 3d02ffe9 7fbe5040 fbe83d80 41fc001c 
> 3ce2fff3 
> [0.016805] 89471fa4 2f8a 40fe000c 99271fa4 <0fe0> 38210090 
> e8010010 eb61ffd8 
> [0.016811] ---[ end trace 42481f903da18455 ]---
> [0.016882] pstore: using zlib compression
> 
> Complete dmesg log is attached.
> 
> Thanks
> -Sachin

-- 
Cheers,
Stephen Rothwell


Re: [PATCH 3/3] powerpc: enable support for GCC plugins

2016-12-06 Thread Emese Revfy
On Tue,  6 Dec 2016 17:28:00 +1100
Andrew Donnellan  wrote:

> +  # Various gccs between 4.5 and 5.1 have bugs on powerpc due to missing
> +  # header files. gcc <= 4.6 doesn't work at all, gccs from 4.8 to 5.1 
> have
> +  # issues with 64-bit targets.
> +  ifeq ($(ARCH),powerpc)
> +ifeq ($(call cc-ifversion, -le, 0501, y), y)
> +   @echo "Cannot use CONFIG_GCC_PLUGINS: plugin support on gcc <= 5.1 is 
> buggy on powerpc, please upgrade to gcc 5.2 or newer" >&2 && exit 1
> +endif
> +  endif

Hi,

What are these missing headers? Because if they aren't necessary then they can
be removed from gcc-common.h. There were missing headers on arm/arm64 and these
archs are supported. I think this version check is unnecessary because
gcc-plugin.sh also checks the missing headers.

What is the problem on gcc-4.5/gcc-4.6?

-- 
Emese


Re: [PATCH 3/3] powerpc: enable support for GCC plugins

2016-12-06 Thread Kees Cook
On Mon, Dec 5, 2016 at 10:28 PM, Andrew Donnellan
 wrote:
> Enable support for GCC plugins on powerpc.
>
> Add an additional version check in gcc-plugins-check to advise users to
> upgrade to gcc 5.2+ on powerpc to avoid issues with header files (gcc <=
> 4.6) or missing copies of rs6000-cpus.def (4.8 to 5.1 on 64-bit targets).
>
> Signed-off-by: Andrew Donnellan 
>
> ---
>
> Open to bikeshedding on the gcc version check.

I think this looks fine. Anyone wanting to use gcc plugins on ppc with
an earlier gcc can send patches if they find a sane way to make it
work. :)

> Compile tested with all plugins enabled on gcc 4.6-6.2,
> x86->ppc{32,64,64le} and 4.8-6.2 ppc64le->ppc{32,64,64le}. Thanks to
> Chris Smart for help with this.

I assume also tested on 5.2? :)

> I think it's best to take this through powerpc#next with an ACK from
> Kees/Emese?

That would be fine by me. Please consider the whole series:

Acked-by: Kees Cook 

Thanks!

-Kees

> ---
>  arch/powerpc/Kconfig | 1 +
>  scripts/Makefile.gcc-plugins | 8 
>  2 files changed, 9 insertions(+)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 65fba4c..6efbc08 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -92,6 +92,7 @@ config PPC
> select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
> select HAVE_FUNCTION_TRACER
> select HAVE_FUNCTION_GRAPH_TRACER
> +   select HAVE_GCC_PLUGINS
> select SYSCTL_EXCEPTION_TRACE
> select VIRT_TO_BUS if !PPC64
> select HAVE_IDE
> diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
> index 26c67b7..9835a75 100644
> --- a/scripts/Makefile.gcc-plugins
> +++ b/scripts/Makefile.gcc-plugins
> @@ -47,6 +47,14 @@ gcc-plugins-check: FORCE
>  ifdef CONFIG_GCC_PLUGINS
>ifeq ($(PLUGINCC),)
>  ifneq ($(GCC_PLUGINS_CFLAGS),)
> +  # Various gccs between 4.5 and 5.1 have bugs on powerpc due to missing
> +  # header files. gcc <= 4.6 doesn't work at all, gccs from 4.8 to 5.1 
> have
> +  # issues with 64-bit targets.
> +  ifeq ($(ARCH),powerpc)
> +ifeq ($(call cc-ifversion, -le, 0501, y), y)
> + @echo "Cannot use CONFIG_GCC_PLUGINS: plugin support on gcc <= 5.1 
> is buggy on powerpc, please upgrade to gcc 5.2 or newer" >&2 && exit 1
> +endif
> +  endif
>ifeq ($(call cc-ifversion, -ge, 0405, y), y)
> $(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" 
> "$(HOSTCXX)" "$(CC)" || true
> @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not 
> support plugins, perhaps the necessary headers are missing?" >&2 && exit 1
> --
> Andrew Donnellan  OzLabs, ADL Canberra
> andrew.donnel...@au1.ibm.com  IBM Australia Limited
>



-- 
Kees Cook
Nexus Security


Re: [PATCH] PPC: sstep.c: Add modsw, moduw instruction emulation

2016-12-06 Thread Naveen N. Rao
On 2016/12/06 10:18PM, PrasannaKumar Muralidharan wrote:
> > By the way, I missed mentioning previously: please use 'powerpc: '
> > prefix for the subject, rather than PPC.
> 
> I will change it. Wondering how they are different.

It's by convention. Maintainers are picky ;)

- Naveen



Re: [PATCH] PPC: sstep.c: Add modsw, moduw instruction emulation

2016-12-06 Thread PrasannaKumar Muralidharan
> By the way, I missed mentioning previously: please use 'powerpc: '
> prefix for the subject, rather than PPC.

I will change it. Wondering how they are different.

>> Add modsw and moduw instruction emulation support to analyse_instr.
>
> And, it will be better if you can briefly describe what these functions
> do for the benefit of others.

Sure. I will add description.


Re: [PATCH] PPC: sstep.c: Add modsw, moduw instruction emulation

2016-12-06 Thread PrasannaKumar Muralidharan
> I guessed as much, but if you look at the existing function, you'll see
> that things have been arranged in numerical order. As such, it's best to
> stick to that convention.

Makes sense. Will do.


Re: [PATCH] cxl: prevent read/write to AFU config space while AFU not configured

2016-12-06 Thread Frederic Barrat



Le 05/12/2016 à 14:22, Andrew Donnellan a écrit :

During EEH recovery, we deconfigure all AFUs whilst leaving the
corresponding vPHB and virtual PCI device in place.

If something attempts to interact with the AFU's PCI config space (e.g.
running lspci) after the AFU has been deconfigured and before it's
reconfigured, cxl_pcie_{read,write}_config() will read invalid values from
the deconfigured struct cxl_afu and proceed to Oops when they try to
dereference pointers that have been set to NULL during deconfiguration.

Add a rwsem to struct cxl_afu so we can prevent interaction with config
space while the AFU is deconfigured.

Reported-by: Pradipta Ghosh 
Suggested-by: Frederic Barrat 
Cc: sta...@vger.kernel.org # 4.4+
Signed-off-by: Andrew Donnellan 



Thanks for adding the comments!

Acked-by: Frederic Barrat 



Re: [PATCH 1/1] serial/uuc_uart: Set shutdown timeout to CONFIG_HZ independent 2ms

2016-12-06 Thread Timur Tabi

Alexander Stein wrote:

Okay, I was just wondering why the timeout is dependant on the timer tick.
That didn't seem obvious to me.
Rethinking about this, I would rather replace those lines with msleep instead.


What's wrong with leaving it as-is?  The code is five years old, and 
Freescale/NXP barely uses the QE any more.  I don't have access to any 
hardware to test any changes you would propose.


[PATCH v2 3/3] powerpc/fsl/dts: add FMan node for t1042d4rdb

2016-12-06 Thread Madalin Bucur
Signed-off-by: Madalin Bucur 
---
 arch/powerpc/boot/dts/fsl/t1042d4rdb.dts | 52 
 1 file changed, 52 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts 
b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
index 2a5a90d..fcd2aeb 100644
--- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
@@ -48,6 +48,58 @@
"fsl,deepsleep-cpld";
};
};
+
+   soc: soc@ffe00 {
+   fman0: fman@40 {
+   ethernet@e {
+   phy-handle = <_sgmii_0>;
+   phy-connection-type = "sgmii";
+   };
+
+   ethernet@e2000 {
+   phy-handle = <_sgmii_1>;
+   phy-connection-type = "sgmii";
+   };
+
+   ethernet@e4000 {
+   phy-handle = <_sgmii_2>;
+   phy-connection-type = "sgmii";
+   };
+
+   ethernet@e6000 {
+   phy-handle = <_rgmii_0>;
+   phy-connection-type = "rgmii";
+   };
+
+   ethernet@e8000 {
+   phy-handle = <_rgmii_1>;
+   phy-connection-type = "rgmii";
+   };
+
+   mdio0: mdio@fc000 {
+   phy_sgmii_0: ethernet-phy@02 {
+   reg = <0x02>;
+   };
+
+   phy_sgmii_1: ethernet-phy@03 {
+   reg = <0x03>;
+   };
+
+   phy_sgmii_2: ethernet-phy@01 {
+   reg = <0x01>;
+   };
+
+   phy_rgmii_0: ethernet-phy@04 {
+   reg = <0x04>;
+   };
+
+   phy_rgmii_1: ethernet-phy@05 {
+   reg = <0x05>;
+   };
+   };
+   };
+   };
+
 };
 
 #include "t1042si-post.dtsi"
-- 
2.1.0



[PATCH v2 2/3] powerpc/fsl/dts: add QMan and BMan portal nodes on t1024

2016-12-06 Thread Madalin Bucur
Signed-off-by: Madalin Bucur 
---
 arch/powerpc/boot/dts/fsl/t1024qds.dts | 29 +
 arch/powerpc/boot/dts/fsl/t1024rdb.dts | 33 +
 2 files changed, 62 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts 
b/arch/powerpc/boot/dts/fsl/t1024qds.dts
index 772143d..d6858b7 100644
--- a/arch/powerpc/boot/dts/fsl/t1024qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts
@@ -41,6 +41,27 @@
#size-cells = <2>;
interrupt-parent = <>;
 
+   reserved-memory {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   bman_fbpr: bman-fbpr {
+   size = <0 0x100>;
+   alignment = <0 0x100>;
+   };
+
+   qman_fqd: qman-fqd {
+   size = <0 0x40>;
+   alignment = <0 0x40>;
+   };
+
+   qman_pfdr: qman-pfdr {
+   size = <0 0x200>;
+   alignment = <0 0x200>;
+   };
+   };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe800 0x0800
@@ -80,6 +101,14 @@
ranges = <0x 0xf 0x 0x01072000>;
};
 
+   bportals: bman-portals@ff400 {
+   ranges = <0x0 0xf 0xf400 0x200>;
+   };
+
+   qportals: qman-portals@ff600 {
+   ranges = <0x0 0xf 0xf600 0x200>;
+   };
+
soc: soc@ffe00 {
ranges = <0x 0xf 0xfe00 0x100>;
reg = <0xf 0xfe00 0 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts 
b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index 302cdd2..73a6453 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -41,6 +41,31 @@
#size-cells = <2>;
interrupt-parent = <>;
 
+   aliases {
+   sg_2500_aqr105_phy4 = _2500_aqr105_phy4;
+   };
+
+   reserved-memory {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   bman_fbpr: bman-fbpr {
+   size = <0 0x100>;
+   alignment = <0 0x100>;
+   };
+
+   qman_fqd: qman-fqd {
+   size = <0 0x40>;
+   alignment = <0 0x40>;
+   };
+
+   qman_pfdr: qman-pfdr {
+   size = <0 0x200>;
+   alignment = <0 0x200>;
+   };
+   };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe800 0x0800
@@ -82,6 +107,14 @@
ranges = <0x 0xf 0x 0x01072000>;
};
 
+   bportals: bman-portals@ff400 {
+   ranges = <0x0 0xf 0xf400 0x200>;
+   };
+
+   qportals: qman-portals@ff600 {
+   ranges = <0x0 0xf 0xf600 0x200>;
+   };
+
soc: soc@ffe00 {
ranges = <0x 0xf 0xfe00 0x100>;
reg = <0xf 0xfe00 0 0x1000>;
-- 
2.1.0



[PATCH v2 1/3] powerpc/fsl/dts: add QMan and BMan portal nodes on t1023rdb

2016-12-06 Thread Madalin Bucur
Signed-off-by: Madalin Bucur 
---
 arch/powerpc/boot/dts/fsl/t1023rdb.dts  |  29 
 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 103 
 2 files changed, 132 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts 
b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
index 2975762..5ba6fbf 100644
--- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
@@ -41,6 +41,27 @@
#size-cells = <2>;
interrupt-parent = <>;
 
+   reserved-memory {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   bman_fbpr: bman-fbpr {
+   size = <0 0x100>;
+   alignment = <0 0x100>;
+   };
+
+   qman_fqd: qman-fqd {
+   size = <0 0x40>;
+   alignment = <0 0x40>;
+   };
+
+   qman_pfdr: qman-pfdr {
+   size = <0 0x200>;
+   alignment = <0 0x200>;
+   };
+   };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe800 0x0800
@@ -72,6 +93,14 @@
ranges = <0x 0xf 0x 0x01072000>;
};
 
+   bportals: bman-portals@ff400 {
+   ranges = <0x0 0xf 0xf400 0x200>;
+   };
+
+   qportals: qman-portals@ff600 {
+   ranges = <0x0 0xf 0xf600 0x200>;
+   };
+
soc: soc@ffe00 {
ranges = <0x 0xf 0xfe00 0x100>;
reg = <0xf 0xfe00 0 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index 6e0b489..da2894c 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -34,6 +34,21 @@
 
 #include 
 
+_fbpr {
+   compatible = "fsl,bman-fbpr";
+   alloc-ranges = <0 0 0x1 0>;
+};
+
+_fqd {
+   compatible = "fsl,qman-fqd";
+   alloc-ranges = <0 0 0x1 0>;
+};
+
+_pfdr {
+   compatible = "fsl,qman-pfdr";
+   alloc-ranges = <0 0 0x1 0>;
+};
+
  {
#address-cells = <2>;
#size-cells = <1>;
@@ -180,6 +195,92 @@
};
 };
 
+ {
+   #address-cells = <0x1>;
+   #size-cells = <0x1>;
+   compatible = "simple-bus";
+
+   bman-portal@0 {
+   cell-index = <0x0>;
+   compatible = "fsl,bman-portal";
+   reg = <0x0 0x4000>, <0x100 0x1000>;
+   interrupts = <105 2 0 0>;
+   };
+   bman-portal@4000 {
+   cell-index = <0x1>;
+   compatible = "fsl,bman-portal";
+   reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+   interrupts = <107 2 0 0>;
+   };
+   bman-portal@8000 {
+   cell-index = <2>;
+   compatible = "fsl,bman-portal";
+   reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+   interrupts = <109 2 0 0>;
+   };
+   bman-portal@c000 {
+   cell-index = <0x3>;
+   compatible = "fsl,bman-portal";
+   reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+   interrupts = <111 2 0 0>;
+   };
+   bman-portal@1 {
+   cell-index = <0x4>;
+   compatible = "fsl,bman-portal";
+   reg = <0x1 0x4000>, <0x1004000 0x1000>;
+   interrupts = <113 2 0 0>;
+   };
+   bman-portal@14000 {
+   cell-index = <0x5>;
+   compatible = "fsl,bman-portal";
+   reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+   interrupts = <115 2 0 0>;
+   };
+};
+
+ {
+   #address-cells = <0x1>;
+   #size-cells = <0x1>;
+   compatible = "simple-bus";
+
+   qportal0: qman-portal@0 {
+   compatible = "fsl,qman-portal";
+   reg = <0x0 0x4000>, <0x100 0x1000>;
+   interrupts = <104 0x2 0 0>;
+   cell-index = <0x0>;
+   };
+   qportal1: qman-portal@4000 {
+   compatible = "fsl,qman-portal";
+   reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+   interrupts = <106 0x2 0 0>;
+   cell-index = <0x1>;
+   };
+   qportal2: qman-portal@8000 {
+   compatible = "fsl,qman-portal";
+   reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+   interrupts = <108 0x2 0 0>;
+   cell-index = <0x2>;
+   };
+   qportal3: qman-portal@c000 {
+   compatible = "fsl,qman-portal";
+   reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+   interrupts = <110 0x2 0 0>;
+   cell-index = <0x3>;
+   };
+   qportal4: qman-portal@1 {
+   compatible = "fsl,qman-portal";
+   reg = <0x1 0x4000>, <0x1004000 

[PATCH v9 6/6] powerpc/pv-qspinlock: Optimise native unlock path

2016-12-06 Thread Pan Xinhui
Avoid a function call under native version of qspinlock. On powerNV,
bafore applying this patch, every unlock is expensive. This small
optimizes enhance the performance.

We use static_key with jump_lable which removes unnecessary loads of
lppaca and its stuff.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/qspinlock_paravirt.h | 18 +-
 arch/powerpc/kernel/paravirt.c|  4 
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h 
b/arch/powerpc/include/asm/qspinlock_paravirt.h
index d87cda0..8d39446 100644
--- a/arch/powerpc/include/asm/qspinlock_paravirt.h
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -6,12 +6,14 @@
 #define _ASM_QSPINLOCK_PARAVIRT_H
 
 #include  
+#include  
 
 extern void pv_lock_init(void);
 extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 extern void __pv_init_lock_hash(void);
 extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+extern struct static_key_true sharedprocessor_key;
 
 static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val)
 {
@@ -20,7 +22,21 @@ static inline void pv_queued_spin_lock(struct qspinlock 
*lock, u32 val)
 
 static inline void pv_queued_spin_unlock(struct qspinlock *lock)
 {
-   pv_lock_op.unlock(lock);
+   /*
+* on powerNV and pSeries with jump_label, code will be
+*  PowerNV:pSeries:
+*  nop;b 2f;
+*  native unlock   2:
+*  pv unlock;
+* In this way, we can do unlock quick in native case.
+*
+* IF jump_label is not enabled, we fall back into
+* if condition, IOW, ld && cmp && bne.
+*/
+   if (static_branch_likely(_key))
+   native_queued_spin_unlock(lock);
+   else
+   pv_lock_op.unlock(lock);
 }
 
 static inline void pv_wait(u8 *ptr, u8 val)
diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c
index e697b17..a0a000e 100644
--- a/arch/powerpc/kernel/paravirt.c
+++ b/arch/powerpc/kernel/paravirt.c
@@ -140,6 +140,9 @@ struct pv_lock_ops pv_lock_op = {
 };
 EXPORT_SYMBOL(pv_lock_op);
 
+struct static_key_true sharedprocessor_key = STATIC_KEY_TRUE_INIT;
+EXPORT_SYMBOL(sharedprocessor_key);
+
 void __init pv_lock_init(void)
 {
if (SHARED_PROCESSOR) {
@@ -149,5 +152,6 @@ void __init pv_lock_init(void)
pv_lock_op.unlock = __pv_queued_spin_unlock;
pv_lock_op.wait = __pv_wait;
pv_lock_op.kick = __pv_kick;
+   static_branch_disable(_key);
}
 }
-- 
2.4.11



[PATCH v9 5/6] powerpc: pSeries: Add pv-qspinlock build config/make

2016-12-06 Thread Pan Xinhui
pSeries run as a guest and might need pv-qspinlock.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/kernel/Makefile   | 1 +
 arch/powerpc/platforms/pseries/Kconfig | 8 
 2 files changed, 9 insertions(+)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1925341..4780415 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
 obj-$(CONFIG_PPC_P7_NAP)   += idle_book3s.o
 procfs-y   := proc_powerpc.o
 obj-$(CONFIG_PROC_FS)  += $(procfs-y)
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)   += paravirt.o
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)  := rtas_pci.o
 obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
 obj-$(CONFIG_PPC_RTAS_DAEMON)  += rtasd.o
diff --git a/arch/powerpc/platforms/pseries/Kconfig 
b/arch/powerpc/platforms/pseries/Kconfig
index bec90fb..c9cc064 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -33,6 +33,14 @@ config PPC_SPLPAR
  processors, that is, which share physical processors between
  two or more partitions.
 
+config PARAVIRT_SPINLOCKS
+   bool "Paravirtialization support for qspinlock"
+   depends on PPC_SPLPAR && QUEUED_SPINLOCKS
+   default y
+   help
+ If kernel need run as a guest then enable this option.
+ Generally it can let kernel have a better performace.
+
 config DTL
bool "Dispatch Trace Log"
depends on PPC_SPLPAR && DEBUG_FS
-- 
2.4.11



[PATCH v9 4/6] powerpc/pv-qspinlock: powerpc support pv-qspinlock

2016-12-06 Thread Pan Xinhui
The default pv-qspinlock uses qspinlock(native version of pv-qspinlock).
pv_lock initialization should be done in bootstage with irq disabled.
And if we run as a guest with powerKVM/pHyp shared_processor mode,
restore pv_lock_ops callbacks to pv-qspinlock(pv version) which makes
full use of virtualization.

There is a hash table, we store cpu number into it and the key is lock.
So everytime pv_wait can know who is the lock holder by searching the
lock. Also store the lock in a per_cpu struct, and remove it when we own
the lock. Then pv_wait can know which lock we are spinning on. But the
cpu in the hash table might not be the correct lock holder, as for
performace issue, we does not take care of hash conflict.

Also introduce spin_lock_holder, which tells who owns the lock now.
currently the only user is spin_unlock_wait.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/qspinlock.h   |  29 +++-
 arch/powerpc/include/asm/qspinlock_paravirt.h  |  36 +
 .../powerpc/include/asm/qspinlock_paravirt_types.h |  13 ++
 arch/powerpc/kernel/paravirt.c | 153 +
 arch/powerpc/lib/locks.c   |   8 +-
 arch/powerpc/platforms/pseries/setup.c |   5 +
 6 files changed, 241 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
 create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h
 create mode 100644 arch/powerpc/kernel/paravirt.c

diff --git a/arch/powerpc/include/asm/qspinlock.h 
b/arch/powerpc/include/asm/qspinlock.h
index 4c89256..8fd6349 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -15,7 +15,7 @@ static inline u8 *__qspinlock_lock_byte(struct qspinlock 
*lock)
return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
 }
 
-static inline void queued_spin_unlock(struct qspinlock *lock)
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
 {
/* release semantics is required */
smp_store_release(__qspinlock_lock_byte(lock), 0);
@@ -27,6 +27,33 @@ static inline int queued_spin_is_locked(struct qspinlock 
*lock)
return atomic_read(>val);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#include 
+/*
+ * try to know who is the lock holder, however it is not always true
+ * Return:
+ * -1, we did not know the lock holder.
+ * other value, likely is the lock holder.
+ */
+extern int spin_lock_holder(void *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+   pv_queued_spin_lock(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+   pv_queued_spin_unlock(lock);
+}
+#else
+#define spin_lock_holder(l) (-1)
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+   native_queued_spin_unlock(lock);
+}
+#endif
+
 #include 
 
 /* we need override it as ppc has io_sync stuff */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h 
b/arch/powerpc/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000..d87cda0
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,36 @@
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#error "do not include this file"
+#endif
+
+#ifndef _ASM_QSPINLOCK_PARAVIRT_H
+#define _ASM_QSPINLOCK_PARAVIRT_H
+
+#include  
+
+extern void pv_lock_init(void);
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val)
+{
+   pv_lock_op.lock(lock, val);
+}
+
+static inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+   pv_lock_op.unlock(lock);
+}
+
+static inline void pv_wait(u8 *ptr, u8 val)
+{
+   pv_lock_op.wait(ptr, val);
+}
+
+static inline void pv_kick(int cpu)
+{
+   pv_lock_op.kick(cpu);
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt_types.h 
b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
new file mode 100644
index 000..83611ed
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+#define _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+
+struct pv_lock_ops {
+   void (*lock)(struct qspinlock *lock, u32 val);
+   void (*unlock)(struct qspinlock *lock);
+   void (*wait)(u8 *ptr, u8 val);
+   void (*kick)(int cpu);
+};
+
+extern struct pv_lock_ops pv_lock_op;
+
+#endif
diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c
new file mode 100644
index 000..e697b17
--- /dev/null
+++ b/arch/powerpc/kernel/paravirt.c
@@ -0,0 +1,153 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * 

[PATCH v9 3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function

2016-12-06 Thread Pan Xinhui
Add two corresponding helper functions to support pv-qspinlock.

For normal use, __spin_yield_cpu will confer current vcpu slices to the
target vcpu(say, a lock holder). If target vcpu is not specified or it
is in running state, such conferging to lpar happens or not depends.

Because hcall itself will introduce latency and a little overhead. And we
do NOT want to suffer any latency on some cases, e.g. in interrupt handler.
The second parameter *confer* can indicate such case.

__spin_wake_cpu is simpiler, it will wake up one vcpu regardless of its
current vcpu state.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/spinlock.h |  4 +++
 arch/powerpc/lib/locks.c| 57 +
 2 files changed, 61 insertions(+)

diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index 954099e..6426bd5 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -64,9 +64,13 @@ static inline bool vcpu_is_preempted(int cpu)
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
 extern void __spin_yield(arch_spinlock_t *lock);
+extern void __spin_yield_cpu(int cpu, int confer);
+extern void __spin_wake_cpu(int cpu);
 extern void __rw_yield(arch_rwlock_t *lock);
 #else /* SPLPAR */
 #define __spin_yield(x)barrier()
+#define __spin_yield_cpu(x, y) barrier()
+#define __spin_wake_cpu(x) barrier()
 #define __rw_yield(x)  barrier()
 #define SHARED_PROCESSOR   0
 #endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 8f6dbb0..dff0bfa 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,63 @@
 #include 
 #include 
 
+/*
+ * confer our slices to a specified cpu and return. If it is in running state
+ * or cpu is -1, then we will check confer. If confer is NULL, we will return
+ * otherwise we confer our slices to lpar.
+ */
+void __spin_yield_cpu(int cpu, int confer)
+{
+   unsigned int yield_count;
+
+   if (cpu == -1)
+   goto yield_to_lpar;
+
+   BUG_ON(cpu >= nr_cpu_ids);
+   yield_count = be32_to_cpu(lppaca_of(cpu).yield_count);
+
+   /* if cpu is running, confer slices to lpar conditionally*/
+   if ((yield_count & 1) == 0)
+   goto yield_to_lpar;
+
+   plpar_hcall_norets(H_CONFER,
+   get_hard_smp_processor_id(cpu), yield_count);
+   return;
+
+yield_to_lpar:
+   if (confer)
+   plpar_hcall_norets(H_CONFER, -1, 0);
+}
+EXPORT_SYMBOL_GPL(__spin_yield_cpu);
+
+void __spin_wake_cpu(int cpu)
+{
+   BUG_ON(cpu >= nr_cpu_ids);
+   /*
+* NOTE: we should always do this hcall regardless of
+* the yield_count of the holder_cpu.
+* as thers might be a case like below;
+*  CPU 1   CPU 2
+*  yielded = true
+* if (yielded)
+*  __spin_wake_cpu()
+*  __spin_yield_cpu()
+*
+* So we might lose a wake if we check the yield_count and
+* return directly if the holder_cpu is running.
+* IOW. do NOT code like below.
+*  yield_count = be32_to_cpu(lppaca_of(cpu).yield_count);
+*  if ((yield_count & 1) == 0)
+*  return;
+*
+* a PROD hcall marks the target_cpu proded, which cause the next cede
+* or confer called on the target_cpu invalid.
+*/
+   plpar_hcall_norets(H_PROD,
+   get_hard_smp_processor_id(cpu));
+}
+EXPORT_SYMBOL_GPL(__spin_wake_cpu);
+
 #ifndef CONFIG_QUEUED_SPINLOCKS
 void __spin_yield(arch_spinlock_t *lock)
 {
-- 
2.4.11



[PATCH v9 2/6] powerpc: platforms/Kconfig: Add qspinlock build config

2016-12-06 Thread Pan Xinhui
pSeries/powerNV will use qspinlock from now on.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/platforms/Kconfig | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fbdae83..3559bbf 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,15 @@ source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
 
+config ARCH_USE_QUEUED_SPINLOCKS
+depends on PPC_PSERIES || PPC_POWERNV
+bool "Enable qspinlock"
+default y
+help
+ Enabling this option will let kernel use qspinlock which is a kind of
+ fairlock.  It has shown a good performance improvement on x86 and also
+ ppc especially in high contention cases.
+
 config KVM_GUEST
bool "KVM Guest support"
default n
-- 
2.4.11



[PATCH v9 1/6] powerpc/qspinlock: powerpc support qspinlock

2016-12-06 Thread Pan Xinhui
This patch add basic code to enable qspinlock on powerpc. qspinlock is
one kind of fairlock implementation. And seen some performance improvement
under some scenarios.

queued_spin_unlock() release the lock by just one write of NULL to the
::locked field which sits at different places in the two endianness
system.

We override some arch_spin_XXX as powerpc has io_sync stuff which makes
sure the io operations are protected by the lock correctly.

There is another special case, see commit
2c610022711 ("locking/qspinlock: Fix spin_unlock_wait() some more")

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/qspinlock.h  | 66 +++
 arch/powerpc/include/asm/spinlock.h   | 31 +--
 arch/powerpc/include/asm/spinlock_types.h |  4 ++
 arch/powerpc/lib/locks.c  | 62 +
 4 files changed, 150 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/include/asm/qspinlock.h

diff --git a/arch/powerpc/include/asm/qspinlock.h 
b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 000..4c89256
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include 
+
+#define SPIN_THRESHOLD (1 << 15)
+#define queued_spin_unlock queued_spin_unlock
+#define queued_spin_is_locked queued_spin_is_locked
+#define queued_spin_unlock_wait queued_spin_unlock_wait
+
+extern void queued_spin_unlock_wait(struct qspinlock *lock);
+
+static inline u8 *__qspinlock_lock_byte(struct qspinlock *lock)
+{
+   return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+   /* release semantics is required */
+   smp_store_release(__qspinlock_lock_byte(lock), 0);
+}
+
+static inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+   smp_mb();
+   return atomic_read(>val);
+}
+
+#include 
+
+/* we need override it as ppc has io_sync stuff */
+#undef arch_spin_trylock
+#undef arch_spin_lock
+#undef arch_spin_lock_flags
+#undef arch_spin_unlock
+#define arch_spin_trylock arch_spin_trylock
+#define arch_spin_lock arch_spin_lock
+#define arch_spin_lock_flags arch_spin_lock_flags
+#define arch_spin_unlock arch_spin_unlock
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+   CLEAR_IO_SYNC;
+   return queued_spin_trylock(lock);
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+   CLEAR_IO_SYNC;
+   queued_spin_lock(lock);
+}
+
+static inline
+void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+{
+   CLEAR_IO_SYNC;
+   queued_spin_lock(lock);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+   SYNC_IO;
+   queued_spin_unlock(lock);
+}
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index 8c1b913..954099e 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -60,6 +60,23 @@ static inline bool vcpu_is_preempted(int cpu)
 }
 #endif
 
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+#define __spin_yield(x)barrier()
+#define __rw_yield(x)  barrier()
+#define SHARED_PROCESSOR   0
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include 
+#else
+
+#define arch_spin_relax(lock)  __spin_yield(lock)
+
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
return lock.slock == 0;
@@ -114,18 +131,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
  * held.  Conveniently, we have a word in the paca that holds this
  * value.
  */
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x)barrier()
-#define __rw_yield(x)  barrier()
-#define SHARED_PROCESSOR   0
-#endif
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
CLEAR_IO_SYNC;
@@ -203,6 +208,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t 
*lock)
smp_mb();
 }
 
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -338,7 +344,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)  __spin_yield(lock)
 #define arch_read_relax(lock)  

[PATCH v9 0/6] Implement qspinlock/pv-qspinlock on ppc

2016-12-06 Thread Pan Xinhui
Hi All,
  this is the fairlock patchset. You can apply them and build successfully.
patches are based on linux-next
  qspinlock can avoid waiter starved issue. It has about the same speed in
single-thread and it can be much faster in high contention situations
especially when the spinlock is embedded within the data structure to be
protected.

v8 -> v9:
mv qspinlocm config entry to platforms/kconfig
fix comments and remove unnecessary codes, thanks boqun reviewing.
v7 -> v8:
add one patch to drop a function call under native qspinlock unlock.
Enabling qspinlock or not is a complier option now.
rebase onto linux-next(4.9-rc7)
v6 -> v7:
rebase onto 4.8-rc4
v1 -> v6:
too many details. snip. 

some benchmark result below

perf bench
these numbers are ops per sec, So the higher the better.
***
on pSeries with 32 vcpus, 32Gb memory, pHyp.

test case   | pv-qspinlock  |  qspinlock| 
current-spinlock

futex hash  | 618572| 552332| 553788
futex lock-pi   | 364   | 364   | 364
sched pipe  | 78984 | 76060 | 81454


unix bench:
these numbers are scores, So the higher the better.

on PowerNV with 16 cores(cpus) (smt off), 32Gb memory:
-
pv-qspinlock and qspinlock have very similar results because pv-qspinlock use 
native version
which is only having one callback overhead

test case   | pv-qspinlock and qspinlock | current-spinlock

Execl Throughput   761.1 761.4
File Copy 1024 bufsize 2000 maxblocks 1259.81286.6
File Copy 256 bufsize 500 maxblocks782.2 790.3
File Copy 4096 bufsize 8000 maxblocks 2741.52817.4
Pipe Throughput   1063.21036.7
Pipe-based Context Switching   284.7 281.1
Process Creation   679.6 649.1
Shell Scripts (1 concurrent)  1933.21922.9
Shell Scripts (8 concurrent)  5003.34899.8
System Call Overhead   900.6 896.8
 ==
System Benchmarks Index Score 1139.3 1133.0
--- 
-

***
on pSeries with 32 vcpus, 32Gb memory, pHyp.

test case   |   pv-qspinlock |  qspinlock | 
current-spinlock

Execl Throughput 877.1 891.2 872.8
File Copy 1024 bufsize 2000 maxblocks   1390.41399.21395.0
File Copy 256 bufsize 500 maxblocks  882.4 889.5 881.8
File Copy 4096 bufsize 8000 maxblocks   3112.33113.43121.7
Pipe Throughput 1095.81162.61158.5
Pipe-based Context Switching 194.9 192.7 200.7
Process Creation 518.4 526.4 509.1
Shell Scripts (1 concurrent)1401.91413.91402.2
Shell Scripts (8 concurrent)3215.63246.63229.1
System Call Overhead 833.2 892.4 888.1
  
System Benchmarks Index Score   1033.71052.51047.8


**
on pSeries with 32 vcpus, 16Gb memory, KVM.

test case   |   pv-qspinlock |  qspinlock | 
current-spinlock

Execl Throughput 497.4518.7 497.8
File Copy 1024 bufsize 2000 maxblocks   1368.8   1390.11343.3
File Copy 256 bufsize 500 maxblocks  857.7859.8 831.4
File Copy 4096 bufsize 8000 

RE: [PATCH 3/3] powerpc/fsl/dts: add FMan node for t1042d4rdb

2016-12-06 Thread Madalin-Cristian Bucur
> From: Scott Wood [mailto:o...@buserror.net]
> Sent: Tuesday, November 15, 2016 8:19 AM
> 
> On Fri, 2016-11-11 at 17:53 +0200, Madalin Bucur wrote:
> > Signed-off-by: Madalin Bucur 
> > ---
> >  arch/powerpc/boot/dts/fsl/t1042d4rdb.dts | 47
> > 
> >  1 file changed, 47 insertions(+)
> >
> > diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
> > b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
> > index 2a5a90d..8c0c318 100644
> > --- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
> > +++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
> > @@ -48,6 +48,53 @@
> >     "fsl,deepsleep-cpld";
> >     };
> >     };
> > +   soc: soc@ffe00 {
> 
> Please leave a blank line between nodes, especially here at the top level.
> 
> -Scott

I missed your feedback, will send a v2.

Thanks,
Madalin


Re: [PATCH 1/3] KVM: PPC: Book3S: Change interrupt call to reduce scratch space use on HV

2016-12-06 Thread Nicholas Piggin
On Tue, 6 Dec 2016 17:09:07 +1100
Paul Mackerras  wrote:

> On Thu, Dec 01, 2016 at 06:18:10PM +1100, Nicholas Piggin wrote:
> > Change the calling convention to put the trap number together with
> > CR in two halves of r12, which frees up HSTATE_SCRATCH2 in the HV
> > handler, and r9 free.  
> 
> Cute idea!  Some comments below...
> 
> > The 64-bit PR handler entry translates the calling convention back
> > to match the previous call convention (i.e., shared with 32-bit), for
> > simplicity.
> > 
> > Signed-off-by: Nicholas Piggin 
> > ---
> >  arch/powerpc/include/asm/exception-64s.h | 28 +++-
> >  arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 15 +++
> >  arch/powerpc/kvm/book3s_segment.S| 27 ---
> >  3 files changed, 42 insertions(+), 28 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/exception-64s.h 
> > b/arch/powerpc/include/asm/exception-64s.h
> > index 9a3eee6..bc8fc45 100644
> > --- a/arch/powerpc/include/asm/exception-64s.h
> > +++ b/arch/powerpc/include/asm/exception-64s.h
> > @@ -233,7 +233,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
> >  
> >  #endif
> >  
> > -#define __KVM_HANDLER_PROLOG(area, n)  
> > \
> > +#define __KVM_HANDLER(area, h, n)  \
> > BEGIN_FTR_SECTION_NESTED(947)   \
> > ld  r10,area+EX_CFAR(r13);  \
> > std r10,HSTATE_CFAR(r13);   \
> > @@ -243,30 +243,32 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
> > std r10,HSTATE_PPR(r13);\
> > END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);\
> > ld  r10,area+EX_R10(r13);   \
> > -   stw r9,HSTATE_SCRATCH1(r13);\
> > -   ld  r9,area+EX_R9(r13); \
> > std r12,HSTATE_SCRATCH0(r13);   \
> > -
> > -#define __KVM_HANDLER(area, h, n)  \
> > -   __KVM_HANDLER_PROLOG(area, n)   \
> > -   li  r12,n;  \
> > +   li  r12,(n);\
> > +   sldir12,r12,32; \
> > +   or  r12,r12,r9; \  
> 
> Did you consider doing it the other way around, i.e. with r12
> containing (cr << 32) | trap?  That would save 1 instruction in each
> handler:

When I tinkered with it I thought it came out slightly nicer this way, but
your suggested versions seem to prove me wrong. I can change it if you'd
like.

> 
> + sldir12,r9,32;  \
> + ori r12,r12,(n);\
> 
> > +   ld  r9,area+EX_R9(r13); \
> > +   std r9,HSTATE_SCRATCH1(r13);\  
> 
> Why not put this std in kvmppc_interrupt[_hv] rather than in each
> handler?

Patch 3/3 uses r9 to load the ctr when CONFIG_RELOCATABLE is turned on, so
this resulted in the smaller difference between the two cases. I agree it's
not ideal when config relocatable is off.

[snip]

Thanks,
Nick


Re: [GIT PULL 00/20] perf/core improvements and fixes

2016-12-06 Thread Ingo Molnar

* Arnaldo Carvalho de Melo  wrote:

> Hi Ingo,
> 
>   Please consider pulling,
> 
> - Arnaldo
> 
> Test results at the end of this message, as usual.
> 
> The following changes since commit e7af7b15121ca08c31a0ab9df71a41b4c53365b4:
> 
>   Merge tag 'perf-core-for-mingo-20161201' of 
> git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core 
> (2016-12-02 10:08:03 +0100)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
> tags/perf-core-for-mingo-20161205
> 
> for you to fetch changes up to bec60e50af83741cde1786ab475d4bf472aed6f9:
> 
>   perf annotate: Show raw form for jump instruction with indirect target 
> (2016-12-05 17:21:57 -0300)
> 
> 
> perf/core improvements and fixes:
> 
> Fixes:
> 
> - Do not show a bogus target address in 'perf annotate' for targetless powerpc
>   jump instructions such as 'bctr' (Ravi Bangoria)
> 
> - tools/build fixes related to race conditions with the fixdep utility (Jiri 
> Olsa)
> 
> - Fix building objtool with clang (Peter Foley)
> 
> Infrastructure:
> 
> - Support linking perf with clang and LLVM libraries, initially statically, 
> but
>   this limitation will be lifted and shared libraries, when available, will
>   be preferred to the static build, that should, as with other features, be
>   enabled explicitly (Wang Nan)
> 
> Signed-off-by: Arnaldo Carvalho de Melo 
> 
> 
> Jiri Olsa (7):
>   tools build: Make fixdep parsing wait for last target
>   tools build: Make the .cmd file more readable
>   tools build: Move tabs to spaces where suitable
>   perf tools: Move install-gtk target into rules area
>   perf tools: Move python/perf.so target into rules area
>   perf tools: Cleanup build directory before each test
>   perf tools: Add non config targets
> 
> Peter Foley (1):
>   tools build: Fix objtool build with clang
> 
> Ravi Bangoria (1):
>   perf annotate: Show raw form for jump instruction with indirect target
> 
> Wang Nan (11):
>   perf tools: Pass context to perf hook functions
>   perf llvm: Extract helpers in llvm-utils.c
>   tools build: Add feature detection for LLVM
>   tools build: Add feature detection for clang
>   perf build: Add clang and llvm compile and linking support
>   perf clang: Add builtin clang support ant test case
>   perf clang: Use real file system for #include
>   perf clang: Allow passing CFLAGS to builtin clang
>   perf clang: Update test case to use real BPF script
>   perf clang: Support compile IR to BPF object and add testcase
>   perf clang: Compile BPF script using builtin clang support
> 
>  tools/build/Build.include  |  20 ++--
>  tools/build/Makefile.feature   | 138 +-
>  tools/build/feature/Makefile   | 120 +--
>  tools/build/feature/test-clang.cpp |  21 
>  tools/build/feature/test-llvm.cpp  |   8 ++
>  tools/build/fixdep.c   |   5 +-
>  tools/perf/Makefile.config |  62 +---
>  tools/perf/Makefile.perf   |  56 +++
>  tools/perf/tests/Build |   1 +
>  tools/perf/tests/builtin-test.c|   9 ++
>  tools/perf/tests/clang.c   |  46 +
>  tools/perf/tests/llvm.h|   7 ++
>  tools/perf/tests/make  |   4 +-
>  tools/perf/tests/perf-hooks.c  |  14 ++-
>  tools/perf/tests/tests.h   |   3 +
>  tools/perf/util/Build  |   2 +
>  tools/perf/util/annotate.c |   3 +
>  tools/perf/util/bpf-loader.c   |  19 +++-
>  tools/perf/util/c++/Build  |   2 +
>  tools/perf/util/c++/clang-c.h  |  43 
>  tools/perf/util/c++/clang-test.cpp |  62 
>  tools/perf/util/c++/clang.cpp  | 195 
> +
>  tools/perf/util/c++/clang.h|  26 +
>  tools/perf/util/llvm-utils.c   |  76 +++
>  tools/perf/util/llvm-utils.h   |   6 ++
>  tools/perf/util/perf-hooks.c   |  10 +-
>  tools/perf/util/perf-hooks.h   |   6 +-
>  tools/perf/util/util-cxx.h |  26 +
>  28 files changed, 795 insertions(+), 195 deletions(-)
>  create mode 100644 tools/build/feature/test-clang.cpp
>  create mode 100644 tools/build/feature/test-llvm.cpp
>  create mode 100644 tools/perf/tests/clang.c
>  create mode 100644 tools/perf/util/c++/Build
>  create mode 100644 tools/perf/util/c++/clang-c.h
>  create mode 100644 tools/perf/util/c++/clang-test.cpp
>  create mode 100644 tools/perf/util/c++/clang.cpp
>  create mode 100644 tools/perf/util/c++/clang.h
>  create mode 100644 tools/perf/util/util-cxx.h
> 
>   # uname -a
>   Linux jouet 4.8.8-300.fc25.x86_64 #1 SMP Tue Nov 15 18:10:06 UTC 2016 
> x86_64 x86_64 x86_64 GNU/Linux
>   # perf test
>1: vmlinux symtab