[PATCH v4 2/3] powerpc: get hugetlbpage handling more generic

2016-12-06 Thread Christophe Leroy
Today there are two implementations of hugetlbpages which are managed
by exclusive #ifdefs:
* FSL_BOOKE: several directory entries points to the same single hugepage
* BOOK3S: one upper level directory entry points to a table of hugepages

In preparation of implementation of hugepage support on the 8xx, we
need a mix of the two above solutions, because the 8xx needs both cases
depending on the size of pages:
* In 4k page size mode, each PGD entry covers a 4M bytes area. It means
that 2 PGD entries will be necessary to cover an 8M hugepage while a
single PGD entry will cover 8x 512k hugepages.
* In 16 page size mode, each PGD entry covers a 64M bytes area. It means
that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
hugepages will be covers by one PGD entry.

This patch:
* removes #ifdefs in favor of if/else based on the range sizes
* merges the two huge_pte_alloc() functions as they are pretty similar
* merges the two hugetlbpage_init() functions as they are pretty similar

Signed-off-by: Christophe Leroy 
Reviewed-by: Aneesh Kumar K.V  (v3)
---
v2: This part is new and results from a split of last patch of v1 serie in
two parts

v3:
- Only allocate hugepte_cache on FSL_BOOKE. Not needed on BOOK3S_64
- Removed the BUG in the unused hugepd_free(), made it
static inline {} instead.

v4:
- Fixing pdshift calculation on FSL_BOOK3E in hugetlbpage_init() by 
   using HUGEPD_PxD_SHIFT instead of PyD_SHIFT.
- Fixing default hugepage size selection on FSL_BOOK3E by spliting decision
   based on #ifdefs in order to keep previous behaviour.

 arch/powerpc/mm/hugetlbpage.c | 195 ++
 1 file changed, 81 insertions(+), 114 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a5d3ecd..53245aa 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
 {
struct kmem_cache *cachep;
pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int i;
-   int num_hugepd = 1 << (pshift - pdshift);
-   cachep = hugepte_cache;
-#else
-   cachep = PGT_CACHE(pdshift - pshift);
-#endif
+   int num_hugepd;
+
+   if (pshift >= pdshift) {
+   cachep = hugepte_cache;
+   num_hugepd = 1 << (pshift - pdshift);
+   } else {
+   cachep = PGT_CACHE(pdshift - pshift);
+   num_hugepd = 1;
+   }
 
new = kmem_cache_zalloc(cachep, GFP_KERNEL);
 
@@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
smp_wmb();
 
spin_lock(>page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+
/*
 * We have multiple higher-level entries that point to the same
 * actual pte location.  Fill in each as we go and backtrack on error.
@@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+#ifdef CONFIG_PPC_BOOK3S_64
+   hpdp->pd = __pa(new) |
+  (shift_to_mmu_psize(pshift) << 2);
+#else
/* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
+#endif
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -109,17 +116,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
-#else
-   if (!hugepd_none(*hpdp))
-   kmem_cache_free(cachep, new);
-   else {
-#ifdef CONFIG_PPC_BOOK3S_64
-   hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
-#else
-   hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
-   }
-#endif
spin_unlock(>page_table_lock);
return 0;
 }
@@ -136,7 +132,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
 #define HUGEPD_PUD_SHIFT PMD_SHIFT
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
 /*
  * At this point we do the placement change only for BOOK3S 64. This would
  * possibly work on other subarchs.
@@ -153,6 +148,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long 
addr, unsigned long sz
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
 
+#ifdef CONFIG_PPC_BOOK3S_64
if (pshift == PGDIR_SHIFT)
/* 16GB huge page */
return (pte_t *) pg;
@@ -178,32 +174,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long 
addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
-   if (!hpdp)
-   return NULL;
-
-   BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
-   if 

[PATCH v4 2/3] powerpc: get hugetlbpage handling more generic

2016-12-06 Thread Christophe Leroy
Today there are two implementations of hugetlbpages which are managed
by exclusive #ifdefs:
* FSL_BOOKE: several directory entries points to the same single hugepage
* BOOK3S: one upper level directory entry points to a table of hugepages

In preparation of implementation of hugepage support on the 8xx, we
need a mix of the two above solutions, because the 8xx needs both cases
depending on the size of pages:
* In 4k page size mode, each PGD entry covers a 4M bytes area. It means
that 2 PGD entries will be necessary to cover an 8M hugepage while a
single PGD entry will cover 8x 512k hugepages.
* In 16 page size mode, each PGD entry covers a 64M bytes area. It means
that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
hugepages will be covers by one PGD entry.

This patch:
* removes #ifdefs in favor of if/else based on the range sizes
* merges the two huge_pte_alloc() functions as they are pretty similar
* merges the two hugetlbpage_init() functions as they are pretty similar

Signed-off-by: Christophe Leroy 
Reviewed-by: Aneesh Kumar K.V  (v3)
---
v2: This part is new and results from a split of last patch of v1 serie in
two parts

v3:
- Only allocate hugepte_cache on FSL_BOOKE. Not needed on BOOK3S_64
- Removed the BUG in the unused hugepd_free(), made it
static inline {} instead.

v4:
- Fixing pdshift calculation on FSL_BOOK3E in hugetlbpage_init() by 
   using HUGEPD_PxD_SHIFT instead of PyD_SHIFT.
- Fixing default hugepage size selection on FSL_BOOK3E by spliting decision
   based on #ifdefs in order to keep previous behaviour.

 arch/powerpc/mm/hugetlbpage.c | 195 ++
 1 file changed, 81 insertions(+), 114 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a5d3ecd..53245aa 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
 {
struct kmem_cache *cachep;
pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int i;
-   int num_hugepd = 1 << (pshift - pdshift);
-   cachep = hugepte_cache;
-#else
-   cachep = PGT_CACHE(pdshift - pshift);
-#endif
+   int num_hugepd;
+
+   if (pshift >= pdshift) {
+   cachep = hugepte_cache;
+   num_hugepd = 1 << (pshift - pdshift);
+   } else {
+   cachep = PGT_CACHE(pdshift - pshift);
+   num_hugepd = 1;
+   }
 
new = kmem_cache_zalloc(cachep, GFP_KERNEL);
 
@@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
smp_wmb();
 
spin_lock(>page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+
/*
 * We have multiple higher-level entries that point to the same
 * actual pte location.  Fill in each as we go and backtrack on error.
@@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+#ifdef CONFIG_PPC_BOOK3S_64
+   hpdp->pd = __pa(new) |
+  (shift_to_mmu_psize(pshift) << 2);
+#else
/* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
+#endif
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -109,17 +116,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
-#else
-   if (!hugepd_none(*hpdp))
-   kmem_cache_free(cachep, new);
-   else {
-#ifdef CONFIG_PPC_BOOK3S_64
-   hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
-#else
-   hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
-   }
-#endif
spin_unlock(>page_table_lock);
return 0;
 }
@@ -136,7 +132,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
 #define HUGEPD_PUD_SHIFT PMD_SHIFT
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
 /*
  * At this point we do the placement change only for BOOK3S 64. This would
  * possibly work on other subarchs.
@@ -153,6 +148,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long 
addr, unsigned long sz
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
 
+#ifdef CONFIG_PPC_BOOK3S_64
if (pshift == PGDIR_SHIFT)
/* 16GB huge page */
return (pte_t *) pg;
@@ -178,32 +174,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long 
addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
-   if (!hpdp)
-   return NULL;
-
-   BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
-   if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift,