Re: [PATCH v2 1/3] powerpc: port 64 bits pgtable_cache to 32 bits

2016-09-19 Thread christophe leroy



Le 19/09/2016 à 07:22, Aneesh Kumar K.V a écrit :

Christophe Leroy  writes:


Today powerpc64 uses a set of pgtable_caches while powerpc32 uses
standard pages when using 4k pages and a single pgtable_cache
if using other size pages.

In preparation of implementing huge pages on the 8xx, this patch
replaces the specific powerpc32 handling by the 64 bits approach.

This is done by:
* moving 64 bits pgtable_cache_add() and pgtable_cache_init()
in a new file called init-common.c
* modifying pgtable_cache_init() to also handle the case
without PMD
* removing the 32 bits version of pgtable_cache_add() and
pgtable_cache_init()
* copying related header contents from 64 bits into both the
book3s/32 and nohash/32 header files

On the 8xx, the following cache sizes will be used:
* 4k pages mode:
- PGT_CACHE(10) for PGD
- PGT_CACHE(3) for 512k hugepage tables
* 16k pages mode:
- PGT_CACHE(6) for PGD
- PGT_CACHE(7) for 512k hugepage tables
- PGT_CACHE(3) for 8M hugepage tables

Signed-off-by: Christophe Leroy 
---
v2: in v1, hugepte_cache was wrongly replaced by PGT_CACHE(1).
This modification has been removed from v2.

 arch/powerpc/include/asm/book3s/32/pgalloc.h |  44 ++--
 arch/powerpc/include/asm/book3s/32/pgtable.h |  43 
 arch/powerpc/include/asm/book3s/64/pgtable.h |   3 -
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  44 ++--
 arch/powerpc/include/asm/nohash/32/pgtable.h |  45 
 arch/powerpc/include/asm/nohash/64/pgtable.h |   2 -
 arch/powerpc/include/asm/pgtable.h   |   2 +
 arch/powerpc/mm/Makefile |   3 +-
 arch/powerpc/mm/init-common.c| 147 +++
 arch/powerpc/mm/init_64.c|  77 --
 arch/powerpc/mm/pgtable_32.c |  37 ---
 11 files changed, 273 insertions(+), 174 deletions(-)
 create mode 100644 arch/powerpc/mm/init-common.c

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 8e21bb4..d310546 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -2,14 +2,42 @@
 #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H

 #include 
+#include 

-/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
-#define MAX_PGTABLE_INDEX_SIZE 0
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation.  For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer.  In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value.  This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf

 extern void __bad_pte(pmd_t *pmd);

-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) ({\
+   BUG_ON(!(shift));   \
+   pgtable_cache[(shift) - 1]; \
+   })
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+   kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}

 /*
  * We don't have any real pmd's, and this code never triggers because
@@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t 
ptepage)

 static inline void pgtable_free(void *table, unsigned index_size)
 {
-   BUG_ON(index_size); /* 32-bit doesn't use this */
-   free_page((unsigned long)table);
+   if (!index_size) {
+   free_page((unsigned long)table);
+   } else {
+   BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+   kmem_cache_free(PGT_CACHE(index_size), table);
+   }
 }

 #define check_pgt_cache()  do { } while (0)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 6b8b2d5..f887499 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -8,6 +8,26 @@
 /* And here we include common definitions */
 #include 

+#define PTE_INDEX_SIZE PTE_SHIFT
+#define PMD_INDEX_SIZE 0
+#define PUD_INDEX_SIZE 0
+#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
+
+#define PMD_CACHE_INDEXPMD_INDEX_SIZE
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_I

Re: [PATCH v2 1/3] powerpc: port 64 bits pgtable_cache to 32 bits

2016-09-18 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> Today powerpc64 uses a set of pgtable_caches while powerpc32 uses
> standard pages when using 4k pages and a single pgtable_cache
> if using other size pages.
>
> In preparation of implementing huge pages on the 8xx, this patch
> replaces the specific powerpc32 handling by the 64 bits approach.
>
> This is done by:
> * moving 64 bits pgtable_cache_add() and pgtable_cache_init()
> in a new file called init-common.c
> * modifying pgtable_cache_init() to also handle the case
> without PMD
> * removing the 32 bits version of pgtable_cache_add() and
> pgtable_cache_init()
> * copying related header contents from 64 bits into both the
> book3s/32 and nohash/32 header files
>
> On the 8xx, the following cache sizes will be used:
> * 4k pages mode:
> - PGT_CACHE(10) for PGD
> - PGT_CACHE(3) for 512k hugepage tables
> * 16k pages mode:
> - PGT_CACHE(6) for PGD
> - PGT_CACHE(7) for 512k hugepage tables
> - PGT_CACHE(3) for 8M hugepage tables
>
> Signed-off-by: Christophe Leroy 
> ---
> v2: in v1, hugepte_cache was wrongly replaced by PGT_CACHE(1).
> This modification has been removed from v2.
>
>  arch/powerpc/include/asm/book3s/32/pgalloc.h |  44 ++--
>  arch/powerpc/include/asm/book3s/32/pgtable.h |  43 
>  arch/powerpc/include/asm/book3s/64/pgtable.h |   3 -
>  arch/powerpc/include/asm/nohash/32/pgalloc.h |  44 ++--
>  arch/powerpc/include/asm/nohash/32/pgtable.h |  45 
>  arch/powerpc/include/asm/nohash/64/pgtable.h |   2 -
>  arch/powerpc/include/asm/pgtable.h   |   2 +
>  arch/powerpc/mm/Makefile |   3 +-
>  arch/powerpc/mm/init-common.c| 147 
> +++
>  arch/powerpc/mm/init_64.c|  77 --
>  arch/powerpc/mm/pgtable_32.c |  37 ---
>  11 files changed, 273 insertions(+), 174 deletions(-)
>  create mode 100644 arch/powerpc/mm/init-common.c
>
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
> b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 8e21bb4..d310546 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -2,14 +2,42 @@
>  #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
>  
>  #include 
> +#include 
>  
> -/* For 32-bit, all levels of page tables are just drawn from get_free_page() 
> */
> -#define MAX_PGTABLE_INDEX_SIZE   0
> +/*
> + * Functions that deal with pagetables that could be at any level of
> + * the table need to be passed an "index_size" so they know how to
> + * handle allocation.  For PTE pages (which are linked to a struct
> + * page for now, and drawn from the main get_free_pages() pool), the
> + * allocation size will be (2^index_size * sizeof(pointer)) and
> + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
> + *
> + * The maximum index size needs to be big enough to allow any
> + * pagetable sizes we need, but small enough to fit in the low bits of
> + * any page table pointer.  In other words all pagetables, even tiny
> + * ones, must be aligned to allow at least enough low 0 bits to
> + * contain this value.  This value is also used as a mask, so it must
> + * be one less than a power of two.
> + */
> +#define MAX_PGTABLE_INDEX_SIZE   0xf
>  
>  extern void __bad_pte(pmd_t *pmd);
>  
> -extern pgd_t *pgd_alloc(struct mm_struct *mm);
> -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> +extern struct kmem_cache *pgtable_cache[];
> +#define PGT_CACHE(shift) ({  \
> + BUG_ON(!(shift));   \
> + pgtable_cache[(shift) - 1]; \
> + })
> +
> +static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> +{
> + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
> +}
> +
> +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
> +{
> + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
> +}
>  
>  /*
>   * We don't have any real pmd's, and this code never triggers because
> @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, 
> pgtable_t ptepage)
>  
>  static inline void pgtable_free(void *table, unsigned index_size)
>  {
> - BUG_ON(index_size); /* 32-bit doesn't use this */
> - free_page((unsigned long)table);
> + if (!index_size) {
> + free_page((unsigned long)table);
> + } else {
> + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
> + kmem_cache_free(PGT_CACHE(index_size), table);
> + }
>  }
>  
>  #define check_pgt_cache()do { } while (0)
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 6b8b2d5..f887499 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -8,6 +8,26 @@
>  /* And here we include common definitions */
>  #include 
>  
> +#define PTE_INDEX_SIZE   PTE_SHIFT
> +#define PMD_INDEX_SIZE   

[PATCH v2 1/3] powerpc: port 64 bits pgtable_cache to 32 bits

2016-09-16 Thread Christophe Leroy
Today powerpc64 uses a set of pgtable_caches while powerpc32 uses
standard pages when using 4k pages and a single pgtable_cache
if using other size pages.

In preparation of implementing huge pages on the 8xx, this patch
replaces the specific powerpc32 handling by the 64 bits approach.

This is done by:
* moving 64 bits pgtable_cache_add() and pgtable_cache_init()
in a new file called init-common.c
* modifying pgtable_cache_init() to also handle the case
without PMD
* removing the 32 bits version of pgtable_cache_add() and
pgtable_cache_init()
* copying related header contents from 64 bits into both the
book3s/32 and nohash/32 header files

On the 8xx, the following cache sizes will be used:
* 4k pages mode:
- PGT_CACHE(10) for PGD
- PGT_CACHE(3) for 512k hugepage tables
* 16k pages mode:
- PGT_CACHE(6) for PGD
- PGT_CACHE(7) for 512k hugepage tables
- PGT_CACHE(3) for 8M hugepage tables

Signed-off-by: Christophe Leroy 
---
v2: in v1, hugepte_cache was wrongly replaced by PGT_CACHE(1).
This modification has been removed from v2.

 arch/powerpc/include/asm/book3s/32/pgalloc.h |  44 ++--
 arch/powerpc/include/asm/book3s/32/pgtable.h |  43 
 arch/powerpc/include/asm/book3s/64/pgtable.h |   3 -
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  44 ++--
 arch/powerpc/include/asm/nohash/32/pgtable.h |  45 
 arch/powerpc/include/asm/nohash/64/pgtable.h |   2 -
 arch/powerpc/include/asm/pgtable.h   |   2 +
 arch/powerpc/mm/Makefile |   3 +-
 arch/powerpc/mm/init-common.c| 147 +++
 arch/powerpc/mm/init_64.c|  77 --
 arch/powerpc/mm/pgtable_32.c |  37 ---
 11 files changed, 273 insertions(+), 174 deletions(-)
 create mode 100644 arch/powerpc/mm/init-common.c

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 8e21bb4..d310546 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -2,14 +2,42 @@
 #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
 
 #include 
+#include 
 
-/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
-#define MAX_PGTABLE_INDEX_SIZE 0
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation.  For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer.  In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value.  This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
 
 extern void __bad_pte(pmd_t *pmd);
 
-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) ({\
+   BUG_ON(!(shift));   \
+   pgtable_cache[(shift) - 1]; \
+   })
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+   kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
 
 /*
  * We don't have any real pmd's, and this code never triggers because
@@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t 
ptepage)
 
 static inline void pgtable_free(void *table, unsigned index_size)
 {
-   BUG_ON(index_size); /* 32-bit doesn't use this */
-   free_page((unsigned long)table);
+   if (!index_size) {
+   free_page((unsigned long)table);
+   } else {
+   BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+   kmem_cache_free(PGT_CACHE(index_size), table);
+   }
 }
 
 #define check_pgt_cache()  do { } while (0)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 6b8b2d5..f887499 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -8,6 +8,26 @@
 /* And here we include common definitions */
 #include 
 
+#define PTE_INDEX_SIZE PTE_SHIFT
+#define PMD_INDEX_SIZE 0
+#define PUD_INDEX_SIZE 0
+#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
+
+#define PMD_CACHE_INDEXPMD_INDEX_SIZE
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PTE_INDEX_SIZE)
+#def