Le 06/03/2018 à 14:25, Nicholas Piggin a écrit :
Calculating the slice mask can become a signifcant overhead for
get_unmapped_area. This patch adds a struct slice_mask for
each page size in the mm_context, and keeps these in synch with
the slices psize arrays and slb_addr_limit.
On Book3S/64 this adds 288 bytes to the mm_context_t for the
slice mask caches.
On POWER8, this increases vfork+exec+exit performance by 9.9%
and reduces time to mmap+munmap a 64kB page by 28%.
Reduces time to mmap+munmap by about 10% on 8xx.
Cc: Benjamin Herrenschmidt
Cc: Anton Blanchard
---
arch/powerpc/include/asm/book3s/64/mmu.h | 18 +
arch/powerpc/include/asm/mmu-8xx.h | 14
arch/powerpc/mm/slice.c | 118 ---
3 files changed, 107 insertions(+), 43 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h
b/arch/powerpc/include/asm/book3s/64/mmu.h
index bef6e39ed63a..78579305 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -80,6 +80,16 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
typedef struct {
mm_context_id_t id;
u16 user_psize; /* page size index */
@@ -95,6 +105,14 @@ typedef struct {
unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long slb_addr_limit;
+# ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+# endif
+ struct slice_mask mask_4k;
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+# endif
#else
u16 sllp; /* SLB page size encoding */
#endif
diff --git a/arch/powerpc/include/asm/mmu-8xx.h
b/arch/powerpc/include/asm/mmu-8xx.h
index d3d7e79140c6..4c3b14703b3e 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -192,6 +192,11 @@
#endif
#ifndef __ASSEMBLY__
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, 0);
+};
+
typedef struct {
unsigned int id;
unsigned int active;
@@ -201,6 +206,15 @@ typedef struct {
unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
unsigned char high_slices_psize[0];
unsigned long slb_addr_limit;
+# ifdef CONFIG_PPC_16K_PAGES
+ struct slice_mask mask_16k;
+# else
+ struct slice_mask mask_4k;
+# endif
Could we just call it mask_base or something like that regardless of the
standard page size ?
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_512k;
+ struct slice_mask mask_8m;
+# endif
#endif
} mm_context_t;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 233c42d593dc..2115efe5e869 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -37,15 +37,6 @@
#include
static DEFINE_SPINLOCK(slice_convert_lock);
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
#ifdef DEBUG
int _slice_debug = 1;
@@ -149,37 +140,44 @@ static void slice_mask_for_free(struct mm_struct *mm,
struct slice_mask *ret,
__set_bit(i, ret->high_slices);
}
-static void slice_mask_for_size(struct mm_struct *mm, int psize,
- struct slice_mask *ret,
- unsigned long high_limit)
+#ifdef CONFIG_PPC_BOOK3S_64
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
{
- unsigned char *hpsizes, *lpsizes;
- int index, mask_index;
- unsigned long i;
-
- ret->low_slices = 0;
- if (SLICE_NUM_HIGH)
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
-
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- ret->low_slices |= 1u << i;
- }
-
- if (high_limit <= SLICE_LOW_TOP)
- return;
-
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- __set_bit(i, ret->high_slices);
- }
+#ifdef CONFIG_PPC_64K_PAGES
+