Switching to a different mm with Hash translation causes SLB entries to be preloaded from the current thread_info. This reduces SLB faults, for example when threads share a common mm but operate on different address ranges.
Preloading entries from the thread_info struct may not always be appropriate - such as when switching to a temporary mm. Introduce a new boolean in mm_context_t to skip the SLB preload entirely. Also move the SLB preload code into a separate function since switch_slb() is already quite long. The default behavior (preloading SLB entries from the current thread_info struct) remains unchanged. Signed-off-by: Christopher M. Riedl <c...@linux.ibm.com> --- v4: * New to series. --- arch/powerpc/include/asm/book3s/64/mmu.h | 3 ++ arch/powerpc/include/asm/mmu_context.h | 13 ++++++ arch/powerpc/mm/book3s64/mmu_context.c | 2 + arch/powerpc/mm/book3s64/slb.c | 56 ++++++++++++++---------- 4 files changed, 50 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index eace8c3f7b0a1..b23a9dcdee5af 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -130,6 +130,9 @@ typedef struct { u32 pkey_allocation_map; s16 execute_only_pkey; /* key holding execute-only protection */ #endif + + /* Do not preload SLB entries from thread_info during switch_slb() */ + bool skip_slb_preload; } mm_context_t; static inline u16 mm_ctx_user_psize(mm_context_t *ctx) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 4bc45d3ed8b0e..264787e90b1a1 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -298,6 +298,19 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, return 0; } +#ifdef CONFIG_PPC_BOOK3S_64 + +static inline void skip_slb_preload_mm(struct mm_struct *mm) +{ + mm->context.skip_slb_preload = true; +} + +#else + +static inline void skip_slb_preload_mm(struct mm_struct *mm) {} + +#endif /* CONFIG_PPC_BOOK3S_64 */ + #include <asm-generic/mmu_context.h> #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index c10fc8a72fb37..3479910264c59 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -202,6 +202,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) atomic_set(&mm->context.active_cpus, 0); atomic_set(&mm->context.copros, 0); + mm->context.skip_slb_preload = false; + return 0; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c91bd85eb90e3..da0836cb855af 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -441,10 +441,39 @@ static void slb_cache_slbie_user(unsigned int index) asm volatile("slbie %0" : : "r" (slbie_data)); } +static void preload_slb_entries(struct task_struct *tsk, struct mm_struct *mm) +{ + struct thread_info *ti = task_thread_info(tsk); + unsigned char i; + + /* + * We gradually age out SLBs after a number of context switches to + * reduce reload overhead of unused entries (like we do with FP/VEC + * reload). Each time we wrap 256 switches, take an entry out of the + * SLB preload cache. + */ + tsk->thread.load_slb++; + if (!tsk->thread.load_slb) { + unsigned long pc = KSTK_EIP(tsk); + + preload_age(ti); + preload_add(ti, pc); + } + + for (i = 0; i < ti->slb_preload_nr; i++) { + unsigned char idx; + unsigned long ea; + + idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; + ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT; + + slb_allocate_user(mm, ea); + } +} + /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - struct thread_info *ti = task_thread_info(tsk); unsigned char i; /* @@ -502,29 +531,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) copy_mm_to_paca(mm); - /* - * We gradually age out SLBs after a number of context switches to - * reduce reload overhead of unused entries (like we do with FP/VEC - * reload). Each time we wrap 256 switches, take an entry out of the - * SLB preload cache. - */ - tsk->thread.load_slb++; - if (!tsk->thread.load_slb) { - unsigned long pc = KSTK_EIP(tsk); - - preload_age(ti); - preload_add(ti, pc); - } - - for (i = 0; i < ti->slb_preload_nr; i++) { - unsigned char idx; - unsigned long ea; - - idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; - ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT; - - slb_allocate_user(mm, ea); - } + if (!mm->context.skip_slb_preload) + preload_slb_entries(tsk, mm); /* * Synchronize slbmte preloads with possible subsequent user memory -- 2.26.1