Excerpts from Christophe Leroy's message of May 2, 2021 5:34 pm: > > > Le 02/05/2021 à 06:56, Nicholas Piggin a écrit : >> This reduces TLB misses by nearly 30x on a `git diff` workload on a >> 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due >> to vfs hashes being allocated with 2MB pages. >> >> Acked-by: Michael Ellerman <m...@ellerman.id.au> >> Signed-off-by: Nicholas Piggin <npig...@gmail.com> >> --- >> This was in the -mm tree but was dropped at the last minute after >> clashing with a patch in powerpc next. >> >> Now all prerequisites are upstream, this can be merged as is. Probably >> makes sense now to go via powerpc tree. >> >> This is rebased and retested on upstream. >> >> Documentation/admin-guide/kernel-parameters.txt | 2 ++ >> arch/powerpc/Kconfig | 1 + >> arch/powerpc/include/asm/pgtable.h | 5 +++++ >> arch/powerpc/kernel/module.c | 16 +++++++++++++--- >> 4 files changed, 21 insertions(+), 3 deletions(-) >> >> diff --git a/Documentation/admin-guide/kernel-parameters.txt >> b/Documentation/admin-guide/kernel-parameters.txt >> index 1c0a3cf6fcc9..1be38b25c485 100644 >> --- a/Documentation/admin-guide/kernel-parameters.txt >> +++ b/Documentation/admin-guide/kernel-parameters.txt >> @@ -3250,6 +3250,8 @@ >> >> nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings. >> >> + nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. >> + >> nosmt [KNL,S390] Disable symmetric multithreading (SMT). >> Equivalent to smt=1. >> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index 1e6230bea09d..c547a9d6a2dd 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -185,6 +185,7 @@ config PPC >> select GENERIC_VDSO_TIME_NS >> select HAVE_ARCH_AUDITSYSCALL >> select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && >> PPC_RADIX_MMU >> + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP >> select HAVE_ARCH_JUMP_LABEL >> select HAVE_ARCH_JUMP_LABEL_RELATIVE >> select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 >> diff --git a/arch/powerpc/include/asm/pgtable.h >> b/arch/powerpc/include/asm/pgtable.h >> index c6a676714f04..1678e4b08fc3 100644 >> --- a/arch/powerpc/include/asm/pgtable.h >> +++ b/arch/powerpc/include/asm/pgtable.h >> @@ -39,6 +39,11 @@ struct mm_struct; >> #define __S110 PAGE_SHARED_X >> #define __S111 PAGE_SHARED_X >> >> +#ifndef MODULES_VADDR >> +#define MODULES_VADDR VMALLOC_START >> +#define MODULES_END VMALLOC_END >> +#endif > > This will also require some changes in a few places, see > https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20210429031602.2606654-4-jniet...@gmail.com/
I see. I'll just make the PPC64 version use VMALLOC_START/VMALLOC_END, which avoids that stupid compiler warning found by kbuild robot as well. > >> + >> #ifndef __ASSEMBLY__ >> >> /* Keep these as a macros to avoid include dependency mess */ >> diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c >> index fab84024650c..77aefcbbd276 100644 >> --- a/arch/powerpc/kernel/module.c >> +++ b/arch/powerpc/kernel/module.c >> @@ -8,6 +8,7 @@ >> #include <linux/moduleloader.h> >> #include <linux/err.h> >> #include <linux/vmalloc.h> >> +#include <linux/mm.h> >> #include <linux/bug.h> >> #include <asm/module.h> >> #include <linux/uaccess.h> >> @@ -88,17 +89,24 @@ int module_finalize(const Elf_Ehdr *hdr, >> return 0; >> } >> >> -#ifdef MODULES_VADDR >> static __always_inline void * >> __module_alloc(unsigned long size, unsigned long start, unsigned long end) >> { >> + /* >> + * Don't do huge page allocations for modules yet until more testing >> + * is done. STRICT_MODULE_RWX may require extra work to support this >> + * too. >> + */ >> return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, >> - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, >> NUMA_NO_NODE, >> + PAGE_KERNEL_EXEC, >> + VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, >> + NUMA_NO_NODE, >> __builtin_return_address(0)); > > Can we avoid so many lines ? Doesn't it fit on 3 lines now that 100 chars per > line are tolerated ? It could be squashed onto fewer lines. Is it better? > >> } >> >> void *module_alloc(unsigned long size) >> { >> +#ifdef CONFIG_PPC32 > > Can we just add an IS_ENABLED(CONFIG_PPC32) in the 'if' instead of this > #ifdef/#else ? I guess not if I don't define MODULES_VADDR. Jordan's clenup patch could change it to IS_ENABLED. Thanks, Nick > >> unsigned long limit = (unsigned long)_etext - SZ_32M; >> void *ptr = NULL; >> >> @@ -112,5 +120,7 @@ void *module_alloc(unsigned long size) >> ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); >> >> return ptr; >> -} >> +#else >> + return __module_alloc(size, MODULES_VADDR, MODULES_END); >> #endif >> +} >> >