This option increases the number of hash misses by limiting the number of kernel HPT entries. This helps stress test difficult to hit paths in the kernel.
Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- .../admin-guide/kernel-parameters.txt | 9 +++ arch/powerpc/include/asm/book3s/64/mmu-hash.h | 10 +++ arch/powerpc/mm/book3s64/hash_4k.c | 3 + arch/powerpc/mm/book3s64/hash_64k.c | 8 +++ arch/powerpc/mm/book3s64/hash_utils.c | 66 ++++++++++++++++++- 5 files changed, 95 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5a34b7dd9ebe..1ec6a32a717a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -876,6 +876,15 @@ them frequently to increase the rate of SLB faults on kernel addresses. + torture_hpt [PPC] + Limits the number of kernel HPT entries in the hash + page table to increase the rate of hash page table + faults on kernel addresses. + + This may hang when run on processors / emulators which + do not have a TLB, or flush it more often than + required, QEMU seems to have problems. + disable= [IPV6] See Documentation/networking/ipv6.txt. diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 758de1e0f676..539e3d91eac4 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -324,6 +324,16 @@ static inline bool torture_slb(void) return static_branch_unlikely(&torture_slb_key); } +extern bool torture_hpt_enabled; +DECLARE_STATIC_KEY_FALSE(torture_hpt_key); +static inline bool torture_hpt(void) +{ + return static_branch_unlikely(&torture_hpt_key); +} + +void hpt_do_torture(unsigned long ea, unsigned long access, + unsigned long rflags, unsigned long hpte_group); + /* * This computes the AVPN and B fields of the first dword of a HPTE, * for use when we want to match an existing PTE. The bottom 7 bits diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 22e787123cdf..54e4ff8c558d 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -118,6 +118,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (torture_hpt()) + hpt_do_torture(ea, access, rflags, hpte_group); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 7084ce2951e6..19ea0fc145a9 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -216,6 +216,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; + if (torture_hpt()) + hpt_do_torture(ea, access, rflags, hpte_group); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -327,7 +330,12 @@ int __hash_page_64K(unsigned long ea, unsigned long access, new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (torture_hpt()) + hpt_do_torture(ea, access, rflags, hpte_group); } + *ptep = __pte(new_pte & ~H_PAGE_BUSY); + return 0; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9c487b5782ef..e9bdf825f897 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -353,8 +353,12 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __read_mostly = false; bool torture_slb_enabled __read_mostly = false; +bool torture_hpt_enabled __read_mostly = false; + +/* per-CPU array allocated if we enable torture_hpt. */ +static unsigned long *torture_hpt_last_group; static int __init parse_disable_1tb_segments(char *p) { @@ -370,6 +374,13 @@ static int __init parse_torture_slb(char *p) } early_param("torture_slb", parse_torture_slb); +static int __init parse_torture_hpt(char *p) +{ + torture_hpt_enabled = true; + return 0; +} +early_param("torture_hpt", parse_torture_hpt); + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -863,6 +874,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, } DEFINE_STATIC_KEY_FALSE(torture_slb_key); +DEFINE_STATIC_KEY_FALSE(torture_hpt_key); static void __init htab_initialize(void) { @@ -882,6 +894,15 @@ static void __init htab_initialize(void) if (torture_slb_enabled) static_branch_enable(&torture_slb_key); + if (torture_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&torture_hpt_key); + tmp = memblock_phys_alloc_range(sizeof(unsigned long) * NR_CPUS, + 0, + 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(unsigned long) * NR_CPUS); + torture_hpt_last_group = __va(tmp); + } /* * Calculate the required size of the htab. We want the number of @@ -1901,6 +1922,49 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn, return slot; } +void hpt_do_torture(unsigned long ea, unsigned long access, + unsigned long rflags, unsigned long hpte_group) +{ + unsigned long last_group; + int cpu = raw_smp_processor_id(); + + last_group = torture_hpt_last_group[cpu]; + if (last_group != -1UL) { + while (mmu_hash_ops.hpte_remove(last_group) != -1) + ; + torture_hpt_last_group[cpu] = -1UL; + } + +#define QEMU_WORKAROUND 0 + + if (ea >= PAGE_OFFSET) { + if (!QEMU_WORKAROUND && (access & (_PAGE_READ|_PAGE_WRITE)) && + !(rflags & (HPTE_R_I|HPTE_R_G))) { + /* prefetch / prefetchw does not seem to set up a TLB + * entry with the powerpc systemsim (mambo) emulator, + * though it works with real hardware. An alternative + * approach that would work more reliably on quirky + * emulators like QEMU may be to remember the last + * insertion and remove that, rather than removing the + * current insertion. Then no prefetch is required. + */ + if ((access & _PAGE_WRITE) && (access & _PAGE_READ)) + atomic_add(0, (atomic_t *)(ea & ~0x3)); + else if (access & _PAGE_READ) + *(volatile char *)ea; + + mb(); + + while (mmu_hash_ops.hpte_remove(hpte_group) != -1) + ; + } else { + /* Can't prefetch cache-inhibited so clear next time. */ + torture_hpt_last_group[cpu] = hpte_group; + } + } +} + + #ifdef CONFIG_DEBUG_PAGEALLOC static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) { -- 2.23.0