Re: [PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C
On 10/01/2015 01:48 AM, Benjamin Herrenschmidt wrote: > On Wed, 2015-09-30 at 17:37 +0530, Anshuman Khandual wrote: >>> + if (unlikely(old_pte & _PAGE_BUSY)) >>> + return 0; >>> + /* If PTE permissions don't match, take page fault */ >> >> We are already in page fault interrupt path, will it be better >> if we call it "take Linux page fault" instead as we will go back >> walking the page table. > > A better wording would be "escalate the page fault" Right. There is one more thing. hash_page_mm function can have these following return values after completion. /* Result code is: * 0 - handled * 1 - normal page fault---> Escalate into linux page fault * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -2 is returned after looking into the sub page protection bits. 0 is returned when hash page insert succeeds - ppc_md.hpte_insert returns actual slot number - Multiple retries after ppc_md.hpte_insert returns -1 indicating that HPTEG is full and try secondary hash 1 is returned when PTE in the page table does not contain PFN -1 is returned when hash page did not succeed - ppc_md.hpte_insert returns -2 when it cannot insert HPTE The point is, there are multiple combinations of (0, 1, -1, -2) out there in various paths without much documentation which can be cleaned up. Not in this series but may be later. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C
On Wed, 2015-09-30 at 17:37 +0530, Anshuman Khandual wrote: > > + if (unlikely(old_pte & _PAGE_BUSY)) > > + return 0; > > + /* If PTE permissions don't match, take page fault */ > > We are already in page fault interrupt path, will it be better > if we call it "take Linux page fault" instead as we will go back > walking the page table. A better wording would be "escalate the page fault" Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C
On 09/30/2015 07:57 AM, Aneesh Kumar K.V wrote: > Signed-off-by: Aneesh Kumar K.V> --- > arch/powerpc/mm/Makefile| 3 + > arch/powerpc/mm/hash64_64k.c| 202 + > arch/powerpc/mm/hash_low_64.S | 380 > > arch/powerpc/mm/hash_utils_64.c | 4 +- > 4 files changed, 208 insertions(+), 381 deletions(-) > create mode 100644 arch/powerpc/mm/hash64_64k.c > > diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile > index 3eb73a38220d..f80ad1a76cc8 100644 > --- a/arch/powerpc/mm/Makefile > +++ b/arch/powerpc/mm/Makefile > @@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_STD_MMU_32)+= ppc_mmu_32.o > obj-$(CONFIG_PPC_STD_MMU)+= hash_low_$(CONFIG_WORD_SIZE).o \ > tlb_hash$(CONFIG_WORD_SIZE).o \ > mmu_context_hash$(CONFIG_WORD_SIZE).o > +ifeq ($(CONFIG_PPC_STD_MMU_64),y) > +obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o > +endif > obj-$(CONFIG_PPC_ICSWX) += icswx.o > obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o > obj-$(CONFIG_40x)+= 40x_mmu.o > diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c > new file mode 100644 > index ..b137e50a3e57 > --- /dev/null > +++ b/arch/powerpc/mm/hash64_64k.c > @@ -0,0 +1,202 @@ > +/* > + * Copyright IBM Corporation, 2015 > + * Author Aneesh Kumar K.V > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of version 2.1 of the GNU Lesser General Public License > + * as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it would be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. > + * > + */ > + > +#include > +#include > +#include > + > +int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long > vsid, > +pte_t *ptep, unsigned long trap, unsigned long flags, > +int ssize, int subpg_prot) > +{ > + real_pte_t rpte; > + unsigned long *hidxp; > + unsigned long hpte_group; > + unsigned int subpg_index; > + unsigned long shift = 12; /* 4K */ > + unsigned long rflags, pa, hidx; > + unsigned long old_pte, new_pte, subpg_pte; > + unsigned long vpn, hash, slot; > + > + /* > + * atomically mark the linux large page PTE busy and dirty > + */ > + do { > + pte_t pte = READ_ONCE(*ptep); > + > + old_pte = pte_val(pte); > + /* If PTE busy, retry the access */ Small nit, need a gap between the above two lines ? > + if (unlikely(old_pte & _PAGE_BUSY)) > + return 0; > + /* If PTE permissions don't match, take page fault */ We are already in page fault interrupt path, will it be better if we call it "take Linux page fault" instead as we will go back walking the page table. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C
Signed-off-by: Aneesh Kumar K.V--- arch/powerpc/mm/Makefile| 3 + arch/powerpc/mm/hash64_64k.c| 202 + arch/powerpc/mm/hash_low_64.S | 380 arch/powerpc/mm/hash_utils_64.c | 4 +- 4 files changed, 208 insertions(+), 381 deletions(-) create mode 100644 arch/powerpc/mm/hash64_64k.c diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3eb73a38220d..f80ad1a76cc8 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ mmu_context_hash$(CONFIG_WORD_SIZE).o +ifeq ($(CONFIG_PPC_STD_MMU_64),y) +obj-$(CONFIG_PPC_64K_PAGES)+= hash64_64k.o +endif obj-$(CONFIG_PPC_ICSWX)+= icswx.o obj-$(CONFIG_PPC_ICSWX_PID)+= icswx_pid.o obj-$(CONFIG_40x) += 40x_mmu.o diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c new file mode 100644 index ..b137e50a3e57 --- /dev/null +++ b/arch/powerpc/mm/hash64_64k.c @@ -0,0 +1,202 @@ +/* + * Copyright IBM Corporation, 2015 + * Author Aneesh Kumar K.V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include +#include +#include + +int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, int subpg_prot) +{ + real_pte_t rpte; + unsigned long *hidxp; + unsigned long hpte_group; + unsigned int subpg_index; + unsigned long shift = 12; /* 4K */ + unsigned long rflags, pa, hidx; + unsigned long old_pte, new_pte, subpg_pte; + unsigned long vpn, hash, slot; + + /* +* atomically mark the linux large page PTE busy and dirty +*/ + do { + pte_t pte = READ_ONCE(*ptep); + + old_pte = pte_val(pte); + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* +* Try to lock the PTE, add ACCESSED and DIRTY if it was +* a write access. Since this is 4K insert of 64K page size +* also add _PAGE_COMBO +*/ + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; + } while (old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); + /* +* Handle the subpage protection bits +*/ + subpg_pte = new_pte & ~subpg_prot; + /* +* PP bits. _PAGE_USER is already PP bit 0x2, so we only +* need to add in 0x1 if it's a read-only user page +*/ + rflags = subpg_pte & _PAGE_USER; + if ((subpg_pte & _PAGE_USER) && !((subpg_pte & _PAGE_RW) && + (subpg_pte & _PAGE_DIRTY))) + rflags |= 0x1; + /* +* _PAGE_EXEC -> HW_NO_EXEC since it's inverted +*/ + rflags |= ((subpg_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + /* +* Always add C and Memory coherence bit +*/ + rflags |= HPTE_R_C | HPTE_R_M; + /* +* Add in WIMG bits +*/ + rflags |= (subpg_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | + _PAGE_COHERENT | _PAGE_GUARDED)); + + if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + + /* +* No CPU has hugepages but lacks no execute, so we +* don't need to worry about that case +*/ + rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); + } + + subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; + vpn = hpt_vpn(ea, vsid, ssize); + rpte = __real_pte(__pte(old_pte), ptep); + /* +*None of the sub 4k page is hashed +*/ + if (!(old_pte & _PAGE_HASHPTE)) + goto htab_insert_hpte; + /* +* Check if the pte was already inserted into the hash table +* as a 64k HW page, and