Re: [PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C

2015-10-01 Thread Anshuman Khandual
On 10/01/2015 01:48 AM, Benjamin Herrenschmidt wrote:
> On Wed, 2015-09-30 at 17:37 +0530, Anshuman Khandual wrote:
>>> + if (unlikely(old_pte & _PAGE_BUSY))
>>> + return 0;
>>> + /* If PTE permissions don't match, take page fault */
>>
>> We are already in page fault interrupt path,  will it be better
>> if we call it "take Linux page fault" instead as we will go back
>> walking the page table.
> 
> A better wording would be "escalate the page fault"

Right. There is one more thing. hash_page_mm function can have
these following return values after completion.

/* Result code is:
 *  0 - handled
 *  1 - normal page fault---> Escalate into linux page fault
 * -1 - critical hash insertion error 
 * -2 - access not permitted by subpage protection mechanism
 */

-2 is returned after looking into the sub page protection bits.
 0 is returned when hash page insert succeeds
   - ppc_md.hpte_insert returns actual slot number
   - Multiple retries after ppc_md.hpte_insert returns -1
 indicating that HPTEG is full and try secondary hash

 1 is returned when PTE in the page table does not contain PFN
-1 is returned when hash page did not succeed
- ppc_md.hpte_insert returns -2 when it cannot insert HPTE

The point is, there are multiple combinations of (0, 1, -1, -2)
out there in various paths without much documentation which can be
cleaned up. Not in this series but may be later.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C

2015-09-30 Thread Benjamin Herrenschmidt
On Wed, 2015-09-30 at 17:37 +0530, Anshuman Khandual wrote:
> > + if (unlikely(old_pte & _PAGE_BUSY))
> > + return 0;
> > + /* If PTE permissions don't match, take page fault */
> 
> We are already in page fault interrupt path,  will it be better
> if we call it "take Linux page fault" instead as we will go back
> walking the page table.

A better wording would be "escalate the page fault"

Cheers,
Ben.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C

2015-09-30 Thread Anshuman Khandual
On 09/30/2015 07:57 AM, Aneesh Kumar K.V wrote:
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/mm/Makefile|   3 +
>  arch/powerpc/mm/hash64_64k.c| 202 +
>  arch/powerpc/mm/hash_low_64.S   | 380 
> 
>  arch/powerpc/mm/hash_utils_64.c |   4 +-
>  4 files changed, 208 insertions(+), 381 deletions(-)
>  create mode 100644 arch/powerpc/mm/hash64_64k.c
> 
> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
> index 3eb73a38220d..f80ad1a76cc8 100644
> --- a/arch/powerpc/mm/Makefile
> +++ b/arch/powerpc/mm/Makefile
> @@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_STD_MMU_32)+= ppc_mmu_32.o
>  obj-$(CONFIG_PPC_STD_MMU)+= hash_low_$(CONFIG_WORD_SIZE).o \
>  tlb_hash$(CONFIG_WORD_SIZE).o \
>  mmu_context_hash$(CONFIG_WORD_SIZE).o
> +ifeq ($(CONFIG_PPC_STD_MMU_64),y)
> +obj-$(CONFIG_PPC_64K_PAGES)  += hash64_64k.o
> +endif
>  obj-$(CONFIG_PPC_ICSWX)  += icswx.o
>  obj-$(CONFIG_PPC_ICSWX_PID)  += icswx_pid.o
>  obj-$(CONFIG_40x)+= 40x_mmu.o
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> new file mode 100644
> index ..b137e50a3e57
> --- /dev/null
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -0,0 +1,202 @@
> +/*
> + * Copyright IBM Corporation, 2015
> + * Author Aneesh Kumar K.V 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of version 2.1 of the GNU Lesser General Public License
> + * as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it would be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long 
> vsid,
> +pte_t *ptep, unsigned long trap, unsigned long flags,
> +int ssize, int subpg_prot)
> +{
> + real_pte_t rpte;
> + unsigned long *hidxp;
> + unsigned long hpte_group;
> + unsigned int subpg_index;
> + unsigned long shift = 12; /* 4K */
> + unsigned long rflags, pa, hidx;
> + unsigned long old_pte, new_pte, subpg_pte;
> + unsigned long vpn, hash, slot;
> +
> + /*
> +  * atomically mark the linux large page PTE busy and dirty
> +  */
> + do {
> + pte_t pte = READ_ONCE(*ptep);
> +
> + old_pte = pte_val(pte);
> + /* If PTE busy, retry the access */

Small nit, need a gap between the above two lines ?

> + if (unlikely(old_pte & _PAGE_BUSY))
> + return 0;
> + /* If PTE permissions don't match, take page fault */

We are already in page fault interrupt path,  will it be better
if we call it "take Linux page fault" instead as we will go back
walking the page table.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 19/31] powerpc/mm: Convert 4k hash insert to C

2015-09-29 Thread Aneesh Kumar K.V
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/Makefile|   3 +
 arch/powerpc/mm/hash64_64k.c| 202 +
 arch/powerpc/mm/hash_low_64.S   | 380 
 arch/powerpc/mm/hash_utils_64.c |   4 +-
 4 files changed, 208 insertions(+), 381 deletions(-)
 create mode 100644 arch/powerpc/mm/hash64_64k.c

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3eb73a38220d..f80ad1a76cc8 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_STD_MMU_32)  += ppc_mmu_32.o
 obj-$(CONFIG_PPC_STD_MMU)  += hash_low_$(CONFIG_WORD_SIZE).o \
   tlb_hash$(CONFIG_WORD_SIZE).o \
   mmu_context_hash$(CONFIG_WORD_SIZE).o
+ifeq ($(CONFIG_PPC_STD_MMU_64),y)
+obj-$(CONFIG_PPC_64K_PAGES)+= hash64_64k.o
+endif
 obj-$(CONFIG_PPC_ICSWX)+= icswx.o
 obj-$(CONFIG_PPC_ICSWX_PID)+= icswx_pid.o
 obj-$(CONFIG_40x)  += 40x_mmu.o
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
new file mode 100644
index ..b137e50a3e57
--- /dev/null
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include 
+#include 
+#include 
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+  pte_t *ptep, unsigned long trap, unsigned long flags,
+  int ssize, int subpg_prot)
+{
+   real_pte_t rpte;
+   unsigned long *hidxp;
+   unsigned long hpte_group;
+   unsigned int subpg_index;
+   unsigned long shift = 12; /* 4K */
+   unsigned long rflags, pa, hidx;
+   unsigned long old_pte, new_pte, subpg_pte;
+   unsigned long vpn, hash, slot;
+
+   /*
+* atomically mark the linux large page PTE busy and dirty
+*/
+   do {
+   pte_t pte = READ_ONCE(*ptep);
+
+   old_pte = pte_val(pte);
+   /* If PTE busy, retry the access */
+   if (unlikely(old_pte & _PAGE_BUSY))
+   return 0;
+   /* If PTE permissions don't match, take page fault */
+   if (unlikely(access & ~old_pte))
+   return 1;
+   /*
+* Try to lock the PTE, add ACCESSED and DIRTY if it was
+* a write access. Since this is 4K insert of 64K page size
+* also add _PAGE_COMBO
+*/
+   new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
+   if (access & _PAGE_RW)
+   new_pte |= _PAGE_DIRTY;
+   } while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
+ old_pte, new_pte));
+   /*
+* Handle the subpage protection bits
+*/
+   subpg_pte = new_pte & ~subpg_prot;
+   /*
+* PP bits. _PAGE_USER is already PP bit 0x2, so we only
+* need to add in 0x1 if it's a read-only user page
+*/
+   rflags = subpg_pte & _PAGE_USER;
+   if ((subpg_pte & _PAGE_USER) && !((subpg_pte & _PAGE_RW) &&
+   (subpg_pte & _PAGE_DIRTY)))
+   rflags |= 0x1;
+   /*
+* _PAGE_EXEC -> HW_NO_EXEC since it's inverted
+*/
+   rflags |= ((subpg_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+   /*
+* Always add C and Memory coherence bit
+*/
+   rflags |= HPTE_R_C | HPTE_R_M;
+   /*
+* Add in WIMG bits
+*/
+   rflags |= (subpg_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+   _PAGE_COHERENT | _PAGE_GUARDED));
+
+   if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+   !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+   /*
+* No CPU has hugepages but lacks no execute, so we
+* don't need to worry about that case
+*/
+   rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+   }
+
+   subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
+   vpn  = hpt_vpn(ea, vsid, ssize);
+   rpte = __real_pte(__pte(old_pte), ptep);
+   /*
+*None of the sub 4k page is hashed
+*/
+   if (!(old_pte & _PAGE_HASHPTE))
+   goto htab_insert_hpte;
+   /*
+* Check if the pte was already inserted into the hash table
+* as a 64k HW page, and