[RFC/PATCH] powerpc: Rework I$/D$ coherency

2009-01-29 Thread Benjamin Herrenschmidt
This patch reworks the way we do I and D cache coherency on PowerPC.

The old way was split in 3 different parts depending on the processor type:

   - Hash with per-page exec support (64-bit and = POWER4 only) does it
at hashing time, by preventing exec on unclean pages and cleaning pages
on exec faults.

   - Everything without per-page exec support (32-bit hash, 8xx, and
64-bit  POWER4) does it for all page going to user space in update_mmu_cache().

   - Embedded with per-page exec support does it from do_page_fault() on
exec faults, in a way similar to what the hash code does.

That leads to confusion, and bugs. For example, the method using 
update_mmu_cache()
is racy on SMP where another processor can see the new PTE and hash it in before
we have cleaned the cache, and then blow trying to execute. This is hard to hit 
but
I think it has bitten us in the past.

Also, it's inefficient for embedded where we always end up having to do at least
one more page fault.

This reworks the whole thing by moving the cache sync into two main call sites,
though we keep different behaviours depending on the HW capability. The call
sites are set_pte_at() which is now made out of line, and 
ptep_set_access_flags()
which joins the former in pgtable.c

The base idea for Embedded with per-page exec support, is that we now do the
flush at set_pte_at() time when coming from an exec fault, which allows us
to avoid the double fault problem completely (we can even improve the situation
more by implementing TLB preload in update_mmu_cache() but that's for later).

If for some reason we didn't do it there and we try to execute, we'll hit
the page fault, which will do a minor fault, which will hit 
ptep_set_access_flags()
to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make
this guys also perform the I/D cache sync for exec faults now. This second path
is the catch all for things that weren't cleaned at set_pte_at() time.

For cpus without per-pag exec support, we always do the sync at set_pte_at(),
thus guaranteeing that when the PTE is visible to other processors, the cache
is clean.

For the 64-bit hash with per-page exec support case, we keep the old mechanism
for now. I'll look into changing it later, once I've reworked a bit how we
use _PAGE_EXEC.

This is also a first step for adding _PAGE_EXEC support for embedded platforms

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---


 arch/powerpc/include/asm/highmem.h   |2 
 arch/powerpc/include/asm/pgtable-ppc32.h |   53 
 arch/powerpc/include/asm/pgtable-ppc64.h |   28 +-
 arch/powerpc/include/asm/pgtable.h   |   84 +++
 arch/powerpc/mm/fault.c  |   46 +++---
 arch/powerpc/mm/mem.c|   33 ---
 arch/powerpc/mm/pgtable.c|  133 +++
 7 files changed, 245 insertions(+), 134 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/pgtable-ppc32.h2009-01-28 
15:55:58.0 +1100
+++ linux-work/arch/powerpc/include/asm/pgtable-ppc32.h 2009-01-29 
10:14:53.0 +1100
@@ -429,6 +429,8 @@ extern int icache_44x_need_flush;
 #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
 #endif
 
+#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
+
 #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 
@@ -667,44 +669,6 @@ static inline unsigned long long pte_upd
 #endif /* CONFIG_PTE_64BIT */
 
 /*
- * set_pte stores a linux PTE into the linux page table.
- * On machines which use an MMU hash table we avoid changing the
- * _PAGE_HASHPTE bit.
- */
-
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
-{
-#if (_PAGE_HASHPTE != 0)  defined(CONFIG_SMP)  !defined(CONFIG_PTE_64BIT)
-   pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)  ~_PAGE_HASHPTE);
-#elif defined(CONFIG_PTE_64BIT)  defined(CONFIG_SMP)
-#if _PAGE_HASHPTE != 0
-   if (pte_val(*ptep)  _PAGE_HASHPTE)
-   flush_hash_entry(mm, ptep, addr);
-#endif
-   __asm__ __volatile__(\
-   stw%U0%X0 %2,%0\n\
-   eieio\n\
-   stw%U0%X0 %L2,%1
-   : =m (*ptep), =m (*((unsigned char *)ptep+4))
-   : r (pte) : memory);
-#else
-   *ptep = __pte((pte_val(*ptep)  _PAGE_HASHPTE)
- | (pte_val(pte)  ~_PAGE_HASHPTE));
-#endif
-}
-
-
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
-{
-#if defined(CONFIG_PTE_64BIT)  defined(CONFIG_SMP)  
defined(CONFIG_DEBUG_VM)
-   WARN_ON(pte_present(*ptep));
-#endif
-   __set_pte_at(mm, addr, ptep, pte);
-}
-
-/*
  * 2.6 calls this without flushing the TLB entry; this is wrong
  * for our hash-based implementation, we fix that up here.
  */
@@ -744,24 +708,13 @@ static inline void huge_ptep_set_wrprote
 }
 
 
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-static inline void 

Re: [RFC/PATCH] powerpc: Rework I$/D$ coherency

2009-01-29 Thread Kumar Gala


On Jan 29, 2009, at 8:26 PM, Benjamin Herrenschmidt wrote:


Index: linux-work/arch/powerpc/include/asm/pgtable-ppc64.h
===
--- linux-work.orig/arch/powerpc/include/asm/pgtable-ppc64.h	 
2009-01-28 16:00:26.0 +1100
+++ linux-work/arch/powerpc/include/asm/pgtable-ppc64.h	2009-01-29  
10:50:58.0 +1100

@@ -125,6 +125,8 @@
#define _PTEIDX_SECONDARY   0x8
#define _PTEIDX_GROUP_IX0x7

+/* To make some generic powerpc code happy */
+#define _PAGE_HWEXEC   0

/*
 * POWER4 and newer have per page execute protection, older chips  
can only

@@ -285,6 +287,10 @@ static inline unsigned long pte_update(s
: r (ptep), r (clr), m (*ptep), i (_PAGE_BUSY)
: cc );

+   /* huge pages use the old page table lock */
+   if (!huge)
+   assert_pte_locked(mm, addr);
+
if (old  _PAGE_HASHPTE)
hpte_need_flush(mm, addr, ptep, old, huge);
return old;
@@ -359,23 +365,12 @@ static inline void pte_clear(struct mm_s
pte_update(mm, addr, ptep, ~0UL, 0);
}

-/*
- * set_pte stores a linux PTE into the linux page table.
- */
-static inline void set_pte_at(struct mm_struct *mm, unsigned long  
addr,

- pte_t *ptep, pte_t pte)
-{
-   if (pte_present(*ptep))
-   pte_clear(mm, addr, ptep);
-   pte = __pte(pte_val(pte)  ~_PAGE_HPTEFLAGS);
-   *ptep = pte;
-}

/* Set the dirty and/or accessed bits atomically in a linux PTE, this
 * function doesn't need to flush the hash entry
 */
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS


should the #define go since its in pgtable.h now?



-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t  
entry, int dirty)

+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
{
unsigned long bits = pte_val(entry) 
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
@@ -392,15 +387,6 @@ static inline void __ptep_set_access_fla
:r (bits), r (ptep), m (*ptep), i (_PAGE_BUSY)
:cc);
}
-#define  ptep_set_access_flags(__vma, __address, __ptep, __entry,  
__dirty) \

-({\
-   int __changed = !pte_same(*(__ptep), __entry); \
-   if (__changed) {   \
-   __ptep_set_access_flags(__ptep, __entry, __dirty); \
-   flush_tlb_page_nohash(__vma, __address);   \
-   }  \
-   __changed; \
-})


___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev