Re: [PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion

2005-08-16 Thread Chris Wright
* [EMAIL PROTECTED] ([EMAIL PROTECTED]) wrote:
> Virtualization aware Linux kernels may need to redefine functions which write
> to hardware page tables at the sub-architecture layer.  Previously, this was
> done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but
> having 8 pgtable header files is simply unacceptable.  This goes some ways
> towards cleaning that up by deprecating the 2/3 level subarch functions.
> This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing
> one sub-arch file, pgtable-ops.h, which gets included before any functions
> which write to hardware page tables, allowing the sub-architecture to override
> any or all definitions it needs.

This looks like a better tradeoff.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion

2005-08-16 Thread Chris Wright
* [EMAIL PROTECTED] ([EMAIL PROTECTED]) wrote:
 Virtualization aware Linux kernels may need to redefine functions which write
 to hardware page tables at the sub-architecture layer.  Previously, this was
 done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but
 having 8 pgtable header files is simply unacceptable.  This goes some ways
 towards cleaning that up by deprecating the 2/3 level subarch functions.
 This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing
 one sub-arch file, pgtable-ops.h, which gets included before any functions
 which write to hardware page tables, allowing the sub-architecture to override
 any or all definitions it needs.

This looks like a better tradeoff.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion

2005-08-15 Thread zach
Virtualization aware Linux kernels may need to redefine functions which write
to hardware page tables at the sub-architecture layer.  Previously, this was
done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but
having 8 pgtable header files is simply unacceptable.  This goes some ways
towards cleaning that up by deprecating the 2/3 level subarch functions.
This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing
one sub-arch file, pgtable-ops.h, which gets included before any functions
which write to hardware page tables, allowing the sub-architecture to override
any or all definitions it needs.

Signed-off-by: Zachary Amsden <[EMAIL PROTECTED]>
Index: linux-2.6.13/include/asm-i386/pgtable-2level.h
===
--- linux-2.6.13.orig/include/asm-i386/pgtable-2level.h 2005-08-15 
14:23:06.0 -0700
+++ linux-2.6.13/include/asm-i386/pgtable-2level.h  2005-08-15 
14:24:11.0 -0700
@@ -55,4 +55,25 @@
 #define __pte_to_swp_entry(pte)((swp_entry_t) { (pte).pte_low 
})
 #define __swp_entry_to_pte(x)  ((pte_t) { (x).val })
 
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#ifndef __HAVE_ARCH_SET_PTE
+#define __HAVE_ARCH_SET_PTE
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#endif
+#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
+
+#ifndef __HAVE_ARCH_SET_PMD
+#define __HAVE_ARCH_SET_PMD
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
+#endif
+
 #endif /* _I386_PGTABLE_2LEVEL_H */
Index: linux-2.6.13/include/asm-i386/pgtable-3level.h
===
--- linux-2.6.13.orig/include/asm-i386/pgtable-3level.h 2005-08-15 
14:23:06.0 -0700
+++ linux-2.6.13/include/asm-i386/pgtable-3level.h  2005-08-15 
14:24:11.0 -0700
@@ -123,4 +123,58 @@
 
 #define __pmd_free_tlb(tlb, x) do { } while (0)
 
+/*
+ * Sub-arch is allowed to override these, so check for definition first.
+ * New functions which write to hardware page table entries should go here.
+ */
+
+/* Rules for using set_pte: the pte being assigned *must* be
+ * either not present or in a state where the hardware will
+ * not attempt to update the pte.  In places where this is
+ * not possible, use pte_get_and_clear to obtain the old pte
+ * value and then use set_pte to update it.  -ben
+ */
+#ifndef __HAVE_ARCH_SET_PTE
+#define __HAVE_ARCH_SET_PTE
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+   ptep->pte_high = pte.pte_high;
+   smp_wmb();
+   ptep->pte_low = pte.pte_low;
+}
+#endif
+
+#ifndef __HAVE_ARCH_SET_PTE_ATOMIC
+#define __HAVE_ARCH_SET_PTE_ATOMIC
+#define set_pte_atomic(pteptr,pteval) \
+   set_64bit((unsigned long long *)(pteptr),pte_val(pteval))
+#endif
+
+#ifndef __HAVE_ARCH_SET_PMD
+#define __HAVE_ARCH_SET_PMD
+#define set_pmd(pmdptr,pmdval) \
+   set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval))
+#endif
+
+#ifndef __HAVE_ARCH_SET_PUD
+#define __HAVE_ARCH_SET_PUD
+#define set_pud(pudptr,pudval) \
+   (*(pudptr) = (pudval))
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
+{
+   pte_t res;
+
+   /* xchg acts as a barrier before the setting of the high bits */
+   res.pte_low = xchg(>pte_low, 0);
+   res.pte_high = ptep->pte_high;
+   ptep->pte_high = 0;
+
+   return res;
+}
+#endif
+
 #endif /* _I386_PGTABLE_3LEVEL_H */
Index: linux-2.6.13/include/asm-i386/pgtable.h
===
--- linux-2.6.13.orig/include/asm-i386/pgtable.h2005-08-15 
14:23:06.0 -0700
+++ linux-2.6.13/include/asm-i386/pgtable.h 2005-08-15 14:24:11.0 
-0700
@@ -236,12 +236,55 @@
 static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; 
return pte; }
 static inline pte_t pte_mkhuge(pte_t pte)  { (pte).pte_low |= 
_PAGE_PRESENT | _PAGE_PSE; return pte; }
 
+#include 
 #ifdef CONFIG_X86_PAE
 # include 
 #else
 # include 
 #endif
-#include 
+
+/*
+ * We give sub-architectures a chance to override functions which write to page
+ * tables, thus we check for existing definitions first.
+ */
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
+{
+   if (!pte_dirty(*ptep))
+   return 0;
+   return test_and_clear_bit(_PAGE_BIT_DIRTY, >pte_low);
+}
+#endif
+
+#ifndef 

[PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion

2005-08-15 Thread zach
Virtualization aware Linux kernels may need to redefine functions which write
to hardware page tables at the sub-architecture layer.  Previously, this was
done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but
having 8 pgtable header files is simply unacceptable.  This goes some ways
towards cleaning that up by deprecating the 2/3 level subarch functions.
This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing
one sub-arch file, pgtable-ops.h, which gets included before any functions
which write to hardware page tables, allowing the sub-architecture to override
any or all definitions it needs.

Signed-off-by: Zachary Amsden [EMAIL PROTECTED]
Index: linux-2.6.13/include/asm-i386/pgtable-2level.h
===
--- linux-2.6.13.orig/include/asm-i386/pgtable-2level.h 2005-08-15 
14:23:06.0 -0700
+++ linux-2.6.13/include/asm-i386/pgtable-2level.h  2005-08-15 
14:24:11.0 -0700
@@ -55,4 +55,25 @@
 #define __pte_to_swp_entry(pte)((swp_entry_t) { (pte).pte_low 
})
 #define __swp_entry_to_pte(x)  ((pte_t) { (x).val })
 
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#ifndef __HAVE_ARCH_SET_PTE
+#define __HAVE_ARCH_SET_PTE
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#endif
+#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
+
+#ifndef __HAVE_ARCH_SET_PMD
+#define __HAVE_ARCH_SET_PMD
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear(mm,addr,xp) __pte(xchg((xp)-pte_low, 0))
+#endif
+
 #endif /* _I386_PGTABLE_2LEVEL_H */
Index: linux-2.6.13/include/asm-i386/pgtable-3level.h
===
--- linux-2.6.13.orig/include/asm-i386/pgtable-3level.h 2005-08-15 
14:23:06.0 -0700
+++ linux-2.6.13/include/asm-i386/pgtable-3level.h  2005-08-15 
14:24:11.0 -0700
@@ -123,4 +123,58 @@
 
 #define __pmd_free_tlb(tlb, x) do { } while (0)
 
+/*
+ * Sub-arch is allowed to override these, so check for definition first.
+ * New functions which write to hardware page table entries should go here.
+ */
+
+/* Rules for using set_pte: the pte being assigned *must* be
+ * either not present or in a state where the hardware will
+ * not attempt to update the pte.  In places where this is
+ * not possible, use pte_get_and_clear to obtain the old pte
+ * value and then use set_pte to update it.  -ben
+ */
+#ifndef __HAVE_ARCH_SET_PTE
+#define __HAVE_ARCH_SET_PTE
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+   ptep-pte_high = pte.pte_high;
+   smp_wmb();
+   ptep-pte_low = pte.pte_low;
+}
+#endif
+
+#ifndef __HAVE_ARCH_SET_PTE_ATOMIC
+#define __HAVE_ARCH_SET_PTE_ATOMIC
+#define set_pte_atomic(pteptr,pteval) \
+   set_64bit((unsigned long long *)(pteptr),pte_val(pteval))
+#endif
+
+#ifndef __HAVE_ARCH_SET_PMD
+#define __HAVE_ARCH_SET_PMD
+#define set_pmd(pmdptr,pmdval) \
+   set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval))
+#endif
+
+#ifndef __HAVE_ARCH_SET_PUD
+#define __HAVE_ARCH_SET_PUD
+#define set_pud(pudptr,pudval) \
+   (*(pudptr) = (pudval))
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
+{
+   pte_t res;
+
+   /* xchg acts as a barrier before the setting of the high bits */
+   res.pte_low = xchg(ptep-pte_low, 0);
+   res.pte_high = ptep-pte_high;
+   ptep-pte_high = 0;
+
+   return res;
+}
+#endif
+
 #endif /* _I386_PGTABLE_3LEVEL_H */
Index: linux-2.6.13/include/asm-i386/pgtable.h
===
--- linux-2.6.13.orig/include/asm-i386/pgtable.h2005-08-15 
14:23:06.0 -0700
+++ linux-2.6.13/include/asm-i386/pgtable.h 2005-08-15 14:24:11.0 
-0700
@@ -236,12 +236,55 @@
 static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; 
return pte; }
 static inline pte_t pte_mkhuge(pte_t pte)  { (pte).pte_low |= 
_PAGE_PRESENT | _PAGE_PSE; return pte; }
 
+#include pgtable-ops.h
 #ifdef CONFIG_X86_PAE
 # include asm/pgtable-3level.h
 #else
 # include asm/pgtable-2level.h
 #endif
-#include pgtable-ops.h
+
+/*
+ * We give sub-architectures a chance to override functions which write to page
+ * tables, thus we check for existing definitions first.
+ */
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
+{
+   if (!pte_dirty(*ptep))
+   return 0;
+   return