Re: [PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion
* [EMAIL PROTECTED] ([EMAIL PROTECTED]) wrote: > Virtualization aware Linux kernels may need to redefine functions which write > to hardware page tables at the sub-architecture layer. Previously, this was > done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but > having 8 pgtable header files is simply unacceptable. This goes some ways > towards cleaning that up by deprecating the 2/3 level subarch functions. > This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing > one sub-arch file, pgtable-ops.h, which gets included before any functions > which write to hardware page tables, allowing the sub-architecture to override > any or all definitions it needs. This looks like a better tradeoff. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion
* [EMAIL PROTECTED] ([EMAIL PROTECTED]) wrote: Virtualization aware Linux kernels may need to redefine functions which write to hardware page tables at the sub-architecture layer. Previously, this was done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but having 8 pgtable header files is simply unacceptable. This goes some ways towards cleaning that up by deprecating the 2/3 level subarch functions. This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing one sub-arch file, pgtable-ops.h, which gets included before any functions which write to hardware page tables, allowing the sub-architecture to override any or all definitions it needs. This looks like a better tradeoff. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion
Virtualization aware Linux kernels may need to redefine functions which write to hardware page tables at the sub-architecture layer. Previously, this was done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but having 8 pgtable header files is simply unacceptable. This goes some ways towards cleaning that up by deprecating the 2/3 level subarch functions. This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing one sub-arch file, pgtable-ops.h, which gets included before any functions which write to hardware page tables, allowing the sub-architecture to override any or all definitions it needs. Signed-off-by: Zachary Amsden <[EMAIL PROTECTED]> Index: linux-2.6.13/include/asm-i386/pgtable-2level.h === --- linux-2.6.13.orig/include/asm-i386/pgtable-2level.h 2005-08-15 14:23:06.0 -0700 +++ linux-2.6.13/include/asm-i386/pgtable-2level.h 2005-08-15 14:24:11.0 -0700 @@ -55,4 +55,25 @@ #define __pte_to_swp_entry(pte)((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#ifndef __HAVE_ARCH_SET_PTE +#define __HAVE_ARCH_SET_PTE +#define set_pte(pteptr, pteval) (*(pteptr) = pteval) +#endif +#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) + +#ifndef __HAVE_ARCH_SET_PMD +#define __HAVE_ARCH_SET_PMD +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + +#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) +#endif + #endif /* _I386_PGTABLE_2LEVEL_H */ Index: linux-2.6.13/include/asm-i386/pgtable-3level.h === --- linux-2.6.13.orig/include/asm-i386/pgtable-3level.h 2005-08-15 14:23:06.0 -0700 +++ linux-2.6.13/include/asm-i386/pgtable-3level.h 2005-08-15 14:24:11.0 -0700 @@ -123,4 +123,58 @@ #define __pmd_free_tlb(tlb, x) do { } while (0) +/* + * Sub-arch is allowed to override these, so check for definition first. + * New functions which write to hardware page table entries should go here. + */ + +/* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +#ifndef __HAVE_ARCH_SET_PTE +#define __HAVE_ARCH_SET_PTE +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} +#endif + +#ifndef __HAVE_ARCH_SET_PTE_ATOMIC +#define __HAVE_ARCH_SET_PTE_ATOMIC +#define set_pte_atomic(pteptr,pteval) \ + set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) +#endif + +#ifndef __HAVE_ARCH_SET_PMD +#define __HAVE_ARCH_SET_PMD +#define set_pmd(pmdptr,pmdval) \ + set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) +#endif + +#ifndef __HAVE_ARCH_SET_PUD +#define __HAVE_ARCH_SET_PUD +#define set_pud(pudptr,pudval) \ + (*(pudptr) = (pudval)) +#endif + +#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t res; + + /* xchg acts as a barrier before the setting of the high bits */ + res.pte_low = xchg(>pte_low, 0); + res.pte_high = ptep->pte_high; + ptep->pte_high = 0; + + return res; +} +#endif + #endif /* _I386_PGTABLE_3LEVEL_H */ Index: linux-2.6.13/include/asm-i386/pgtable.h === --- linux-2.6.13.orig/include/asm-i386/pgtable.h2005-08-15 14:23:06.0 -0700 +++ linux-2.6.13/include/asm-i386/pgtable.h 2005-08-15 14:24:11.0 -0700 @@ -236,12 +236,55 @@ static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT | _PAGE_PSE; return pte; } +#include #ifdef CONFIG_X86_PAE # include #else # include #endif -#include + +/* + * We give sub-architectures a chance to override functions which write to page + * tables, thus we check for existing definitions first. + */ +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (!pte_dirty(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_BIT_DIRTY, >pte_low); +} +#endif + +#ifndef
[PATCH 6/6] i386 virtualization - Attempt to clean up pgtable code motion
Virtualization aware Linux kernels may need to redefine functions which write to hardware page tables at the sub-architecture layer. Previously, this was done by encapsulation in a split mach-xxx/pgtable-{2|3}level-ops.h file, but having 8 pgtable header files is simply unacceptable. This goes some ways towards cleaning that up by deprecating the 2/3 level subarch functions. This is accomplished by using __HAVE_ARCH_FUNC macros, and allowing one sub-arch file, pgtable-ops.h, which gets included before any functions which write to hardware page tables, allowing the sub-architecture to override any or all definitions it needs. Signed-off-by: Zachary Amsden [EMAIL PROTECTED] Index: linux-2.6.13/include/asm-i386/pgtable-2level.h === --- linux-2.6.13.orig/include/asm-i386/pgtable-2level.h 2005-08-15 14:23:06.0 -0700 +++ linux-2.6.13/include/asm-i386/pgtable-2level.h 2005-08-15 14:24:11.0 -0700 @@ -55,4 +55,25 @@ #define __pte_to_swp_entry(pte)((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#ifndef __HAVE_ARCH_SET_PTE +#define __HAVE_ARCH_SET_PTE +#define set_pte(pteptr, pteval) (*(pteptr) = pteval) +#endif +#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) + +#ifndef __HAVE_ARCH_SET_PMD +#define __HAVE_ARCH_SET_PMD +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + +#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear(mm,addr,xp) __pte(xchg((xp)-pte_low, 0)) +#endif + #endif /* _I386_PGTABLE_2LEVEL_H */ Index: linux-2.6.13/include/asm-i386/pgtable-3level.h === --- linux-2.6.13.orig/include/asm-i386/pgtable-3level.h 2005-08-15 14:23:06.0 -0700 +++ linux-2.6.13/include/asm-i386/pgtable-3level.h 2005-08-15 14:24:11.0 -0700 @@ -123,4 +123,58 @@ #define __pmd_free_tlb(tlb, x) do { } while (0) +/* + * Sub-arch is allowed to override these, so check for definition first. + * New functions which write to hardware page table entries should go here. + */ + +/* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +#ifndef __HAVE_ARCH_SET_PTE +#define __HAVE_ARCH_SET_PTE +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + ptep-pte_high = pte.pte_high; + smp_wmb(); + ptep-pte_low = pte.pte_low; +} +#endif + +#ifndef __HAVE_ARCH_SET_PTE_ATOMIC +#define __HAVE_ARCH_SET_PTE_ATOMIC +#define set_pte_atomic(pteptr,pteval) \ + set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) +#endif + +#ifndef __HAVE_ARCH_SET_PMD +#define __HAVE_ARCH_SET_PMD +#define set_pmd(pmdptr,pmdval) \ + set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) +#endif + +#ifndef __HAVE_ARCH_SET_PUD +#define __HAVE_ARCH_SET_PUD +#define set_pud(pudptr,pudval) \ + (*(pudptr) = (pudval)) +#endif + +#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t res; + + /* xchg acts as a barrier before the setting of the high bits */ + res.pte_low = xchg(ptep-pte_low, 0); + res.pte_high = ptep-pte_high; + ptep-pte_high = 0; + + return res; +} +#endif + #endif /* _I386_PGTABLE_3LEVEL_H */ Index: linux-2.6.13/include/asm-i386/pgtable.h === --- linux-2.6.13.orig/include/asm-i386/pgtable.h2005-08-15 14:23:06.0 -0700 +++ linux-2.6.13/include/asm-i386/pgtable.h 2005-08-15 14:24:11.0 -0700 @@ -236,12 +236,55 @@ static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT | _PAGE_PSE; return pte; } +#include pgtable-ops.h #ifdef CONFIG_X86_PAE # include asm/pgtable-3level.h #else # include asm/pgtable-2level.h #endif -#include pgtable-ops.h + +/* + * We give sub-architectures a chance to override functions which write to page + * tables, thus we check for existing definitions first. + */ +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (!pte_dirty(*ptep)) + return 0; + return