Le 30/11/2023 à 03:54, Rohan McLure a écrit : > On creation and clearing of a page table mapping, instrument such calls > by invoking page_table_check_pte_set and page_table_check_pte_clear > respectively. These calls serve as a sanity check against illegal > mappings. > > Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all platforms. > > See also: > > riscv support in commit 3fee229a8eb9 ("riscv/mm: enable > ARCH_SUPPORTS_PAGE_TABLE_CHECK") > arm64 in commit 42b2547137f5 ("arm64/mm: enable > ARCH_SUPPORTS_PAGE_TABLE_CHECK") > x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table > check") > > Reviewed-by: Christophe Leroy <christophe.le...@csgroup.eu> > Signed-off-by: Rohan McLure <rmcl...@linux.ibm.com> > --- > v9: Updated for new API. Instrument pmdp_collapse_flush's two > constituent calls to avoid header hell > --- > arch/powerpc/Kconfig | 1 + > arch/powerpc/include/asm/book3s/32/pgtable.h | 7 +++- > arch/powerpc/include/asm/book3s/64/pgtable.h | 39 ++++++++++++++++---- > arch/powerpc/mm/book3s64/hash_pgtable.c | 4 ++ > arch/powerpc/mm/book3s64/pgtable.c | 13 +++++-- > arch/powerpc/mm/book3s64/radix_pgtable.c | 3 ++ > arch/powerpc/mm/pgtable.c | 4 ++ > 7 files changed, 58 insertions(+), 13 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 6f105ee4f3cf..5bd6d367ef40 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -166,6 +166,7 @@ config PPC > select ARCH_STACKWALK > select ARCH_SUPPORTS_ATOMIC_RMW > select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x > + select ARCH_SUPPORTS_PAGE_TABLE_CHECK > select ARCH_USE_BUILTIN_BSWAP > select ARCH_USE_CMPXCHG_LOCKREF if PPC64 > select ARCH_USE_MEMTEST > diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h > b/arch/powerpc/include/asm/book3s/32/pgtable.h > index bd6f8cdd25aa..48f4e7b98340 100644 > --- a/arch/powerpc/include/asm/book3s/32/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h > @@ -201,6 +201,7 @@ void unmap_kernel_page(unsigned long va); > #ifndef __ASSEMBLY__ > #include <linux/sched.h> > #include <linux/threads.h> > +#include <linux/page_table_check.h> > > /* Bits to mask out from a PGD to get to the PUD page */ > #define PGD_MASKED_BITS 0 > @@ -319,7 +320,11 @@ static inline int __ptep_test_and_clear_young(struct > mm_struct *mm, > static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long > addr, > pte_t *ptep) > { > - return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); > + pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); > + > + page_table_check_pte_clear(mm, old_pte); > + > + return old_pte; > } > > #define __HAVE_ARCH_PTEP_SET_WRPROTECT > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h > index dd3e7b190ab7..834c997ba657 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -151,6 +151,8 @@ > #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) > > #ifndef __ASSEMBLY__ > +#include <linux/page_table_check.h> > + > /* > * page table defines > */ > @@ -421,8 +423,11 @@ static inline void huge_ptep_set_wrprotect(struct > mm_struct *mm, > static inline pte_t ptep_get_and_clear(struct mm_struct *mm, > unsigned long addr, pte_t *ptep) > { > - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); > - return __pte(old); > + pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0)); > + > + page_table_check_pte_clear(mm, old_pte); > + > + return old_pte; > } > > #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL > @@ -431,11 +436,16 @@ static inline pte_t ptep_get_and_clear_full(struct > mm_struct *mm, > pte_t *ptep, int full) > { > if (full && radix_enabled()) { > + pte_t old_pte; > + > /* > * We know that this is a full mm pte clear and > * hence can be sure there is no parallel set_pte. > */ > - return radix__ptep_get_and_clear_full(mm, addr, ptep, full); > + old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full); > + page_table_check_pte_clear(mm, old_pte); > + > + return old_pte; > } > return ptep_get_and_clear(mm, addr, ptep); > } > @@ -1339,17 +1349,30 @@ extern int pudp_test_and_clear_young(struct > vm_area_struct *vma, > static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, > unsigned long addr, pmd_t *pmdp) > { > - if (radix_enabled()) > - return radix__pmdp_huge_get_and_clear(mm, addr, pmdp); > - return hash__pmdp_huge_get_and_clear(mm, addr, pmdp); > + pmd_t old_pmd; > + > + if (radix_enabled()) { > + old_pmd = radix__pmdp_huge_get_and_clear(mm, addr, pmdp); > + } else { > + old_pmd = hash__pmdp_huge_get_and_clear(mm, addr, pmdp); > + } > + > + page_table_check_pmd_clear(mm, old_pmd); > + > + return old_pmd; > } > > #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR > static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, > unsigned long addr, pud_t *pudp) > { > - if (radix_enabled()) > - return radix__pudp_huge_get_and_clear(mm, addr, pudp); > + pud_t old_pud;
Should go inside the block below. > + > + if (radix_enabled()) { > + old_pud = radix__pudp_huge_get_and_clear(mm, addr, pudp); > + page_table_check_pud_clear(mm, old_pud); > + return old_pud; > + } Otherwise, could implemented as follows in order to be similar to pmdp_huge_get_and_clear() { pud_t old_pud; if (radix_enabled()) old_pud = radix__pudp_huge_get_and_clear(mm, addr, pudp); else BUG(); page_table_check_pud_clear(mm, old_pud); return old_pud; } > BUG(); > return *pudp; > } > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c > b/arch/powerpc/mm/book3s64/hash_pgtable.c > index ae52c8db45b7..d6bde756e4a6 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -8,6 +8,7 @@ > #include <linux/sched.h> > #include <linux/mm_types.h> > #include <linux/mm.h> > +#include <linux/page_table_check.h> > #include <linux/stop_machine.h> > > #include <asm/sections.h> > @@ -231,6 +232,9 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct > *vma, unsigned long addres > > pmd = *pmdp; > pmd_clear(pmdp); > + > + page_table_check_pmd_clear(vma->vm_mm, pmd); > + > /* > * Wait for all pending hash_page to finish. This is needed > * in case of subpage collapse. When we collapse normal pages > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > b/arch/powerpc/mm/book3s64/pgtable.c > index 9a0a2accb261..194df0e4a33d 100644 > --- a/arch/powerpc/mm/book3s64/pgtable.c > +++ b/arch/powerpc/mm/book3s64/pgtable.c > @@ -10,6 +10,7 @@ > #include <linux/pkeys.h> > #include <linux/debugfs.h> > #include <linux/proc_fs.h> > +#include <linux/page_table_check.h> > #include <misc/cxl-base.h> > > #include <asm/pgalloc.h> > @@ -116,6 +117,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, > WARN_ON(!(pmd_large(pmd))); > #endif > trace_hugepage_set_pmd(addr, pmd_val(pmd)); > + page_table_check_pmd_set(mm, pmdp, pmd); > return __set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd), 0); > } > > @@ -133,7 +135,8 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr, > WARN_ON(!(pud_large(pud))); > #endif > trace_hugepage_set_pud(addr, pud_val(pud)); > - return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud)); > + page_table_check_pud_set(mm, pudp, pud); > + return __set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud), 0); Did you miss that from previous patch ? > } > > static void do_serialize(void *arg) > @@ -168,11 +171,13 @@ void serialize_against_pte_lookup(struct mm_struct *mm) > pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, > pmd_t *pmdp) > { > - unsigned long old_pmd; > + pmd_t old_pmd; > > - old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, > _PAGE_INVALID); > + old_pmd = __pmd(pmd_hugepage_update(vma->vm_mm, address, pmdp, > _PAGE_PRESENT, _PAGE_INVALID)); > flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); > - return __pmd(old_pmd); > + page_table_check_pmd_clear(vma->vm_mm, old_pmd); > + > + return old_pmd; > } > > pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c > index ae4a5f66ccd2..9ed38466a99a 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -14,6 +14,7 @@ > #include <linux/of.h> > #include <linux/of_fdt.h> > #include <linux/mm.h> > +#include <linux/page_table_check.h> > #include <linux/hugetlb.h> > #include <linux/string_helpers.h> > #include <linux/memory.h> > @@ -1404,6 +1405,8 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct > *vma, unsigned long addre > pmd = *pmdp; > pmd_clear(pmdp); > > + page_table_check_pmd_clear(vma->vm_mm, pmd); > + > radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); > > return pmd; > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > index e8e0289d7ab0..bccc96ba2471 100644 > --- a/arch/powerpc/mm/pgtable.c > +++ b/arch/powerpc/mm/pgtable.c > @@ -22,6 +22,7 @@ > #include <linux/mm.h> > #include <linux/percpu.h> > #include <linux/hardirq.h> > +#include <linux/page_table_check.h> > #include <linux/hugetlb.h> > #include <asm/tlbflush.h> > #include <asm/tlb.h> > @@ -206,6 +207,9 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, > pte_t *ptep, > * and not hw_valid ptes. Hence there is no translation cache flush > * involved that need to be batched. > */ > + > + page_table_check_ptes_set(mm, ptep, pte, nr); > + > for (;;) { > > /*