Re: [PATCH v5 9/9] mm/mmu_notifier: set MMU_NOTIFIER_USE_CHANGE_PTE flag where appropriate v2
On 2/19/19 12:04 PM, jgli...@redhat.com wrote: From: Jérôme Glisse When notifying change for a range use MMU_NOTIFIER_USE_CHANGE_PTE flag for page table update that use set_pte_at_notify() and where the we are going either from read and write to read only with same pfn or read only to read and write with new pfn. Note that set_pte_at_notify() itself should only be use in rare cases ie we do not want to use it when we are updating a significant range of virtual addresses and thus a significant number of pte. Instead for those cases the event provided to mmu notifer invalidate_range_start() callback should be use for optimization. Changes since v1: - Use the new unsigned flags field in struct mmu_notifier_range - Use the new flags parameter to mmu_notifier_range_init() - Explicitly list all the patterns where we can use change_pte() Signed-off-by: Jérôme Glisse Cc: Christian König Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Jan Kara Cc: Andrea Arcangeli Cc: Peter Xu Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Ross Zwisler Cc: Dan Williams Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Michal Hocko Cc: Christian Koenig Cc: Ralph Campbell Cc: John Hubbard Cc: k...@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-r...@vger.kernel.org Cc: Arnd Bergmann --- include/linux/mmu_notifier.h | 34 -- mm/ksm.c | 11 ++- mm/memory.c | 5 +++-- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b6c004bd9f6a..0230a4b06b46 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -40,6 +40,26 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, }; +/* + * @MMU_NOTIFIER_RANGE_BLOCKABLE: can the mmu notifier range_start/range_end + * callback block or not ? If set then the callback can block. + * + * @MMU_NOTIFIER_USE_CHANGE_PTE: only set when the page table it updated with + * the set_pte_at_notify() the valid patterns for this are: + * - pte read and write to read only same pfn + * - pte read only to read and write (pfn can change or stay the same) + * - pte read only to read only with different pfn + * It is illegal to set in any other circumstances. + * + * Note that set_pte_at_notify() should not be use outside of the above cases. + * When updating a range in batch (like write protecting a range) it is better + * to rely on invalidate_range_start() and struct mmu_notifier_range to infer + * the kind of update that is happening (as an example you can look at the + * mmu_notifier_range_update_to_read_only() function). + */ +#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) +#define MMU_NOTIFIER_USE_CHANGE_PTE (1 << 1) + #ifdef CONFIG_MMU_NOTIFIER /* @@ -55,8 +75,6 @@ struct mmu_notifier_mm { spinlock_t lock; }; -#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) - struct mmu_notifier_range { struct vm_area_struct *vma; struct mm_struct *mm; @@ -268,6 +286,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); } +static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{ + return (range->flags & MMU_NOTIFIER_USE_CHANGE_PTE); +} + static inline void mmu_notifier_release(struct mm_struct *mm) { if (mm_has_notifiers(mm)) @@ -509,6 +533,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return true; } +static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{ + return false; +} + static inline int mm_has_notifiers(struct mm_struct *mm) { return 0; diff --git a/mm/ksm.c b/mm/ksm.c index b782fadade8f..41e51882f999 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1066,9 +1066,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, BUG_ON(PageTransCompound(page)); - mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, 0, vma, mm, - pvmw.address, - pvmw.address + PAGE_SIZE); + mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, vma, mm, + pvmw.address, pvmw.address + PAGE_SIZE); mmu_notifier_invalidate_range_start(); if (!page_vma_mapped_walk()) @@ -1155,8 +1155,9 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, if (!pmd) goto out; - mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, - addr + PAGE_SIZE); + mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, + vma, mm, addr, addr + PAGE_SIZE); mmu_notifier_invalidate_range_start();
[PATCH v5 9/9] mm/mmu_notifier: set MMU_NOTIFIER_USE_CHANGE_PTE flag where appropriate v2
From: Jérôme Glisse When notifying change for a range use MMU_NOTIFIER_USE_CHANGE_PTE flag for page table update that use set_pte_at_notify() and where the we are going either from read and write to read only with same pfn or read only to read and write with new pfn. Note that set_pte_at_notify() itself should only be use in rare cases ie we do not want to use it when we are updating a significant range of virtual addresses and thus a significant number of pte. Instead for those cases the event provided to mmu notifer invalidate_range_start() callback should be use for optimization. Changes since v1: - Use the new unsigned flags field in struct mmu_notifier_range - Use the new flags parameter to mmu_notifier_range_init() - Explicitly list all the patterns where we can use change_pte() Signed-off-by: Jérôme Glisse Cc: Christian König Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Jan Kara Cc: Andrea Arcangeli Cc: Peter Xu Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Ross Zwisler Cc: Dan Williams Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Michal Hocko Cc: Christian Koenig Cc: Ralph Campbell Cc: John Hubbard Cc: k...@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-r...@vger.kernel.org Cc: Arnd Bergmann --- include/linux/mmu_notifier.h | 34 -- mm/ksm.c | 11 ++- mm/memory.c | 5 +++-- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b6c004bd9f6a..0230a4b06b46 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -40,6 +40,26 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, }; +/* + * @MMU_NOTIFIER_RANGE_BLOCKABLE: can the mmu notifier range_start/range_end + * callback block or not ? If set then the callback can block. + * + * @MMU_NOTIFIER_USE_CHANGE_PTE: only set when the page table it updated with + * the set_pte_at_notify() the valid patterns for this are: + * - pte read and write to read only same pfn + * - pte read only to read and write (pfn can change or stay the same) + * - pte read only to read only with different pfn + * It is illegal to set in any other circumstances. + * + * Note that set_pte_at_notify() should not be use outside of the above cases. + * When updating a range in batch (like write protecting a range) it is better + * to rely on invalidate_range_start() and struct mmu_notifier_range to infer + * the kind of update that is happening (as an example you can look at the + * mmu_notifier_range_update_to_read_only() function). + */ +#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) +#define MMU_NOTIFIER_USE_CHANGE_PTE (1 << 1) + #ifdef CONFIG_MMU_NOTIFIER /* @@ -55,8 +75,6 @@ struct mmu_notifier_mm { spinlock_t lock; }; -#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) - struct mmu_notifier_range { struct vm_area_struct *vma; struct mm_struct *mm; @@ -268,6 +286,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); } +static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{ + return (range->flags & MMU_NOTIFIER_USE_CHANGE_PTE); +} + static inline void mmu_notifier_release(struct mm_struct *mm) { if (mm_has_notifiers(mm)) @@ -509,6 +533,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return true; } +static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{ + return false; +} + static inline int mm_has_notifiers(struct mm_struct *mm) { return 0; diff --git a/mm/ksm.c b/mm/ksm.c index b782fadade8f..41e51882f999 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1066,9 +1066,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, BUG_ON(PageTransCompound(page)); - mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, 0, vma, mm, - pvmw.address, - pvmw.address + PAGE_SIZE); + mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, vma, mm, + pvmw.address, pvmw.address + PAGE_SIZE); mmu_notifier_invalidate_range_start(); if (!page_vma_mapped_walk()) @@ -1155,8 +1155,9 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, if (!pmd) goto out; - mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, - addr + PAGE_SIZE); + mmu_notifier_range_init(, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, + vma, mm, addr, addr + PAGE_SIZE); mmu_notifier_invalidate_range_start(); ptep = pte_offset_map_lock(mm, pmd, addr, ); diff