Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
Should be all fixed in v2 i built with and without mmu notifier and did not had any issue in v2. On Fri, Dec 07, 2018 at 05:19:21AM +0800, kbuild test robot wrote: > Hi Jérôme, > > I love your patch! Yet something to improve: > > [auto build test ERROR on linus/master] > [also build test ERROR on v4.20-rc5] > [cannot apply to next-20181206] > [if your patch is applied to the wrong git tree, please drop us a note to > help improve the system] > > url: > https://github.com/0day-ci/linux/commits/jglisse-redhat-com/mmu-notifier-contextual-informations/20181207-031930 > config: x86_64-randconfig-x017-201848 (attached as .config) > compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 > reproduce: > # save the attached .config to linux build tree > make ARCH=x86_64 > > All errors (new ones prefixed by >>): > >fs///proc/task_mmu.c: In function 'clear_refs_write': >fs///proc/task_mmu.c:1099:29: error: storage size of 'range' isn't known > struct mmu_notifier_range range; > ^ > >> fs///proc/task_mmu.c:1147:18: error: 'MMU_NOTIFY_SOFT_DIRTY' undeclared > >> (first use in this function); did you mean 'CLEAR_REFS_SOFT_DIRTY'? >range.event = MMU_NOTIFY_SOFT_DIRTY; > ^ > CLEAR_REFS_SOFT_DIRTY >fs///proc/task_mmu.c:1147:18: note: each undeclared identifier is reported > only once for each function it appears in >fs///proc/task_mmu.c:1099:29: warning: unused variable 'range' > [-Wunused-variable] > struct mmu_notifier_range range; > ^ > > vim +1147 fs///proc/task_mmu.c > > 1069 > 1070static ssize_t clear_refs_write(struct file *file, const char > __user *buf, > 1071size_t count, loff_t *ppos) > 1072{ > 1073struct task_struct *task; > 1074char buffer[PROC_NUMBUF]; > 1075struct mm_struct *mm; > 1076struct vm_area_struct *vma; > 1077enum clear_refs_types type; > 1078struct mmu_gather tlb; > 1079int itype; > 1080int rv; > 1081 > 1082memset(buffer, 0, sizeof(buffer)); > 1083if (count > sizeof(buffer) - 1) > 1084count = sizeof(buffer) - 1; > 1085if (copy_from_user(buffer, buf, count)) > 1086return -EFAULT; > 1087rv = kstrtoint(strstrip(buffer), 10, &itype); > 1088if (rv < 0) > 1089return rv; > 1090type = (enum clear_refs_types)itype; > 1091if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) > 1092return -EINVAL; > 1093 > 1094task = get_proc_task(file_inode(file)); > 1095if (!task) > 1096return -ESRCH; > 1097mm = get_task_mm(task); > 1098if (mm) { > > 1099struct mmu_notifier_range range; > 1100struct clear_refs_private cp = { > 1101.type = type, > 1102}; > 1103struct mm_walk clear_refs_walk = { > 1104.pmd_entry = clear_refs_pte_range, > 1105.test_walk = clear_refs_test_walk, > 1106.mm = mm, > 1107.private = &cp, > 1108}; > 1109 > 1110if (type == CLEAR_REFS_MM_HIWATER_RSS) { > if (down_write_killable(&mm->mmap_sem)) > { > 1112count = -EINTR; > 1113goto out_mm; > 1114} > 1115 > 1116/* > 1117 * Writing 5 to /proc/pid/clear_refs > resets the peak > 1118 * resident set size to this mm's > current rss value. > 1119 */ > 1120reset_mm_hiwater_rss(mm); > 1121up_write(&mm->mmap_sem); > 1122goto out_mm; > 1123} > 1124 > 1125down_read(&mm->mmap_sem); > 1126tlb_gather_mmu(&tlb, mm, 0, -1); > 1127if (type == CLEAR_REFS_SOFT_DIRTY) { > 1128for (vma = mm->mmap; vma; vma = > vma->vm_next) { > 1129if (!(vma->vm_flags & > VM_SOFTDIRTY)) > 1130
Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
Hi Jérôme, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.20-rc5] [cannot apply to next-20181206] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/jglisse-redhat-com/mmu-notifier-contextual-informations/20181207-031930 config: x86_64-randconfig-x017-201848 (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): fs///proc/task_mmu.c: In function 'clear_refs_write': fs///proc/task_mmu.c:1099:29: error: storage size of 'range' isn't known struct mmu_notifier_range range; ^ >> fs///proc/task_mmu.c:1147:18: error: 'MMU_NOTIFY_SOFT_DIRTY' undeclared >> (first use in this function); did you mean 'CLEAR_REFS_SOFT_DIRTY'? range.event = MMU_NOTIFY_SOFT_DIRTY; ^ CLEAR_REFS_SOFT_DIRTY fs///proc/task_mmu.c:1147:18: note: each undeclared identifier is reported only once for each function it appears in fs///proc/task_mmu.c:1099:29: warning: unused variable 'range' [-Wunused-variable] struct mmu_notifier_range range; ^ vim +1147 fs///proc/task_mmu.c 1069 1070 static ssize_t clear_refs_write(struct file *file, const char __user *buf, 1071 size_t count, loff_t *ppos) 1072 { 1073 struct task_struct *task; 1074 char buffer[PROC_NUMBUF]; 1075 struct mm_struct *mm; 1076 struct vm_area_struct *vma; 1077 enum clear_refs_types type; 1078 struct mmu_gather tlb; 1079 int itype; 1080 int rv; 1081 1082 memset(buffer, 0, sizeof(buffer)); 1083 if (count > sizeof(buffer) - 1) 1084 count = sizeof(buffer) - 1; 1085 if (copy_from_user(buffer, buf, count)) 1086 return -EFAULT; 1087 rv = kstrtoint(strstrip(buffer), 10, &itype); 1088 if (rv < 0) 1089 return rv; 1090 type = (enum clear_refs_types)itype; 1091 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) 1092 return -EINVAL; 1093 1094 task = get_proc_task(file_inode(file)); 1095 if (!task) 1096 return -ESRCH; 1097 mm = get_task_mm(task); 1098 if (mm) { > 1099 struct mmu_notifier_range range; 1100 struct clear_refs_private cp = { 1101 .type = type, 1102 }; 1103 struct mm_walk clear_refs_walk = { 1104 .pmd_entry = clear_refs_pte_range, 1105 .test_walk = clear_refs_test_walk, 1106 .mm = mm, 1107 .private = &cp, 1108 }; 1109 1110 if (type == CLEAR_REFS_MM_HIWATER_RSS) { if (down_write_killable(&mm->mmap_sem)) { 1112 count = -EINTR; 1113 goto out_mm; 1114 } 1115 1116 /* 1117 * Writing 5 to /proc/pid/clear_refs resets the peak 1118 * resident set size to this mm's current rss value. 1119 */ 1120 reset_mm_hiwater_rss(mm); 1121 up_write(&mm->mmap_sem); 1122 goto out_mm; 1123 } 1124 1125 down_read(&mm->mmap_sem); 1126 tlb_gather_mmu(&tlb, mm, 0, -1); 1127 if (type == CLEAR_REFS_SOFT_DIRTY) { 1128 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1129 if (!(vma->vm_flags & VM_SOFTDIRTY)) 1130 continue; 1131 up_read(&mm->mmap_sem); 1132 if (down_write_killable(&mm->mmap_sem)) { 1133 count = -EINTR; 1134 goto out_mm; 1135 } 1136 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1137 vma->vm_flags &= ~VM_SOFTDIRTY; 1138 vma_set_page_prot(vma); 1139 } 1140 downgrade_write(&mm->mmap_sem); 1141 break; 1142
Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
Hi Jérôme, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.20-rc5] [cannot apply to next-20181206] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/jglisse-redhat-com/mmu-notifier-contextual-informations/20181207-031930 config: i386-randconfig-x007-201848 (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): kernel/events/uprobes.c: In function '__replace_page': kernel/events/uprobes.c:174:28: error: storage size of 'range' isn't known struct mmu_notifier_range range; ^ >> kernel/events/uprobes.c:177:16: error: 'MMU_NOTIFY_CLEAR' undeclared (first >> use in this function); did you mean 'VM_ARCH_CLEAR'? range.event = MMU_NOTIFY_CLEAR; ^~~~ VM_ARCH_CLEAR kernel/events/uprobes.c:177:16: note: each undeclared identifier is reported only once for each function it appears in kernel/events/uprobes.c:174:28: warning: unused variable 'range' [-Wunused-variable] struct mmu_notifier_range range; ^ vim +177 kernel/events/uprobes.c 152 153 /** 154 * __replace_page - replace page in vma by new page. 155 * based on replace_page in mm/ksm.c 156 * 157 * @vma: vma that holds the pte pointing to page 158 * @addr: address the old @page is mapped at 159 * @page: the cowed page we are replacing by kpage 160 * @kpage:the modified page we replace page by 161 * 162 * Returns 0 on success, -EFAULT on failure. 163 */ 164 static int __replace_page(struct vm_area_struct *vma, unsigned long addr, 165 struct page *old_page, struct page *new_page) 166 { 167 struct mm_struct *mm = vma->vm_mm; 168 struct page_vma_mapped_walk pvmw = { 169 .page = old_page, 170 .vma = vma, 171 .address = addr, 172 }; 173 int err; > 174 struct mmu_notifier_range range; 175 struct mem_cgroup *memcg; 176 > 177 range.event = MMU_NOTIFY_CLEAR; 178 range.start = addr; 179 range.end = addr + PAGE_SIZE; 180 range.mm = mm; 181 182 VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page); 183 184 err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg, 185 false); 186 if (err) 187 return err; 188 189 /* For try_to_free_swap() and munlock_vma_page() below */ 190 lock_page(old_page); 191 192 mmu_notifier_invalidate_range_start(&range); 193 err = -EAGAIN; 194 if (!page_vma_mapped_walk(&pvmw)) { 195 mem_cgroup_cancel_charge(new_page, memcg, false); 196 goto unlock; 197 } 198 VM_BUG_ON_PAGE(addr != pvmw.address, old_page); 199 200 get_page(new_page); 201 page_add_new_anon_rmap(new_page, vma, addr, false); 202 mem_cgroup_commit_charge(new_page, memcg, false, false); 203 lru_cache_add_active_or_unevictable(new_page, vma); 204 205 if (!PageAnon(old_page)) { 206 dec_mm_counter(mm, mm_counter_file(old_page)); 207 inc_mm_counter(mm, MM_ANONPAGES); 208 } 209 210 flush_cache_page(vma, addr, pte_pfn(*pvmw.pte)); 211 ptep_clear_flush_notify(vma, addr, pvmw.pte); 212 set_pte_at_notify(mm, addr, pvmw.pte, 213 mk_pte(new_page, vma->vm_page_prot)); 214 215 page_remove_rmap(old_page, false); 216 if (!page_mapped(old_page)) 217 try_to_free_swap(old_page); 218 page_vma_mapped_walk_done(&pvmw); 219 220 if (vma->vm_flags & VM_LOCKED) 221 munlock_vma_page(old_page); 222 put_page(old_page); 223 224 err = 0; 225 unlock: 226 mmu_notifier_invalidate_range_end(&range); 227 unlock_page(old_page); 228 return err; 229 } 230 --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
On Mon, 3 Dec 2018 15:18:17 -0500 jgli...@redhat.com wrote: > CPU page table update can happens for many reasons, not only as a result > of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also > as a result of kernel activities (memory compression, reclaim, migration, > ...). > > Users of mmu notifier API track changes to the CPU page table and take > specific action for them. While current API only provide range of virtual > address affected by the change, not why the changes is happening. > > This patchset adds event information so that users of mmu notifier can > differentiate among broad category: > - UNMAP: munmap() or mremap() > - CLEAR: page table is cleared (migration, compaction, reclaim, ...) > - PROTECTION_VMA: change in access protections for the range > - PROTECTION_PAGE: change in access protections for page in the range > - SOFT_DIRTY: soft dirtyness tracking > > Being able to identify munmap() and mremap() from other reasons why the > page table is cleared is important to allow user of mmu notifier to > update their own internal tracking structure accordingly (on munmap or > mremap it is not longer needed to track range of virtual address as it > becomes invalid). > > ... > > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -519,6 +519,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm) > struct mmu_notifier_range range; > struct mmu_gather tlb; > > + range.event = MMU_NOTIFY_CLEAR; > range.start = vma->vm_start; > range.end = vma->vm_end; > range.mm = mm; mmu_notifier_range and MMU_NOTIFY_CLEAR aren't defined if CONFIG_MMU_NOTIFIER=n. I'll try a temporary bodge: +++ a/include/linux/mmu_notifier.h @@ -10,8 +10,6 @@ struct mmu_notifier; struct mmu_notifier_ops; -#ifdef CONFIG_MMU_NOTIFIER - /* * The mmu notifier_mm structure is allocated and installed in * mm->mmu_notifier_mm inside the mm_take_all_locks() protected @@ -32,6 +30,8 @@ struct mmu_notifier_range { bool blockable; }; +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier_ops { /* * Called either by mmu_notifier_unregister or when the mm is But this new code should vanish altogether if CONFIG_MMU_NOTIFIER=n, please. Or at least, we shouldn't be unnecessarily initializing .mm and .event. Please take a look at debloating this code. ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
On Tue, Dec 04, 2018 at 10:17:48AM +0200, Mike Rapoport wrote: > On Mon, Dec 03, 2018 at 03:18:17PM -0500, jgli...@redhat.com wrote: > > From: Jérôme Glisse [...] > > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h > > index cbeece8e47d4..3077d487be8b 100644 > > --- a/include/linux/mmu_notifier.h > > +++ b/include/linux/mmu_notifier.h > > @@ -25,10 +25,43 @@ struct mmu_notifier_mm { > > spinlock_t lock; > > }; > > > > +/* > > + * What event is triggering the invalidation: > > Can you please make it kernel-doc comment? Sorry should have done that in the first place, Andrew i will post a v2 with that and fixing my one stupid bug. > > + * > > + * MMU_NOTIFY_UNMAP > > + *either munmap() that unmap the range or a mremap() that move the > > range > > + * > > + * MMU_NOTIFY_CLEAR > > + *clear page table entry (many reasons for this like madvise() or > > replacing > > + *a page by another one, ...). > > + * > > + * MMU_NOTIFY_PROTECTION_VMA > > + *update is due to protection change for the range ie using the vma > > access > > + *permission (vm_page_prot) to update the whole range is enough no > > need to > > + *inspect changes to the CPU page table (mprotect() syscall) > > + * > > + * MMU_NOTIFY_PROTECTION_PAGE > > + *update is due to change in read/write flag for pages in the range so > > to > > + *mirror those changes the user must inspect the CPU page table (from > > the > > + *end callback). > > + * > > + * > > + * MMU_NOTIFY_SOFT_DIRTY > > + *soft dirty accounting (still same page and same access flags) > > + */ > > +enum mmu_notifier_event { > > + MMU_NOTIFY_UNMAP = 0, > > + MMU_NOTIFY_CLEAR, > > + MMU_NOTIFY_PROTECTION_VMA, > > + MMU_NOTIFY_PROTECTION_PAGE, > > + MMU_NOTIFY_SOFT_DIRTY, > > +}; ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
On Mon, Dec 03, 2018 at 03:18:17PM -0500, jgli...@redhat.com wrote: > From: Jérôme Glisse > > CPU page table update can happens for many reasons, not only as a result > of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also > as a result of kernel activities (memory compression, reclaim, migration, > ...). > > Users of mmu notifier API track changes to the CPU page table and take > specific action for them. While current API only provide range of virtual > address affected by the change, not why the changes is happening. > > This patchset adds event information so that users of mmu notifier can > differentiate among broad category: > - UNMAP: munmap() or mremap() > - CLEAR: page table is cleared (migration, compaction, reclaim, ...) > - PROTECTION_VMA: change in access protections for the range > - PROTECTION_PAGE: change in access protections for page in the range > - SOFT_DIRTY: soft dirtyness tracking > > Being able to identify munmap() and mremap() from other reasons why the > page table is cleared is important to allow user of mmu notifier to > update their own internal tracking structure accordingly (on munmap or > mremap it is not longer needed to track range of virtual address as it > becomes invalid). > > Signed-off-by: Jérôme Glisse > Cc: Andrew Morton > Cc: Matthew Wilcox > Cc: Ross Zwisler > Cc: Jan Kara > Cc: Dan Williams > Cc: Paolo Bonzini > Cc: Radim Krčmář > Cc: Michal Hocko > Cc: Christian Koenig > Cc: Felix Kuehling > Cc: Ralph Campbell > Cc: John Hubbard > Cc: k...@vger.kernel.org > Cc: linux-r...@vger.kernel.org > Cc: linux-fsde...@vger.kernel.org > Cc: dri-devel@lists.freedesktop.org > --- > fs/dax.c | 1 + > fs/proc/task_mmu.c | 1 + > include/linux/mmu_notifier.h | 33 + > kernel/events/uprobes.c | 1 + > mm/huge_memory.c | 4 > mm/hugetlb.c | 4 > mm/khugepaged.c | 1 + > mm/ksm.c | 2 ++ > mm/madvise.c | 1 + > mm/memory.c | 5 + > mm/migrate.c | 2 ++ > mm/mprotect.c| 1 + > mm/mremap.c | 1 + > mm/oom_kill.c| 1 + > mm/rmap.c| 2 ++ > 15 files changed, 60 insertions(+) > > diff --git a/fs/dax.c b/fs/dax.c > index e22508ee19ec..83092c5ac5f0 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -761,6 +761,7 @@ static void dax_entry_mkclean(struct address_space > *mapping, pgoff_t index, > struct mmu_notifier_range range; > unsigned long address; > > + range.event = MMU_NOTIFY_PROTECTION_PAGE; > range.mm = vma->vm_mm; > > cond_resched(); > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 53d625925669..4abb1668eeb3 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -1144,6 +1144,7 @@ static ssize_t clear_refs_write(struct file *file, > const char __user *buf, > range.start = 0; > range.end = -1UL; > range.mm = mm; > + range.event = MMU_NOTIFY_SOFT_DIRTY; > mmu_notifier_invalidate_range_start(&range); > } > walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h > index cbeece8e47d4..3077d487be8b 100644 > --- a/include/linux/mmu_notifier.h > +++ b/include/linux/mmu_notifier.h > @@ -25,10 +25,43 @@ struct mmu_notifier_mm { > spinlock_t lock; > }; > > +/* > + * What event is triggering the invalidation: Can you please make it kernel-doc comment? > + * > + * MMU_NOTIFY_UNMAP > + *either munmap() that unmap the range or a mremap() that move the range > + * > + * MMU_NOTIFY_CLEAR > + *clear page table entry (many reasons for this like madvise() or > replacing > + *a page by another one, ...). > + * > + * MMU_NOTIFY_PROTECTION_VMA > + *update is due to protection change for the range ie using the vma > access > + *permission (vm_page_prot) to update the whole range is enough no need > to > + *inspect changes to the CPU page table (mprotect() syscall) > + * > + * MMU_NOTIFY_PROTECTION_PAGE > + *update is due to change in read/write flag for pages in the range so to > + *mirror those changes the user must inspect the CPU page table (from the > + *end callback). > + * > + * > + * MMU_NOTIFY_SOFT_DIRTY > + *soft dirty accounting (still same page and same access flags) > + */ > +enum mmu_notifier_event { > + MMU_NOTIFY_UNMAP = 0, > + MMU_NOTIFY_CLEAR, > + MMU_NOTIFY_PROTECTION_VMA, > + MMU_NOTIFY_PROTECTION_PAGE, > + MMU_NOTIFY_SOFT_DIRTY, > +}; > + > struct mmu_notifier_range { > struct mm_struct *mm; > unsigned long start; > unsigned long end; > + enum mmu_notifier_event eve
[PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation
From: Jérôme Glisse CPU page table update can happens for many reasons, not only as a result of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also as a result of kernel activities (memory compression, reclaim, migration, ...). Users of mmu notifier API track changes to the CPU page table and take specific action for them. While current API only provide range of virtual address affected by the change, not why the changes is happening. This patchset adds event information so that users of mmu notifier can differentiate among broad category: - UNMAP: munmap() or mremap() - CLEAR: page table is cleared (migration, compaction, reclaim, ...) - PROTECTION_VMA: change in access protections for the range - PROTECTION_PAGE: change in access protections for page in the range - SOFT_DIRTY: soft dirtyness tracking Being able to identify munmap() and mremap() from other reasons why the page table is cleared is important to allow user of mmu notifier to update their own internal tracking structure accordingly (on munmap or mremap it is not longer needed to track range of virtual address as it becomes invalid). Signed-off-by: Jérôme Glisse Cc: Andrew Morton Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Jan Kara Cc: Dan Williams Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Michal Hocko Cc: Christian Koenig Cc: Felix Kuehling Cc: Ralph Campbell Cc: John Hubbard Cc: k...@vger.kernel.org Cc: linux-r...@vger.kernel.org Cc: linux-fsde...@vger.kernel.org Cc: dri-devel@lists.freedesktop.org --- fs/dax.c | 1 + fs/proc/task_mmu.c | 1 + include/linux/mmu_notifier.h | 33 + kernel/events/uprobes.c | 1 + mm/huge_memory.c | 4 mm/hugetlb.c | 4 mm/khugepaged.c | 1 + mm/ksm.c | 2 ++ mm/madvise.c | 1 + mm/memory.c | 5 + mm/migrate.c | 2 ++ mm/mprotect.c| 1 + mm/mremap.c | 1 + mm/oom_kill.c| 1 + mm/rmap.c| 2 ++ 15 files changed, 60 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index e22508ee19ec..83092c5ac5f0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -761,6 +761,7 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, struct mmu_notifier_range range; unsigned long address; + range.event = MMU_NOTIFY_PROTECTION_PAGE; range.mm = vma->vm_mm; cond_resched(); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 53d625925669..4abb1668eeb3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1144,6 +1144,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, range.start = 0; range.end = -1UL; range.mm = mm; + range.event = MMU_NOTIFY_SOFT_DIRTY; mmu_notifier_invalidate_range_start(&range); } walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index cbeece8e47d4..3077d487be8b 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -25,10 +25,43 @@ struct mmu_notifier_mm { spinlock_t lock; }; +/* + * What event is triggering the invalidation: + * + * MMU_NOTIFY_UNMAP + *either munmap() that unmap the range or a mremap() that move the range + * + * MMU_NOTIFY_CLEAR + *clear page table entry (many reasons for this like madvise() or replacing + *a page by another one, ...). + * + * MMU_NOTIFY_PROTECTION_VMA + *update is due to protection change for the range ie using the vma access + *permission (vm_page_prot) to update the whole range is enough no need to + *inspect changes to the CPU page table (mprotect() syscall) + * + * MMU_NOTIFY_PROTECTION_PAGE + *update is due to change in read/write flag for pages in the range so to + *mirror those changes the user must inspect the CPU page table (from the + *end callback). + * + * + * MMU_NOTIFY_SOFT_DIRTY + *soft dirty accounting (still same page and same access flags) + */ +enum mmu_notifier_event { + MMU_NOTIFY_UNMAP = 0, + MMU_NOTIFY_CLEAR, + MMU_NOTIFY_PROTECTION_VMA, + MMU_NOTIFY_PROTECTION_PAGE, + MMU_NOTIFY_SOFT_DIRTY, +}; + struct mmu_notifier_range { struct mm_struct *mm; unsigned long start; unsigned long end; + enum mmu_notifier_event event; bool blockable; }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index aa7996ca361e..b6ef3be1c24e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -174,6 +174,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct mmu_notifier_range range;