Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-06 Thread Jerome Glisse
Should be all fixed in v2 i built with and without mmu notifier and
did not had any issue in v2.

On Fri, Dec 07, 2018 at 05:19:21AM +0800, kbuild test robot wrote:
> Hi Jérôme,
> 
> I love your patch! Yet something to improve:
> 
> [auto build test ERROR on linus/master]
> [also build test ERROR on v4.20-rc5]
> [cannot apply to next-20181206]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/jglisse-redhat-com/mmu-notifier-contextual-informations/20181207-031930
> config: x86_64-randconfig-x017-201848 (attached as .config)
> compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64 
> 
> All errors (new ones prefixed by >>):
> 
>fs///proc/task_mmu.c: In function 'clear_refs_write':
>fs///proc/task_mmu.c:1099:29: error: storage size of 'range' isn't known
>   struct mmu_notifier_range range;
> ^
> >> fs///proc/task_mmu.c:1147:18: error: 'MMU_NOTIFY_SOFT_DIRTY' undeclared 
> >> (first use in this function); did you mean 'CLEAR_REFS_SOFT_DIRTY'?
>range.event = MMU_NOTIFY_SOFT_DIRTY;
>  ^
>  CLEAR_REFS_SOFT_DIRTY
>fs///proc/task_mmu.c:1147:18: note: each undeclared identifier is reported 
> only once for each function it appears in
>fs///proc/task_mmu.c:1099:29: warning: unused variable 'range' 
> [-Wunused-variable]
>   struct mmu_notifier_range range;
> ^
> 
> vim +1147 fs///proc/task_mmu.c
> 
>   1069
>   1070static ssize_t clear_refs_write(struct file *file, const char 
> __user *buf,
>   1071size_t count, loff_t *ppos)
>   1072{
>   1073struct task_struct *task;
>   1074char buffer[PROC_NUMBUF];
>   1075struct mm_struct *mm;
>   1076struct vm_area_struct *vma;
>   1077enum clear_refs_types type;
>   1078struct mmu_gather tlb;
>   1079int itype;
>   1080int rv;
>   1081
>   1082memset(buffer, 0, sizeof(buffer));
>   1083if (count > sizeof(buffer) - 1)
>   1084count = sizeof(buffer) - 1;
>   1085if (copy_from_user(buffer, buf, count))
>   1086return -EFAULT;
>   1087rv = kstrtoint(strstrip(buffer), 10, &itype);
>   1088if (rv < 0)
>   1089return rv;
>   1090type = (enum clear_refs_types)itype;
>   1091if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
>   1092return -EINVAL;
>   1093
>   1094task = get_proc_task(file_inode(file));
>   1095if (!task)
>   1096return -ESRCH;
>   1097mm = get_task_mm(task);
>   1098if (mm) {
> > 1099struct mmu_notifier_range range;
>   1100struct clear_refs_private cp = {
>   1101.type = type,
>   1102};
>   1103struct mm_walk clear_refs_walk = {
>   1104.pmd_entry = clear_refs_pte_range,
>   1105.test_walk = clear_refs_test_walk,
>   1106.mm = mm,
>   1107.private = &cp,
>   1108};
>   1109
>   1110if (type == CLEAR_REFS_MM_HIWATER_RSS) {
>   if (down_write_killable(&mm->mmap_sem)) 
> {
>   1112count = -EINTR;
>   1113goto out_mm;
>   1114}
>   1115
>   1116/*
>   1117 * Writing 5 to /proc/pid/clear_refs 
> resets the peak
>   1118 * resident set size to this mm's 
> current rss value.
>   1119 */
>   1120reset_mm_hiwater_rss(mm);
>   1121up_write(&mm->mmap_sem);
>   1122goto out_mm;
>   1123}
>   1124
>   1125down_read(&mm->mmap_sem);
>   1126tlb_gather_mmu(&tlb, mm, 0, -1);
>   1127if (type == CLEAR_REFS_SOFT_DIRTY) {
>   1128for (vma = mm->mmap; vma; vma = 
> vma->vm_next) {
>   1129if (!(vma->vm_flags & 
> VM_SOFTDIRTY))
>   1130

Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-06 Thread kbuild test robot
Hi Jérôme,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.20-rc5]
[cannot apply to next-20181206]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/jglisse-redhat-com/mmu-notifier-contextual-informations/20181207-031930
config: x86_64-randconfig-x017-201848 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

   fs///proc/task_mmu.c: In function 'clear_refs_write':
   fs///proc/task_mmu.c:1099:29: error: storage size of 'range' isn't known
  struct mmu_notifier_range range;
^
>> fs///proc/task_mmu.c:1147:18: error: 'MMU_NOTIFY_SOFT_DIRTY' undeclared 
>> (first use in this function); did you mean 'CLEAR_REFS_SOFT_DIRTY'?
   range.event = MMU_NOTIFY_SOFT_DIRTY;
 ^
 CLEAR_REFS_SOFT_DIRTY
   fs///proc/task_mmu.c:1147:18: note: each undeclared identifier is reported 
only once for each function it appears in
   fs///proc/task_mmu.c:1099:29: warning: unused variable 'range' 
[-Wunused-variable]
  struct mmu_notifier_range range;
^

vim +1147 fs///proc/task_mmu.c

  1069  
  1070  static ssize_t clear_refs_write(struct file *file, const char __user 
*buf,
  1071  size_t count, loff_t *ppos)
  1072  {
  1073  struct task_struct *task;
  1074  char buffer[PROC_NUMBUF];
  1075  struct mm_struct *mm;
  1076  struct vm_area_struct *vma;
  1077  enum clear_refs_types type;
  1078  struct mmu_gather tlb;
  1079  int itype;
  1080  int rv;
  1081  
  1082  memset(buffer, 0, sizeof(buffer));
  1083  if (count > sizeof(buffer) - 1)
  1084  count = sizeof(buffer) - 1;
  1085  if (copy_from_user(buffer, buf, count))
  1086  return -EFAULT;
  1087  rv = kstrtoint(strstrip(buffer), 10, &itype);
  1088  if (rv < 0)
  1089  return rv;
  1090  type = (enum clear_refs_types)itype;
  1091  if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
  1092  return -EINVAL;
  1093  
  1094  task = get_proc_task(file_inode(file));
  1095  if (!task)
  1096  return -ESRCH;
  1097  mm = get_task_mm(task);
  1098  if (mm) {
> 1099  struct mmu_notifier_range range;
  1100  struct clear_refs_private cp = {
  1101  .type = type,
  1102  };
  1103  struct mm_walk clear_refs_walk = {
  1104  .pmd_entry = clear_refs_pte_range,
  1105  .test_walk = clear_refs_test_walk,
  1106  .mm = mm,
  1107  .private = &cp,
  1108  };
  1109  
  1110  if (type == CLEAR_REFS_MM_HIWATER_RSS) {
    if (down_write_killable(&mm->mmap_sem)) {
  1112  count = -EINTR;
  1113  goto out_mm;
  1114  }
  1115  
  1116  /*
  1117   * Writing 5 to /proc/pid/clear_refs resets the 
peak
  1118   * resident set size to this mm's current rss 
value.
  1119   */
  1120  reset_mm_hiwater_rss(mm);
  1121  up_write(&mm->mmap_sem);
  1122  goto out_mm;
  1123  }
  1124  
  1125  down_read(&mm->mmap_sem);
  1126  tlb_gather_mmu(&tlb, mm, 0, -1);
  1127  if (type == CLEAR_REFS_SOFT_DIRTY) {
  1128  for (vma = mm->mmap; vma; vma = vma->vm_next) {
  1129  if (!(vma->vm_flags & VM_SOFTDIRTY))
  1130  continue;
  1131  up_read(&mm->mmap_sem);
  1132  if (down_write_killable(&mm->mmap_sem)) 
{
  1133  count = -EINTR;
  1134  goto out_mm;
  1135  }
  1136  for (vma = mm->mmap; vma; vma = 
vma->vm_next) {
  1137  vma->vm_flags &= ~VM_SOFTDIRTY;
  1138  vma_set_page_prot(vma);
  1139  }
  1140  downgrade_write(&mm->mmap_sem);
  1141  break;
  1142

Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-06 Thread kbuild test robot
Hi Jérôme,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.20-rc5]
[cannot apply to next-20181206]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/jglisse-redhat-com/mmu-notifier-contextual-informations/20181207-031930
config: i386-randconfig-x007-201848 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/events/uprobes.c: In function '__replace_page':
   kernel/events/uprobes.c:174:28: error: storage size of 'range' isn't known
 struct mmu_notifier_range range;
   ^
>> kernel/events/uprobes.c:177:16: error: 'MMU_NOTIFY_CLEAR' undeclared (first 
>> use in this function); did you mean 'VM_ARCH_CLEAR'?
 range.event = MMU_NOTIFY_CLEAR;
   ^~~~
   VM_ARCH_CLEAR
   kernel/events/uprobes.c:177:16: note: each undeclared identifier is reported 
only once for each function it appears in
   kernel/events/uprobes.c:174:28: warning: unused variable 'range' 
[-Wunused-variable]
 struct mmu_notifier_range range;
   ^

vim +177 kernel/events/uprobes.c

   152  
   153  /**
   154   * __replace_page - replace page in vma by new page.
   155   * based on replace_page in mm/ksm.c
   156   *
   157   * @vma:  vma that holds the pte pointing to page
   158   * @addr: address the old @page is mapped at
   159   * @page: the cowed page we are replacing by kpage
   160   * @kpage:the modified page we replace page by
   161   *
   162   * Returns 0 on success, -EFAULT on failure.
   163   */
   164  static int __replace_page(struct vm_area_struct *vma, unsigned long 
addr,
   165  struct page *old_page, struct page 
*new_page)
   166  {
   167  struct mm_struct *mm = vma->vm_mm;
   168  struct page_vma_mapped_walk pvmw = {
   169  .page = old_page,
   170  .vma = vma,
   171  .address = addr,
   172  };
   173  int err;
 > 174  struct mmu_notifier_range range;
   175  struct mem_cgroup *memcg;
   176  
 > 177  range.event = MMU_NOTIFY_CLEAR;
   178  range.start = addr;
   179  range.end = addr + PAGE_SIZE;
   180  range.mm = mm;
   181  
   182  VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
   183  
   184  err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, 
&memcg,
   185  false);
   186  if (err)
   187  return err;
   188  
   189  /* For try_to_free_swap() and munlock_vma_page() below */
   190  lock_page(old_page);
   191  
   192  mmu_notifier_invalidate_range_start(&range);
   193  err = -EAGAIN;
   194  if (!page_vma_mapped_walk(&pvmw)) {
   195  mem_cgroup_cancel_charge(new_page, memcg, false);
   196  goto unlock;
   197  }
   198  VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
   199  
   200  get_page(new_page);
   201  page_add_new_anon_rmap(new_page, vma, addr, false);
   202  mem_cgroup_commit_charge(new_page, memcg, false, false);
   203  lru_cache_add_active_or_unevictable(new_page, vma);
   204  
   205  if (!PageAnon(old_page)) {
   206  dec_mm_counter(mm, mm_counter_file(old_page));
   207  inc_mm_counter(mm, MM_ANONPAGES);
   208  }
   209  
   210  flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
   211  ptep_clear_flush_notify(vma, addr, pvmw.pte);
   212  set_pte_at_notify(mm, addr, pvmw.pte,
   213  mk_pte(new_page, vma->vm_page_prot));
   214  
   215  page_remove_rmap(old_page, false);
   216  if (!page_mapped(old_page))
   217  try_to_free_swap(old_page);
   218  page_vma_mapped_walk_done(&pvmw);
   219  
   220  if (vma->vm_flags & VM_LOCKED)
   221  munlock_vma_page(old_page);
   222  put_page(old_page);
   223  
   224  err = 0;
   225   unlock:
   226  mmu_notifier_invalidate_range_end(&range);
   227  unlock_page(old_page);
   228  return err;
   229  }
   230  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-04 Thread Andrew Morton
On Mon,  3 Dec 2018 15:18:17 -0500 jgli...@redhat.com wrote:

> CPU page table update can happens for many reasons, not only as a result
> of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also
> as a result of kernel activities (memory compression, reclaim, migration,
> ...).
> 
> Users of mmu notifier API track changes to the CPU page table and take
> specific action for them. While current API only provide range of virtual
> address affected by the change, not why the changes is happening.
> 
> This patchset adds event information so that users of mmu notifier can
> differentiate among broad category:
> - UNMAP: munmap() or mremap()
> - CLEAR: page table is cleared (migration, compaction, reclaim, ...)
> - PROTECTION_VMA: change in access protections for the range
> - PROTECTION_PAGE: change in access protections for page in the range
> - SOFT_DIRTY: soft dirtyness tracking
> 
> Being able to identify munmap() and mremap() from other reasons why the
> page table is cleared is important to allow user of mmu notifier to
> update their own internal tracking structure accordingly (on munmap or
> mremap it is not longer needed to track range of virtual address as it
> becomes invalid).
> 
> ...
>
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -519,6 +519,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
>   struct mmu_notifier_range range;
>   struct mmu_gather tlb;
>  
> + range.event = MMU_NOTIFY_CLEAR;
>   range.start = vma->vm_start;
>   range.end = vma->vm_end;
>   range.mm = mm;

mmu_notifier_range and MMU_NOTIFY_CLEAR aren't defined if
CONFIG_MMU_NOTIFIER=n.

I'll try a temporary bodge:

+++ a/include/linux/mmu_notifier.h
@@ -10,8 +10,6 @@
 struct mmu_notifier;
 struct mmu_notifier_ops;
 
-#ifdef CONFIG_MMU_NOTIFIER
-
 /*
  * The mmu notifier_mm structure is allocated and installed in
  * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
@@ -32,6 +30,8 @@ struct mmu_notifier_range {
bool blockable;
 };
 
+#ifdef CONFIG_MMU_NOTIFIER
+
 struct mmu_notifier_ops {
/*
 * Called either by mmu_notifier_unregister or when the mm is


But this new code should vanish altogether if CONFIG_MMU_NOTIFIER=n,
please.  Or at least, we shouldn't be unnecessarily initializing .mm
and .event.  Please take a look at debloating this code.


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-04 Thread Jerome Glisse
On Tue, Dec 04, 2018 at 10:17:48AM +0200, Mike Rapoport wrote:
> On Mon, Dec 03, 2018 at 03:18:17PM -0500, jgli...@redhat.com wrote:
> > From: Jérôme Glisse 

[...]

> > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> > index cbeece8e47d4..3077d487be8b 100644
> > --- a/include/linux/mmu_notifier.h
> > +++ b/include/linux/mmu_notifier.h
> > @@ -25,10 +25,43 @@ struct mmu_notifier_mm {
> > spinlock_t lock;
> >  };
> > 
> > +/*
> > + * What event is triggering the invalidation:
> 
> Can you please make it kernel-doc comment?

Sorry should have done that in the first place, Andrew i will post a v2
with that and fixing my one stupid bug.



> > + *
> > + * MMU_NOTIFY_UNMAP
> > + *either munmap() that unmap the range or a mremap() that move the 
> > range
> > + *
> > + * MMU_NOTIFY_CLEAR
> > + *clear page table entry (many reasons for this like madvise() or 
> > replacing
> > + *a page by another one, ...).
> > + *
> > + * MMU_NOTIFY_PROTECTION_VMA
> > + *update is due to protection change for the range ie using the vma 
> > access
> > + *permission (vm_page_prot) to update the whole range is enough no 
> > need to
> > + *inspect changes to the CPU page table (mprotect() syscall)
> > + *
> > + * MMU_NOTIFY_PROTECTION_PAGE
> > + *update is due to change in read/write flag for pages in the range so 
> > to
> > + *mirror those changes the user must inspect the CPU page table (from 
> > the
> > + *end callback).
> > + *
> > + *
> > + * MMU_NOTIFY_SOFT_DIRTY
> > + *soft dirty accounting (still same page and same access flags)
> > + */
> > +enum mmu_notifier_event {
> > +   MMU_NOTIFY_UNMAP = 0,
> > +   MMU_NOTIFY_CLEAR,
> > +   MMU_NOTIFY_PROTECTION_VMA,
> > +   MMU_NOTIFY_PROTECTION_PAGE,
> > +   MMU_NOTIFY_SOFT_DIRTY,
> > +};
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-04 Thread Mike Rapoport
On Mon, Dec 03, 2018 at 03:18:17PM -0500, jgli...@redhat.com wrote:
> From: Jérôme Glisse 
> 
> CPU page table update can happens for many reasons, not only as a result
> of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also
> as a result of kernel activities (memory compression, reclaim, migration,
> ...).
> 
> Users of mmu notifier API track changes to the CPU page table and take
> specific action for them. While current API only provide range of virtual
> address affected by the change, not why the changes is happening.
> 
> This patchset adds event information so that users of mmu notifier can
> differentiate among broad category:
> - UNMAP: munmap() or mremap()
> - CLEAR: page table is cleared (migration, compaction, reclaim, ...)
> - PROTECTION_VMA: change in access protections for the range
> - PROTECTION_PAGE: change in access protections for page in the range
> - SOFT_DIRTY: soft dirtyness tracking
> 
> Being able to identify munmap() and mremap() from other reasons why the
> page table is cleared is important to allow user of mmu notifier to
> update their own internal tracking structure accordingly (on munmap or
> mremap it is not longer needed to track range of virtual address as it
> becomes invalid).
> 
> Signed-off-by: Jérôme Glisse 
> Cc: Andrew Morton 
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: Jan Kara 
> Cc: Dan Williams 
> Cc: Paolo Bonzini 
> Cc: Radim Krčmář 
> Cc: Michal Hocko 
> Cc: Christian Koenig 
> Cc: Felix Kuehling 
> Cc: Ralph Campbell 
> Cc: John Hubbard 
> Cc: k...@vger.kernel.org
> Cc: linux-r...@vger.kernel.org
> Cc: linux-fsde...@vger.kernel.org
> Cc: dri-devel@lists.freedesktop.org
> ---
>  fs/dax.c |  1 +
>  fs/proc/task_mmu.c   |  1 +
>  include/linux/mmu_notifier.h | 33 +
>  kernel/events/uprobes.c  |  1 +
>  mm/huge_memory.c |  4 
>  mm/hugetlb.c |  4 
>  mm/khugepaged.c  |  1 +
>  mm/ksm.c |  2 ++
>  mm/madvise.c |  1 +
>  mm/memory.c  |  5 +
>  mm/migrate.c |  2 ++
>  mm/mprotect.c|  1 +
>  mm/mremap.c  |  1 +
>  mm/oom_kill.c|  1 +
>  mm/rmap.c|  2 ++
>  15 files changed, 60 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index e22508ee19ec..83092c5ac5f0 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -761,6 +761,7 @@ static void dax_entry_mkclean(struct address_space 
> *mapping, pgoff_t index,
>   struct mmu_notifier_range range;
>   unsigned long address;
> 
> + range.event = MMU_NOTIFY_PROTECTION_PAGE;
>   range.mm = vma->vm_mm;
> 
>   cond_resched();
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 53d625925669..4abb1668eeb3 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1144,6 +1144,7 @@ static ssize_t clear_refs_write(struct file *file, 
> const char __user *buf,
>   range.start = 0;
>   range.end = -1UL;
>   range.mm = mm;
> + range.event = MMU_NOTIFY_SOFT_DIRTY;
>   mmu_notifier_invalidate_range_start(&range);
>   }
>   walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> index cbeece8e47d4..3077d487be8b 100644
> --- a/include/linux/mmu_notifier.h
> +++ b/include/linux/mmu_notifier.h
> @@ -25,10 +25,43 @@ struct mmu_notifier_mm {
>   spinlock_t lock;
>  };
> 
> +/*
> + * What event is triggering the invalidation:

Can you please make it kernel-doc comment?

> + *
> + * MMU_NOTIFY_UNMAP
> + *either munmap() that unmap the range or a mremap() that move the range
> + *
> + * MMU_NOTIFY_CLEAR
> + *clear page table entry (many reasons for this like madvise() or 
> replacing
> + *a page by another one, ...).
> + *
> + * MMU_NOTIFY_PROTECTION_VMA
> + *update is due to protection change for the range ie using the vma 
> access
> + *permission (vm_page_prot) to update the whole range is enough no need 
> to
> + *inspect changes to the CPU page table (mprotect() syscall)
> + *
> + * MMU_NOTIFY_PROTECTION_PAGE
> + *update is due to change in read/write flag for pages in the range so to
> + *mirror those changes the user must inspect the CPU page table (from the
> + *end callback).
> + *
> + *
> + * MMU_NOTIFY_SOFT_DIRTY
> + *soft dirty accounting (still same page and same access flags)
> + */
> +enum mmu_notifier_event {
> + MMU_NOTIFY_UNMAP = 0,
> + MMU_NOTIFY_CLEAR,
> + MMU_NOTIFY_PROTECTION_VMA,
> + MMU_NOTIFY_PROTECTION_PAGE,
> + MMU_NOTIFY_SOFT_DIRTY,
> +};
> +
>  struct mmu_notifier_range {
>   struct mm_struct *mm;
>   unsigned long start;
>   unsigned long end;
> + enum mmu_notifier_event eve

[PATCH 3/3] mm/mmu_notifier: contextual information for event triggering invalidation

2018-12-03 Thread jglisse
From: Jérôme Glisse 

CPU page table update can happens for many reasons, not only as a result
of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also
as a result of kernel activities (memory compression, reclaim, migration,
...).

Users of mmu notifier API track changes to the CPU page table and take
specific action for them. While current API only provide range of virtual
address affected by the change, not why the changes is happening.

This patchset adds event information so that users of mmu notifier can
differentiate among broad category:
- UNMAP: munmap() or mremap()
- CLEAR: page table is cleared (migration, compaction, reclaim, ...)
- PROTECTION_VMA: change in access protections for the range
- PROTECTION_PAGE: change in access protections for page in the range
- SOFT_DIRTY: soft dirtyness tracking

Being able to identify munmap() and mremap() from other reasons why the
page table is cleared is important to allow user of mmu notifier to
update their own internal tracking structure accordingly (on munmap or
mremap it is not longer needed to track range of virtual address as it
becomes invalid).

Signed-off-by: Jérôme Glisse 
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Ross Zwisler 
Cc: Jan Kara 
Cc: Dan Williams 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Michal Hocko 
Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Ralph Campbell 
Cc: John Hubbard 
Cc: k...@vger.kernel.org
Cc: linux-r...@vger.kernel.org
Cc: linux-fsde...@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
---
 fs/dax.c |  1 +
 fs/proc/task_mmu.c   |  1 +
 include/linux/mmu_notifier.h | 33 +
 kernel/events/uprobes.c  |  1 +
 mm/huge_memory.c |  4 
 mm/hugetlb.c |  4 
 mm/khugepaged.c  |  1 +
 mm/ksm.c |  2 ++
 mm/madvise.c |  1 +
 mm/memory.c  |  5 +
 mm/migrate.c |  2 ++
 mm/mprotect.c|  1 +
 mm/mremap.c  |  1 +
 mm/oom_kill.c|  1 +
 mm/rmap.c|  2 ++
 15 files changed, 60 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index e22508ee19ec..83092c5ac5f0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -761,6 +761,7 @@ static void dax_entry_mkclean(struct address_space 
*mapping, pgoff_t index,
struct mmu_notifier_range range;
unsigned long address;
 
+   range.event = MMU_NOTIFY_PROTECTION_PAGE;
range.mm = vma->vm_mm;
 
cond_resched();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 53d625925669..4abb1668eeb3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1144,6 +1144,7 @@ static ssize_t clear_refs_write(struct file *file, const 
char __user *buf,
range.start = 0;
range.end = -1UL;
range.mm = mm;
+   range.event = MMU_NOTIFY_SOFT_DIRTY;
mmu_notifier_invalidate_range_start(&range);
}
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index cbeece8e47d4..3077d487be8b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -25,10 +25,43 @@ struct mmu_notifier_mm {
spinlock_t lock;
 };
 
+/*
+ * What event is triggering the invalidation:
+ *
+ * MMU_NOTIFY_UNMAP
+ *either munmap() that unmap the range or a mremap() that move the range
+ *
+ * MMU_NOTIFY_CLEAR
+ *clear page table entry (many reasons for this like madvise() or replacing
+ *a page by another one, ...).
+ *
+ * MMU_NOTIFY_PROTECTION_VMA
+ *update is due to protection change for the range ie using the vma access
+ *permission (vm_page_prot) to update the whole range is enough no need to
+ *inspect changes to the CPU page table (mprotect() syscall)
+ *
+ * MMU_NOTIFY_PROTECTION_PAGE
+ *update is due to change in read/write flag for pages in the range so to
+ *mirror those changes the user must inspect the CPU page table (from the
+ *end callback).
+ *
+ *
+ * MMU_NOTIFY_SOFT_DIRTY
+ *soft dirty accounting (still same page and same access flags)
+ */
+enum mmu_notifier_event {
+   MMU_NOTIFY_UNMAP = 0,
+   MMU_NOTIFY_CLEAR,
+   MMU_NOTIFY_PROTECTION_VMA,
+   MMU_NOTIFY_PROTECTION_PAGE,
+   MMU_NOTIFY_SOFT_DIRTY,
+};
+
 struct mmu_notifier_range {
struct mm_struct *mm;
unsigned long start;
unsigned long end;
+   enum mmu_notifier_event event;
bool blockable;
 };
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index aa7996ca361e..b6ef3be1c24e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -174,6 +174,7 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
struct mmu_notifier_range range;