[PATCH v3 4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC

2013-08-16 Thread Andy Lutomirski
This is probably unimportant but improves POSIX compliance.

Signed-off-by: Andy Lutomirski 
---
 mm/msync.c | 83 +-
 1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/mm/msync.c b/mm/msync.c
index 632df45..9e41acd 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,13 +13,16 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
  * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * Now all it does is ensure that file timestamps get updated, since POSIX
+ * requires it.  We track dirty pages correct without MS_ASYNC.
  *
  * The application may now run fsync() to
  * write out the dirty pages and wait on the writeout and check the result.
@@ -28,6 +31,57 @@
  * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  * applications.
  */
+
+static int msync_async_range(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long end)
+{
+   struct mm_struct *mm;
+   struct address_space *mapping;
+   int iters = 0;
+
+   while (*start < end && *start < vma->vm_end && iters < 128) {
+   unsigned int page_mask, page_increm;
+
+   /*
+* Require that the pte writable (because otherwise it can't
+* be dirty, so there's nothing to clean).
+*
+* In theory we could check the pte dirty bit, but this is
+* awkward and barely worth it.
+*/
+   struct page *page = follow_page_mask(vma, *start,
+FOLL_GET | FOLL_WRITE,
+_mask);
+
+   if (page && !IS_ERR(page)) {
+   if (lock_page_killable(page) == 0) {
+   page_mkclean(page);
+   unlock_page(page);
+   }
+   put_page(page);
+   }
+
+   if (IS_ERR(page))
+   return PTR_ERR(page);
+
+   page_increm = 1 + (~(*start >> PAGE_SHIFT) & page_mask);
+   *start += page_increm * PAGE_SIZE;
+   cond_resched();
+   iters++;
+   }
+
+   /* XXX: try to do this only once? */
+   mapping = vma->vm_file->f_mapping;
+   if (mapping->a_ops->flush_cmtime)
+   mapping->a_ops->flush_cmtime(mapping);
+
+   /* Give mmap_sem writers a chance. */
+   mm = current->mm;
+   up_read(>mmap_sem);
+   down_read(>mmap_sem);
+   return 0;
+}
+
 SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 {
unsigned long end;
@@ -77,18 +131,25 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, 
int, flags)
goto out_unlock;
}
file = vma->vm_file;
-   start = vma->vm_end;
-   if ((flags & MS_SYNC) && file &&
-   (vma->vm_flags & VM_SHARED)) {
-   get_file(file);
-   up_read(>mmap_sem);
-   error = vfs_fsync(file, 0);
-   fput(file);
-   if (error || start >= end)
-   goto out;
-   down_read(>mmap_sem);
+   if (file && vma->vm_flags & VM_SHARED) {
+   if (flags & MS_SYNC) {
+   start = vma->vm_end;
+   get_file(file);
+   up_read(>mmap_sem);
+   error = vfs_fsync(file, 0);
+   fput(file);
+   if (error || start >= end)
+   goto out;
+   down_read(>mmap_sem);
+   } else if ((vma->vm_flags & VM_WRITE) &&
+  file->f_mapping) {
+   error = msync_async_range(vma, , end);
+   } else {
+   start = vma->vm_end;
+   }
vma = find_vma(mm, start);
} else {
+   start = vma->vm_end;
if (start >= end) {
error = 0;
goto out_unlock;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC

2013-08-16 Thread Andy Lutomirski
This is probably unimportant but improves POSIX compliance.

Signed-off-by: Andy Lutomirski l...@amacapital.net
---
 mm/msync.c | 83 +-
 1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/mm/msync.c b/mm/msync.c
index 632df45..9e41acd 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,13 +13,16 @@
 #include linux/file.h
 #include linux/syscalls.h
 #include linux/sched.h
+#include linux/rmap.h
+#include linux/pagemap.h
 
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
  * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * Now all it does is ensure that file timestamps get updated, since POSIX
+ * requires it.  We track dirty pages correct without MS_ASYNC.
  *
  * The application may now run fsync() to
  * write out the dirty pages and wait on the writeout and check the result.
@@ -28,6 +31,57 @@
  * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  * applications.
  */
+
+static int msync_async_range(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long end)
+{
+   struct mm_struct *mm;
+   struct address_space *mapping;
+   int iters = 0;
+
+   while (*start  end  *start  vma-vm_end  iters  128) {
+   unsigned int page_mask, page_increm;
+
+   /*
+* Require that the pte writable (because otherwise it can't
+* be dirty, so there's nothing to clean).
+*
+* In theory we could check the pte dirty bit, but this is
+* awkward and barely worth it.
+*/
+   struct page *page = follow_page_mask(vma, *start,
+FOLL_GET | FOLL_WRITE,
+page_mask);
+
+   if (page  !IS_ERR(page)) {
+   if (lock_page_killable(page) == 0) {
+   page_mkclean(page);
+   unlock_page(page);
+   }
+   put_page(page);
+   }
+
+   if (IS_ERR(page))
+   return PTR_ERR(page);
+
+   page_increm = 1 + (~(*start  PAGE_SHIFT)  page_mask);
+   *start += page_increm * PAGE_SIZE;
+   cond_resched();
+   iters++;
+   }
+
+   /* XXX: try to do this only once? */
+   mapping = vma-vm_file-f_mapping;
+   if (mapping-a_ops-flush_cmtime)
+   mapping-a_ops-flush_cmtime(mapping);
+
+   /* Give mmap_sem writers a chance. */
+   mm = current-mm;
+   up_read(mm-mmap_sem);
+   down_read(mm-mmap_sem);
+   return 0;
+}
+
 SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 {
unsigned long end;
@@ -77,18 +131,25 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, 
int, flags)
goto out_unlock;
}
file = vma-vm_file;
-   start = vma-vm_end;
-   if ((flags  MS_SYNC)  file 
-   (vma-vm_flags  VM_SHARED)) {
-   get_file(file);
-   up_read(mm-mmap_sem);
-   error = vfs_fsync(file, 0);
-   fput(file);
-   if (error || start = end)
-   goto out;
-   down_read(mm-mmap_sem);
+   if (file  vma-vm_flags  VM_SHARED) {
+   if (flags  MS_SYNC) {
+   start = vma-vm_end;
+   get_file(file);
+   up_read(mm-mmap_sem);
+   error = vfs_fsync(file, 0);
+   fput(file);
+   if (error || start = end)
+   goto out;
+   down_read(mm-mmap_sem);
+   } else if ((vma-vm_flags  VM_WRITE) 
+  file-f_mapping) {
+   error = msync_async_range(vma, start, end);
+   } else {
+   start = vma-vm_end;
+   }
vma = find_vma(mm, start);
} else {
+   start = vma-vm_end;
if (start = end) {
error = 0;
goto out_unlock;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/