From: Pekka Enberg <[EMAIL PROTECTED]>

As pointed out by Nick Piggin, modifying vma->vm_flags without
->mmap_sem can corrupt the flags. This changes revoke_mapping() to use 
down_write_trylock() to acquire ->mmap_sem and then rescan vmas of the 
parent mm.

Also make sure we restart scanning in revoke_mm() if zap_page_range()
did not finish within our process timeslice.

Cc: Nick Piggin <[EMAIL PROTECTED]>
Signed-off-by: Pekka Enberg <[EMAIL PROTECTED]>
---
 fs/revoke.c |  128 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 105 insertions(+), 23 deletions(-)

Index: uml-2.6/fs/revoke.c
===================================================================
--- uml-2.6.orig/fs/revoke.c    2007-03-16 11:37:52.000000000 +0200
+++ uml-2.6/fs/revoke.c 2007-03-16 12:02:10.000000000 +0200
@@ -158,6 +158,22 @@    for (fd = 0; fd < fdt->max_fds; fd++) {
        return err;
 }
 
+static inline bool need_revoke(struct vm_area_struct *vma,
+                              struct file *to_exclude)
+{
+       if (vma->vm_flags & VM_REVOKED)
+               return false;
+
+       if (!(vma->vm_flags & VM_SHARED))
+               return false;
+
+       return vma->vm_file != to_exclude;
+}
+
+/*
+ *      LOCKING: down_write(&mm->mmap_sem)
+ *                 -> spin_lock(&mapping->i_mmap_lock)
+ */
 static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details)
 {
        unsigned long restart_addr, start_addr, end_addr;
@@ -166,11 +182,6 @@ static int revoke_vma(struct vm_area_str
        start_addr = vma->vm_start;
        end_addr = vma->vm_end;
 
-       /*
-        * Not holding ->mmap_sem here.
-        */
-       vma->vm_flags |= VM_REVOKED;
-       smp_mb();
   again:
        restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr,
                                      details);
@@ -183,6 +194,7 @@ static int revoke_vma(struct vm_area_str
                start_addr = restart_addr;
                goto again;
        }
+       vma->vm_flags |= VM_REVOKED;
        return 0;
 
   out_need_break:
@@ -192,34 +204,108 @@  return 0;
        return -EINTR;
 }
 
-static int revoke_mapping(struct address_space *mapping, struct file 
*to_exclude)
+/*
+ *     LOCKING: spin_lock(&mapping->i_mmap_lock)
+ */
+static int revoke_mm(struct mm_struct *mm, struct address_space *mapping,
+                    struct file *to_exclude)
 {
        struct vm_area_struct *vma;
-       struct prio_tree_iter iter;
        struct zap_details details;
        int err = 0;
 
        details.i_mmap_lock = &mapping->i_mmap_lock;
 
-       spin_lock(&mapping->i_mmap_lock);
+       /*
+        * If ->mmap_sem is under contention, we continue scanning other
+        * mms and try again later.
+        */
+       if (!down_write_trylock(&mm->mmap_sem)) {
+               err = -EAGAIN;
+               goto out;
+       }
+       for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+               if (!need_revoke(vma, to_exclude))
+                       continue;
+
+               err = revoke_vma(vma, &details);
+               if (err)
+                       break;
+       }
+       up_write(&mm->mmap_sem);
+  out:
+       return err;
+}
+
+/*
+ *     LOCKING: spin_lock(&mapping->i_mmap_lock)
+ */
+static void revoke_mapping_tree(struct address_space *mapping,
+                               struct file *to_exclude)
+{
+       struct vm_area_struct *vma;
+       struct prio_tree_iter iter;
+       int try_again = 0;
+
+  restart:
        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) {
-               if ((vma->vm_flags & VM_SHARED) && vma->vm_file != to_exclude) {
-                       err = revoke_vma(vma, &details);
-                       if (err)
-                               goto out;
+               int err;
+
+               if (likely(!need_revoke(vma, to_exclude)))
+                       continue;
+
+               err = revoke_mm(vma->vm_mm, mapping, to_exclude);
+               if (err == -EAGAIN) {
+                       try_again = 1;
+                       continue;
                }
+               if (err == -EINTR)
+                       goto restart;
+       }
+       if (try_again) {
+               cond_resched();
+               goto restart;
        }
+}
 
+/*
+ *     LOCKING: spin_lock(&mapping->i_mmap_lock)
+ */
+static void revoke_mapping_list(struct address_space *mapping,
+                               struct file *to_exclude)
+{
+       struct vm_area_struct *vma;
+       int try_again = 0;
+
+  restart:
        list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 
shared.vm_set.list) {
-               if ((vma->vm_flags & VM_SHARED) && vma->vm_file != to_exclude) {
-                       err = revoke_vma(vma, &details);
-                       if (err)
-                               goto out;
+               int err;
+
+               if (likely(!need_revoke(vma, to_exclude)))
+                       continue;
+
+               err = revoke_mm(vma->vm_mm, mapping, to_exclude);
+               if (err == -EAGAIN) {
+                       try_again = 1;
+                       continue;
                }
+               if (err == -EINTR)
+                       goto restart;
        }
-  out:
+       if (try_again) {
+               cond_resched();
+               goto restart;
+       }
+}
+
+static void revoke_mapping(struct address_space *mapping, struct file 
*to_exclude)
+{
+       spin_lock(&mapping->i_mmap_lock);
+       if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+               revoke_mapping_tree(mapping, to_exclude);
+       if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
+               revoke_mapping_list(mapping, to_exclude);
        spin_unlock(&mapping->i_mmap_lock);
-       return err;
 }
 
 static void restore_file(struct revokefs_inode_info *info)
@@ -439,11 +525,7 @@    int err = 0;
        /*
         * Take down shared memory mappings.
         */
-       err = revoke_mapping(inode->i_mapping, to_exclude);
-       if (err) {
-               restore_files(table);
-               goto out_free_table;
-       }
+       revoke_mapping(inode->i_mapping, to_exclude);
 
        /*
         * Now, revoke the files for good.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to