This is a note to let you know that I've just added the patch titled

    tmpfs: fix race between umount and swapoff

to the 2.6.33-longterm tree which can be found at:
    
http://www.kernel.org/git/?p=linux/kernel/git/longterm/longterm-queue-2.6.33.git;a=summary

The filename of the patch is:
     tmpfs-fix-race-between-umount-and-swapoff.patch
and it can be found in the queue-2.6.33 subdirectory.

If you, or anyone else, feels it should not be added to the 2.6.33 longterm 
tree,
please let <[email protected]> know about it.


>From 778dd893ae785c5fd505dac30b5fc40aae188bf1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <[email protected]>
Date: Wed, 11 May 2011 15:13:37 -0700
Subject: tmpfs: fix race between umount and swapoff

From: Hugh Dickins <[email protected]>

commit 778dd893ae785c5fd505dac30b5fc40aae188bf1 upstream.

The use of igrab() in swapoff's shmem_unuse_inode() is just as vulnerable
to umount as that in shmem_writepage().

Fix this instance by extending the protection of shmem_swaplist_mutex
right across shmem_unuse_inode(): while it's on the list, the inode cannot
be evicted (and the filesystem cannot be unmounted) without
shmem_evict_inode() taking that mutex to remove it from the list.

But since shmem_writepage() might take that mutex, we should avoid making
memory allocations or memcg charges while holding it: prepare them at the
outer level in shmem_unuse().  When mem_cgroup_cache_charge() was
originally placed, we didn't know until that point that the page from swap
was actually a shmem page; but nowadays it's noted in the swap_map, so
we're safe to charge upfront.  For the radix_tree, do as is done in
shmem_getpage(): preload upfront, but don't pin to the cpu; so we make a
habit of refreshing the node pool, but might dip into GFP_NOWAIT reserves
on occasion if subsequently preempted.

With the allocation and charge moved out from shmem_unuse_inode(),
we can also hold index map and info->lock over from finding the entry.

Signed-off-by: Hugh Dickins <[email protected]>
Cc: Konstantin Khlebnikov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

---
 mm/shmem.c |   88 +++++++++++++++++++++++++++++--------------------------------
 1 file changed, 43 insertions(+), 45 deletions(-)

--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -850,7 +850,7 @@ static inline int shmem_find_swp(swp_ent
 
 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, 
struct page *page)
 {
-       struct inode *inode;
+       struct address_space *mapping;
        unsigned long idx;
        unsigned long size;
        unsigned long limit;
@@ -873,8 +873,10 @@ static int shmem_unuse_inode(struct shme
        if (size > SHMEM_NR_DIRECT)
                size = SHMEM_NR_DIRECT;
        offset = shmem_find_swp(entry, ptr, ptr+size);
-       if (offset >= 0)
+       if (offset >= 0) {
+               shmem_swp_balance_unmap();
                goto found;
+       }
        if (!info->i_indirect)
                goto lost2;
 
@@ -912,11 +914,11 @@ static int shmem_unuse_inode(struct shme
                        if (size > ENTRIES_PER_PAGE)
                                size = ENTRIES_PER_PAGE;
                        offset = shmem_find_swp(entry, ptr, ptr+size);
-                       shmem_swp_unmap(ptr);
                        if (offset >= 0) {
                                shmem_dir_unmap(dir);
                                goto found;
                        }
+                       shmem_swp_unmap(ptr);
                }
        }
 lost1:
@@ -926,8 +928,7 @@ lost2:
        return 0;
 found:
        idx += offset;
-       inode = igrab(&info->vfs_inode);
-       spin_unlock(&info->lock);
+       ptr += offset;
 
        /*
         * Move _head_ to start search for next from here.
@@ -938,37 +939,18 @@ found:
         */
        if (shmem_swaplist.next != &info->swaplist)
                list_move_tail(&shmem_swaplist, &info->swaplist);
-       mutex_unlock(&shmem_swaplist_mutex);
 
-       error = 1;
-       if (!inode)
-               goto out;
        /*
-        * Charge page using GFP_KERNEL while we can wait.
-        * Charged back to the user(not to caller) when swap account is used.
-        * add_to_page_cache() will be called with GFP_NOWAIT.
+        * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
+        * but also to hold up shmem_evict_inode(): so inode cannot be freed
+        * beneath us (pagelock doesn't help until the page is in pagecache).
         */
-       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
-       if (error)
-               goto out;
-       error = radix_tree_preload(GFP_KERNEL);
-       if (error) {
-               mem_cgroup_uncharge_cache_page(page);
-               goto out;
-       }
-       error = 1;
-
-       spin_lock(&info->lock);
-       ptr = shmem_swp_entry(info, idx, NULL);
-       if (ptr && ptr->val == entry.val) {
-               error = add_to_page_cache_locked(page, inode->i_mapping,
-                                               idx, GFP_NOWAIT);
-               /* does mem_cgroup_uncharge_cache_page on error */
-       } else  /* we must compensate for our precharge above */
-               mem_cgroup_uncharge_cache_page(page);
+       mapping = info->vfs_inode.i_mapping;
+       error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
+       /* which does mem_cgroup_uncharge_cache_page on error */
 
        if (error == -EEXIST) {
-               struct page *filepage = find_get_page(inode->i_mapping, idx);
+               struct page *filepage = find_get_page(mapping, idx);
                error = 1;
                if (filepage) {
                        /*
@@ -988,14 +970,8 @@ found:
                swap_free(entry);
                error = 1;      /* not an error, but entry was found */
        }
-       if (ptr)
-               shmem_swp_unmap(ptr);
+       shmem_swp_unmap(ptr);
        spin_unlock(&info->lock);
-       radix_tree_preload_end();
-out:
-       unlock_page(page);
-       page_cache_release(page);
-       iput(inode);            /* allows for NULL */
        return error;
 }
 
@@ -1007,6 +983,26 @@ int shmem_unuse(swp_entry_t entry, struc
        struct list_head *p, *next;
        struct shmem_inode_info *info;
        int found = 0;
+       int error;
+
+       /*
+        * Charge page using GFP_KERNEL while we can wait, before taking
+        * the shmem_swaplist_mutex which might hold up shmem_writepage().
+        * Charged back to the user (not to caller) when swap account is used.
+        * add_to_page_cache() will be called with GFP_NOWAIT.
+        */
+       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+       if (error)
+               goto out;
+       /*
+        * Try to preload while we can wait, to not make a habit of
+        * draining atomic reserves; but don't latch on to this cpu,
+        * it's okay if sometimes we get rescheduled after this.
+        */
+       error = radix_tree_preload(GFP_KERNEL);
+       if (error)
+               goto uncharge;
+       radix_tree_preload_end();
 
        mutex_lock(&shmem_swaplist_mutex);
        list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1014,17 +1010,19 @@ int shmem_unuse(swp_entry_t entry, struc
                found = shmem_unuse_inode(info, entry, page);
                cond_resched();
                if (found)
-                       goto out;
+                       break;
        }
        mutex_unlock(&shmem_swaplist_mutex);
-       /*
-        * Can some race bring us here?  We've been holding page lock,
-        * so I think not; but would rather try again later than BUG()
-        */
+
+uncharge:
+       if (!found)
+               mem_cgroup_uncharge_cache_page(page);
+       if (found < 0)
+               error = found;
+out:
        unlock_page(page);
        page_cache_release(page);
-out:
-       return (found < 0) ? found : 0;
+       return error;
 }
 
 /*


Patches currently in longterm-queue-2.6.33 which might be from [email protected] 
are

/home/gregkh/linux/longterm/longterm-queue-2.6.33/queue-2.6.33/tmpfs-fix-race-between-umount-and-swapoff.patch

_______________________________________________
stable mailing list
[email protected]
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to