The patch below does not apply to the .39-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <sta...@kernel.org>.

thanks,

greg k-h

------------------ original commit in Linus's tree ------------------

>From 08142579b6ca35883c1ed066a2681de6f6917062 Mon Sep 17 00:00:00 2001
From: Jan Kara <j...@suse.cz>
Date: Mon, 27 Jun 2011 16:18:10 -0700
Subject: [PATCH] mm: fix assertion mapping->nrpages == 0 in end_writeback()

Under heavy memory and filesystem load, users observe the assertion
mapping->nrpages == 0 in end_writeback() trigger.  This can be caused by
page reclaim reclaiming the last page from a mapping in the following
race:

        CPU0                            CPU1
  ...
  shrink_page_list()
    __remove_mapping()
      __delete_from_page_cache()
        radix_tree_delete()
                                        evict_inode()
                                          truncate_inode_pages()
                                            truncate_inode_pages_range()
                                              pagevec_lookup() - finds nothing
                                          end_writeback()
                                            mapping->nrpages != 0 -> BUG
        page->mapping = NULL
        mapping->nrpages--

Fix the problem by doing a reliable check of mapping->nrpages under
mapping->tree_lock in end_writeback().

Analyzed by Jay <jinshan.xi...@whamcloud.com>, lost in LKML, and dug out
by Miklos Szeredi <mszer...@suse.de>.

Cc: Jay <jinshan.xi...@whamcloud.com>
Cc: Miklos Szeredi <mszer...@suse.de>
Signed-off-by: Jan Kara <j...@suse.cz>
Cc: <sta...@kernel.org>
Signed-off-by: Andrew Morton <a...@linux-foundation.org>
Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>

diff --git a/fs/inode.c b/fs/inode.c
index 0f7e88a..43566d1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash);
 void end_writeback(struct inode *inode)
 {
        might_sleep();
+       /*
+        * We have to cycle tree_lock here because reclaim can be still in the
+        * process of removing the last page (in __delete_from_page_cache())
+        * and we must not free mapping under it.
+        */
+       spin_lock_irq(&inode->i_data.tree_lock);
        BUG_ON(inode->i_data.nrpages);
+       spin_unlock_irq(&inode->i_data.tree_lock);
        BUG_ON(!list_empty(&inode->i_data.private_list));
        BUG_ON(!(inode->i_state & I_FREEING));
        BUG_ON(inode->i_state & I_CLEAR);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e73e2e..b5b9792 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -639,6 +639,7 @@ struct address_space {
        struct prio_tree_root   i_mmap;         /* tree of private and shared 
mappings */
        struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
        struct mutex            i_mmap_mutex;   /* protect tree, count, list */
+       /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
        pgoff_t                 writeback_index;/* writeback starts here */
        const struct address_space_operations *a_ops;   /* methods */
diff --git a/mm/truncate.c b/mm/truncate.c
index 29a9b8a..e13f22e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
  * @lstart: offset from which to truncate
  *
  * Called under (and serialised by) inode->i_mutex.
+ *
+ * Note: When this function returns, there can be a page in the process of
+ * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
+ * mapping->nrpages can be non-zero when this function returns even after
+ * truncation of the whole mapping.
  */
 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 {

_______________________________________________
stable mailing list
stable@linux.kernel.org
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to