From: Steven Whitehouse <[EMAIL PROTECTED]>

This patch resolves a lock ordering issue where we had been getting
a transaction lock in the wrong order with respect to the page lock.
By using writepages rather than just writepage, it is then possible
to start a transaction before locking the page, and thus matching the
locking order elsewhere in the code.

Signed-off-by: Steven Whitehouse <[EMAIL PROTECTED]>

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 70b404d..1e1fe8d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -650,7 +650,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
                get_bh(bh);
                gfs2_log_unlock(sdp);
                lock_buffer(bh);
-               if (test_clear_buffer_dirty(bh)) {
+               if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
                        bh->b_end_io = end_buffer_write_sync;
                        submit_bh(WRITE, bh);
                } else {
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4bf73ed..48913e5 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -21,6 +21,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
 #include <linux/swap.h>
+#include <linux/pagevec.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -189,6 +190,34 @@ static int gfs2_ordered_writepage(struct page *page,
 }
 
 /**
+ * __gfs2_jdata_writepage - The core of jdata writepage
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ * This is shared between writepage and writepages and implements the
+ * core of the writepage operation. If a transaction is required then
+ * PageChecked will have been set and the transaction will have
+ * already been started before this is called.
+ */
+
+static int __gfs2_jdata_writepage(struct page *page, struct writeback_control 
*wbc)
+{
+       struct inode *inode = page->mapping->host;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+       if (PageChecked(page)) {
+               ClearPageChecked(page);
+               if (!page_has_buffers(page)) {
+                       create_empty_buffers(page, inode->i_sb->s_blocksize,
+                                            (1 << BH_Dirty)|(1 << 
BH_Uptodate));
+               }
+               gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+       }
+       return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
+
+/**
  * gfs2_jdata_writepage - Write complete page
  * @page: Page to write
  *
@@ -199,7 +228,6 @@ static int gfs2_ordered_writepage(struct page *page,
 static int gfs2_jdata_writepage(struct page *page, struct writeback_control 
*wbc)
 {
        struct inode *inode = page->mapping->host;
-       struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        int error;
        int done_trans = 0;
@@ -209,18 +237,14 @@ static int gfs2_jdata_writepage(struct page *page, struct 
writeback_control *wbc
                return error;
 
        if (PageChecked(page)) {
+               if (wbc->sync_mode != WB_SYNC_ALL)
+                       goto out_ignore;
                error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
                if (error)
                        goto out_ignore;
-               ClearPageChecked(page);
-               if (!page_has_buffers(page)) {
-                       create_empty_buffers(page, inode->i_sb->s_blocksize,
-                                            (1 << BH_Dirty)|(1 << 
BH_Uptodate));
-               }
-               gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
                done_trans = 1;
        }
-       error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+       error = __gfs2_jdata_writepage(page, wbc);
        if (done_trans)
                gfs2_trans_end(sdp);
        return error;
@@ -247,6 +271,178 @@ static int gfs2_writeback_writepages(struct address_space 
*mapping,
 }
 
 /**
+ * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
+ * @mapping: The mapping
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call for each page
+ * @pvec: The vector of pages
+ * @nr_pages: The number of pages to write
+ *
+ * Returns: non-zero if loop should terminate, zero otherwise
+ */
+
+static int gfs2_write_jdata_pagevec(struct address_space *mapping,
+                                   struct writeback_control *wbc,
+                                   struct pagevec *pvec,
+                                   int nr_pages, pgoff_t end)
+{
+       struct inode *inode = mapping->host;
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       loff_t i_size = i_size_read(inode);
+       pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+       unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
+       unsigned nrblocks = nr_pages * 
(PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int i;
+       int ret;
+
+       ret = gfs2_trans_begin(sdp, nrblocks, 0);
+       if (ret < 0)
+               return ret;
+
+       for(i = 0; i < nr_pages; i++) {
+               struct page *page = pvec->pages[i];
+
+               lock_page(page);
+
+               if (unlikely(page->mapping != mapping)) {
+                       unlock_page(page);
+                       continue;
+               }
+
+               if (!wbc->range_cyclic && page->index > end) {
+                       ret = 1;
+                       unlock_page(page);
+                       continue;
+               }
+
+               if (wbc->sync_mode != WB_SYNC_NONE)
+                       wait_on_page_writeback(page);
+
+               if (PageWriteback(page) ||
+                   !clear_page_dirty_for_io(page)) {
+                       unlock_page(page);
+                       continue;
+               }
+
+               /* Is the page fully outside i_size? (truncate in progress) */
+               if (page->index > end_index || (page->index == end_index && 
!offset)) {
+                       page->mapping->a_ops->invalidatepage(page, 0);
+                       unlock_page(page);
+                       continue;
+               }
+
+               ret = __gfs2_jdata_writepage(page, wbc);
+
+               if (ret || (--(wbc->nr_to_write) <= 0))
+                       ret = 1;
+               if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                       wbc->encountered_congestion = 1;
+                       ret = 1;
+               }
+
+       }
+       gfs2_trans_end(sdp);
+       return ret;
+}
+
+/**
+ * gfs2_write_cache_jdata - Like write_cache_pages but different
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call
+ * @data: The data to pass to writepage
+ *
+ * The reason that we use our own function here is that we need to
+ * start transactions before we grab page locks. This allows us
+ * to get the ordering right.
+ */
+
+static int gfs2_write_cache_jdata(struct address_space *mapping,
+                                 struct writeback_control *wbc)
+{
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;
+       int scanned = 0;
+       int range_whole = 0;
+
+       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+               wbc->encountered_congestion = 1;
+               return 0;
+       }
+
+       pagevec_init(&pvec, 0);
+       if (wbc->range_cyclic) {
+               index = mapping->writeback_index; /* Start from prev offset */
+               end = -1;
+       } else {
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
+               end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       range_whole = 1;
+               scanned = 1;
+       }
+
+retry:
+        while (!done && (index <= end) &&
+               (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                              PAGECACHE_TAG_DIRTY,
+                                              min(end - index, 
(pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+               scanned = 1;
+               ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, 
end);
+               if (ret)
+                       done = 1;
+               if (ret > 0)
+                       ret = 0;
+
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+
+       if (!scanned && !done) {
+               /*
+                * We hit the last page and there is more work to be done: wrap
+                * back to the start of the file
+                */
+               scanned = 1;
+               index = 0;
+               goto retry;
+       }
+
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = index;
+       return ret;
+}
+
+
+/**
+ * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * 
+ */
+
+static int gfs2_jdata_writepages(struct address_space *mapping,
+                                struct writeback_control *wbc)
+{
+       struct gfs2_inode *ip = GFS2_I(mapping->host);
+       struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+       int ret;
+
+       ret = gfs2_write_cache_jdata(mapping, wbc);
+       if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
+               gfs2_log_flush(sdp, ip->i_gl);
+               ret = gfs2_write_cache_jdata(mapping, wbc);
+       }
+       return ret;
+}
+
+/**
  * stuffed_readpage - Fill in a Linux page with stuffed file data
  * @ip: the inode
  * @page: the page
@@ -937,6 +1133,7 @@ static const struct address_space_operations 
gfs2_ordered_aops = {
 
 static const struct address_space_operations gfs2_jdata_aops = {
        .writepage = gfs2_jdata_writepage,
+       .writepages = gfs2_jdata_writepages,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
        .sync_page = block_sync_page,
-- 
1.5.1.2

Reply via email to