In the case of subpagesize-blocksize, this patch makes it possible to read
only a single metadata block from the disk instead of all the metadata blocks
that map into a page.

Signed-off-by: Chandan Rajendra <[email protected]>
---
 fs/btrfs/disk-io.c   |  65 +++++++++++++++++++------------
 fs/btrfs/disk-io.h   |   3 ++
 fs/btrfs/extent_io.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 144 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 51fe2ec..b794e33 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -597,28 +597,41 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
-static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
-                                     u64 phy_offset, struct page *page,
-                                     u64 start, u64 end, int mirror)
+int verify_extent_buffer_read(struct btrfs_io_bio *io_bio,
+                       struct page *page,
+                       u64 start, u64 end, int mirror)
 {
-       u64 found_start;
-       int found_level;
+       struct address_space *mapping = 
(io_bio->bio).bi_io_vec->bv_page->mapping;
+       struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
+       struct extent_buffer_head *ebh;
        struct extent_buffer *eb;
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-       int ret = 0;
+       unsigned long num_pages;
+       unsigned long i;
+       u64 found_start;
+       int found_level;
        int reads_done;
+       int ret = 0;
 
        if (!page->private)
                goto out;
 
        eb = (struct extent_buffer *)page->private;
+       do {
+               if ((eb->start <= start) && (eb->start + eb->len - 1 > start))
+                       break;
+       } while ((eb = eb->eb_next) != NULL);
+
+       BUG_ON(!eb);
+
+       ebh = eb_head(eb);
 
        /* the pending IO might have been the only thing that kept this buffer
         * in memory.  Make sure we have a ref for all this other checks
         */
        extent_buffer_get(eb);
 
-       reads_done = atomic_dec_and_test(&eb->io_pages);
+       reads_done = atomic_dec_and_test(&ebh->io_bvecs);
        if (!reads_done)
                goto err;
 
@@ -683,28 +696,34 @@ err:
                 * again, we have to make sure it has something
                 * to decrement
                 */
-               atomic_inc(&eb->io_pages);
+               atomic_inc(&eb_head(eb)->io_bvecs);
                clear_extent_buffer_uptodate(eb);
        }
+
+       /*
+         We never read more than one extent buffer from a page at a time.
+         So unlocking the page here should be fine.
+        */
+       if (reads_done) {
+               num_pages = num_extent_pages(eb->start, eb->len);
+               for (i = 0; i < num_pages; i++) {
+                       page = eb_head(eb)->pages[i];
+                       unlock_page(page);
+               }
+
+               /*
+                 We don't need to add a check to see if
+                 extent_io_tree->track_uptodate is set or not, Since
+                 this function only deals with extent buffers.
+               */
+               unlock_extent(tree, eb->start, eb->start + eb->len - 1);
+       }
+
        free_extent_buffer(eb);
 out:
        return ret;
 }
 
-static int btree_io_failed_hook(struct page *page, int failed_mirror)
-{
-       struct extent_buffer *eb;
-       struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-
-       eb = (struct extent_buffer *)page->private;
-       set_bit(EXTENT_BUFFER_READ_ERR, &eb->ebflags);
-       eb->read_mirror = failed_mirror;
-       atomic_dec(&eb->io_pages);
-       if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->ebflags))
-               btree_readahead_hook(root, eb, eb->start, -EIO);
-       return -EIO;    /* we fixed nothing */
-}
-
 static void end_workqueue_bio(struct bio *bio, int err)
 {
        struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
@@ -4349,8 +4368,6 @@ static int btrfs_cleanup_transaction(struct btrfs_root 
*root)
 }
 
 static const struct extent_io_ops btree_extent_io_ops = {
-       .readpage_end_io_hook = btree_readpage_end_io_hook,
-       .readpage_io_failed_hook = btree_io_failed_hook,
        .submit_bio_hook = btree_submit_bio_hook,
        /* note we're sharing with inode.c for the merge bio hook */
        .merge_bio_hook = btrfs_merge_bio_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index d4cbfee..c69076c 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -111,6 +111,9 @@ static inline void btrfs_put_fs_root(struct btrfs_root 
*root)
                kfree(root);
 }
 
+int verify_extent_buffer_read(struct btrfs_io_bio *io_bio,
+                       struct page *page,
+                       u64 start, u64 end, int mirror);
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
                          int atomic);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a7e715a..76a6e39 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -14,6 +14,7 @@
 #include "extent_io.h"
 #include "extent_map.h"
 #include "ctree.h"
+#include "disk-io.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
 #include "check-integrity.h"
@@ -2179,7 +2180,7 @@ int repair_eb_io_failure(struct btrfs_root *root, struct 
extent_buffer *eb,
                struct page *p = eb_head(eb)->pages[i];
 
                ret = repair_io_failure(root->fs_info->btree_inode, start,
-                                       PAGE_CACHE_SIZE, start, p,
+                                       eb->len, start, p,
                                        start - page_offset(p), mirror_num);
                if (ret)
                        break;
@@ -3706,6 +3707,77 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
        return ret;
 }
 
+static void end_bio_extent_buffer_readpage(struct bio *bio, int err)
+{
+       struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
+       struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
+       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       struct extent_buffer *eb;
+       struct btrfs_root *root;
+       struct bio_vec *bvec;
+       struct page *page;
+       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       u64 start;
+       u64 end;
+       int mirror;
+       int ret;
+       int i;
+
+       if (err)
+               uptodate = 0;
+
+       bio_for_each_segment_all(bvec, bio, i) {
+               page = bvec->bv_page;
+               root = BTRFS_I(page->mapping->host)->root;
+
+               start = page_offset(page) + bvec->bv_offset;
+               end = start + bvec->bv_len - 1;
+
+               if (!page->private) {
+                       unlock_page(page);
+                       unlock_extent(tree, start, end);
+                       continue;
+               }
+
+               eb = (struct extent_buffer *)page->private;
+
+               do {
+                       /*
+                         read_extent_buffer_pages() does not start
+                         I/O on PG_uptodate pages. Hence the bio may
+                         map only part of the extent buffer.
+                        */
+                       if ((eb->start <= start) && (eb->start + eb->len - 1 > 
start))
+                               break;
+               } while ((eb = eb->eb_next) != NULL);
+
+               BUG_ON(!eb);
+
+               mirror = io_bio->mirror_num;
+
+               if (uptodate) {
+                       ret = verify_extent_buffer_read(io_bio, page, start,
+                                                       end, mirror);
+                       if (ret)
+                               uptodate = 0;
+               }
+
+               if (!uptodate) {
+                       set_bit(EXTENT_BUFFER_READ_ERR, &eb->ebflags);
+                       eb->read_mirror = mirror;
+                       atomic_dec(&eb_head(eb)->io_bvecs);
+                       if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
+                                               &eb->ebflags))
+                               btree_readahead_hook(root, eb, eb->start,
+                                               -EIO);
+                       ClearPageUptodate(page);
+                       SetPageError(page);
+               }
+       }
+
+       bio_put(bio);
+}
+
 static void end_extent_buffer_writeback(struct extent_buffer *eb)
 {
        clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->ebflags);
@@ -5330,6 +5402,9 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                             struct extent_buffer *eb, u64 start, int wait,
                             get_extent_t *get_extent, int mirror_num)
 {
+       struct inode *inode = tree->mapping->host;
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+       struct extent_state *cached_state = NULL;
        unsigned long i;
        unsigned long start_i;
        struct page *page;
@@ -5376,15 +5451,31 @@ int read_extent_buffer_pages(struct extent_io_tree 
*tree,
 
        clear_bit(EXTENT_BUFFER_READ_ERR, &eb->ebflags);
        eb->read_mirror = 0;
-       atomic_set(&eb->io_pages, num_reads);
+       atomic_set(&eb_head(eb)->io_bvecs, num_reads);
        for (i = start_i; i < num_pages; i++) {
                page = eb_head(eb)->pages[i];
                if (!PageUptodate(page)) {
                        ClearPageError(page);
-                       err = __extent_read_full_page(tree, page,
-                                                     get_extent, &bio,
-                                                     mirror_num, &bio_flags,
-                                                     READ | REQ_META);
+                       if (eb->len < PAGE_CACHE_SIZE) {
+                               lock_extent_bits(tree, eb->start, eb->start + 
eb->len - 1, 0,
+                                                       &cached_state);
+                               err = submit_extent_page(READ | REQ_META, tree,
+                                                       page, eb->start >> 9,
+                                                       eb->len, eb->start - 
page_offset(page),
+                                                       
fs_info->fs_devices->latest_bdev,
+                                                       &bio, -1, 
end_bio_extent_buffer_readpage,
+                                                       mirror_num, bio_flags, 
bio_flags);
+                       } else {
+                               lock_extent_bits(tree, page_offset(page),
+                                               page_offset(page) + 
PAGE_CACHE_SIZE - 1,
+                                               0, &cached_state);
+                               err = submit_extent_page(READ | REQ_META, tree,
+                                                       page, page_offset(page) 
>> 9,
+                                                       PAGE_CACHE_SIZE, 0,
+                                                       
fs_info->fs_devices->latest_bdev,
+                                                       &bio, -1, 
end_bio_extent_buffer_readpage,
+                                                       mirror_num, bio_flags, 
bio_flags);
+                       }
                        if (err)
                                ret = err;
                } else {
@@ -5405,10 +5496,11 @@ int read_extent_buffer_pages(struct extent_io_tree 
*tree,
        for (i = start_i; i < num_pages; i++) {
                page = eb_head(eb)->pages[i];
                wait_on_page_locked(page);
-               if (!PageUptodate(page))
-                       ret = -EIO;
        }
 
+       if (!extent_buffer_uptodate(eb))
+               ret = -EIO;
+
        return ret;
 
 unlock_exit:
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to