On Mon, Jun 01, 2015 at 08:52:40PM +0530, Chandan Rajendra wrote:
> In the case of subpagesize-blocksize, this patch makes it possible to read
> only a single metadata block from the disk instead of all the metadata blocks
> that map into a page.

I'm a bit curious about how much benefit is gained from reading a single block
rather reading the whole page.

Thanks,

-liubo

> 
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
>  fs/btrfs/disk-io.c   |  65 +++++++++++++++++++------------
>  fs/btrfs/disk-io.h   |   3 ++
>  fs/btrfs/extent_io.c | 108 
> +++++++++++++++++++++++++++++++++++++++++++++++----
>  3 files changed, 144 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 51fe2ec..b794e33 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -597,28 +597,41 @@ static noinline int check_leaf(struct btrfs_root *root,
>       return 0;
>  }
>  
> -static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
> -                                   u64 phy_offset, struct page *page,
> -                                   u64 start, u64 end, int mirror)
> +int verify_extent_buffer_read(struct btrfs_io_bio *io_bio,
> +                     struct page *page,
> +                     u64 start, u64 end, int mirror)
>  {
> -     u64 found_start;
> -     int found_level;
> +     struct address_space *mapping = 
> (io_bio->bio).bi_io_vec->bv_page->mapping;
> +     struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
> +     struct extent_buffer_head *ebh;
>       struct extent_buffer *eb;
>       struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
> -     int ret = 0;
> +     unsigned long num_pages;
> +     unsigned long i;
> +     u64 found_start;
> +     int found_level;
>       int reads_done;
> +     int ret = 0;
>  
>       if (!page->private)
>               goto out;
>  
>       eb = (struct extent_buffer *)page->private;
> +     do {
> +             if ((eb->start <= start) && (eb->start + eb->len - 1 > start))
> +                     break;
> +     } while ((eb = eb->eb_next) != NULL);
> +
> +     BUG_ON(!eb);
> +
> +     ebh = eb_head(eb);
>  
>       /* the pending IO might have been the only thing that kept this buffer
>        * in memory.  Make sure we have a ref for all this other checks
>        */
>       extent_buffer_get(eb);
>  
> -     reads_done = atomic_dec_and_test(&eb->io_pages);
> +     reads_done = atomic_dec_and_test(&ebh->io_bvecs);
>       if (!reads_done)
>               goto err;
>  
> @@ -683,28 +696,34 @@ err:
>                * again, we have to make sure it has something
>                * to decrement
>                */
> -             atomic_inc(&eb->io_pages);
> +             atomic_inc(&eb_head(eb)->io_bvecs);
>               clear_extent_buffer_uptodate(eb);
>       }
> +
> +     /*
> +       We never read more than one extent buffer from a page at a time.
> +       So unlocking the page here should be fine.
> +      */
> +     if (reads_done) {
> +             num_pages = num_extent_pages(eb->start, eb->len);
> +             for (i = 0; i < num_pages; i++) {
> +                     page = eb_head(eb)->pages[i];
> +                     unlock_page(page);
> +             }
> +
> +             /*
> +               We don't need to add a check to see if
> +               extent_io_tree->track_uptodate is set or not, Since
> +               this function only deals with extent buffers.
> +             */
> +             unlock_extent(tree, eb->start, eb->start + eb->len - 1);
> +     }
> +
>       free_extent_buffer(eb);
>  out:
>       return ret;
>  }
>  
> -static int btree_io_failed_hook(struct page *page, int failed_mirror)
> -{
> -     struct extent_buffer *eb;
> -     struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
> -
> -     eb = (struct extent_buffer *)page->private;
> -     set_bit(EXTENT_BUFFER_READ_ERR, &eb->ebflags);
> -     eb->read_mirror = failed_mirror;
> -     atomic_dec(&eb->io_pages);
> -     if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->ebflags))
> -             btree_readahead_hook(root, eb, eb->start, -EIO);
> -     return -EIO;    /* we fixed nothing */
> -}
> -
>  static void end_workqueue_bio(struct bio *bio, int err)
>  {
>       struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
> @@ -4349,8 +4368,6 @@ static int btrfs_cleanup_transaction(struct btrfs_root 
> *root)
>  }
>  
>  static const struct extent_io_ops btree_extent_io_ops = {
> -     .readpage_end_io_hook = btree_readpage_end_io_hook,
> -     .readpage_io_failed_hook = btree_io_failed_hook,
>       .submit_bio_hook = btree_submit_bio_hook,
>       /* note we're sharing with inode.c for the merge bio hook */
>       .merge_bio_hook = btrfs_merge_bio_hook,
> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
> index d4cbfee..c69076c 100644
> --- a/fs/btrfs/disk-io.h
> +++ b/fs/btrfs/disk-io.h
> @@ -111,6 +111,9 @@ static inline void btrfs_put_fs_root(struct btrfs_root 
> *root)
>               kfree(root);
>  }
>  
> +int verify_extent_buffer_read(struct btrfs_io_bio *io_bio,
> +                     struct page *page,
> +                     u64 start, u64 end, int mirror);
>  void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
>  int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
>                         int atomic);
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index a7e715a..76a6e39 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -14,6 +14,7 @@
>  #include "extent_io.h"
>  #include "extent_map.h"
>  #include "ctree.h"
> +#include "disk-io.h"
>  #include "btrfs_inode.h"
>  #include "volumes.h"
>  #include "check-integrity.h"
> @@ -2179,7 +2180,7 @@ int repair_eb_io_failure(struct btrfs_root *root, 
> struct extent_buffer *eb,
>               struct page *p = eb_head(eb)->pages[i];
>  
>               ret = repair_io_failure(root->fs_info->btree_inode, start,
> -                                     PAGE_CACHE_SIZE, start, p,
> +                                     eb->len, start, p,
>                                       start - page_offset(p), mirror_num);
>               if (ret)
>                       break;
> @@ -3706,6 +3707,77 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
>       return ret;
>  }
>  
> +static void end_bio_extent_buffer_readpage(struct bio *bio, int err)
> +{
> +     struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
> +     struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
> +     struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
> +     struct extent_buffer *eb;
> +     struct btrfs_root *root;
> +     struct bio_vec *bvec;
> +     struct page *page;
> +     int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
> +     u64 start;
> +     u64 end;
> +     int mirror;
> +     int ret;
> +     int i;
> +
> +     if (err)
> +             uptodate = 0;
> +
> +     bio_for_each_segment_all(bvec, bio, i) {
> +             page = bvec->bv_page;
> +             root = BTRFS_I(page->mapping->host)->root;
> +
> +             start = page_offset(page) + bvec->bv_offset;
> +             end = start + bvec->bv_len - 1;
> +
> +             if (!page->private) {
> +                     unlock_page(page);
> +                     unlock_extent(tree, start, end);
> +                     continue;
> +             }
> +
> +             eb = (struct extent_buffer *)page->private;
> +
> +             do {
> +                     /*
> +                       read_extent_buffer_pages() does not start
> +                       I/O on PG_uptodate pages. Hence the bio may
> +                       map only part of the extent buffer.
> +                      */
> +                     if ((eb->start <= start) && (eb->start + eb->len - 1 > 
> start))
> +                             break;
> +             } while ((eb = eb->eb_next) != NULL);
> +
> +             BUG_ON(!eb);
> +
> +             mirror = io_bio->mirror_num;
> +
> +             if (uptodate) {
> +                     ret = verify_extent_buffer_read(io_bio, page, start,
> +                                                     end, mirror);
> +                     if (ret)
> +                             uptodate = 0;
> +             }
> +
> +             if (!uptodate) {
> +                     set_bit(EXTENT_BUFFER_READ_ERR, &eb->ebflags);
> +                     eb->read_mirror = mirror;
> +                     atomic_dec(&eb_head(eb)->io_bvecs);
> +                     if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
> +                                             &eb->ebflags))
> +                             btree_readahead_hook(root, eb, eb->start,
> +                                             -EIO);
> +                     ClearPageUptodate(page);
> +                     SetPageError(page);
> +             }
> +     }
> +
> +     bio_put(bio);
> +}
> +
>  static void end_extent_buffer_writeback(struct extent_buffer *eb)
>  {
>       clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->ebflags);
> @@ -5330,6 +5402,9 @@ int read_extent_buffer_pages(struct extent_io_tree 
> *tree,
>                            struct extent_buffer *eb, u64 start, int wait,
>                            get_extent_t *get_extent, int mirror_num)
>  {
> +     struct inode *inode = tree->mapping->host;
> +     struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +     struct extent_state *cached_state = NULL;
>       unsigned long i;
>       unsigned long start_i;
>       struct page *page;
> @@ -5376,15 +5451,31 @@ int read_extent_buffer_pages(struct extent_io_tree 
> *tree,
>  
>       clear_bit(EXTENT_BUFFER_READ_ERR, &eb->ebflags);
>       eb->read_mirror = 0;
> -     atomic_set(&eb->io_pages, num_reads);
> +     atomic_set(&eb_head(eb)->io_bvecs, num_reads);
>       for (i = start_i; i < num_pages; i++) {
>               page = eb_head(eb)->pages[i];
>               if (!PageUptodate(page)) {
>                       ClearPageError(page);
> -                     err = __extent_read_full_page(tree, page,
> -                                                   get_extent, &bio,
> -                                                   mirror_num, &bio_flags,
> -                                                   READ | REQ_META);
> +                     if (eb->len < PAGE_CACHE_SIZE) {
> +                             lock_extent_bits(tree, eb->start, eb->start + 
> eb->len - 1, 0,
> +                                                     &cached_state);
> +                             err = submit_extent_page(READ | REQ_META, tree,
> +                                                     page, eb->start >> 9,
> +                                                     eb->len, eb->start - 
> page_offset(page),
> +                                                     
> fs_info->fs_devices->latest_bdev,
> +                                                     &bio, -1, 
> end_bio_extent_buffer_readpage,
> +                                                     mirror_num, bio_flags, 
> bio_flags);
> +                     } else {
> +                             lock_extent_bits(tree, page_offset(page),
> +                                             page_offset(page) + 
> PAGE_CACHE_SIZE - 1,
> +                                             0, &cached_state);
> +                             err = submit_extent_page(READ | REQ_META, tree,
> +                                                     page, page_offset(page) 
> >> 9,
> +                                                     PAGE_CACHE_SIZE, 0,
> +                                                     
> fs_info->fs_devices->latest_bdev,
> +                                                     &bio, -1, 
> end_bio_extent_buffer_readpage,
> +                                                     mirror_num, bio_flags, 
> bio_flags);
> +                     }
>                       if (err)
>                               ret = err;
>               } else {
> @@ -5405,10 +5496,11 @@ int read_extent_buffer_pages(struct extent_io_tree 
> *tree,
>       for (i = start_i; i < num_pages; i++) {
>               page = eb_head(eb)->pages[i];
>               wait_on_page_locked(page);
> -             if (!PageUptodate(page))
> -                     ret = -EIO;
>       }
>  
> +     if (!extent_buffer_uptodate(eb))
> +             ret = -EIO;
> +
>       return ret;
>  
>  unlock_exit:
> -- 
> 2.1.0
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to