Hi Greg,

Bellow is backport of upstream commit f742dc4a32587bff50b13dde9d8894b96851951a,
to the 3.6.y branch.

Thanks,
Tao

From f7ff690e8121d5142129d4a7854205287d17f92a Mon Sep 17 00:00:00 2001
From: Peng Tao <[email protected]>
Date: Fri, 24 Aug 2012 00:27:52 +0800
Subject: [PATCH 1/2] pnfsblock: fix non-aligned DIO read

For DIO read, if it is not sector aligned, we should reject it
and resend via MDS. Otherwise there might be data corruption.
Also teach bl_read_pagelist to handle partial page reads for DIO.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
 fs/nfs/blocklayout/blocklayout.c |   77 ++++++++++++++++++++++---------------
 1 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f3d16ad..e5dfef5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -242,14 +242,6 @@ bl_end_par_io_read(void *data, int unused)
        schedule_work(&rdata->task.u.tk_work);
 }
 
-static bool
-bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
-{
-       if ((offset & blkmask) || (len & blkmask))
-               return false;
-       return true;
-}
-
 static enum pnfs_try_status
 bl_read_pagelist(struct nfs_read_data *rdata)
 {
@@ -260,15 +252,15 @@ bl_read_pagelist(struct nfs_read_data *rdata)
        sector_t isect, extent_length = 0;
        struct parallel_io *par;
        loff_t f_offset = rdata->args.offset;
+       size_t bytes_left = rdata->args.count;
+       unsigned int pg_offset, pg_len;
        struct page **pages = rdata->args.pages;
        int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+       const bool is_dio = (header->dreq != NULL);
 
        dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
               rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
 
-       if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
-               goto use_mds;
-
        par = alloc_parallel(rdata);
        if (!par)
                goto use_mds;
@@ -298,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *rdata)
                                extent_length = min(extent_length, cow_length);
                        }
                }
+
+               if (is_dio) {
+                       pg_offset = f_offset & ~PAGE_CACHE_MASK;
+                       if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
+                               pg_len = PAGE_CACHE_SIZE - pg_offset;
+                       else
+                               pg_len = bytes_left;
+
+                       f_offset += pg_len;
+                       bytes_left -= pg_len;
+                       isect += (pg_offset >> SECTOR_SHIFT);
+               } else {
+                       pg_offset = 0;
+                       pg_len = PAGE_CACHE_SIZE;
+               }
+
                hole = is_hole(be, isect);
                if (hole && !cow_read) {
                        bio = bl_submit_bio(READ, bio);
                        /* Fill hole w/ zeroes w/o accessing device */
                        dprintk("%s Zeroing page for hole\n", __func__);
-                       zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+                       zero_user_segment(pages[i], pg_offset, pg_len);
                        print_page(pages[i]);
                        SetPageUptodate(pages[i]);
                } else {
                        struct pnfs_block_extent *be_read;
 
                        be_read = (hole && cow_read) ? cow_read : be;
-                       bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+                       bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
                                                 READ,
                                                 isect, pages[i], be_read,
-                                                bl_end_io_read, par);
+                                                bl_end_io_read, par,
+                                                pg_offset, pg_len);
                        if (IS_ERR(bio)) {
                                header->pnfs_error = PTR_ERR(bio);
                                bio = NULL;
                                goto out;
                        }
                }
-               isect += PAGE_CACHE_SECTORS;
+               isect += (pg_len >> SECTOR_SHIFT);
                extent_length -= PAGE_CACHE_SECTORS;
        }
        if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
                rdata->res.eof = 1;
-               rdata->res.count = header->inode->i_size - f_offset;
+               rdata->res.count = header->inode->i_size - rdata->args.offset;
        } else {
-               rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
+               rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
        }
 out:
        bl_put_extent(be);
@@ -676,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
        struct bio *bio = NULL;
        struct pnfs_block_extent *be = NULL, *cow_read = NULL;
        sector_t isect, last_isect = 0, extent_length = 0;
-       struct parallel_io *par = NULL;
+       struct parallel_io *par;
        loff_t offset = wdata->args.offset;
        size_t count = wdata->args.count;
        unsigned int pg_offset, pg_len, saved_len;
@@ -688,10 +697,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
            NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
 
        dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
-       /* Check for alignment first */
-       if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
-               goto out_mds;
-
        /* At this point, wdata->pages is a (sequential) list of nfs_pages.
         * We want to write each, and if there is an error set pnfs_error
         * to have it redone using nfs.
@@ -1164,32 +1169,42 @@ bl_clear_layoutdriver(struct nfs_server *server)
        return 0;
 }
 
+static bool
+is_aligned_req(struct nfs_page *req, unsigned int alignment)
+{
+       return IS_ALIGNED(req->wb_offset, alignment) &&
+              IS_ALIGNED(req->wb_bytes, alignment);
+}
+
 static void
 bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
-       if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+       if (pgio->pg_dreq != NULL &&
+           !is_aligned_req(req, SECTOR_SIZE))
                nfs_pageio_reset_read_mds(pgio);
        else
                pnfs_generic_pg_init_read(pgio, req);
 }
 
-static void
-bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+static bool
+bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+               struct nfs_page *req)
 {
-       if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
-               nfs_pageio_reset_write_mds(pgio);
-       else
-               pnfs_generic_pg_init_write(pgio, req);
+       if (pgio->pg_dreq != NULL &&
+           !is_aligned_req(req, SECTOR_SIZE))
+               return false;
+
+       return pnfs_generic_pg_test(pgio, prev, req);
 }
 
 static const struct nfs_pageio_ops bl_pg_read_ops = {
        .pg_init = bl_pg_init_read,
-       .pg_test = pnfs_generic_pg_test,
+       .pg_test = bl_pg_test_read,
        .pg_doio = pnfs_generic_pg_readpages,
 };
 
 static const struct nfs_pageio_ops bl_pg_write_ops = {
-       .pg_init = bl_pg_init_write,
+       .pg_init = pnfs_generic_pg_init_write,
        .pg_test = pnfs_generic_pg_test,
        .pg_doio = pnfs_generic_pg_writepages,
 };
-- 
1.7.1


> -----Original Message-----
> From: [email protected] [mailto:[email protected]]
> Sent: Sunday, October 14, 2012 6:29 PM
> To: [email protected]; [email protected]; Peng, Tao; 
> [email protected]
> Cc: [email protected]
> Subject: FAILED: patch "[PATCH] pnfsblock: fix non-aligned DIO read" failed 
> to apply to 3.6-stable
> tree
> 
> 
> The patch below does not apply to the 3.6-stable tree.
> If someone wants it applied there, or to any other stable or longterm
> tree, then please email the backport, including the original git commit
> id to <[email protected]>.
> 
> thanks,
> 
> greg k-h
> 
> ------------------ original commit in Linus's tree ------------------
> 
> From f742dc4a32587bff50b13dde9d8894b96851951a Mon Sep 17 00:00:00 2001
> From: Peng Tao <[email protected]>
> Date: Fri, 24 Aug 2012 00:27:52 +0800
> Subject: [PATCH] pnfsblock: fix non-aligned DIO read
> 
> For DIO read, if it is not sector aligned, we should reject it
> and resend via MDS. Otherwise there might be data corruption.
> Also teach bl_read_pagelist to handle partial page reads for DIO.
> 
> Cc: stable <[email protected]> [since v3.4]
> Signed-off-by: Peng Tao <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.c 
> b/fs/nfs/blocklayout/blocklayout.c
> index a9fe644..61e04fb 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -252,8 +252,11 @@ bl_read_pagelist(struct nfs_read_data *rdata)
>       sector_t isect, extent_length = 0;
>       struct parallel_io *par;
>       loff_t f_offset = rdata->args.offset;
> +     size_t bytes_left = rdata->args.count;
> +     unsigned int pg_offset, pg_len;
>       struct page **pages = rdata->args.pages;
>       int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
> +     const bool is_dio = (header->dreq != NULL);
> 
>       dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
>              rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
> @@ -287,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *rdata)
>                               extent_length = min(extent_length, cow_length);
>                       }
>               }
> +
> +             if (is_dio) {
> +                     pg_offset = f_offset & ~PAGE_CACHE_MASK;
> +                     if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
> +                             pg_len = PAGE_CACHE_SIZE - pg_offset;
> +                     else
> +                             pg_len = bytes_left;
> +
> +                     f_offset += pg_len;
> +                     bytes_left -= pg_len;
> +                     isect += (pg_offset >> SECTOR_SHIFT);
> +             } else {
> +                     pg_offset = 0;
> +                     pg_len = PAGE_CACHE_SIZE;
> +             }
> +
>               hole = is_hole(be, isect);
>               if (hole && !cow_read) {
>                       bio = bl_submit_bio(READ, bio);
>                       /* Fill hole w/ zeroes w/o accessing device */
>                       dprintk("%s Zeroing page for hole\n", __func__);
> -                     zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
> +                     zero_user_segment(pages[i], pg_offset, pg_len);
>                       print_page(pages[i]);
>                       SetPageUptodate(pages[i]);
>               } else {
>                       struct pnfs_block_extent *be_read;
> 
>                       be_read = (hole && cow_read) ? cow_read : be;
> -                     bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
> +                     bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
>                                                READ,
>                                                isect, pages[i], be_read,
> -                                              bl_end_io_read, par);
> +                                              bl_end_io_read, par,
> +                                              pg_offset, pg_len);
>                       if (IS_ERR(bio)) {
>                               header->pnfs_error = PTR_ERR(bio);
>                               bio = NULL;
>                               goto out;
>                       }
>               }
> -             isect += PAGE_CACHE_SECTORS;
> +             isect += (pg_len >> SECTOR_SHIFT);
>               extent_length -= PAGE_CACHE_SECTORS;
>       }
>       if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
>               rdata->res.eof = 1;
> -             rdata->res.count = header->inode->i_size - f_offset;
> +             rdata->res.count = header->inode->i_size - rdata->args.offset;
>       } else {
> -             rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
> +             rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
>       }
>  out:
>       bl_put_extent(be);
> @@ -1149,9 +1169,37 @@ bl_clear_layoutdriver(struct nfs_server *server)
>       return 0;
>  }
> 
> +static bool
> +is_aligned_req(struct nfs_page *req, unsigned int alignment)
> +{
> +     return IS_ALIGNED(req->wb_offset, alignment) &&
> +            IS_ALIGNED(req->wb_bytes, alignment);
> +}
> +
> +static void
> +bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
> +{
> +     if (pgio->pg_dreq != NULL &&
> +         !is_aligned_req(req, SECTOR_SIZE))
> +             nfs_pageio_reset_read_mds(pgio);
> +     else
> +             pnfs_generic_pg_init_read(pgio, req);
> +}
> +
> +static bool
> +bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> +             struct nfs_page *req)
> +{
> +     if (pgio->pg_dreq != NULL &&
> +         !is_aligned_req(req, SECTOR_SIZE))
> +             return false;
> +
> +     return pnfs_generic_pg_test(pgio, prev, req);
> +}
> +
>  static const struct nfs_pageio_ops bl_pg_read_ops = {
> -     .pg_init = pnfs_generic_pg_init_read,
> -     .pg_test = pnfs_generic_pg_test,
> +     .pg_init = bl_pg_init_read,
> +     .pg_test = bl_pg_test_read,
>       .pg_doio = pnfs_generic_pg_readpages,
>  };
> 
> 

Reply via email to