Re: [Cluster-devel] [PATCH V11 15/19] block: enable multipage bvecs

2018-11-23 Thread Ming Lei
On Wed, Nov 21, 2018 at 05:12:06PM +0100, Christoph Hellwig wrote:
> On Wed, Nov 21, 2018 at 11:48:13PM +0800, Ming Lei wrote:
> > I guess the correct check should be:
> > 
> > end_addr = vec_addr + bv->bv_offset + bv->bv_len;
> > if (same_page &&
> > (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK))
> > return false;
> 
> Indeed.

The above is still not totally correct, and it should have been:

end_addr = vec_addr + bv->bv_offset + bv->bv_len - 1;
if (same_page && (end_addr & PAGE_MASK) != page_addr)
return false;

Also bv->bv_len should be guaranteed as being bigger than zero.

It also shows that it is quite easy to figure out the last page as
wrong, :-(


Thanks,
Ming



Re: [Cluster-devel] [PATCH V11 15/19] block: enable multipage bvecs

2018-11-21 Thread Christoph Hellwig
On Wed, Nov 21, 2018 at 11:48:13PM +0800, Ming Lei wrote:
> I guess the correct check should be:
> 
>   end_addr = vec_addr + bv->bv_offset + bv->bv_len;
>   if (same_page &&
>   (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK))
>   return false;

Indeed.



Re: [Cluster-devel] [PATCH V11 15/19] block: enable multipage bvecs

2018-11-21 Thread Ming Lei
On Wed, Nov 21, 2018 at 03:55:02PM +0100, Christoph Hellwig wrote:
> On Wed, Nov 21, 2018 at 11:23:23AM +0800, Ming Lei wrote:
> > if (bio->bi_vcnt > 0) {
> > -   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> > +   struct bio_vec bv;
> > +   struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
> >  
> > -   if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
> > -   bv->bv_len += len;
> > +   bvec_last_segment(seg, );
> > +
> > +   if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
> 
> I think this we can simplify the try to merge into bio case a bit,
> and also document it better with something like this:
> 
> diff --git a/block/bio.c b/block/bio.c
> index 854676edc438..cc913281a723 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -822,54 +822,40 @@ EXPORT_SYMBOL(bio_add_pc_page);
>   * @page: page to add
>   * @len: length of the data to add
>   * @off: offset of the data in @page
> + * @same_page: if %true only merge if the new data is in the same physical
> + *   page as the last segment of the bio.
>   *
> - * Try to add the data at @page + @off to the last page of @bio.  This is a
> + * Try to add the data at @page + @off to the last bvec of @bio.  This is a
>   * a useful optimisation for file systems with a block size smaller than the
>   * page size.
>   *
>   * Return %true on success or %false on failure.
>   */
>  bool __bio_try_merge_page(struct bio *bio, struct page *page,
> - unsigned int len, unsigned int off)
> + unsigned int len, unsigned int off, bool same_page)
>  {
>   if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
>   return false;
>  
>   if (bio->bi_vcnt > 0) {
> - struct bio_vec bv;
> - struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
> -
> - bvec_last_segment(seg, );
> -
> - if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
> - seg->bv_len += len;
> - bio->bi_iter.bi_size += len;
> - return true;
> - }
> + struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> + phys_addr_t vec_addr = page_to_phys(bv->bv_page);
> + phys_addr_t page_addr = page_to_phys(page);
> +
> + if (vec_addr + bv->bv_offset + bv->bv_len != page_addr + off)
> + return false;
> + if (same_page &&
> + (vec_addr & PAGE_SIZE) != (page_addr & PAGE_SIZE))
> + return false;

I guess the correct check should be:

end_addr = vec_addr + bv->bv_offset + bv->bv_len;
if (same_page &&
(end_addr & PAGE_MASK) != (page_addr & PAGE_MASK))
return false;

And this approach is good, will take it in V12.

Thanks,
Ming



Re: [Cluster-devel] [PATCH V11 15/19] block: enable multipage bvecs

2018-11-21 Thread Christoph Hellwig
On Wed, Nov 21, 2018 at 11:23:23AM +0800, Ming Lei wrote:
>   if (bio->bi_vcnt > 0) {
> - struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> + struct bio_vec bv;
> + struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
>  
> - if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
> - bv->bv_len += len;
> + bvec_last_segment(seg, );
> +
> + if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {

I think this we can simplify the try to merge into bio case a bit,
and also document it better with something like this:

diff --git a/block/bio.c b/block/bio.c
index 854676edc438..cc913281a723 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -822,54 +822,40 @@ EXPORT_SYMBOL(bio_add_pc_page);
  * @page: page to add
  * @len: length of the data to add
  * @off: offset of the data in @page
+ * @same_page: if %true only merge if the new data is in the same physical
+ * page as the last segment of the bio.
  *
- * Try to add the data at @page + @off to the last page of @bio.  This is a
+ * Try to add the data at @page + @off to the last bvec of @bio.  This is a
  * a useful optimisation for file systems with a block size smaller than the
  * page size.
  *
  * Return %true on success or %false on failure.
  */
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
-   unsigned int len, unsigned int off)
+   unsigned int len, unsigned int off, bool same_page)
 {
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
 
if (bio->bi_vcnt > 0) {
-   struct bio_vec bv;
-   struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
-
-   bvec_last_segment(seg, );
-
-   if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
-   seg->bv_len += len;
-   bio->bi_iter.bi_size += len;
-   return true;
-   }
+   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
+   phys_addr_t vec_addr = page_to_phys(bv->bv_page);
+   phys_addr_t page_addr = page_to_phys(page);
+
+   if (vec_addr + bv->bv_offset + bv->bv_len != page_addr + off)
+   return false;
+   if (same_page &&
+   (vec_addr & PAGE_SIZE) != (page_addr & PAGE_SIZE))
+   return false;
+
+   bv->bv_len += len;
+   bio->bi_iter.bi_size += len;
+   return true;
}
return false;
 }
 EXPORT_SYMBOL_GPL(__bio_try_merge_page);
 
-static bool bio_try_merge_segment(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off)
-{
-   if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
-   return false;
-
-   if (bio->bi_vcnt > 0) {
-   struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
-
-   if (page_to_phys(seg->bv_page) + seg->bv_offset + seg->bv_len ==
-   page_to_phys(page) + off) {
-   seg->bv_len += len;
-   bio->bi_iter.bi_size += len;
-   return true;
-   }
-   }
-   return false;
-}
-
 /**
  * __bio_add_page - add page to a bio in a new segment
  * @bio: destination bio
@@ -910,7 +896,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
 int bio_add_page(struct bio *bio, struct page *page,
 unsigned int len, unsigned int offset)
 {
-   if (!bio_try_merge_segment(bio, page, len, offset)) {
+   if (!__bio_try_merge_page(bio, page, len, offset, false)) {
if (bio_full(bio))
return 0;
__bio_add_page(bio, page, len, offset);
diff --git a/fs/iomap.c b/fs/iomap.c
index ccc2ba115f4d..d918acb9bfc9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -313,7 +313,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, 
loff_t length, void *data,
 */
sector = iomap_sector(iomap, pos);
if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
-   if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+   if (__bio_try_merge_page(ctx->bio, page, plen, poff, true))
goto done;
is_contig = true;
}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5c2190216614..b9fd44168f61 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -616,7 +616,7 @@ xfs_add_to_ioend(
bdev, sector);
}
 
-   if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
+   if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) {
if (iop)
atomic_inc(>write_count);
if (bio_full(wpc->ioend->io_bio))
diff --git a/include/linux/bio.h b/include/linux/bio.h
index e5b975fa0558..f08e6940c1ab 100644
--- 

[Cluster-devel] [PATCH V11 15/19] block: enable multipage bvecs

2018-11-20 Thread Ming Lei
This patch pulls the trigger for multi-page bvecs.

Signed-off-by: Ming Lei 
---
 block/bio.c   | 32 +++-
 fs/iomap.c|  2 +-
 fs/xfs/xfs_aops.c |  2 +-
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 0f1635b9ec50..854676edc438 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -823,7 +823,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
  * @len: length of the data to add
  * @off: offset of the data in @page
  *
- * Try to add the data at @page + @off to the last bvec of @bio.  This is a
+ * Try to add the data at @page + @off to the last page of @bio.  This is a
  * a useful optimisation for file systems with a block size smaller than the
  * page size.
  *
@@ -836,10 +836,13 @@ bool __bio_try_merge_page(struct bio *bio, struct page 
*page,
return false;
 
if (bio->bi_vcnt > 0) {
-   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
+   struct bio_vec bv;
+   struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
 
-   if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
-   bv->bv_len += len;
+   bvec_last_segment(seg, );
+
+   if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
+   seg->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
}
@@ -848,6 +851,25 @@ bool __bio_try_merge_page(struct bio *bio, struct page 
*page,
 }
 EXPORT_SYMBOL_GPL(__bio_try_merge_page);
 
+static bool bio_try_merge_segment(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+   if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+   return false;
+
+   if (bio->bi_vcnt > 0) {
+   struct bio_vec *seg = >bi_io_vec[bio->bi_vcnt - 1];
+
+   if (page_to_phys(seg->bv_page) + seg->bv_offset + seg->bv_len ==
+   page_to_phys(page) + off) {
+   seg->bv_len += len;
+   bio->bi_iter.bi_size += len;
+   return true;
+   }
+   }
+   return false;
+}
+
 /**
  * __bio_add_page - add page to a bio in a new segment
  * @bio: destination bio
@@ -888,7 +910,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
 int bio_add_page(struct bio *bio, struct page *page,
 unsigned int len, unsigned int offset)
 {
-   if (!__bio_try_merge_page(bio, page, len, offset)) {
+   if (!bio_try_merge_segment(bio, page, len, offset)) {
if (bio_full(bio))
return 0;
__bio_add_page(bio, page, len, offset);
diff --git a/fs/iomap.c b/fs/iomap.c
index f5fb8bf75cc8..ccc2ba115f4d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -344,7 +344,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, 
loff_t length, void *data,
ctx->bio->bi_end_io = iomap_read_end_io;
}
 
-   __bio_add_page(ctx->bio, page, plen, poff);
+   bio_add_page(ctx->bio, page, plen, poff);
 done:
/*
 * Move the caller beyond our range so that it keeps making progress.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1f1829e506e8..5c2190216614 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -621,7 +621,7 @@ xfs_add_to_ioend(
atomic_inc(>write_count);
if (bio_full(wpc->ioend->io_bio))
xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
-   __bio_add_page(wpc->ioend->io_bio, page, len, poff);
+   bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
 
wpc->ioend->io_size += len;
-- 
2.9.5