Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-07 Thread Greg KH
On Wed, Apr 07, 2021 at 03:55:07PM +0900, Changheun Lee wrote:
> > On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > > > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > > > in case of large chunk direct I/O, - 32MB chunk read in user 
> > > > > > > > space -
> > > > > > > > all pages for 32MB would be merged to a bio structure if the 
> > > > > > > > pages
> > > > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > > > until merge complete. bio max size should be limited to a 
> > > > > > > > proper size.
> > > > > > > > 
> > > > > > > > When 32MB chunk read with direct I/O option is coming from 
> > > > > > > > userspace,
> > > > > > > > kernel behavior is below now in do_direct_IO() loop. it's 
> > > > > > > > timeline.
> > > > > > > > 
> > > > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > > > >  | total elapsed time is over 2ms.
> > > > > > > >  |-- ... --->|
> > > > > > > >  | 8,192 pages 
> > > > > > > > merged a bio.
> > > > > > > >  | at this 
> > > > > > > > time, first bio submit is done.
> > > > > > > >  | 1 bio is 
> > > > > > > > split to 32 read request and issue.
> > > > > > > >  
> > > > > > > > |--->
> > > > > > > >   
> > > > > > > > |--->
> > > > > > > >
> > > > > > > > |--->
> > > > > > > >   
> > > > > > > > ..
> > > > > > > > 
> > > > > > > >|--->
> > > > > > > > 
> > > > > > > > |--->|
> > > > > > > >   total 19ms elapsed to complete 32MB 
> > > > > > > > read done from device. |
> > > > > > > > 
> > > > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > > > 
> > > > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > > > >  | total 32 bio will be made.
> > > > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > > > >   | 256 pages merged a bio.
> > > > > > > >   | at this time, first bio submit is done.
> > > > > > > >   | and 1 read request is issued for 1 bio.
> > > > > > > >   |--->
> > > > > > > >|--->
> > > > > > > > |--->
> > > > > > > >   ..
> > > > > > > >  
> > > > > > > > |--->
> > > > > > > >   
> > > > > > > > |--->|
> > > > > > > > total 17ms elapsed to complete 32MB read done from 
> > > > > > > > device. |
> > > > > > > > 
> > > > > > > > As a result, read request issue timing is faster if bio max 
> > > > > > > > size is limited.
> > > > > > > > Current kernel behavior with multipage bvec, super large bio 
> > > > > > > > can be created.
> > > > > > > > And it lead to delay first I/O request issue.
> > > > > > > > 
> > > > > > > > Signed-off-by: Changheun Lee 
> > > > > > > > ---
> > > > > > > >  block/bio.c| 13 -
> > > > > > > >  include/linux/bio.h|  2 +-
> > > > > > > >  include/linux/blkdev.h |  3 +++
> > > > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > > > --- a/block/bio.c
> > > > > > > > +++ b/block/bio.c
> > > > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct 
> > > > > > > > bio_vec *table,
> > > > > > > >  }
> > > > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > > > >  
> > > > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > > > +{
> > > > > > > > +   struct request_queue *q = bio->bi_disk->queue;
> > > > > > > > +
> > > > > > > > +   if (blk_queue_limit_bio_size(q))
> > > > > > > > +   return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > > > +   << SECTOR_SHIFT;
> > > > > > > > +
> > > > > > > > +   return UINT_MAX;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  /**
> > > > > > > >   * bio_reset - reinitialize a bio
> > > > > > 

Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-07 Thread Changheun Lee
> On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > > in case of large chunk direct I/O, - 32MB chunk read in user 
> > > > > > > space -
> > > > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > > until merge complete. bio max size should be limited to a proper 
> > > > > > > size.
> > > > > > > 
> > > > > > > When 32MB chunk read with direct I/O option is coming from 
> > > > > > > userspace,
> > > > > > > kernel behavior is below now in do_direct_IO() loop. it's 
> > > > > > > timeline.
> > > > > > > 
> > > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > > >  | total elapsed time is over 2ms.
> > > > > > >  |-- ... --->|
> > > > > > >  | 8,192 pages 
> > > > > > > merged a bio.
> > > > > > >  | at this time, 
> > > > > > > first bio submit is done.
> > > > > > >  | 1 bio is split 
> > > > > > > to 32 read request and issue.
> > > > > > >  |--->
> > > > > > >   
> > > > > > > |--->
> > > > > > >
> > > > > > > |--->
> > > > > > >   
> > > > > > > ..
> > > > > > >   
> > > > > > >  |--->
> > > > > > >   
> > > > > > >   |--->|
> > > > > > >   total 19ms elapsed to complete 32MB 
> > > > > > > read done from device. |
> > > > > > > 
> > > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > > 
> > > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > > >  | total 32 bio will be made.
> > > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > > >   | 256 pages merged a bio.
> > > > > > >   | at this time, first bio submit is done.
> > > > > > >   | and 1 read request is issued for 1 bio.
> > > > > > >   |--->
> > > > > > >|--->
> > > > > > > |--->
> > > > > > >   ..
> > > > > > >  |--->
> > > > > > >   
> > > > > > > |--->|
> > > > > > > total 17ms elapsed to complete 32MB read done from 
> > > > > > > device. |
> > > > > > > 
> > > > > > > As a result, read request issue timing is faster if bio max size 
> > > > > > > is limited.
> > > > > > > Current kernel behavior with multipage bvec, super large bio can 
> > > > > > > be created.
> > > > > > > And it lead to delay first I/O request issue.
> > > > > > > 
> > > > > > > Signed-off-by: Changheun Lee 
> > > > > > > ---
> > > > > > >  block/bio.c| 13 -
> > > > > > >  include/linux/bio.h|  2 +-
> > > > > > >  include/linux/blkdev.h |  3 +++
> > > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > > --- a/block/bio.c
> > > > > > > +++ b/block/bio.c
> > > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct 
> > > > > > > bio_vec *table,
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > > >  
> > > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > > +{
> > > > > > > + struct request_queue *q = bio->bi_disk->queue;
> > > > > > > +
> > > > > > > + if (blk_queue_limit_bio_size(q))
> > > > > > > + return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > > + << SECTOR_SHIFT;
> > > > > > > +
> > > > > > > + return UINT_MAX;
> > > > > > > +}
> > > > > > > +
> > > > > > >  /**
> > > > > > >   * bio_reset - reinitialize a bio
> > > > > > >   * @bio: bio to reset
> > > > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, 
> > > > > > > struct page *page,
> > > > > > >   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> > > > > > >  
> > > > > > >   if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > > > -

Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-06 Thread Greg KH
On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > until merge complete. bio max size should be limited to a proper 
> > > > > > size.
> > > > > > 
> > > > > > When 32MB chunk read with direct I/O option is coming from 
> > > > > > userspace,
> > > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > > 
> > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > >  | total elapsed time is over 2ms.
> > > > > >  |-- ... --->|
> > > > > >  | 8,192 pages 
> > > > > > merged a bio.
> > > > > >  | at this time, 
> > > > > > first bio submit is done.
> > > > > >  | 1 bio is split 
> > > > > > to 32 read request and issue.
> > > > > >  |--->
> > > > > >   |--->
> > > > > >|--->
> > > > > >   ..
> > > > > >
> > > > > > |--->
> > > > > > 
> > > > > > |--->|
> > > > > >   total 19ms elapsed to complete 32MB read 
> > > > > > done from device. |
> > > > > > 
> > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > 
> > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > >  | total 32 bio will be made.
> > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > >   | 256 pages merged a bio.
> > > > > >   | at this time, first bio submit is done.
> > > > > >   | and 1 read request is issued for 1 bio.
> > > > > >   |--->
> > > > > >|--->
> > > > > > |--->
> > > > > >   ..
> > > > > >  |--->
> > > > > >   |--->|
> > > > > > total 17ms elapsed to complete 32MB read done from device. |
> > > > > > 
> > > > > > As a result, read request issue timing is faster if bio max size is 
> > > > > > limited.
> > > > > > Current kernel behavior with multipage bvec, super large bio can be 
> > > > > > created.
> > > > > > And it lead to delay first I/O request issue.
> > > > > > 
> > > > > > Signed-off-by: Changheun Lee 
> > > > > > ---
> > > > > >  block/bio.c| 13 -
> > > > > >  include/linux/bio.h|  2 +-
> > > > > >  include/linux/blkdev.h |  3 +++
> > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > 
> > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > --- a/block/bio.c
> > > > > > +++ b/block/bio.c
> > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec 
> > > > > > *table,
> > > > > >  }
> > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > >  
> > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > +{
> > > > > > +   struct request_queue *q = bio->bi_disk->queue;
> > > > > > +
> > > > > > +   if (blk_queue_limit_bio_size(q))
> > > > > > +   return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > +   << SECTOR_SHIFT;
> > > > > > +
> > > > > > +   return UINT_MAX;
> > > > > > +}
> > > > > > +
> > > > > >  /**
> > > > > >   * bio_reset - reinitialize a bio
> > > > > >   * @bio:   bio to reset
> > > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, 
> > > > > > struct page *page,
> > > > > > struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> > > > > >  
> > > > > > if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > > -   if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > > +   if (bio->bi_iter.bi_size > bio_max_size(bio) - 
> > > > > > len) {
> > > > > > *same_page = false;
> > > > > > return false;
> > > > > >   

Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-06 Thread Changheun Lee
> On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > but sometimes it would lead to inefficient behaviors.
> > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > until merge complete. bio max size should be limited to a proper size.
> > > > > 
> > > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > 
> > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > >  | total elapsed time is over 2ms.
> > > > >  |-- ... --->|
> > > > >  | 8,192 pages merged 
> > > > > a bio.
> > > > >  | at this time, 
> > > > > first bio submit is done.
> > > > >  | 1 bio is split to 
> > > > > 32 read request and issue.
> > > > >  |--->
> > > > >   |--->
> > > > >|--->
> > > > >   ..
> > > > >
> > > > > |--->
> > > > > 
> > > > > |--->|
> > > > >   total 19ms elapsed to complete 32MB read 
> > > > > done from device. |
> > > > > 
> > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > 
> > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > >  | total 32 bio will be made.
> > > > >  | total elapsed time is over 2ms. it's same.
> > > > >  | but, first bio submit timing is fast. about 100us.
> > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > >   | 256 pages merged a bio.
> > > > >   | at this time, first bio submit is done.
> > > > >   | and 1 read request is issued for 1 bio.
> > > > >   |--->
> > > > >|--->
> > > > > |--->
> > > > >   ..
> > > > >  |--->
> > > > >   |--->|
> > > > > total 17ms elapsed to complete 32MB read done from device. |
> > > > > 
> > > > > As a result, read request issue timing is faster if bio max size is 
> > > > > limited.
> > > > > Current kernel behavior with multipage bvec, super large bio can be 
> > > > > created.
> > > > > And it lead to delay first I/O request issue.
> > > > > 
> > > > > Signed-off-by: Changheun Lee 
> > > > > ---
> > > > >  block/bio.c| 13 -
> > > > >  include/linux/bio.h|  2 +-
> > > > >  include/linux/blkdev.h |  3 +++
> > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > --- a/block/bio.c
> > > > > +++ b/block/bio.c
> > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec 
> > > > > *table,
> > > > >  }
> > > > >  EXPORT_SYMBOL(bio_init);
> > > > >  
> > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > +{
> > > > > + struct request_queue *q = bio->bi_disk->queue;
> > > > > +
> > > > > + if (blk_queue_limit_bio_size(q))
> > > > > + return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > + << SECTOR_SHIFT;
> > > > > +
> > > > > + return UINT_MAX;
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * bio_reset - reinitialize a bio
> > > > >   * @bio: bio to reset
> > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct 
> > > > > page *page,
> > > > >   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> > > > >  
> > > > >   if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > - if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > + if (bio->bi_iter.bi_size > bio_max_size(bio) - 
> > > > > len) {
> > > > >   *same_page = false;
> > > > >   return false;
> > > > >   }
> > > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > > --- a/include/linux/bio.h
> > > > > +++ b/include/linux/bio.h
> > > > > @@ -113,7 +113,7 @@ static inline bool 

Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-06 Thread Greg KH
On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > but sometimes it would lead to inefficient behaviors.
> > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > physical addresses are contiguous. it makes some delay to submit
> > > > until merge complete. bio max size should be limited to a proper size.
> > > > 
> > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > 
> > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > >  | total elapsed time is over 2ms.
> > > >  |-- ... --->|
> > > >  | 8,192 pages merged a 
> > > > bio.
> > > >  | at this time, first 
> > > > bio submit is done.
> > > >  | 1 bio is split to 32 
> > > > read request and issue.
> > > >  |--->
> > > >   |--->
> > > >|--->
> > > >   ..
> > > >
> > > > |--->
> > > > 
> > > > |--->|
> > > >   total 19ms elapsed to complete 32MB read done 
> > > > from device. |
> > > > 
> > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > 
> > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > >  | total 32 bio will be made.
> > > >  | total elapsed time is over 2ms. it's same.
> > > >  | but, first bio submit timing is fast. about 100us.
> > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > >   | 256 pages merged a bio.
> > > >   | at this time, first bio submit is done.
> > > >   | and 1 read request is issued for 1 bio.
> > > >   |--->
> > > >|--->
> > > > |--->
> > > >   ..
> > > >  |--->
> > > >   |--->|
> > > > total 17ms elapsed to complete 32MB read done from device. |
> > > > 
> > > > As a result, read request issue timing is faster if bio max size is 
> > > > limited.
> > > > Current kernel behavior with multipage bvec, super large bio can be 
> > > > created.
> > > > And it lead to delay first I/O request issue.
> > > > 
> > > > Signed-off-by: Changheun Lee 
> > > > ---
> > > >  block/bio.c| 13 -
> > > >  include/linux/bio.h|  2 +-
> > > >  include/linux/blkdev.h |  3 +++
> > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/block/bio.c b/block/bio.c
> > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > --- a/block/bio.c
> > > > +++ b/block/bio.c
> > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec 
> > > > *table,
> > > >  }
> > > >  EXPORT_SYMBOL(bio_init);
> > > >  
> > > > +unsigned int bio_max_size(struct bio *bio)
> > > > +{
> > > > +   struct request_queue *q = bio->bi_disk->queue;
> > > > +
> > > > +   if (blk_queue_limit_bio_size(q))
> > > > +   return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > +   << SECTOR_SHIFT;
> > > > +
> > > > +   return UINT_MAX;
> > > > +}
> > > > +
> > > >  /**
> > > >   * bio_reset - reinitialize a bio
> > > >   * @bio:   bio to reset
> > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct 
> > > > page *page,
> > > > struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> > > >  
> > > > if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > -   if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > +   if (bio->bi_iter.bi_size > bio_max_size(bio) - 
> > > > len) {
> > > > *same_page = false;
> > > > return false;
> > > > }
> > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > --- a/include/linux/bio.h
> > > > +++ b/include/linux/bio.h
> > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, 
> > > > unsigned len)
> > > > if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > > return true;
> > > >  
> > > > -   if 

Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-06 Thread Changheun Lee
> On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > but sometimes it would lead to inefficient behaviors.
> > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > all pages for 32MB would be merged to a bio structure if the pages
> > > physical addresses are contiguous. it makes some delay to submit
> > > until merge complete. bio max size should be limited to a proper size.
> > > 
> > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > 
> > >  | bio merge for 32MB. total 8,192 pages are merged.
> > >  | total elapsed time is over 2ms.
> > >  |-- ... --->|
> > >  | 8,192 pages merged a 
> > > bio.
> > >  | at this time, first 
> > > bio submit is done.
> > >  | 1 bio is split to 32 
> > > read request and issue.
> > >  |--->
> > >   |--->
> > >|--->
> > >   ..
> > >
> > > |--->
> > > 
> > > |--->|
> > >   total 19ms elapsed to complete 32MB read done 
> > > from device. |
> > > 
> > > If bio max size is limited with 1MB, behavior is changed below.
> > > 
> > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > >  | total 32 bio will be made.
> > >  | total elapsed time is over 2ms. it's same.
> > >  | but, first bio submit timing is fast. about 100us.
> > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > >   | 256 pages merged a bio.
> > >   | at this time, first bio submit is done.
> > >   | and 1 read request is issued for 1 bio.
> > >   |--->
> > >|--->
> > > |--->
> > >   ..
> > >  |--->
> > >   |--->|
> > > total 17ms elapsed to complete 32MB read done from device. |
> > > 
> > > As a result, read request issue timing is faster if bio max size is 
> > > limited.
> > > Current kernel behavior with multipage bvec, super large bio can be 
> > > created.
> > > And it lead to delay first I/O request issue.
> > > 
> > > Signed-off-by: Changheun Lee 
> > > ---
> > >  block/bio.c| 13 -
> > >  include/linux/bio.h|  2 +-
> > >  include/linux/blkdev.h |  3 +++
> > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/block/bio.c b/block/bio.c
> > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > --- a/block/bio.c
> > > +++ b/block/bio.c
> > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > >  }
> > >  EXPORT_SYMBOL(bio_init);
> > >  
> > > +unsigned int bio_max_size(struct bio *bio)
> > > +{
> > > + struct request_queue *q = bio->bi_disk->queue;
> > > +
> > > + if (blk_queue_limit_bio_size(q))
> > > + return blk_queue_get_max_sectors(q, bio_op(bio))
> > > + << SECTOR_SHIFT;
> > > +
> > > + return UINT_MAX;
> > > +}
> > > +
> > >  /**
> > >   * bio_reset - reinitialize a bio
> > >   * @bio: bio to reset
> > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct 
> > > page *page,
> > >   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> > >  
> > >   if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > - if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > >   *same_page = false;
> > >   return false;
> > >   }
> > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > index 1edda614f7ce..13b6f6562a5b 100644
> > > --- a/include/linux/bio.h
> > > +++ b/include/linux/bio.h
> > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned 
> > > len)
> > >   if (bio->bi_vcnt >= bio->bi_max_vecs)
> > >   return true;
> > >  
> > > - if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > + if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > >   return true;
> > >  
> > >   return false;
> > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > index f94ee3089e01..3aeab9e7e97b 100644
> > > --- a/include/linux/blkdev.h
> > > +++ b/include/linux/blkdev.h
> > > @@ -621,6 +621,7 @@ struct request_queue {
> > 

Re: [RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-06 Thread Greg KH
On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > bio size can grow up to 4GB when muli-page bvec is enabled.
> > but sometimes it would lead to inefficient behaviors.
> > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > all pages for 32MB would be merged to a bio structure if the pages
> > physical addresses are contiguous. it makes some delay to submit
> > until merge complete. bio max size should be limited to a proper size.
> > 
> > When 32MB chunk read with direct I/O option is coming from userspace,
> > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > 
> >  | bio merge for 32MB. total 8,192 pages are merged.
> >  | total elapsed time is over 2ms.
> >  |-- ... --->|
> >  | 8,192 pages merged a bio.
> >  | at this time, first bio 
> > submit is done.
> >  | 1 bio is split to 32 
> > read request and issue.
> >  |--->
> >   |--->
> >|--->
> >   ..
> >
> > |--->
> > 
> > |--->|
> >   total 19ms elapsed to complete 32MB read done 
> > from device. |
> > 
> > If bio max size is limited with 1MB, behavior is changed below.
> > 
> >  | bio merge for 1MB. 256 pages are merged for each bio.
> >  | total 32 bio will be made.
> >  | total elapsed time is over 2ms. it's same.
> >  | but, first bio submit timing is fast. about 100us.
> >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> >   | 256 pages merged a bio.
> >   | at this time, first bio submit is done.
> >   | and 1 read request is issued for 1 bio.
> >   |--->
> >|--->
> > |--->
> >   ..
> >  |--->
> >   |--->|
> > total 17ms elapsed to complete 32MB read done from device. |
> > 
> > As a result, read request issue timing is faster if bio max size is limited.
> > Current kernel behavior with multipage bvec, super large bio can be created.
> > And it lead to delay first I/O request issue.
> > 
> > Signed-off-by: Changheun Lee 
> > ---
> >  block/bio.c| 13 -
> >  include/linux/bio.h|  2 +-
> >  include/linux/blkdev.h |  3 +++
> >  3 files changed, 16 insertions(+), 2 deletions(-)
> > 
> > diff --git a/block/bio.c b/block/bio.c
> > index 1f2cc1fbe283..c528e1f944c7 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> >  }
> >  EXPORT_SYMBOL(bio_init);
> >  
> > +unsigned int bio_max_size(struct bio *bio)
> > +{
> > +   struct request_queue *q = bio->bi_disk->queue;
> > +
> > +   if (blk_queue_limit_bio_size(q))
> > +   return blk_queue_get_max_sectors(q, bio_op(bio))
> > +   << SECTOR_SHIFT;
> > +
> > +   return UINT_MAX;
> > +}
> > +
> >  /**
> >   * bio_reset - reinitialize a bio
> >   * @bio:   bio to reset
> > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page 
> > *page,
> > struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
> >  
> > if (page_is_mergeable(bv, page, len, off, same_page)) {
> > -   if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > +   if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > *same_page = false;
> > return false;
> > }
> > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > index 1edda614f7ce..13b6f6562a5b 100644
> > --- a/include/linux/bio.h
> > +++ b/include/linux/bio.h
> > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned 
> > len)
> > if (bio->bi_vcnt >= bio->bi_max_vecs)
> > return true;
> >  
> > -   if (bio->bi_iter.bi_size > UINT_MAX - len)
> > +   if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > return true;
> >  
> > return false;
> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index f94ee3089e01..3aeab9e7e97b 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -621,6 +621,7 @@ struct request_queue {
> >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27/* record rq->alloc_time_ns */
> >  #define QUEUE_FLAG_HCTX_ACTIVE 28  /* at least one blk-mq hctx is 
> > active */
> >  #define QUEUE_FLAG_NOWAIT

[RESEND PATCH v5 1/2] bio: limit bio max size

2021-04-05 Thread Changheun Lee
> bio size can grow up to 4GB when muli-page bvec is enabled.
> but sometimes it would lead to inefficient behaviors.
> in case of large chunk direct I/O, - 32MB chunk read in user space -
> all pages for 32MB would be merged to a bio structure if the pages
> physical addresses are contiguous. it makes some delay to submit
> until merge complete. bio max size should be limited to a proper size.
> 
> When 32MB chunk read with direct I/O option is coming from userspace,
> kernel behavior is below now in do_direct_IO() loop. it's timeline.
> 
>  | bio merge for 32MB. total 8,192 pages are merged.
>  | total elapsed time is over 2ms.
>  |-- ... --->|
>  | 8,192 pages merged a bio.
>  | at this time, first bio 
> submit is done.
>  | 1 bio is split to 32 read 
> request and issue.
>  |--->
>   |--->
>|--->
>   ..
>
> |--->
> 
> |--->|
>   total 19ms elapsed to complete 32MB read done from 
> device. |
> 
> If bio max size is limited with 1MB, behavior is changed below.
> 
>  | bio merge for 1MB. 256 pages are merged for each bio.
>  | total 32 bio will be made.
>  | total elapsed time is over 2ms. it's same.
>  | but, first bio submit timing is fast. about 100us.
>  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
>   | 256 pages merged a bio.
>   | at this time, first bio submit is done.
>   | and 1 read request is issued for 1 bio.
>   |--->
>|--->
> |--->
>   ..
>  |--->
>   |--->|
> total 17ms elapsed to complete 32MB read done from device. |
> 
> As a result, read request issue timing is faster if bio max size is limited.
> Current kernel behavior with multipage bvec, super large bio can be created.
> And it lead to delay first I/O request issue.
> 
> Signed-off-by: Changheun Lee 
> ---
>  block/bio.c| 13 -
>  include/linux/bio.h|  2 +-
>  include/linux/blkdev.h |  3 +++
>  3 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 1f2cc1fbe283..c528e1f944c7 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
>  }
>  EXPORT_SYMBOL(bio_init);
>  
> +unsigned int bio_max_size(struct bio *bio)
> +{
> + struct request_queue *q = bio->bi_disk->queue;
> +
> + if (blk_queue_limit_bio_size(q))
> + return blk_queue_get_max_sectors(q, bio_op(bio))
> + << SECTOR_SHIFT;
> +
> + return UINT_MAX;
> +}
> +
>  /**
>   * bio_reset - reinitialize a bio
>   * @bio: bio to reset
> @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page 
> *page,
>   struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
>  
>   if (page_is_mergeable(bv, page, len, off, same_page)) {
> - if (bio->bi_iter.bi_size > UINT_MAX - len) {
> + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
>   *same_page = false;
>   return false;
>   }
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 1edda614f7ce..13b6f6562a5b 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
>   if (bio->bi_vcnt >= bio->bi_max_vecs)
>   return true;
>  
> - if (bio->bi_iter.bi_size > UINT_MAX - len)
> + if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
>   return true;
>  
>   return false;
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index f94ee3089e01..3aeab9e7e97b 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -621,6 +621,7 @@ struct request_queue {
>  #define QUEUE_FLAG_RQ_ALLOC_TIME 27  /* record rq->alloc_time_ns */
>  #define QUEUE_FLAG_HCTX_ACTIVE   28  /* at least one blk-mq hctx is 
> active */
>  #define QUEUE_FLAG_NOWAIT   29   /* device supports NOWAIT */
> +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30 /* limit bio size */
>  
>  #define QUEUE_FLAG_MQ_DEFAULT((1 << QUEUE_FLAG_IO_STAT) |
> \
>(1 << QUEUE_FLAG_SAME_COMP) | 

[RESEND PATCH v5 1/2] bio: limit bio max size

2021-03-16 Thread Changheun Lee
bio size can grow up to 4GB when muli-page bvec is enabled.
but sometimes it would lead to inefficient behaviors.
in case of large chunk direct I/O, - 32MB chunk read in user space -
all pages for 32MB would be merged to a bio structure if the pages
physical addresses are contiguous. it makes some delay to submit
until merge complete. bio max size should be limited to a proper size.

When 32MB chunk read with direct I/O option is coming from userspace,
kernel behavior is below now in do_direct_IO() loop. it's timeline.

 | bio merge for 32MB. total 8,192 pages are merged.
 | total elapsed time is over 2ms.
 |-- ... --->|
 | 8,192 pages merged a bio.
 | at this time, first bio 
submit is done.
 | 1 bio is split to 32 read 
request and issue.
 |--->
  |--->
   |--->
  ..
   
|--->

|--->|
  total 19ms elapsed to complete 32MB read done from 
device. |

If bio max size is limited with 1MB, behavior is changed below.

 | bio merge for 1MB. 256 pages are merged for each bio.
 | total 32 bio will be made.
 | total elapsed time is over 2ms. it's same.
 | but, first bio submit timing is fast. about 100us.
 |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
  | 256 pages merged a bio.
  | at this time, first bio submit is done.
  | and 1 read request is issued for 1 bio.
  |--->
   |--->
|--->
  ..
 |--->
  |--->|
total 17ms elapsed to complete 32MB read done from device. |

As a result, read request issue timing is faster if bio max size is limited.
Current kernel behavior with multipage bvec, super large bio can be created.
And it lead to delay first I/O request issue.

Signed-off-by: Changheun Lee 
---
 block/bio.c| 13 -
 include/linux/bio.h|  2 +-
 include/linux/blkdev.h |  3 +++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 1f2cc1fbe283..c528e1f944c7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 }
 EXPORT_SYMBOL(bio_init);
 
+unsigned int bio_max_size(struct bio *bio)
+{
+   struct request_queue *q = bio->bi_disk->queue;
+
+   if (blk_queue_limit_bio_size(q))
+   return blk_queue_get_max_sectors(q, bio_op(bio))
+   << SECTOR_SHIFT;
+
+   return UINT_MAX;
+}
+
 /**
  * bio_reset - reinitialize a bio
  * @bio:   bio to reset
@@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page 
*page,
struct bio_vec *bv = >bi_io_vec[bio->bi_vcnt - 1];
 
if (page_is_mergeable(bv, page, len, off, same_page)) {
-   if (bio->bi_iter.bi_size > UINT_MAX - len) {
+   if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
*same_page = false;
return false;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1edda614f7ce..13b6f6562a5b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
if (bio->bi_vcnt >= bio->bi_max_vecs)
return true;
 
-   if (bio->bi_iter.bi_size > UINT_MAX - len)
+   if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
return true;
 
return false;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f94ee3089e01..3aeab9e7e97b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -621,6 +621,7 @@ struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE 28  /* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT   29 /* device supports NOWAIT */
+#define QUEUE_FLAG_LIMIT_BIO_SIZE 30   /* limit bio size */
 
 #define QUEUE_FLAG_MQ_DEFAULT  ((1 << QUEUE_FLAG_IO_STAT) |\
 (1 << QUEUE_FLAG_SAME_COMP) |  \
@@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct 
request_queue *q);
 #define blk_queue_fua(q)   test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define